Commit f4ecc7c7 by Ting PAN

Change the code structure

1 parent d3ed62db
Showing with 4157 additions and 3067 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.0 (20200408)
Dragon Minimum Required (Version 0.3.0.dev20200408)
Changes:
Preview Features:
- Optimize the code structure.
- DALI support for SSD, RetinaNet, and Faster-RCNN.
- Use KPLRecord instead of SeetaRecord.
Bugs fixed:
- Fix the frozen Affine issue.
------------------------------------------------------------------------
SeetaDet 0.3.0 (20191121)
Dragon Minimum Required (Version 0.3.0.dev20191121)
......
......@@ -2,8 +2,8 @@
## WHAT's SeetaDet?
SeetaDet contains many useful object detectors, including R-CNN series, SSD,
and the recent RetinaNet.
SeetaDet is a platform implementing popular object detection algorithms,
including R-CNN series, SSD, and RetinaNet.
We have achieved the same or higher performance than the baseline reported by the original paper.
......@@ -14,22 +14,33 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements
seeta-dragon >= 0.3.0.dev20191121
seeta-dragon >= 0.3.0.dev20200408
## Installation
#### 1. Install the required python packages
#### Build From Source
If you prefer to develop modules as well as running experiments,
following commands will build but not install to ***site-packages***:
```bash
pip install cython pyyaml matplotlib
pip install opencv-python Pillow
cd SeetaDet && python setup.py build
```
#### 2. Compile the C Extensions
#### Install From Source
Clone this repository to local disk and install:
```bash
cd SeetaDet && python setup.py install
```
#### Install From Git
You can also install it from remote repository:
```bash
cd SeetaDet/compile
bash ./make.sh
pip install git+https://gitlab.seetatech.com/seetaresearch/SeetaDet.git@master
```
## Quick Start
......@@ -37,7 +48,7 @@ bash ./make.sh
#### Train a detection model
```bash
cd SeetaDet/tools
cd tools
python train.py --cfg <MODEL_YAML>
```
......@@ -46,20 +57,20 @@ We have provided the default YAML examples into ``SeetaDet/configs``.
#### Test a detection model
```bash
cd SeetaDet/tools
cd tools
python test.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
```
Or
```bash
cd SeetaDet/tools
cd tools
python test_all.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR>
```
#### Export a detection model to ONNX
```bash
cd SeetaDet/tools
cd tools
python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
```
......
PROJECT(gpu_nms)
CMAKE_MINIMUM_REQUIRED(VERSION 3.0.2)
# ---------------- User Config ----------------
# Set your python "interpreter" if necessary
# if not, a default interpreter will be used
# here, provide several examples:
# set(PYTHON_EXECUTABLE /usr/bin/python) # Linux & OSX, Builtin Python
# set(PYTHON_EXECUTABLE /X/anaconda/bin/python) # Linux & OSX, Anaconda
# set(PYTHON_EXECUTABLE X:/Anaconda/python) # Win, Anaconda
# Set CUDA compiling architecture
# Remove "compute_70/sm_70" if using CUDA 8.0
set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
-gencode arch=compute_35,code=sm_35
-gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60
-gencode arch=compute_70,code=sm_70)
# ---------------- User Config ----------------
# ---[ Dependencies
include(${PROJECT_SOURCE_DIR}/cmake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FindNumPy.cmake)
FIND_PACKAGE(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
message(STATUS "C++11 support has been enabled by default.")
# ---[ Config types
set(CMAKE_BUILD_TYPE Release CACHE STRING "set build type to release")
set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING "set build type to release" FORCE)
# ---[ Includes
set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
include_directories(${INCLUDE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/src)
include_directories(${PYTHON_INCLUDE_DIRS})
include_directories(${NUMPY_INCLUDE_DIR})
include_directories(${CUDA_INCLUDE_DIRS})
# ---[ libs
link_directories(${PYTHON_LIBRARIES})
# ---[ Install
set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR} CACHE STRING "set install prefix" FORCE)
set(CMAKE_SHARED_LIBRARY_PREFIX "")
# ---[ Flags
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_ARCH}")
if(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP /O2 /Oi /GL /Ot /Gy")
endif()
if(UNIX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s -w -fPIC -O3 -m64 -std=c++11")
endif()
# ---[ Files
set(HEADER_FILES gpu_nms.h)
set(SRC_FILES gpu_nms.cpp nms_kernel.cu)
# ---[ Add Target
CUDA_ADD_LIBRARY(${PROJECT_NAME} SHARED ${HEADER_FILES} ${SRC_FILES})
# ---[ Link Libs
TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${CUDA_LIBRARIES} ${CUDA_cublas_LIBRARY} ${CUDA_curand_LIBRARY})
if(WIN32)
TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${PYTHON_LIBRARIES})
endif()
# ---[ Install Target
set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "gpu_nms")
install (TARGETS ${PROJECT_NAME} DESTINATION ${PROJECT_BINARY_DIR}/../install/lib/nms)
# - Find the NumPy libraries
# This module finds if NumPy is installed, and sets the following variables
# indicating where it is.
#
# TODO: Update to provide the libraries and paths for linking npymath lib.
#
# NUMPY_FOUND - was NumPy found
# NUMPY_VERSION - the version of NumPy found as a string
# NUMPY_VERSION_MAJOR - the major version number of NumPy
# NUMPY_VERSION_MINOR - the minor version number of NumPy
# NUMPY_VERSION_PATCH - the patch version number of NumPy
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
# NUMPY_INCLUDE_DIR - path to the NumPy include files
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result
OUTPUT_VARIABLE __output
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(__result MATCHES 0)
string(REGEX REPLACE ";" "\\\\;" __values ${__output})
string(REGEX REPLACE "\r?\n" ";" __values ${__values})
list(GET __values 0 NUMPY_VERSION)
list(GET __values 1 NUMPY_INCLUDE_DIR)
string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
if(NOT "${__ver_check}" STREQUAL "")
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
math(EXPR NUMPY_VERSION_DECIMAL
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
else()
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
endif()
endif()
else()
message("Can not find Python interpretator.")
message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
VERSION_VAR NUMPY_VERSION)
if(NUMPY_FOUND)
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
endif()
\ No newline at end of file
# - Find python libraries
# This module finds the libraries corresponding to the Python interpeter
# FindPythonInterp provides.
# This code sets the following variables:
#
# PYTHONLIBS_FOUND - have the Python libs been found
# PYTHON_PREFIX - path to the Python installation
# PYTHON_LIBRARIES - path to the python library
# PYTHON_INCLUDE_DIRS - path to where Python.h is found
# PYTHON_MODULE_EXTENSION - lib extension, e.g. '.so' or '.pyd'
# PYTHON_MODULE_PREFIX - lib name prefix: usually an empty string
# PYTHON_SITE_PACKAGES - path to installation site-packages
# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build
#
# Thanks to talljimbo for the patch adding the 'LDVERSION' config
# variable usage.
#=============================================================================
# Copyright 2001-2009 Kitware, Inc.
# Copyright 2012 Continuum Analytics, Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# * Neither the names of Kitware, Inc., the Insight Software Consortium,
# nor the names of their contributors may be used to endorse or promote
# products derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
# Checking for the extension makes sure that `LibsNew` was found and not just `Libs`.
if(PYTHONLIBS_FOUND AND PYTHON_MODULE_EXTENSION)
return()
endif()
# Use the Python interpreter to find the libs.
if(PythonLibsNew_FIND_REQUIRED)
find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} REQUIRED)
else()
find_package(PythonInterp ${PythonLibsNew_FIND_VERSION})
endif()
if(NOT PYTHONINTERP_FOUND)
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
# testing whether sys has the gettotalrefcount function is a reliable, cross-platform
# way to detect a CPython debug interpreter.
#
# The library suffix is from the config var LDVERSION sometimes, otherwise
# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"from distutils import sysconfig as s;import sys;import struct;
print('.'.join(str(v) for v in sys.version_info));
print(sys.prefix);
print(s.get_python_inc(plat_specific=True));
print(s.get_python_lib(plat_specific=True));
print(s.get_config_var('SO'));
print(hasattr(sys, 'gettotalrefcount')+0);
print(struct.calcsize('@P'));
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(s.get_config_var('LIBDIR') or '');
print(s.get_config_var('MULTIARCH') or '');
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
ERROR_VARIABLE _PYTHON_ERROR_VALUE)
if(NOT _PYTHON_SUCCESS MATCHES 0)
if(PythonLibsNew_FIND_REQUIRED)
message(FATAL_ERROR
"Python config failure:\n${_PYTHON_ERROR_VALUE}")
endif()
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# Convert the process output into a list
string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR)
list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH)
# Make sure the Python has the same pointer-size as the chosen compiler
# Skip if CMAKE_SIZEOF_VOID_P is not defined
if(CMAKE_SIZEOF_VOID_P AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
if(PythonLibsNew_FIND_REQUIRED)
math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
message(FATAL_ERROR
"Python config failure: Python is ${_PYTHON_BITS}-bit, "
"chosen compiler is ${_CMAKE_BITS}-bit")
endif()
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# The built-in FindPython didn't always give the version numbers
string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
# Make sure all directory separators are '/'
string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
if(CMAKE_HOST_WIN32)
set(PYTHON_LIBRARY
"${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
# when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
# original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
if(NOT EXISTS "${PYTHON_LIBRARY}")
get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
set(PYTHON_LIBRARY
"${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
endif()
# raise an error if the python libs are still not found.
if(NOT EXISTS "${PYTHON_LIBRARY}")
message(FATAL_ERROR "Python libraries not found")
endif()
else()
if(PYTHON_MULTIARCH)
set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}/${PYTHON_MULTIARCH}" "${PYTHON_LIBDIR}")
else()
set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}")
endif()
#message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
# Probably this needs to be more involved. It would be nice if the config
# information the python interpreter itself gave us were more complete.
find_library(PYTHON_LIBRARY
NAMES "python${PYTHON_LIBRARY_SUFFIX}"
PATHS ${_PYTHON_LIBS_SEARCH}
NO_DEFAULT_PATH)
# If all else fails, just set the name/version and let the linker figure out the path.
if(NOT PYTHON_LIBRARY)
set(PYTHON_LIBRARY python${PYTHON_LIBRARY_SUFFIX})
endif()
endif()
MARK_AS_ADVANCED(
PYTHON_LIBRARY
PYTHON_INCLUDE_DIR
)
# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
# cache entries because they are meant to specify the location of a single
# library. We now set the variables listed by the documentation for this
# module.
SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
find_package_message(PYTHON
"Found PythonLibs: ${PYTHON_LIBRARY}"
"${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
set(PYTHONLIBS_FOUND TRUE)
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
int boxes_dim, float nms_overlap_thresh, int device_id);
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
import numpy as np
cimport numpy as np
assert sizeof(int) == sizeof(np.int32_t)
cdef extern from "gpu_nms.h":
void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, float thresh, int device_id=0):
cdef int boxes_num = dets.shape[0]
cdef int boxes_dim = dets.shape[1]
cdef int num_out
cdef np.ndarray[np.int32_t, ndim=1] \
keep = np.zeros(boxes_num, dtype=np.int32)
cdef np.ndarray[np.float32_t, ndim=1] \
scores = dets[:, 4]
cdef np.ndarray[np.intp_t, ndim=1] \
order = scores.argsort()[::-1]
cdef np.ndarray[np.float32_t, ndim=2] \
sorted_dets = dets[order, :]
_nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
keep = keep[:num_out]
return list(order[keep])
#!/bin/sh
# Delete cache
rm -r build install *.c *.cpp
# Compile cpp modules
python setup.py build_ext --inplace
# Compile cuda modules
cd build && cmake .. && make install && cd ..
# Copy to the library root
cp -r install/lib ../
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#include <vector>
#include "gpu_nms.h"
#define CUDA_CHECK(condition) \
/* Code block avoids redefinition of cudaError_t error */ \
do { \
cudaError_t error = condition; \
if (error != cudaSuccess) { \
\
} \
} while (0)
void SetDevice(int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(&current_device));
if (current_device == device_id) return;
CUDA_CHECK(cudaSetDevice(device_id));
}
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define NMS_BLOCK_SIZE 64
template <typename T>
__device__ T iou(const T* A, const T* B) {
const T x1 = max(A[0], B[0]);
const T y1 = max(A[1], B[1]);
const T x2 = min(A[2], B[2]);
const T y2 = min(A[3], B[3]);
const T width = max((T)0, x2 - x1 + 1);
const T height = max((T)0, y2 - y1 + 1);
const T area = width * height;
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
return area / (A_area + B_area - area);
}
template <typename T>
__global__ void nms_mask(const int num_boxes, const T nms_thresh,
const T* boxes, unsigned long long* mask) {
const int i_start = blockIdx.x * NMS_BLOCK_SIZE;
const int di_end = min(num_boxes - i_start, NMS_BLOCK_SIZE);
const int j_start = blockIdx.y * NMS_BLOCK_SIZE;
const int dj_end = min(num_boxes - j_start, NMS_BLOCK_SIZE);
const int num_blocks = DIV_UP(num_boxes, NMS_BLOCK_SIZE);
const int bid = blockIdx.x;
const int tid = threadIdx.x;
__shared__ T boxes_i[NMS_BLOCK_SIZE * 4];
if (tid < di_end) {
boxes_i[tid * 4 + 0] = boxes[(i_start + tid) * 5 + 0];
boxes_i[tid * 4 + 1] = boxes[(i_start + tid) * 5 + 1];
boxes_i[tid * 4 + 2] = boxes[(i_start + tid) * 5 + 2];
boxes_i[tid * 4 + 3] = boxes[(i_start + tid) * 5 + 3];
}
__syncthreads();
if (tid < dj_end) {
const T* const box_j = boxes + (j_start + tid) * 5;
unsigned long long mask_j = 0;
const int di_start = (i_start == j_start) ? (tid + 1) : 0;
for (int di = di_start; di < di_end; ++di)
if (iou(box_j, boxes_i + di * 4) > nms_thresh)
mask_j |= 1ULL << di;
mask[(j_start + tid) * num_blocks + bid] = mask_j;
}
}
template <typename T>
void ApplyNMS(const int num_boxes, const int max_keeps, const float thresh,
const T* boxes, int* keep_indices, int& num_keep) {
const int num_blocks = DIV_UP(num_boxes, NMS_BLOCK_SIZE);
const dim3 blocks(num_blocks, num_blocks);
size_t mask_nbytes = num_boxes * num_blocks * sizeof(unsigned long long);
size_t boxes_nbytes = num_boxes * 5 * sizeof(T);
void* boxes_dev, *mask_dev;
CUDA_CHECK(cudaMalloc(&boxes_dev, boxes_nbytes));
CUDA_CHECK(cudaMalloc(&mask_dev, mask_nbytes));
CUDA_CHECK(cudaMemcpy(boxes_dev, boxes, boxes_nbytes, cudaMemcpyHostToDevice));
nms_mask<T> << <blocks, NMS_BLOCK_SIZE >> > (num_boxes, thresh,
(T*)boxes_dev,
(unsigned long long*)mask_dev);
CUDA_CHECK(cudaPeekAtLastError());
std::vector<unsigned long long> mask_host(num_boxes * num_blocks);
CUDA_CHECK(cudaMemcpy(&mask_host[0], mask_dev, mask_nbytes, cudaMemcpyDeviceToHost));
std::vector<unsigned long long> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(unsigned long long) * num_blocks);
int num_selected = 0;
for (int i = 0; i < num_boxes; ++i) {
const int nblock = i / NMS_BLOCK_SIZE;
const int inblock = i % NMS_BLOCK_SIZE;
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
unsigned long long* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
}
num_keep = num_selected;
CUDA_CHECK(cudaFree(mask_dev));
CUDA_CHECK(cudaFree(boxes_dev));
}
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
int boxes_dim, float nms_overlap_thresh, int device_id) {
// set the device to use
SetDevice(device_id);
// apply gpu nms
ApplyNMS<float>(boxes_num, boxes_num, nms_overlap_thresh,
boxes_host, keep_out, *num_out);
}
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from distutils.extension import Extension
from distutils.core import setup
from Cython.Distutils import build_ext
import numpy as np
numpy_include = np.get_include()
ext_modules = [
Extension(
"install.lib.utils.cython_bbox",
["bbox.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
include_dirs = [numpy_include]),
Extension(
"install.lib.nms.cpu_nms",
["cpu_nms.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
include_dirs = [numpy_include]),
Extension(
"install.deprecated.gpu_nms",
["gpu_nms.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
language='c++',
include_dirs = [numpy_include]),
Extension(
'install.lib.pycocotools._mask',
['../lib/pycocotools/maskApi.c', '../lib/pycocotools/_mask.pyx'],
include_dirs=[numpy_include, 'pycocotools'],
extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99']),
]
setup(name='Detectron',ext_modules=ext_modules,cmdclass = {'build_ext': build_ext})
......@@ -31,14 +31,14 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
TEST:
DATABASE: '/data/coco_2014_minival'
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
......
......@@ -31,14 +31,14 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
TEST:
DATABASE: '/data/coco_2014_minival'
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
......
......@@ -22,13 +22,13 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 2
BATCH_SIZE: 128
SCALES: [600]
MAX_SIZE: 1000
TEST:
DATABASE: '/data/voc_2007_test'
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [600]
......
......@@ -28,14 +28,14 @@ FRCNN:
MLP_HEAD_DIM: 4096
TRAIN:
WEIGHTS: '/model/VGG16.RCNN.pth'
DATABASE: '/data/voc_0712_trainval'
DATASET: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2
BATCH_SIZE: 128
SCALES: [600]
MAX_SIZE: 1000
TEST:
DATABASE: '/data/voc_2007_test'
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300
......
......@@ -21,9 +21,9 @@ MODEL:
'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
DECAY_STEPS: [30000, 40000]
MAX_STEPS: 45000
BASE_LR: 0.01
DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_400
FPN:
......@@ -31,12 +31,15 @@ FPN:
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: True
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
RANDOM_SCALES: [0.75, 1.0]
TEST:
DATABASE: '/data/coco_2014_minival'
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
......
NUM_GPUS: 4
VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
WARM_UP_STEPS: 2000 # default: 500
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_400
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
DROPBLOCK:
DROP_ON: True
DECREMENT: 0.000005 # * 20000 = 0.1
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
SCALE_JITTER_RANGE: [0.75, 1.33]
TEST:
DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [400]
MAX_SIZE: 666
NMS: 0.5
\ No newline at end of file
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: resnet18.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
DECAY_STEPS: [40000, 50000, 60000]
WARM_UP_STEPS: 2000
MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/model/R-18.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_JITTER_RANGE: [0.5, 2.0]
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
TEST:
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
......@@ -12,27 +12,24 @@ MODEL:
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.02
BASE_LR: 0.01
DECAY_STEPS: [40000, 50000, 60000]
MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
SNAPSHOT_PREFIX: voc_retinanet_320
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_JITTER_RANGE: [0.5, 2.0]
USE_SCALE_JITTER: True
DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.5, 1.0]
TEST:
DATABASE: '/data/voc_2007_test'
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
SCALES: [320]
NMS: 0.45
\ No newline at end of file
......@@ -17,23 +17,20 @@ SOLVER:
WARM_UP_STEPS: 2000
MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
SNAPSHOT_PREFIX: voc_retinanet_320
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/model/R-34.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_JITTER_RANGE: [0.5, 2.0]
USE_SCALE_JITTER: True
WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.5, 2.0]
TEST:
DATABASE: '/data/voc_2007_test'
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
SCALES: [320]
NMS: 0.45
\ No newline at end of file
......@@ -16,24 +16,25 @@ SOLVER:
DECAY_STEPS: [80000, 100000, 120000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_300
SNAPSHOT_PREFIX: voc_ssd_320
SSD:
RESIZE:
HEIGHT: 300
WIDTH: 300
NUM_CONVS: 2
MULTIBOX:
STRIDES: [8, 16, 32]
MIN_SIZES: [30, 90, 150]
MAX_SIZES: [90, 150, 210]
STRIDES: [8, 16, 32]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
DATASET: '/data/voc_0712_trainval'
SCALES: [320]
RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32
TEST:
DATABASE: '/data/voc_2007_test'
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
SCALES: [320]
NMS_TOP_K: 400
NMS: 0.45
SCORE_THRESH: 0.01
......
......@@ -14,30 +14,35 @@ MODEL:
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.001
WARM_UP_FACTOR: 0.
WEIGHT_DECAY: 0.0005
DECAY_STEPS: [80000, 100000, 120000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_300
SSD:
RESIZE:
HEIGHT: 300
WIDTH: 300
MULTIBOX:
STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
ASPECT_RATIOS: [
[1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5]
]
TRAIN:
WEIGHTS: '/model/VGG16.SSD.pth'
DATABASE: '/data/voc_0712_trainval'
DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
RANDOM_SCALES: [0.25, 1.00]
TEST:
DATABASE: '/data/voc_2007_test'
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
SCALES: [300]
NMS_TOP_K: 400
NMS: 0.45
SCORE_THRESH: 0.01
......
......@@ -22,23 +22,29 @@ SOLVER:
SNAPSHOT_PREFIX: voc_ssd_320
SSD:
NUM_CONVS: 2
RESIZE:
HEIGHT: 320
WIDTH: 320
MULTIBOX:
STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
ASPECT_RATIOS: [
[1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5]
]
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
DATASET: '/data/voc_0712_trainval'
SCALES: [320]
RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32
TEST:
DATABASE: '/data/voc_2007_test'
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
SCALES: [320]
NMS_TOP_K: 400
NMS: 0.45
SCORE_THRESH: 0.01
......
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h"
namespace dragon {
template <class Context> template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected;
utils::detection::ApplyNMS(
Output(0)->count(),
Output(0)->count(),
iou_threshold_,
Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx()
);
Output(0)->Reshape({ num_selected });
}
template <class Context>
void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) });
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
}
DEPLOY_CPU(NonMaxSuppression);
#ifdef USE_CUDA
DEPLOY_CUDA(NonMaxSuppression);
#endif
OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1);
NO_GRADIENT(NonMaxSuppression);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_NMS_OP_H_
#define SEETADET_CXX_OPERATORS_NMS_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class NonMaxSuppressionOp final : public Operator<Context> {
public:
NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float iou_threshold_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_NMS_OP_H_
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w;
int C = Input(-3).dim(2), A, K;
int total_proposals = 0;
int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
BT im_h = im_info[0];
BT im_w = im_info[1];
BT im_scale_h = im_info[2];
BT im_scale_w = im_info[2];
if (Input(-1).dim(1) == 4) im_scale_w = im_info[3];
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " features";
// Select the top-k candidates as proposals
num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1);
roi_indices_.resize(num_candidates);
num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_)
roi_indices_[num_candidates++] = i;
scores_.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i)
scores_[i] = scores[roi_indices_[i]];
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true,
scores_.data(),
indices_
);
for (int i = 0; i < num_proposals; ++i)
indices_[i] = roi_indices_[indices_[i]];
// Decode the candidates
int base_offset = 0;
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, C, A,
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
y
);
base_offset += (A * K);
}
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes, C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y
);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 });
}
template <class Context>
void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 });
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
DEPLOY_CPU(RetinaNetDecoder);
#ifdef USE_CUDA
DEPLOY_CUDA(RetinaNetDecoder);
#endif
OPERATOR_SCHEMA(RetinaNetDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#define SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class RetinaNetDecoderOp final : public Operator<Context> {
public:
RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
score_thr_(OpArg<float>("score_thresh", 0.05f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float score_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t num_images_, pre_nms_topn_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "rpn_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
void RPNDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w, K, A;
int total_rois = 0, num_rois;
int num_candidates, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
const BT im_h = im_info[0];
const BT im_w = im_info[1];
const BT scale = im_info[2];
const BT min_box_h = min_size_ * scale;
const BT min_box_w = min_size_ * scale;
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
if (strides_.size() == 1) {
// Case 1: single stride
feat_h = Input(0).dim(2);
feat_w = Input(0).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
// Select the Top-K candidates as proposals
num_candidates = A * K;
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
// Decode the candidates
anchors_.resize((size_t)(A * 4));
proposals_.Reshape({ num_proposals, 5 });
utils::detection::GenerateAnchors(
strides_[0],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
strides_[0],
0,
anchors_.data(),
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
utils::detection::GenerateSSProposals(
K, num_proposals,
im_h, im_w,
min_box_h, min_box_w,
scores,
deltas,
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals_.template mutable_data<BT, BC>()
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals_.template data<BT, BC>(),
roi_indices_.data(),
y
);
} else if (strides_.size() > 1) {
// Case 2: multiple strides
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " feature inputs";
CHECK_EQ(strides_.size(), scales_.size())
<< "\nGiven " << strides_.size() << " strides "
<< "and " << scales_.size() << " scales";
// Select the top-k candidates as proposals
num_candidates = Input(-3).dim(1);
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
// Decode the candidates
int base_offset = 0;
proposals_.Reshape({ num_proposals, 5 });
auto* proposals = proposals_
.template mutable_data<BT, BC>();
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = (int)ratios_.size();
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
1,
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
proposals
);
base_offset += (A * K);
}
utils::detection::GenerateMSProposals(
num_candidates,
num_proposals,
im_h, im_w,
min_box_h, min_box_w,
scores,
deltas,
&indices_[0],
proposals
);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals,
roi_indices_.data(),
y
);
} else {
LOG(FATAL) << "Excepted at least one stride for proposals.";
}
total_rois += num_rois;
y += (num_rois * 5);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_rois, 5 });
// Distribute rois into K bins
if (OutputSize() > 1) {
CHECK_EQ(max_level_ - min_level_ + 1, OutputSize())
<< "\nExcepted " << OutputSize() << " outputs for levels "
"between [" << min_level_ << ", " << max_level_ << "].";
vector<BT*> ys(OutputSize());
vector<vec64_t> bins(OutputSize());
Tensor RoIs; RoIs.ReshapeLike(*Output(0));
auto* rois = RoIs.template mutable_data<BT, BC>();
ctx()->template Copy<BT, BC, BC>(
Output(0)->count(),
rois, Output(0)->template data<BT, BC>()
);
utils::detection::CollectRoIs(
total_rois,
min_level_,
max_level_,
canonical_level_,
canonical_scale_,
rois, bins
);
for (int i = 0; i < OutputSize(); i++) {
Output(i)->Reshape({ std::max((int)bins[i].size(), 1), 5 });
ys[i] = Output(i)->template mutable_data<BT, BC>();
}
utils::detection::DistributeRoIs(bins, rois, ys);
}
}
template <class Context>
void RPNDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
roi_indices_.resize(post_nms_topn_);
Output(0)->Reshape({ num_images_ * post_nms_topn_, 5 });
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
DEPLOY_CPU(RPNDecoder);
#ifdef USE_CUDA
DEPLOY_CUDA(RPNDecoder);
#endif
OPERATOR_SCHEMA(RPNDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
#define SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
#include "dragon/core/operator.h"
namespace dragon {
template <class Context>
class RPNDecoderOp final : public Operator<Context> {
public:
RPNDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)),
nms_thr_(OpArg<float>("nms_thresh", 0.7f)),
min_size_(OpArg<int64_t>("min_size", 16)),
min_level_(OpArg<int64_t>("min_level", 2)),
max_level_(OpArg<int64_t>("max_level", 5)),
canonical_level_(OpArg<int64_t>("canonical_level", 4)),
canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float nms_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t min_size_, pre_nms_topn_, post_nms_topn_;
int64_t num_images_, min_level_, max_level_;
int64_t canonical_level_, canonical_scale_;
Tensor proposals_;
};
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Build cxx sources."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from distutils.core import setup
from dragon.tools import cpp_extension
if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension
else:
Extension = cpp_extension.CppExtension
ext_modules = [
Extension(
name='install.lib.modules._C',
sources=[
'utils/detection_utils.cc',
'utils/detection_utils.cu',
'operators/nms_op.cc',
'operators/retinanet_decoder_op.cc',
'operators/rpn_decoder_op.cc',
],
),
]
setup(
name='SeetaDet',
ext_modules=ext_modules,
cmdclass={'build_ext': cpp_extension.BuildExtension}
)
#include <dragon/core/context.h>
#include "detection_utils.h"
namespace dragon {
namespace utils {
namespace detection {
template <typename T>
T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] ||
A[2] < B[0] || A[3] < B[1]) return 0;
const T x1 = std::max(A[0], B[0]);
const T y1 = std::max(A[1], B[1]);
const T x2 = std::min(A[2], B[2]);
const T y2 = std::min(A[3], B[3]);
const T width = std::max((T)0, x2 - x1 + 1);
const T height = std::max((T)0, y2 - y1 + 1);
const T area = width * height;
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
return area / (A_area + B_area - area);
}
template <> void ApplyNMS<float, CPUContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CPUContext* ctx) {
int count = 0;
std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i) {
if (is_dead[i]) continue;
keep_indices[count++] = i;
if (count == max_keeps) break;
for (int j = i + 1; j < num_boxes; ++j)
if (!is_dead[j] && IoU(&boxes[i * 5],
&boxes[j * 5]) > thresh)
is_dead[j] = 1;
}
num_keep = count;
}
} // namespace detection
} // namespace utils
} // namespace dragon
#ifdef USE_CUDA
#include <dragon/core/context_cuda.h>
#include "detection_utils.h"
namespace dragon {
namespace utils {
namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64
namespace {
template <typename T>
__device__ bool _CheckIoU(
const T* a,
const T* b,
const float thresh) {
const T x1 = max(a[0], b[0]);
const T y1 = max(a[1], b[1]);
const T x2 = min(a[2], b[2]);
const T y2 = min(a[3], b[3]);
const T width = max(T(0), x2 - x1 + 1);
const T height = max(T(0), y2 - y1 + 1);
const T inter = width * height;
const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
return inter > thresh * (Sa + Sb - inter);
}
template <typename T>
__global__ void _NonMaxSuppression(
const int num_blocks,
const int num_boxes,
const T thresh,
const T* dev_boxes,
uint64_t* dev_mask) {
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;
if (row_start > col_start) return;
const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS);
const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS);
__shared__ T block_boxes[NUM_THREADS * 4];
if (threadIdx.x < col_size) {
const int c1 = threadIdx.x * 4;
const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5;
block_boxes[c1] = dev_boxes[c2];
block_boxes[c1 + 1] = dev_boxes[c2 + 1];
block_boxes[c1 + 2] = dev_boxes[c2 + 2];
block_boxes[c1 + 3] = dev_boxes[c2 + 3];
}
__syncthreads();
if (threadIdx.x < row_size) {
const int index = row_start * NUM_THREADS + threadIdx.x;
const T* dev_box = dev_boxes + index * 5;
unsigned long long val = 0;
const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0;
for (int i = start; i < col_size; ++i) {
if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) {
val |= 1ULL << i;
}
}
dev_mask[index * num_blocks + col_start] = val;
}
}
} // namespace
template <> void ApplyNMS<float, CUDAContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CUDAContext* ctx) {
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS);
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression
<<< dim3(num_blocks, num_blocks), NUM_THREADS,
0, ctx->cuda_stream() >>>(
num_blocks,
num_boxes,
thresh,
boxes,
mask_dev
);
CUDA_CHECK(cudaMemcpyAsync(
mask_host.data(),
mask_dev,
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost,
ctx->cuda_stream()
));
ctx->FinishDeviceComputation();
vector<uint64_t> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks);
int num_selected = 0;
for (int i = 0; i < num_boxes; ++i) {
const int nblock = i / NUM_THREADS;
const int inblock = i % NUM_THREADS;
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
}
num_keep = num_selected;
ctx->Delete(mask_dev);
}
} // namespace detection
} // namespace utils
} // namespace dragon
#endif // USE_CUDA
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#define SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#include "dragon/core/context.h"
#include "dragon/core/operator.h"
namespace dragon {
namespace utils {
namespace detection {
#define ROUND(x) ((int)((x) + (T)0.5))
/******************** BBox ********************/
template <typename T>
inline int FilterBoxes(
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T min_box_w,
const T min_box_h,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1));
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1));
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1));
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1));
const T bbox_w = bbox[2] - bbox[0] + 1;
const T bbox_h = bbox[3] - bbox[1] + 1;
return (bbox_w >= min_box_w) * (bbox_h >= min_box_h);
}
template <typename T>
inline void BBoxTransform(
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T im_scale_h,
const T im_scale_w,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)) / im_scale_w;
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)) / im_scale_h;
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)) / im_scale_w;
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h;
}
/******************** Anchor ********************/
template <typename T>
inline void GenerateAnchors(
int base_size,
const int num_ratios,
const int num_scales,
const T* ratios,
const T* scales,
T* anchors) {
const T base_area = (T)(base_size * base_size);
const T center = (T)0.5 * (base_size - (T)1);
T* offset_anchors = anchors;
for (int i = 0; i < num_ratios; ++i) {
const T ratio_w = (T)ROUND(sqrt(base_area / ratios[i]));
const T ratio_h = (T)ROUND(ratio_w * ratios[i]);
for (int j = 0; j < num_scales; ++j) {
const T scale_w = (T)0.5 * (ratio_w * scales[j] - (T)1);
const T scale_h = (T)0.5 * (ratio_h * scales[j] - (T)1);
offset_anchors[0] = center - scale_w;
offset_anchors[1] = center - scale_h;
offset_anchors[2] = center + scale_w;
offset_anchors[3] = center + scale_h;
offset_anchors += 4;
}
}
}
template <typename T>
inline void GenerateGridAnchors(
const int num_proposals,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_3d, a, h, w;
int idx_range = num_anchors * feat_h * feat_w;
for (int i = 0; i < num_proposals; ++i) {
idx_3d = (int)indices[i] - base_offset;
if (idx_3d >= 0 && idx_3d < idx_range) {
w = idx_3d % feat_w;
h = (idx_3d / feat_w) % feat_h;
a = idx_3d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 5;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
}
}
template <typename T>
inline void GenerateGridAnchors(
const int num_proposals,
const int num_classes,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_4d, a, h, w;
int lr = num_classes * base_offset;
int rr = num_classes * (num_anchors * feat_h * feat_w);
for (int i = 0; i < num_proposals; ++i) {
idx_4d = (int)indices[i] - lr;
if (idx_4d >= 0 && idx_4d < rr) {
idx_4d /= num_classes;
w = idx_4d % feat_w;
h = (idx_4d / feat_w) % feat_h;
a = idx_4d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 7 + 1;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
}
}
/******************** Proposal ********************/
template <typename T>
void GenerateSSProposals(
const int K,
const int num_proposals,
const float im_h,
const float im_w,
const float min_box_h,
const float min_box_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, a, k;
const float* delta;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
a = index / K, k = index % K;
delta = deltas + k;
dx = delta[(a * 4 + 0) * K];
dy = delta[(a * 4 + 1) * K];
d_log_w = delta[(a * 4 + 2) * K];
d_log_h = delta[(a * 4 + 3) * K];
proposal[4] = FilterBoxes(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
min_box_w, min_box_h,
proposal
) * scores[index];
proposal += 5;
}
}
template <typename T>
void GenerateMSProposals(
const int num_candidates,
const int num_proposals,
const float im_h,
const float im_w,
const float min_box_h,
const float min_box_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index;
int64_t num_candidates_2x = 2 * num_candidates;
int64_t num_candidates_3x = 3 * num_candidates;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
dx = deltas[index];
dy = deltas[num_candidates + index];
d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[num_candidates_3x + index];
proposal[4] = FilterBoxes(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
min_box_w, min_box_h,
proposal
) * scores[index];
proposal += 5;
}
}
template <typename T>
void GenerateMCProposals(
const int num_proposals,
const int num_boxes,
const int num_classes,
const int im_idx,
const float im_h,
const float im_w,
const float im_scale_h,
const float im_scale_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, cls;
int64_t num_boxes_2x = 2 * num_boxes;
int64_t num_boxes_3x = 3 * num_boxes;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
cls = indices[i] % num_classes;
index = indices[i] / num_classes;
dx = deltas[index];
dy = deltas[num_boxes + index];
d_log_w = deltas[num_boxes_2x + index];
d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx;
BBoxTransform(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
im_scale_h, im_scale_w,
proposal + 1
);
proposal[5] = scores[indices[i]];
proposal[6] = cls + 1;
proposal += 7;
}
}
template <typename T>
inline void SortProposals(
const int start,
const int end,
const int num_top,
T* proposals) {
const T pivot_score = proposals[start * 5 + 4];
int left = start + 1, right = end;
while (left <= right) {
while (left <= end && proposals[left * 5 + 4] >= pivot_score) ++left;
while (right > start && proposals[right * 5 + 4] <= pivot_score) --right;
if (left <= right) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[left * 5 + i], proposals[right * 5 + i]);
++left;
--right;
}
}
if (right > start) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[start * 5 + i], proposals[right * 5 + i]);
}
if (start < right - 1) SortProposals(start, right - 1, num_top, proposals);
if (right + 1 < num_top && right + 1 < end)
SortProposals(right + 1, end, num_top, proposals);
}
template <typename T>
inline void RetrieveRoIs(
const int num_rois,
const int roi_batch_ind,
const T* proposals,
const int64_t* roi_indices,
T* rois) {
for (int i = 0; i < num_rois; ++i) {
const T* proposal = proposals + roi_indices[i] * 5;
rois[i * 5 + 0] = (T)roi_batch_ind;
rois[i * 5 + 1] = proposal[0];
rois[i * 5 + 2] = proposal[1];
rois[i * 5 + 3] = proposal[2];
rois[i * 5 + 4] = proposal[3];
}
}
template <typename T>
inline int roi_level(
const int min_level,
const int max_level,
const int canonical_level,
const int canonical_scale,
T* roi) {
T w = roi[3] - roi[1] + 1;
T h = roi[4] - roi[2] + 1;
// Refer the settings of paper
int level = canonical_level + std::log2(
std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale);
return std::min(max_level, std::max(min_level, level));
}
template <typename T>
inline void CollectRoIs(
const int num_rois,
const int min_level,
const int max_level,
const int canonical_level,
const int canonical_scale,
const T* rois,
vector<vec64_t>& roi_bins) {
const T* roi = rois;
for (int i = 0; i < num_rois; ++i) {
int bin_idx = roi_level(min_level, max_level,
canonical_level, canonical_scale, roi);
bin_idx = std::max(bin_idx - min_level, 0);
roi_bins[bin_idx].push_back(i);
roi += 5;
}
}
template <typename T>
inline void DistributeRoIs(
const vector<vec64_t>& roi_bins,
const T* rois,
vector<T*> outputs) {
for (int i = 0; i < roi_bins.size(); i++) {
auto* y = outputs[i];
if (roi_bins[i].size() == 0) {
// Fake a tiny roi to avoid empty roi pooling
y[0] = 0, y[1] = 0, y[2] = 0, y[3] = 1, y[4] = 1;
} else {
for (int j = 0; j < roi_bins[i].size(); ++j) {
const T* roi = rois + roi_bins[i][j] * 5;
for (int k = 0; k < 5; ++k) y[k] = roi[k];
y += 5;
}
}
}
}
/******************** NMS ********************/
template <typename T, class Context>
void ApplyNMS(
const int num_boxes,
const int max_keeps,
const T thresh,
const T* boxes,
int64_t* keep_indices,
int& num_keep,
Context* ctx);
} // namespace detection
} // namespace utils
} // namespace dragon
#endif // SEETADET_CXX_UTILS_DETECTION_UTILS_H_
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Compile the cython extensions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from distutils.extension import Extension
from distutils.core import setup
import os
from Cython.Distutils import build_ext
import numpy as np
ext_modules = [
Extension(
'install.lib.utils.cython_bbox',
['cython_bbox.pyx'],
extra_compile_args=['-w'],
include_dirs=[np.get_include()]
),
Extension(
'install.lib.utils.cython_nms',
['cython_nms.pyx'],
extra_compile_args=['-w'],
include_dirs=[np.get_include()]
),
Extension(
'install.lib.pycocotools._mask',
['maskApi.c', '_mask.pyx'],
include_dirs=[np.get_include(), os.path.dirname(os.path.abspath(__file__))],
extra_compile_args=['-w']
),
]
setup(
name='SeetaDet',
ext_modules=ext_modules,
cmdclass={'build_ext': build_ext},
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info, all_boxes = [], [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
all_boxes.append(im_boxes)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, dtype=np.float32),
'gt_boxes': np.concatenate(all_boxes, axis=0),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.mask_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import mask_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info = [], []
packed_boxes, packed_masks = [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes, gt_masks = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
packed_boxes.append(im_boxes)
packed_masks.append(gt_masks)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, 'float32'),
'gt_boxes': np.concatenate(packed_boxes, 0),
'gt_masks': mask_list_to_blob(packed_masks),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
_STORE = collections.defaultdict(dict)
###########################################
# #
# Body #
# #
###########################################
# ResNet
for D in [18, 34, 50, 101, 152, 200, 269]:
_STORE['BODY']['resnet{}'.format(D)] = \
'lib.modeling.resnet.make_resnet_{}'.format(D)
# VGG
for D in [16, 19]:
for T in ['', '_reduced_300', '_reduced_512']:
_STORE['BODY']['vgg{}{}'.format(D, T)] = \
'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
# AirNet
for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
# MobileNet
for D in ['a1', 'v2']:
_STORE['BODY']['mobilenet_{}'.format(D)] = \
'lib.modeling.mobilenet.make_mobilenet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'
.format(name, desc, ', '.join(sets.keys()))
)
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name):
return get_template_func(
name, _STORE['BODY'], 'Body')
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch import nn
from lib.core.config import cfg
class Affine(object):
"""Affine transformation with weight and bias fixed."""
def __new__(cls, dim_in, bias=True, inplace=True):
return nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Conv1x1(object):
"""1x1 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, bias=False):
return nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
class Conv3x3(object):
"""3x3 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, dilation=1, bias=False):
return nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1 * dilation,
bias=bias,
)
class CrossEntropyLoss(object):
"""Cross entropy loss."""
def __new__(cls):
return nn.CrossEntropyLoss(ignore_index=-1)
class Identity(nn.Module):
"""Pass input to the output."""
def __init__(self, *args, **kwargs):
super(Identity, self).__init__()
_, _ = args, kwargs
def forward(self, x):
return x
class SigmoidFocalLoss(object):
"""Sigmoid focal loss."""
def __new__(cls):
return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
class SmoothL1Loss(object):
"""Smoothed l1 loss."""
def __new__(cls, beta=1.):
return nn.SmoothL1Loss(
beta=beta,
reduction='batch_size',
)
def is_conv2d(module):
"""Return a bool indicating the module is a Conv2d."""
return isinstance(module, nn.Conv2d) or \
isinstance(module, nn.DepthwiseConv2d)
AvgPool2d = nn.AvgPool2d
BatchNorm2d = nn.BatchNorm2d
BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d
Linear = nn.Linear
MaxPool2d = nn.MaxPool2d
Module = nn.Module
ModuleList = nn.ModuleList
Sequential = nn.Sequential
ReLU = nn.ReLU
Sigmoid = nn.Sigmoid
Softmax = nn.Softmax
Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies,
either expressed or implied, of the FreeBSD Project.
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.ssd.data_transformer import DataTransformer
from lib.utils import logger
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q2 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in, transformer.q_out = self.Q1, self.Q2
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
image_batch_shape = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
# Main prefetch loop
while True:
boxes_to_pack = []
img, gt_boxes = self.Q2.get()
ims_blob = np.zeros(image_batch_shape, img.dtype)
for i in range(cfg.TRAIN.IMS_PER_BATCH):
ims_blob[i] = img
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, i
boxes_to_pack.append(boxes)
if i != (cfg.TRAIN.IMS_PER_BATCH - 1):
img, gt_boxes = self.Q2.get()
self.Q3.put({
'data': ims_blob,
'gt_boxes': np.concatenate(boxes_to_pack),
})
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/ppwwyyxx/tensorpack/blob/master/examples/FasterRCNN/utils/np_box_ops.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from lib.utils import cython_bbox
def intersection(boxes1, boxes2):
"""Compute pairwise intersection areas between boxes.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes
boxes2: a numpy array with shape [M, 4] holding M boxes
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
[y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
[y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
intersect_heights = np.maximum(
np.zeros(all_pairs_max_ymin.shape),
all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
intersect_widths = np.maximum(
np.zeros(all_pairs_max_xmin.shape),
all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxes1, boxes2):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding M boxes.
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
intersect = intersection(boxes1, boxes2)
area1 = boxes_area(boxes1)
area2 = boxes_area(boxes2)
union = \
np.expand_dims(area1, axis=1) + \
np.expand_dims(area2, axis=0) - intersect
return intersect / union
def ioa1(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(boxes_area(boxes1), axis=1)
return intersect / areas
def ioa2(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(boxes_area(boxes2), axis=0)
return intersect / areas
def bbox_overlaps(boxes1, boxes2):
"""Compute the overlaps between two group of boxes."""
return cython_bbox.bbox_overlaps(
np.ascontiguousarray(boxes1, dtype=np.float),
np.ascontiguousarray(boxes2, dtype=np.float),
)
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
"""Transform the boxes to the regression targets."""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets = [wx * (gt_ctr_x - ex_ctr_x) / ex_widths]
targets += [wy * (gt_ctr_y - ex_ctr_y) / ex_heights]
targets += [ww * np.log(gt_widths / ex_widths)]
targets += [wh * np.log(gt_heights / ex_heights)]
return np.vstack(targets).transpose()
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas."""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.
heights = boxes[:, 3] - boxes[:, 1] + 1.
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2
return pred_boxes
def boxes_area(boxes):
"""Compute the area of an array of boxes."""
w = (boxes[:, 2] - boxes[:, 0] + 1)
h = (boxes[:, 3] - boxes[:, 1] + 1)
areas = w * h
assert np.all(areas >= 0), 'Negative areas founds'
return areas
def clip_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1] = np.maximum(np.minimum(boxes[:, 1], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], im_shape[0] - 1), 0)
return boxes
def clip_tiled_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale."""
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
x_c = (boxes[:, 2] + boxes[:, 0]) * .5
y_c = (boxes[:, 3] + boxes[:, 1]) * .5
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
def flip_boxes(boxes, width):
"""Flip the boxes horizontally."""
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
return flip_boxes
def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def dismantle_boxes(gt_boxes, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
][:, :-1] for i in range(num_images)
]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon
from dragon.core.framework import tensor_util
from dragon.core.util import six
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
def feed_tensor(tensor, array):
tensor_util.set_array(tensor, array)
def get_param_groups(module, bias_lr=1., bias_decay=0.):
"""Separate weight and bias into parameters groups.
Parameters
----------
module : dragon.vm.torch.nn.Module
The module to collect parameters.
bias_lr : float, optional, default=1.
The lr multiplier of bias.
bias_decay : float, optional, default=0.
The decay multiplier of bias.
Returns
-------
Sequence[ParamGroup]
The parameter groups.
"""
param_groups = [
{
'params': [],
'lr_mult': 1.,
'decay_mult': 1.,
},
{
'params': [],
'lr_mult': bias_lr,
'decay_mult': bias_decay,
}
]
for name, param in module.named_parameters():
gi = 0 if 'weight' in name and param.dim() > 1 else 1
param_groups[gi]['params'].append(param)
if len(param_groups[1]['params']) == 0:
param_groups.pop() # Remove empty group
return param_groups
def get_workspace():
"""Return the current default workspace.
Returns
-------
dragon.Workspace
The default workspace.
"""
return dragon.get_workspace()
def new_placeholder(device=None):
"""Create a new tensor to feed data.
Parameters
----------
device : int, optional
The device index.
Returns
-------
dragon.vm.torch.Tensor
The placeholder tensor.
"""
value = torch.zeros(1)
if device is not None:
return value.cuda(device)
return value
def new_tensor(data, enforce_cpu=False):
"""Create a new tensor from the data.
Parameters
----------
data : array_like
The data value.
enforce_cpu : bool, optional, default=False
**True** to enforce the cpu storage.
Returns
-------
dragon.vm.torch.Tensor
The tensor taken with the data.
"""
if isinstance(data, np.ndarray):
tensor = torch.from_numpy(data)
elif isinstance(data, torch.Tensor):
tensor = data
else:
tensor = torch.tensor(data)
if not enforce_cpu:
tensor = tensor.cuda(cfg.GPU_ID)
return tensor
def new_workspace(merge_default=True):
"""Create a new workspace.
Parameters
----------
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
workspace = dragon.Workspace()
if merge_default:
workspace.merge_from(get_workspace())
return workspace
def reset_workspace(workspace=None, merge_default=True):
"""Reset a workspace and return a new one.
Parameters
----------
workspace : dragon.Workspace, optional
The workspace to reset.
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
if workspace is not None:
workspace.Clear() # Block the GIL
return new_workspace(merge_default)
class Graph(object):
"""Simple sequential graph to accelerate inference.
Graph reduces the overhead of python functions
under eager execution. Such cost will be at least 15ms
for common backbones, which limits to about 60FPS.
For more details, see the eager mechanism of Dragon.
"""
def __init__(self, inputs, outputs, constants=None):
def canonicalize(input_dict):
if input_dict is None:
return {}
for k, v in input_dict.items():
input_dict[k] = v.name if hasattr(v, 'name') else v
return input_dict
self.placeholders = {}
self._inputs = canonicalize(inputs)
self._outputs = canonicalize(outputs)
self._constants = canonicalize(constants)
self._workspace = get_workspace()
self._tracer = torch.jit.get_tracer()
@property
def workspace(self):
return self._workspace
@workspace.setter
def workspace(self, value):
self._workspace = value
def forward(self, **kwargs):
# Assign inputs
for name, tensor in self._inputs.items():
value = kwargs.get(name, None)
tensor_util.set_array(tensor, value)
# Replay the traced expressions
self._tracer.replay()
# Collect outputs
# 1) Target results
# 2) Constant values
outputs = collections.OrderedDict()
for name, tensor in self._outputs.items():
outputs[name] = tensor_util.to_array(tensor, True)
for name, value in self._constants.items():
outputs[name] = value
return outputs
def __call__(self, **kwargs):
with self._workspace.as_default():
return self.forward(**kwargs)
# Aliases
pickle = six.moves.pickle
......@@ -11,6 +11,10 @@
"""Make record file for COCO dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
......@@ -37,8 +41,8 @@ if __name__ == '__main__':
record_file=os.path.join(COCO_ROOT, 'coco_2014_trainval35k'),
images_path=[os.path.join(COCO_ROOT, 'images/train2014'),
os.path.join(COCO_ROOT, 'images/val2014')],
splits_path=[os.path.join(COCO_ROOT, 'ImageSets'),
os.path.join(COCO_ROOT, 'ImageSets')],
splits_path=[os.path.join(COCO_ROOT, 'splits'),
os.path.join(COCO_ROOT, 'splits')],
mask_file='build/coco_2014_trainval35k_mask.pkl',
splits=['train', 'valminusminival'],
)
......@@ -48,7 +52,7 @@ if __name__ == '__main__':
record_file=os.path.join(COCO_ROOT, 'coco_2014_minival'),
images_path=os.path.join(COCO_ROOT, 'images/val2014'),
mask_file='build/coco_2014_minival_mask.pkl',
splits_path=os.path.join(COCO_ROOT, 'ImageSets'),
splits_path=os.path.join(COCO_ROOT, 'splits'),
splits=['minival'],
)
......
......@@ -86,7 +86,7 @@ def make_record(
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter(
writer = dragon.io.KPLRecordWriter(
path=record_file,
protocol={
'id': 'string',
......@@ -133,6 +133,6 @@ def make_record(
writer.close()
end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6
data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time))
......@@ -20,11 +20,11 @@ except:
import pickle as cPickle
sys.path.insert(0, '../..')
from lib.pycocotools.coco import COCO
from lib.pycocotools import mask_utils
from seetadet.pycocotools.coco import COCO
from seetadet.pycocotools import mask_utils
class imdb(object):
class COCOWrapper(object):
def __init__(self, image_set, year, data_dir):
self._year = year
self._image_set = image_set
......@@ -120,8 +120,6 @@ class imdb(object):
# running out of the image bound
# Do not use them or decoding error is inevitable
mask_bytes = mask_utils.poly2bytes(obj['segmentation'], height, width)
if not isinstance(mask_bytes, bytes):
print(type(mask_bytes))
if obj['area'] > 0 and x2 > x1 and y2 > y1:
obj['clean_bbox'] = [x1, y1, x2, y2]
valid_objects.append({
......@@ -146,10 +144,11 @@ class imdb(object):
def make_mask(split, year, data_dir):
coco = imdb(split, year, data_dir)
print('Preparing to make split: {}, total {} images'.format(split, coco.num_images))
if not osp.exists(osp.join(coco._data_path, 'ImageSets')):
os.makedirs(osp.join(coco._data_path, 'ImageSets'))
coco = COCOWrapper(split, year, data_dir)
print('Preparing to make split: {}, total {} images'
.format(split, coco.num_images))
if not osp.exists(osp.join(coco._data_path, 'splits')):
os.makedirs(osp.join(coco._data_path, 'splits'))
gt_recs = OrderedDict()
for i in range(coco.num_images):
......@@ -157,14 +156,14 @@ def make_mask(split, year, data_dir):
h, w, objects = coco.annotation_at(i)
gt_recs[filename] = objects
with open(osp.join('build',
'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f:
with open(osp.join('build', 'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f:
cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
with open(osp.join(coco._data_path, 'ImageSets', split + '.txt'), 'w') as f:
with open(osp.join(coco._data_path, 'splits', split + '.txt'), 'w') as f:
for i in range(coco.num_images):
filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0]
if i != coco.num_images - 1: filename += '\n'
if i != coco.num_images - 1:
filename += '\n'
f.write(filename)
......
......@@ -26,6 +26,6 @@ if __name__ == '__main__':
record_file=osp.join(data_root, 'rotated_train'),
images_path=[osp.join(data_root, 'JPEGImages')],
annotations_path=[osp.join(data_root, 'Annotations')],
imagesets_path=[osp.join(data_root, 'ImageSets')],
splits_path=[osp.join(data_root, 'ImageSets')],
splits=['train']
)
......@@ -57,7 +57,7 @@ def make_record(
record_file,
images_path,
annotations_path,
imagesets_path,
splits_path,
splits
):
if os.path.exists(record_file):
......@@ -68,15 +68,15 @@ def make_record(
images_path = [images_path]
if not isinstance(annotations_path, list):
annotations_path = [annotations_path]
if not isinstance(imagesets_path, list):
imagesets_path = [imagesets_path]
assert len(splits) == len(imagesets_path)
if not isinstance(splits_path, list):
splits_path = [splits_path]
assert len(splits) == len(splits_path)
assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter(
writer = dragon.io.KPLRecordWriter(
path=record_file,
protocol={
'id': 'string',
......@@ -99,31 +99,37 @@ def make_record(
}
)
count, total_line = 0, 0
start_time = time.time()
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
# Scan all available entries
print('Scan entries...')
entries = []
for i, split in enumerate(splits):
split_file = os.path.join(splits_path[i], split + '.txt')
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 2000 == 0:
filename = line.strip()
img_file = os.path.join(images_path[i], filename + '.jpg')
ann_file = os.path.join(annotations_path[i], filename + '.xml')
entries.append((img_file, ann_file))
# Parse and write into record file
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
start_time = time.time()
for i, (img_file, ann_file) in enumerate(entries):
if i > 0 and i % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
writer.write(make_example(image_file, xml_file))
i, len(entries), now_time - start_time))
writer.write(make_example(img_file, ann_file))
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
print('{} / {} in {:.2f} sec'.format(
len(entries), len(entries), now_time - start_time))
writer.close()
end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6
data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time))
.format(len(entries), data_size, end_time - start_time))
......@@ -28,7 +28,7 @@ if __name__ == '__main__':
osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval']
)
......@@ -37,6 +37,6 @@ if __name__ == '__main__':
record_file=osp.join(voc_root, 'voc_2007_test'),
images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test']
)
......@@ -26,10 +26,16 @@ def make_example(image_file, xml_file):
tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1]
objs = tree.findall('object')
size = tree.find('size')
example = {'id': filename.split('.')[0], 'object': []}
with open(image_file, 'rb') as f:
img_bytes = bytes(f.read())
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1)
if size is not None:
example['height'] = int(size.find('height').text)
example['width'] = int(size.find('width').text)
example['depth'] = int(size.find('depth').text)
else:
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 3)
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes
for ix, obj in enumerate(objs):
......@@ -53,7 +59,7 @@ def make_record(
record_file,
images_path,
annotations_path,
imagesets_path,
splits_path,
splits
):
if os.path.exists(record_file):
......@@ -64,15 +70,13 @@ def make_record(
images_path = [images_path]
if not isinstance(annotations_path, list):
annotations_path = [annotations_path]
if not isinstance(imagesets_path, list):
imagesets_path = [imagesets_path]
assert len(splits) == len(imagesets_path)
if not isinstance(splits_path, list):
splits_path = [splits_path]
assert len(splits) == len(splits_path)
assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter(
writer = dragon.io.KPLRecordWriter(
path=record_file,
protocol={
'id': 'string',
......@@ -91,31 +95,36 @@ def make_record(
}
)
count, total_line = 0, 0
start_time = time.time()
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
# Scan all available entries
print('Scan entries...')
entries = []
for i, split in enumerate(splits):
split_file = os.path.join(splits_path[i], split + '.txt')
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 2000 == 0:
filename = line.strip()
img_file = os.path.join(images_path[i], filename + '.jpg')
ann_file = os.path.join(annotations_path[i], filename + '.xml')
entries.append((img_file, ann_file))
# Parse and write into record file
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
start_time = time.time()
for i, (img_file, ann_file) in enumerate(entries):
if i > 0 and i % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
writer.write(make_example(image_file, xml_file))
i, len(entries), now_time - start_time))
writer.write(make_example(img_file, ann_file))
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
print('{} / {} in {:.2f} sec'.format(
len(entries), len(entries), now_time - start_time))
writer.close()
end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6
data_size = os.path.getsize(record_file + '/root.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time))
.format(len(entries), data_size, end_time - start_time))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from seetadet.algo.faster_rcnn.anchor_target import AnchorTarget
from seetadet.algo.faster_rcnn.data_loader import DataLoader
from seetadet.algo.faster_rcnn.proposal import Proposal
from seetadet.algo.faster_rcnn.proposal_target import ProposalTarget
from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from seetadet.algo.faster_rcnn.utils import map_blobs_by_levels
from seetadet.algo.faster_rcnn.utils import map_rois_to_levels
from seetadet.algo.faster_rcnn.utils import map_returns_to_blobs
......@@ -16,11 +16,11 @@ from __future__ import print_function
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
from seetadet.algo.faster_rcnn.generate_anchors import generate_anchors
from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from seetadet.core.config import cfg
from seetadet.utils import boxes as box_util
from seetadet.utils.env import new_tensor
class AnchorTarget(object):
......@@ -62,9 +62,7 @@ class AnchorTarget(object):
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, num_anchors,), 'float32')
bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32')
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_indices_wide, bbox_anchors_wide, bbox_targets_wide = [], [], []
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, ...)
......@@ -95,13 +93,13 @@ class AnchorTarget(object):
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
# Foreground: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
# Foreground: above threshold IoU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
# Background: below threshold IoU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
......@@ -112,6 +110,11 @@ class AnchorTarget(object):
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Retract the clamping if we don't have one
if len(fg_inds) == 0:
labels[gt_argmax_overlaps] = 1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
......@@ -119,51 +122,27 @@ class AnchorTarget(object):
disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), 'float32')
bbox_targets[fg_inds, :] = \
labels_wide[ix, inds_inside] = labels
bbox_anchors_wide.append(anchors[fg_inds])
bbox_indices_wide.append(inds_inside[fg_inds] + (num_anchors * ix))
bbox_targets_wide.append(
box_util.bbox_transform(
anchors[fg_inds, :],
anchors[fg_inds],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), 'float32')
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), 'float32')
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
if self.num_strides > 1:
labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
else:
)
if self.num_strides == 1:
A = self.base_anchors[0].shape[0]
height, width = features[0].shape[-2:]
labels = labels_wide \
labels_wide = labels_wide \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = bbox_inside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = bbox_outside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': new_tensor(labels),
'bbox_targets': new_tensor(bbox_targets),
'bbox_inside_weights': new_tensor(bbox_inside_weights),
'bbox_outside_weights': new_tensor(bbox_outside_weights),
'labels': new_tensor(labels_wide),
'bbox_indices': new_tensor(np.concatenate(bbox_indices_wide)),
'bbox_targets': new_tensor(np.concatenate(bbox_targets_wide).astype('float32')),
'bbox_anchors': new_tensor(np.concatenate(bbox_anchors_wide).astype('float32')),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from seetadet.algo.faster_rcnn import data_transformer
from seetadet.core.config import cfg
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
from seetadet.utils.blob import im_list_to_blob
class DataLoader(object):
"""Load mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
dataset = get_dataset(cfg.TRAIN.DATASET)
if cfg.USE_DALI:
from seetadet.dali import rcnn_pipeline as pipe
self.iterator = pipe.new_iterator(dataset.source)
else:
self.iterator = Iterator(**{
'dataset': dataset.cls,
'source': dataset.source,
'classes': dataset.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_THREADS - 1,
})
def __call__(self):
outputs = self.iterator.next()
if isinstance(outputs['data'], np.ndarray):
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class Iterator(mp.Process):
"""Iterator to return the batch of data."""
def __init__(self, **kwargs):
super(Iterator, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and \
kwargs.get('phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.q_in = mp.Queue(num_batches * self._batch_size)
self.q1_out = mp.Queue(num_batches * self._batch_size)
self.q2_out = mp.Queue(num_batches * self._batch_size)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
part_idx=part_idx, num_parts=num_parts, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.q_in
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
p = data_transformer.DataTransformer(**kwargs)
p._seed += (i + rank * self._num_transformers)
p.q_in = self.q_in
p.q1_out, p.q2_out = self.q1_out, self.q2_out
p.start()
self._transformers.append(p)
time.sleep(0.1)
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for p in processes:
p.terminate()
p.join()
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def next(self):
"""Return the next batch of data."""
return self.__next__()
def __iter__(self):
"""Return the iterator self."""
return self
def __next__(self):
"""Return the next batch of data."""
q_out = None
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
while q_out is None:
if self.q1_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q1_out
elif self.q2_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q2_out
self.q1_out, self.q2_out = self.q2_out, self.q1_out
images, images_info, boxes_to_pack = [], [], []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
image, image_scale, boxes = q_out.get()
images.append(image)
images_info.append(list(image.shape[:2]) + [image_scale])
gt_boxes = np.zeros((boxes.shape[0], boxes.shape[1] + 1), 'float32')
gt_boxes[:, :boxes.shape[1]], gt_boxes[:, -1] = boxes, i
boxes_to_pack.append(gt_boxes)
return {
'data': im_list_to_blob(images),
'ims_info': np.array(images_info, dtype=np.float32),
'gt_boxes': np.concatenate(boxes_to_pack),
}
......@@ -15,19 +15,19 @@ from __future__ import print_function
import multiprocessing
import cv2
import numpy as np
from lib.core.config import cfg
from lib.datasets.example import Example
from lib.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob
from lib.utils.image import get_image_with_target_size
from seetadet.core.config import cfg
from seetadet.datasets.example import Example
from seetadet.utils import boxes as box_util
from seetadet.utils.blob import prep_im_for_blob
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES
self._max_size = cfg.TRAIN.MAX_SIZE
self._seed = cfg.RNG_SEED
self._use_flipped = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF
......@@ -37,13 +37,7 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q1_out = self.q2_out = None
self.daemon = True
def make_roi_dict(
self,
example,
im_scale,
apply_flip=False,
offsets=None,
):
def make_roi_dict(self, example, im_scale, apply_flip=False):
objects, n_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff:
......@@ -86,15 +80,6 @@ class DataTransformer(multiprocessing.Process):
# Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale
# Apply the offsets from scale jitter
if offsets is not None:
roi_dict['boxes'][:, 0::2] += offsets[0]
roi_dict['boxes'][:, 1::2] += offsets[1]
roi_dict['boxes'][:, :] = np.minimum(
np.maximum(roi_dict['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2,
)
return roi_dict
def get(self, example):
......@@ -102,9 +87,8 @@ class DataTransformer(multiprocessing.Process):
img = example.image
# Scale
max_size = cfg.TRAIN.MAX_SIZE
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
target_size = self._scales[np.random.randint(len(self._scales))]
img, im_scale = prep_im_for_blob(img, target_size, self._max_size)
# Flip
apply_flip = False
......@@ -113,19 +97,8 @@ class DataTransformer(multiprocessing.Process):
img = img[:, ::-1]
apply_flip = True
# Random Crop or RandomPad
offsets = None
if cfg.TRAIN.MAX_SIZE > 0:
if jitter != 1:
# To a rectangle (scale, max_size)
target_size = (np.array(img.shape[:2]) / jitter).astype(np.int32)
img, offsets = get_image_with_target_size(target_size, img)
else:
# To a square (target_size, target_size)
img, offsets = get_image_with_target_size([target_size] * 2, img)
# Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
roi_dict = self.make_roi_dict(example, im_scale, apply_flip)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
......
......@@ -17,11 +17,11 @@ import collections
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
from seetadet.algo.faster_rcnn.generate_anchors import generate_anchors
from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from seetadet.core.config import cfg
from seetadet.utils import boxes as box_util
from seetadet.utils import nms
class Proposal(object):
......@@ -67,8 +67,8 @@ class Proposal(object):
# Prepare for the outputs
batch_rois = []
cls_prob = cls_prob.numpy(True)
bbox_pred = bbox_pred.numpy(True)
cls_prob = cls_prob.numpy()
bbox_pred = bbox_pred.numpy()
if self.num_strides > 1:
# (?, 4, A * K) -> (?, A * K, 4)
bbox_pred = bbox_pred.transpose((0, 2, 1))
......@@ -113,7 +113,7 @@ class Proposal(object):
# Apply nms (e.g. threshold = 0.7)
# Take after_nms_topN (e.g. 300)
# Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
keep = nms.gpu_nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
......
......@@ -18,12 +18,10 @@ import collections
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_returns_to_blobs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
from seetadet.algo.faster_rcnn import utils as rcnn_util
from seetadet.core.config import cfg
from seetadet.utils import boxes as box_util
from seetadet.utils.env import new_tensor
class ProposalTarget(object):
......@@ -35,10 +33,8 @@ class ProposalTarget(object):
self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('labels', np.array([-1], 'int64')),
('bbox_targets', np.zeros((1, 4), 'float32')),
])
def __call__(self, rpn_rois, gt_boxes):
......@@ -63,85 +59,64 @@ class ProposalTarget(object):
# Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs(
rcnn_util.map_returns_to_blobs(
sample_rois(
rois,
gt_boxes,
rois_per_image,
fg_rois_per_image,
self.num_classes,
), blobs, keys,
)
# Stack into continuous blobs
for k, v in blobs.items():
blobs[k] = np.concatenate(blobs[k], 0)
blobs = dict((k, np.concatenate(blobs[k])) for k in blobs.keys())
if self.num_strides > 1:
# Distribute RoIs into pyramids
min_lvl = cfg.FPN.ROI_MIN_LEVEL
max_lvl = cfg.FPN.ROI_MAX_LEVEL
k = max_lvl - min_lvl + 1
levels = map_rois_to_levels(blobs['rois'], min_lvl, max_lvl)
outputs = map_blobs_to_outputs(
num_levels = max_lvl - min_lvl + 1
levels = rcnn_util.map_rois_to_levels(blobs['rois'], min_lvl, max_lvl)
lvl_blobs = rcnn_util.map_blobs_by_levels(
blobs,
self.defaults,
[np.where(levels == (i + min_lvl))[0] for i in range(k)],
[np.where(levels == (i + min_lvl))[0] for i in range(num_levels)],
)
return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
blobs = dict((k, np.concatenate(lvl_blobs[k])) for k in blobs.keys())
rois_wide = [lvl_blobs['rois'][i] for i in range(num_levels)]
else:
# Return RoIs directly for CX-stride
# Return RoIs directly for specified stride
rois_wide = [blobs['rois']]
# Select the foreground RoIs only for bbox branch
fg_inds = np.where(blobs['labels'] > 0)[0]
cls_inds = np.arange(len(blobs['rois'])) * self.num_classes
return {
'rois': [new_tensor(blobs['rois'])],
'rois': [new_tensor(rois) for rois in rois_wide],
'labels': new_tensor(blobs['labels']),
'bbox_targets': new_tensor(blobs['bbox_targets']),
'bbox_inside_weights': new_tensor(blobs['bbox_inside_weights']),
'bbox_outside_weights': new_tensor(blobs['bbox_outside_weights']),
'bbox_indices': new_tensor(cls_inds[fg_inds] + blobs['labels'][fg_inds]),
'bbox_targets': new_tensor(blobs['bbox_targets'][fg_inds].astype('float32')),
'bbox_anchors': new_tensor(blobs['rois'][fg_inds, 1:].astype('float32')),
}
def get_targets(ex_rois, gt_rois, gt_labels, num_classes):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
return bbox_targets, inside_weights, outside_weights
def sample_rois(
all_rois,
gt_boxes,
num_rois,
num_fg_rois,
num_classes,
):
def sample_rois(all_rois, gt_boxes, num_rois, num_fg_rois):
"""Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
labels = gt_boxes[gt_assignment, 4].astype('int64')
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
fg_thresh = cfg.TRAIN.FG_THRESH
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
while fg_inds.size == 0:
fg_thresh -= 0.01
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
......@@ -160,15 +135,14 @@ def sample_rois(
rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs
outputs = [rois, labels]
outputs += get_targets(
return [
rois,
labels,
box_util.bbox_transform(
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels,
num_classes,
cfg.BBOX_REG_WEIGHTS,
)
return outputs
]
......@@ -13,17 +13,18 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import types
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
from lib.utils import framework
from lib.utils import time_util
from lib.utils.blob import im_list_to_blob
from lib.utils.image import scale_image
from seetadet.core.config import cfg
from seetadet.modeling.detector import new_detector
from seetadet.utils import boxes as box_util
from seetadet.utils import nms as nms_util
from seetadet.utils import time_util
from seetadet.utils.blob import im_list_to_blob
from seetadet.utils.image import scale_image
def im_detect(detector, raw_image):
......@@ -31,49 +32,41 @@ def im_detect(detector, raw_image):
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
data = im_list_to_blob(ims)
ims_info = np.array([list(data.shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
# Do Forward
if not hasattr(detector, 'graph'):
with framework.new_workspace().as_default():
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info'])
with torch.no_grad():
with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs)
detector.graph = \
framework.Graph(inputs, {
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']
})
outputs = detector.graph(**blobs)
data = torch.from_numpy(data)
ims_info = torch.from_numpy(ims_info)
if not hasattr(detector, 'script_forward'):
def script_forward(self, data, ims_info):
return self.forward({'data': data, 'ims_info': ims_info})
detector.script_forward = torch.jit.trace(
func=types.MethodType(script_forward, detector),
example_inputs=[data, ims_info],
)
outputs = detector.script_forward(data, ims_info)
outputs = dict((k, outputs[k].numpy()) for k in outputs.keys())
# Decode results
rois = outputs['rois']
scores, boxes, batch_inds = [], [], []
all_scores, all_boxes = [], []
pred_boxes = \
box_util.bbox_transform_inv(
rois[:, 1:5],
outputs['rois'][:, 1:5],
outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS,
)
for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
return (
np.vstack(scores) if len(ims) > 0 else scores[0],
np.vstack(boxes) if len(ims) > 0 else boxes[0],
)
inds = np.where(outputs['rois'][:, 0].astype(np.int32) == i)[0]
boxes = pred_boxes[inds] / ims_scale[i]
all_scores.append(outputs['cls_prob'][inds])
all_boxes.append(box_util.clip_tiled_boxes(boxes, raw_image.shape))
return np.vstack(all_scores), np.vstack(all_boxes)
def test_net(weights, num_classes, q_in, q_out, device):
......@@ -84,7 +77,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
while True:
idx, raw_image = q_in.get()
if raw_image is None:
if idx < 0:
break
boxes_this_image = [[]]
......@@ -101,17 +94,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms(
keep = nms_util.soft_nms(
cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms_wrapper.nms(
keep = nms_util.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
boxes_this_image.append(cls_detections)
......@@ -119,11 +111,8 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put((
idx,
{
'im_detect': _t['im_detect'].average_time,
'misc': _t['misc'].average_time,
},
{
'boxes': boxes_this_image,
},
dict([('im_detect', _t['im_detect'].average_time),
('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]),
))
......@@ -16,7 +16,7 @@ from __future__ import print_function
import collections
import numpy as np
from lib.core.config import cfg
from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides):
......@@ -75,7 +75,7 @@ def map_rois_to_levels(rois, k_min, k_max):
return np.clip(target_levels, k_min, k_max)
def map_blobs_to_outputs(blobs, defaults, lvl_inds):
def map_blobs_by_levels(blobs, defaults, lvl_inds):
"""Map blobs to outputs according to fpn indices."""
outputs = collections.defaultdict(list)
for inds in lvl_inds:
......
......@@ -13,10 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Import custom modules
from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN
from lib.modeling.mask_rcnn import MaskRCNN
from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD
from seetadet.algo.faster_rcnn.anchor_target import AnchorTarget
from seetadet.algo.faster_rcnn.proposal import Proposal
from seetadet.algo.mask_rcnn.data_loader import DataLoader
from seetadet.algo.mask_rcnn.proposal_target import ProposalTarget
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from seetadet.algo.mask_rcnn import data_transformer
from seetadet.core.config import cfg
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
from seetadet.utils.blob import im_list_to_blob
from seetadet.utils.blob import mask_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
dataset = get_dataset(cfg.TRAIN.DATASET)
self.iterator = Iterator(**{
'dataset': dataset.cls,
'source': dataset.source,
'classes': dataset.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_THREADS - 1,
})
def __call__(self):
outputs = self.iterator.next()
if isinstance(outputs['data'], np.ndarray):
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class Iterator(mp.Process):
"""Iterator to return the batch of data."""
def __init__(self, **kwargs):
super(Iterator, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and \
kwargs.get('phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.q_in = mp.Queue(num_batches * self._batch_size)
self.q1_out = mp.Queue(num_batches * self._batch_size)
self.q2_out = mp.Queue(num_batches * self._batch_size)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
part_idx=part_idx, num_parts=num_parts, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.q_in
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
p = data_transformer.DataTransformer(**kwargs)
p._seed += (i + rank * self._num_transformers)
p.q_in = self.q_in
p.q1_out, p.q2_out = self.q1_out, self.q2_out
p.start()
self._transformers.append(p)
time.sleep(0.1)
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for p in processes:
p.terminate()
p.join()
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def next(self):
"""Return the next batch of data."""
return self.__next__()
def __iter__(self):
"""Return the iterator self."""
return self
def __next__(self):
"""Return the next batch of data."""
q_out = None
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
while q_out is None:
if self.q1_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q1_out
elif self.q2_out.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
q_out = self.q2_out
self.q1_out, self.q2_out = self.q2_out, self.q1_out
images, images_info = [], []
boxes_to_pack, masks_to_pack = [], []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
image, image_scale, boxes, masks = q_out.get()
images.append(image)
images_info.append(list(image.shape[:2]) + [image_scale])
gt_boxes = np.zeros((boxes.shape[0], boxes.shape[1] + 1), 'float32')
gt_boxes[:, :boxes.shape[1]], gt_boxes[:, -1] = boxes, i
boxes_to_pack.append(gt_boxes)
masks_to_pack.append(masks)
return {
'data': im_list_to_blob(images),
'ims_info': np.array(images_info, 'float32'),
'gt_boxes': np.concatenate(boxes_to_pack),
'gt_masks': mask_list_to_blob(masks_to_pack),
}
......@@ -17,17 +17,18 @@ import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.datasets.example import Example
from lib.pycocotools import mask_utils
from lib.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob
from lib.utils.image import get_image_with_target_size
from seetadet.core.config import cfg
from seetadet.datasets.example import Example
from seetadet.pycocotools import mask_utils
from seetadet.utils import boxes as box_util
from seetadet.utils.blob import prep_im_for_blob
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES
self._max_size = cfg.TRAIN.MAX_SIZE
self._seed = cfg.RNG_SEED
self._use_flipped = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF
......@@ -98,9 +99,8 @@ class DataTransformer(multiprocessing.Process):
img = example.image
# Scale
max_size = cfg.TRAIN.MAX_SIZE
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
target_size = self._scales[np.random.randint(len(self._scales))]
img, im_scale = prep_im_for_blob(img, target_size, self._max_size)
# Flip
apply_flip = False
......
......@@ -18,13 +18,11 @@ import collections
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_returns_to_blobs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.utils import boxes as box_util
from lib.utils import mask as mask_util
from lib.utils.framework import new_tensor
from seetadet.algo.faster_rcnn import utils as rcnn_util
from seetadet.core.config import cfg
from seetadet.utils import boxes as box_util
from seetadet.utils import mask as mask_util
from seetadet.utils.env import new_tensor
class ProposalTarget(object):
......@@ -36,10 +34,8 @@ class ProposalTarget(object):
self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('labels', np.array([-1], 'int64')),
('bbox_targets', np.zeros((1, 4), 'float32')),
('mask_targets', -np.ones((1, self.resolution, self.resolution), 'float32')),
])
......@@ -72,67 +68,75 @@ class ProposalTarget(object):
# Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs(
rcnn_util.map_returns_to_blobs(
sample_rois(
rois,
gt_boxes,
gt_masks,
rois_per_image,
fg_rois_per_image,
self.num_classes,
ims_info[ix][2],
), blobs, keys,
)
# Stack into continuous blobs
for k, v in blobs.items():
blobs[k] = np.concatenate(blobs[k], 0)
blobs = dict((k, np.concatenate(blobs[k])) for k in blobs.keys())
# Distribute rois into pyramids
k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1
levels = map_rois_to_levels(blobs['rois'], k_min, k_max)
outputs = \
map_blobs_to_outputs(
num_levels = k_max - k_min + 1
levels = rcnn_util.map_rois_to_levels(blobs['rois'], k_min, k_max)
lvl_blobs = rcnn_util.map_blobs_by_levels(
blobs,
self.defaults,
[np.where(levels == (i + k_min))[0] for i in range(k)],
[np.where(levels == (i + k_min))[0] for i in range(num_levels)],
)
# Select the foreground RoIs only for mask branch
for i in range(k):
inds = np.where(outputs['labels'][i] > 0)[0]
inds = inds if len(inds) > 0 else np.array([0], 'int64')
outputs['mask_rois'].append(outputs['rois'][i][inds])
outputs['mask_targets'][i] = outputs['mask_targets'][i][inds]
outputs['mask_labels'].append(outputs['labels'][i][inds].astype('int64') - 1)
# Use the sparse indices to select logits
# Reduce the overhead on feeding dense class-specific targets
mask_labels = np.concatenate(outputs['mask_labels'], 0)
mask_indices = np.arange(len(mask_labels)) * (self.num_classes - 1)
rois_wide = [lvl_blobs['rois'][i] for i in range(num_levels)]
mask_rois_wide, mask_labels_wide = [], []
# Select the foreground RoIs only for bbox/mask branch
for i in range(num_levels):
inds = np.where(lvl_blobs['labels'][i] > 0)[0]
if len(inds) > 0:
mask_rois_wide.append(lvl_blobs['rois'][i][inds])
mask_labels_wide.append(lvl_blobs['labels'][i][inds] - 1)
lvl_blobs['mask_targets'][i] = lvl_blobs['mask_targets'][i][inds]
else:
mask_rois_wide.append(self.defaults['rois'])
mask_labels_wide.append(np.array([0], 'int64'))
lvl_blobs['mask_targets'][i] = self.defaults['mask_targets']
blobs = dict((k, np.concatenate(lvl_blobs[k])) for k in blobs.keys())
mask_labels = np.concatenate(mask_labels_wide)
fg_inds = np.where(blobs['labels'] > 0)[0]
bbox_cls_inds = np.arange(len(blobs['rois'])) * self.num_classes
mask_cls_inds = np.arange(len(mask_labels)) * (self.num_classes - 1)
# Sample a proposal randomly to avoid memory issue
if len(fg_inds) == 0:
fg_inds = np.random.randint(len(blobs['labels']), size=[1])
return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
'mask_rois': [new_tensor(outputs['mask_rois'][i]) for i in range(k)],
'mask_targets': new_tensor(np.vstack(outputs['mask_targets'])),
'mask_indices': new_tensor(mask_indices + mask_labels),
'rois': [new_tensor(rois_wide[i]) for i in range(num_levels)],
'mask_rois': [new_tensor(mask_rois_wide[i]) for i in range(num_levels)],
'labels': new_tensor(blobs['labels']),
'bbox_indices': new_tensor(bbox_cls_inds[fg_inds] + blobs['labels'][fg_inds]),
'bbox_targets': new_tensor(blobs['bbox_targets'][fg_inds].astype('float32')),
'bbox_anchors': new_tensor(blobs['rois'][fg_inds, 1:].astype('float32')),
'mask_indices': new_tensor(mask_cls_inds + mask_labels),
'mask_targets': new_tensor(blobs['mask_targets']),
}
def get_targets(
def compute_targets(
ex_rois,
gt_rois,
gt_labels,
gt_masks,
mask_flags,
mask_size,
num_classes,
im_scale,
):
"""Compute the bounding-box regression targets."""
......@@ -141,14 +145,8 @@ def get_targets(
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
bbox_targets = box_util.bbox_transform(
ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
# Compute mask classification targets
mask_shape = [mask_size] * 2
ex_rois_ori = np.round(ex_rois / im_scale).astype(int)
......@@ -168,7 +166,7 @@ def get_targets(
mask=box_mask,
size=mask_shape,
)
return bbox_targets, inside_weights, outside_weights, mask_targets
return bbox_targets, mask_targets
def sample_rois(
......@@ -177,14 +175,13 @@ def sample_rois(
gt_masks,
num_rois,
num_fg_rois,
num_classes,
im_scale,
):
"""Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
labels = gt_boxes[gt_assignment, 4].astype('int64')
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
......@@ -209,19 +206,16 @@ def sample_rois(
rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs
outputs = [rois, labels]
outputs += get_targets(
outputs += compute_targets(
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels,
gt_masks[gt_assignment[fg_inds]],
gt_boxes[gt_assignment[fg_inds], 5],
cfg.MRCNN.RESOLUTION,
num_classes,
im_scale,
)
return outputs
......@@ -13,19 +13,20 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import types
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn import map_rois_to_levels
from lib.faster_rcnn import map_blobs_to_outputs
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper
from lib.utils import framework
from lib.utils import time_util
from lib.utils import boxes as box_util
from lib.utils.blob import im_list_to_blob
from lib.utils.image import scale_image
from seetadet.algo.faster_rcnn import utils as rcnn_util
from seetadet.core.config import cfg
from seetadet.modeling.detector import new_detector
from seetadet.utils import env
from seetadet.utils import nms as nms_util
from seetadet.utils import time_util
from seetadet.utils import boxes as box_util
from seetadet.utils.blob import im_list_to_blob
from seetadet.utils.image import scale_image
def im_detect(detector, raw_image):
......@@ -33,50 +34,46 @@ def im_detect(detector, raw_image):
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
data = im_list_to_blob(ims)
ims_info = np.array([list(data.shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
# Do Forward
if not hasattr(detector, 'graph'):
with framework.new_workspace().as_default():
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info'])
with torch.no_grad():
with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs)
detector.graph = \
framework.Graph(inputs, {
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']
})
outputs = detector.graph(**blobs)
data = torch.from_numpy(data)
ims_info = torch.from_numpy(ims_info)
if not hasattr(detector, 'script_forward'):
def script_forward(self, data, ims_info):
return self.forward({'data': data, 'ims_info': ims_info})
detector.script_forward = torch.jit.trace(
func=types.MethodType(script_forward, detector),
example_inputs=[data, ims_info],
)
outputs = detector.script_forward(data, ims_info)
outputs = dict((k, outputs[k].numpy()) for k in outputs.keys())
# Decode results
rois = outputs['rois']
scores, boxes, batch_inds = [], [], []
all_scores, all_boxes, batch_inds = [], [], []
pred_boxes = \
box_util.bbox_transform_inv(
rois[:, 1:5],
outputs['rois'][:, 1:5],
outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS,
)
for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
inds = np.where(outputs['rois'][:, 0].astype(np.int32) == i)[0]
boxes = pred_boxes[inds] / ims_scale[i]
all_scores.append(outputs['cls_prob'][inds])
all_boxes.append(box_util.clip_tiled_boxes(boxes, raw_image.shape))
batch_inds.append(np.ones((len(inds), 1), 'int32') * i)
return (
np.vstack(scores) if len(ims) > 0 else scores[0],
np.vstack(boxes) if len(ims) > 0 else boxes[0],
np.vstack(batch_inds) if len(ims) > 0 else batch_inds[0],
np.vstack(all_scores),
np.vstack(all_boxes),
np.vstack(batch_inds),
np.array(ims_scale, 'float64'),
)
......@@ -85,25 +82,15 @@ def mask_detect(detector, rois):
k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1
levels = map_rois_to_levels(rois, k_min, k_max)
levels = rcnn_util.map_rois_to_levels(rois, k_min, k_max)
level_inds = [np.where(levels == (i + k_min))[0] for i in range(k)]
fpn_rois = map_blobs_to_outputs(
fpn_rois = rcnn_util.map_blobs_by_levels(
{'rois': rois[:, :5]},
{'rois': np.array([[-1, 0, 0, 1, 1]], 'float32')},
level_inds)['rois']
workspace = detector.graph.workspace
placeholders = detector.graph.placeholders
score_fn = detector.rcnn.compute_mask_score
with workspace.as_default():
if 'rois' not in placeholders:
placeholders['rois'] = \
[framework.new_placeholder(cfg.GPU_ID) for _ in range(k)]
placeholders['mask_inds'] = \
framework.new_placeholder(cfg.GPU_ID)
for i, v in enumerate(fpn_rois):
framework.feed_tensor(placeholders['rois'][i], v.astype('float32'))
with torch.no_grad():
mask_score = score_fn(rois=placeholders['rois'])
mask_score = detector.rcnn.compute_mask_score(
rois=[env.new_tensor(r.astype('float32')) for r in fpn_rois])
nc, i = mask_score.shape[1], 0
mask_inds = {}
for inds in level_inds:
......@@ -114,14 +101,10 @@ def mask_detect(detector, rois):
if len(inds) == 0:
i += 1
mask_inds = list(map(mask_inds.get, sorted(mask_inds)))
framework.feed_tensor(
placeholders['mask_inds'],
np.array(mask_inds, 'int64'),
)
mask_inds = env.new_tensor(np.array(mask_inds, 'int64'))
with torch.no_grad():
mask_pred = mask_score.index_select(
(0, 1), placeholders['mask_inds'])
return detector.rcnn.sigmoid(mask_pred).numpy(True).copy()
mask_pred = mask_score.index_select((0, 1), mask_inds)
return detector.rcnn.sigmoid(mask_pred).numpy().copy()
def test_net(weights, num_classes, q_in, q_out, device):
......@@ -132,7 +115,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
while True:
idx, raw_image = q_in.get()
if raw_image is None:
if idx < 0:
break
rois_this_image = []
......@@ -153,17 +136,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms(
keep = nms_util.soft_nms(
cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms_wrapper.nms(
keep = nms_util.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
cls_batch_inds = cls_batch_inds[keep]
......@@ -190,13 +172,9 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put((
idx,
{
'im_detect': _t['im_detect'].average_time,
'mask_detect': _t['mask_detect'].average_time,
'misc': _t['misc'].average_time,
},
{
'boxes': boxes_this_image,
'masks': masks_this_image,
},
dict([('im_detect', _t['im_detect'].average_time),
('mask_detect', _t['mask_detect'].average_time),
('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image),
('masks', masks_this_image)]),
))
......@@ -13,7 +13,5 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.faster_rcnn.anchor_target import AnchorTarget
from lib.faster_rcnn.proposal import Proposal
from lib.mask_rcnn.data_loader import DataLoader
from lib.mask_rcnn.proposal_target import ProposalTarget
from seetadet.algo.retinanet.anchor_target import AnchorTarget
from seetadet.algo.retinanet.data_loader import DataLoader
......@@ -15,12 +15,12 @@ from __future__ import print_function
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.faster_rcnn import generate_grid_anchors
from lib.utils import boxes as box_util
from lib.utils import logger
from lib.utils.framework import new_tensor
from seetadet.core.config import cfg
from seetadet.algo.faster_rcnn.generate_anchors import generate_anchors_v2
from seetadet.algo.faster_rcnn.utils import generate_grid_anchors
from seetadet.utils import boxes as box_util
from seetadet.utils import logger
from seetadet.utils.env import new_tensor
class AnchorTarget(object):
......@@ -47,7 +47,7 @@ class AnchorTarget(object):
sizes=sizes,
))
def __call__(self, features, gt_boxes, ims_info):
def __call__(self, features, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
......@@ -67,10 +67,8 @@ class AnchorTarget(object):
num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, num_anchors,), 'float32')
bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32')
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
labels_wide = -np.ones((num_images, num_anchors,), 'int64')
bbox_indices_wide, bbox_anchors_wide, bbox_targets_wide = [], [], []
# Different from R-CNN, all anchors will be used
inds_inside, anchors = np.arange(num_anchors), all_anchors
......@@ -81,7 +79,7 @@ class AnchorTarget(object):
gt_boxes = gt_boxes_wide[ix]
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels = np.empty((num_inside,), dtype='int64')
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
......@@ -89,48 +87,41 @@ class AnchorTarget(object):
argmax_overlaps = overlaps.argmax(1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
# fg label: for each gt, anchor with highest overlap
# Foreground: for each gt, anchor with highest overlap
gt_argmax_overlaps = overlaps.argmax(0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
gt_inds = argmax_overlaps[gt_argmax_overlaps]
labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
# fg label: above threshold IOU
# Foreground: above threshold IoU
inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
gt_inds = argmax_overlaps[inds]
labels[inds] = gt_boxes[gt_inds, 4]
fg_inds = np.where(labels > 0)[0]
# bg label: below threshold IOU
# Background: below threshold IoU
labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = \
# Retract the clamping if we don't have one
if len(fg_inds) == 0:
gt_inds = argmax_overlaps[gt_argmax_overlaps]
labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
fg_inds = np.where(labels > 0)[0]
labels_wide[ix, inds_inside] = labels
bbox_anchors_wide.append(anchors[fg_inds])
bbox_indices_wide.append(fg_inds + (num_anchors * ix))
bbox_targets_wide.append(
box_util.bbox_transform(
anchors[fg_inds, :],
anchors[fg_inds],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_reg_weight = float(cfg.RETINANET.BBOX_REG_WEIGHT)
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = bbox_reg_weight / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
)
return {
'labels': new_tensor(labels),
'bbox_targets': new_tensor(bbox_targets),
'bbox_inside_weights': new_tensor(bbox_inside_weights),
'bbox_outside_weights': new_tensor(bbox_outside_weights),
'labels': new_tensor(labels_wide),
'bbox_indices': new_tensor(np.concatenate(bbox_indices_wide)),
'bbox_anchors': new_tensor(np.concatenate(bbox_anchors_wide).astype('float32')),
'bbox_targets': new_tensor(np.concatenate(bbox_targets_wide).astype('float32')),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from seetadet.algo import faster_rcnn
from seetadet.algo import ssd
from seetadet.core.config import cfg
class DataLoader(object):
"""Provide mini-batches of data."""
def __new__(cls):
if cfg.TRAIN.MAX_SIZE > 0:
return faster_rcnn.DataLoader()
else:
return ssd.DataLoader()
......@@ -13,66 +13,59 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import types
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper
from lib.utils import framework
from lib.utils import time_util
from lib.utils.blob import im_list_to_blob
from lib.utils.image import scale_image
from seetadet.core.config import cfg
from seetadet.modeling.detector import new_detector
from seetadet.utils import nms as nms_util
from seetadet.utils import time_util
from seetadet.utils.blob import im_list_to_blob
from seetadet.utils.image import scale_image
def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales."""
ims, ims_scale = scale_image(raw_images[0])
num_scales = len(ims_scale)
ims_shape = [im.shape for im in raw_images]
for item_idx in range(1, len(raw_images)):
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
ims += ims_ext
ims_scale += ims_scale_ext
ims, ims_scale = [], []
for i in range(len(raw_images)):
im, im_scale = scale_image(raw_images[i])
ims += im
ims_scale += im_scale
num_scales = len(ims_scale) // len(raw_images)
ims_shape = np.array([im.shape[:2] for im in ims])
ims_scale = np.array(ims_scale).reshape((len(ims), -1))
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
data = im_list_to_blob(ims)
ims_info = np.hstack([ims_shape, ims_scale]).astype('float32')
# Do Forward
if not hasattr(detector, 'graph'):
with framework.new_workspace().as_default():
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info'])
with torch.no_grad():
with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs)
detector.graph = \
framework.Graph({
'data': inputs['data'],
'ims_info': inputs['ims_info']
}, {'detections': outputs['detections']})
outputs = detector.graph(**blobs)
data = torch.from_numpy(data)
ims_info = torch.from_numpy(ims_info)
if not hasattr(detector, 'script_forward'):
def script_forward(self, data, ims_info):
return self.forward({'data': data, 'ims_info': ims_info})
detector.script_forward = torch.jit.trace(
func=types.MethodType(script_forward, detector),
example_inputs=[data, ims_info],
)
outputs = detector.script_forward(data, ims_info)
outputs = dict((k, outputs[k].numpy()) for k in outputs.keys())
# Unpack results
results = outputs['detections']
detections = [[] for _ in range(len(ims_shape))]
detections = [[] for _ in range(len((raw_images)))]
for i in range(len(ims)):
inds = np.where(results[:, 0].astype(np.int32) == i)[0]
detections[i // num_scales].append(results[inds, 1:])
for i in range(len(ims_shape)):
detections[i] = \
np.vstack(detections[i]) \
if len(detections[i]) > 1 \
else detections[i][0]
return detections
return [np.vstack(detections[i]) for i in range(len(raw_images))]
def test_net(weights, num_classes, q_in, q_out, device):
......@@ -88,7 +81,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
indices, raw_images = [], []
for i in range(cfg.TEST.IMS_PER_BATCH):
idx, raw_image = q_in.get()
if raw_image is None:
if idx < 0:
must_stop = True
break
indices.append(idx)
......@@ -115,17 +108,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms(
keep = nms_util.soft_nms(
cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms_wrapper.nms(
keep = nms_util.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
boxes_this_image.append(cls_detections)
......@@ -133,11 +125,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put((
indices[i],
{
'im_detect': _t['im_detect'].average_time,
'misc': _t['misc'].average_time,
},
{
'boxes': boxes_this_image,
},
dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]),
))
......@@ -13,11 +13,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.faster_rcnn.anchor_target import AnchorTarget
from lib.faster_rcnn.data_loader import DataLoader
from lib.faster_rcnn.proposal import Proposal
from lib.faster_rcnn.proposal_target import ProposalTarget
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.faster_rcnn.utils import map_returns_to_blobs
from seetadet.algo.ssd.data_loader import DataLoader
from seetadet.algo.ssd.hard_mining import HardMining
from seetadet.algo.ssd.multibox import MultiBoxMatch
from seetadet.algo.ssd.multibox import MultiBoxTarget
from seetadet.algo.ssd.priorbox import PriorBox
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from seetadet.algo.ssd import data_transformer
from seetadet.core.config import cfg
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
dataset = get_dataset(cfg.TRAIN.DATASET)
if cfg.USE_DALI:
from seetadet.dali import ssd_pipeline as pipe
self.iterator = pipe.new_iterator(dataset.source)
else:
self.iterator = Iterator(**{
'dataset': dataset.cls,
'source': dataset.source,
'classes': dataset.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_THREADS - 1,
})
def __call__(self):
outputs = self.iterator.next()
if isinstance(outputs['data'], np.ndarray):
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class Iterator(object):
"""Iterator to return the batch of data."""
def __init__(self, **kwargs):
super(Iterator, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and \
kwargs.get('phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.q_in = mp.Queue(num_batches * self._batch_size)
self.q_out = mp.Queue(num_batches * self._batch_size)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
part_idx=part_idx, num_parts=num_parts, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.q_in
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
p = data_transformer.DataTransformer(**kwargs)
p._seed += (i + rank * self._num_transformers)
p.q_in, p.q_out = self.q_in, self.q_out
p.start()
self._transformers.append(p)
time.sleep(0.1)
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for p in processes:
p.terminate()
p.join()
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def next(self):
"""Return the next batch of data."""
return self.__next__()
def __iter__(self):
"""Return the iterator self."""
return self
def __next__(self):
"""Return the next batch of data."""
n = cfg.TRAIN.IMS_PER_BATCH
h = w = cfg.TRAIN.SCALES[0]
boxes_to_pack = []
image, boxes = self.q_out.get()
images = np.zeros((n, h, w, 3), image.dtype)
for i in range(n):
images[i] = image
gt_boxes = np.zeros((boxes.shape[0], boxes.shape[1] + 1), 'float32')
gt_boxes[:, :boxes.shape[1]], gt_boxes[:, -1] = boxes, i
boxes_to_pack.append(gt_boxes)
if i != (cfg.TRAIN.IMS_PER_BATCH - 1):
image, boxes = self.q_out.get()
boxes_to_pack = np.concatenate(boxes_to_pack)
return {'data': images, 'gt_boxes': boxes_to_pack}
......@@ -14,19 +14,18 @@ from __future__ import division
from __future__ import print_function
import multiprocessing
import cv2
import numpy as np
from lib.core.config import cfg
from lib.datasets.example import Example
from lib.ssd import transforms
from lib.utils import boxes as box_util
from seetadet.algo.ssd import transforms
from seetadet.core.config import cfg
from seetadet.datasets.example import Example
from seetadet.utils import boxes as box_util
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._scale = cfg.TRAIN.SCALES[0]
self._seed = cfg.RNG_SEED
self._mirror = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF
......@@ -107,14 +106,15 @@ class DataTransformer(multiprocessing.Process):
gt_boxes = np.empty((roi_dict['gt_classes'].size, 5), 'float32')
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
if len(gt_boxes) == 0:
# Ignore the non-object image
return img, gt_boxes
# Distort => Expand => Sample => Resize
img, gt_boxes = self.augment_image(img, gt_boxes)
# Restore to the blob scale
gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT
gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT
gt_boxes[:, :4] *= self._scale
# Post-Process for image
if img.dtype == 'uint16':
......
......@@ -15,29 +15,25 @@ from __future__ import print_function
import numpy as np
from lib.core.config import cfg
from lib.utils.framework import new_tensor
from seetadet.core.config import cfg
from seetadet.utils.env import new_tensor
class HardMining(object):
def __call__(self, prob_wide, labels_wide, overlaps_wide):
prob_wide = prob_wide.numpy(True)
def __call__(self, prob, labels, overlaps):
label_shape, label_size = labels.shape, labels.size
prob = prob.numpy().reshape((label_size, -1))
labels, overlaps = labels.flatten(), overlaps.flatten()
neg_ovr = cfg.SSD.OHEM.NEG_OVERLAP
neg_ratio = cfg.SSD.OHEM.NEG_POS_RATIO
# label ``-1`` will be ignored
new_labels_wide = -np.ones(labels_wide.shape, 'int64')
for ix in range(labels_wide.shape[0]):
labels = labels_wide[ix]
overlaps = overlaps_wide[ix]
prob = prob_wide[ix]
loss = np.zeros(labels.shape, 'float32')
inds = np.where(labels >= 0)[0]
loss[inds] = -np.log(
new_labels = -np.ones(labels.shape, 'int64')
cls_loss = -np.log(
np.maximum(
prob[inds, labels[inds]],
prob[np.arange(label_size), labels],
np.finfo(float).eps,
)
)
......@@ -50,12 +46,12 @@ class HardMining(object):
neg_inds = neg_inds[eligible_neg_inds]
# Apply mining on negatives
neg_loss = loss[neg_inds]
neg_cls_loss = cls_loss[neg_inds]
num_pos, num_neg = len(fg_inds), len(neg_inds)
num_bg = min(int(num_pos * neg_ratio), num_neg)
bg_inds = neg_inds[np.argsort(-neg_loss)][:num_bg]
new_labels_wide[ix][fg_inds] = labels[fg_inds] # Keep fg indices
new_labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
bg_inds = neg_inds[np.argsort(-neg_cls_loss)][:num_bg]
new_labels[fg_inds] = labels[fg_inds] # Keep fg indices
new_labels[bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss
return {'labels': new_tensor(new_labels_wide)}
return {'labels': new_tensor(new_labels.reshape(label_shape))}
......@@ -15,9 +15,9 @@ from __future__ import print_function
import numpy as np
from lib.core.config import cfg
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
from seetadet.core.config import cfg
from seetadet.utils import boxes as box_util
from seetadet.utils.env import new_tensor
class MultiBoxMatch(object):
......@@ -47,8 +47,8 @@ class MultiBoxMatch(object):
# Bipartite matching and assignments
bipartite_inds = overlaps.argmax(0)
class_assignment = gt_boxes[:, -1]
match_inds_wide[ix][bipartite_inds] = np.arange(num_gt, dtype='int32')
match_labels_wide[ix][bipartite_inds] = class_assignment
match_inds_wide[ix, bipartite_inds] = np.arange(num_gt, dtype='int32')
match_labels_wide[ix, bipartite_inds] = class_assignment
# Per prediction matching and assignments
# Note that SSD match each prior box for only once
......@@ -56,8 +56,8 @@ class MultiBoxMatch(object):
per_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
gt_assignment = argmax_overlaps[per_inds]
class_assignment = gt_boxes[gt_assignment, -1]
match_inds_wide[ix][per_inds] = gt_assignment
match_labels_wide[ix][per_inds] = class_assignment
match_inds_wide[ix, per_inds] = gt_assignment
match_labels_wide[ix, per_inds] = class_assignment
return {
'match_inds': match_inds_wide,
......@@ -82,15 +82,7 @@ class MultiBoxTarget(object):
num_priors, box_dim = prior_boxes.shape[:]
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
bbox_targets_wide = np.zeros((num_images, num_priors, box_dim), 'float32')
bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, 'float32')
bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, 'float32')
# Number of matched boxes(#positive)
n_pos = float(max(len(np.where(match_labels_wide > 0)[0]), 1))
# Multiple by the num images to compensate the smooth l1 loss
bbox_reg_weight = cfg.SSD.BBOX_REG_WEIGHT * num_images / n_pos
bbox_indices_wide, bbox_anchors_wide, bbox_targets_wide = [], [], []
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
......@@ -106,17 +98,18 @@ class MultiBoxTarget(object):
gt_rois = gt_boxes[gt_assignment]
# Assign bbox targets
bbox_targets_wide[ix][ex_inds] = \
bbox_anchors_wide.append(ex_rois)
bbox_indices_wide.append(ex_inds + (num_priors * ix))
bbox_targets_wide.append(
box_util.bbox_transform(
ex_rois,
gt_rois,
cfg.BBOX_REG_WEIGHTS,
)
bbox_inside_weights_wide[ix, :] = 1.
bbox_outside_weights_wide[ix][ex_inds] = bbox_reg_weight
)
return {
'bbox_targets': new_tensor(bbox_targets_wide),
'bbox_inside_weights': new_tensor(bbox_inside_weights_wide),
'bbox_outside_weights': new_tensor(bbox_outside_weights_wide),
'bbox_indices': new_tensor(np.concatenate(bbox_indices_wide)),
'bbox_anchors': new_tensor(np.concatenate(bbox_anchors_wide).astype('float32')),
'bbox_targets': new_tensor(np.concatenate(bbox_targets_wide).astype('float32')),
}
......@@ -15,9 +15,8 @@ from __future__ import print_function
import numpy as np
from lib.core.config import cfg
from lib.ssd.generate_anchors import generate_anchors
from lib.utils import logger
from seetadet.algo.ssd.generate_anchors import generate_anchors
from seetadet.core.config import cfg
class PriorBox(object):
......@@ -29,8 +28,10 @@ class PriorBox(object):
max_sizes = cfg.SSD.MULTIBOX.MAX_SIZES
if len(max_sizes) > 0:
if len(min_sizes) != len(max_sizes):
logger.fatal('Got {} min sizes and {} max sizes.'.format(
len(min_sizes), len(max_sizes)))
raise ValueError(
'Got {} min sizes and {} max sizes.'
.format(len(min_sizes), len(max_sizes))
)
self.strides = cfg.SSD.MULTIBOX.STRIDES
aspect_ratios = cfg.SSD.MULTIBOX.ASPECT_RATIOS
self.base_anchors = []
......@@ -44,9 +45,14 @@ class PriorBox(object):
aspect_ratios[i],
)
)
self.grid_anchors = None
def __call__(self, features):
all_anchors = []
if self.grid_anchors is not None:
return self.grid_anchors
self.grid_anchors = []
for i in range(len(self.strides)):
# 1. Generate base grids
height, width = features[i].shape[-2:]
......@@ -61,26 +67,17 @@ class PriorBox(object):
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
D = self.base_anchors[i].shape[1]
if D == 4:
shifts = np.vstack((
shift_x.ravel(),
shift_y.ravel(),
shift_x.ravel(),
shift_y.ravel())
).transpose()
elif D == 5:
shifts = np.vstack((
shift_x.ravel(),
shift_y.ravel(),
shift_x.ravel() * 0,
shift_y.ravel() * 0,
shift_y.ravel() * 0)
).transpose()
else:
raise ValueError('Excepted anchor4d or anchor5d.')
K = shifts.shape[0] # K = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, D)) +
shifts.reshape((1, K, D)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, D)).astype(np.float32)
all_anchors.append(anchors)
return np.concatenate(all_anchors, axis=0)
self.grid_anchors.append(anchors)
self.grid_anchors = np.concatenate(self.grid_anchors)
return self.grid_anchors
......@@ -13,26 +13,30 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import types
import cv2
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
from lib.utils import framework
from lib.utils import time_util
from seetadet.core.config import cfg
from seetadet.modeling.detector import new_detector
from seetadet.utils import boxes as box_util
from seetadet.utils import nms as nms_util
from seetadet.utils import time_util
def get_images(ims):
target_h = cfg.SSD.RESIZE.HEIGHT
target_w = cfg.SSD.RESIZE.WIDTH
out_size = cfg.TEST.SCALES[0]
processed_ims, im_scales = [], []
for im in ims:
im_scales.append((float(target_h) / im.shape[0],
float(target_w) / im.shape[1]))
processed_ims.append(cv2.resize(im, (target_w, target_h)))
im_scales.append((float(out_size) / im.shape[0],
float(out_size) / im.shape[1]))
processed_ims.append(
cv2.resize(
im, (out_size, out_size),
interpolation=cv2.INTER_AREA,
))
if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256.
else:
......@@ -45,34 +49,33 @@ def ims_detect(detector, ims):
data, im_scales = get_images(ims)
# Do Forward
if not hasattr(detector, 'graph'):
with framework.new_workspace().as_default():
with torch.no_grad():
with torch.jit.Tracer(retain_ops=True):
inputs = {'data': torch.from_numpy(data)}
outputs = detector.forward(inputs)
detector.graph = \
framework.Graph(inputs, {
'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']
}, {'prior_boxes': outputs['prior_boxes']})
outputs = detector.graph(data=data)
data = torch.from_numpy(data)
if not hasattr(detector, 'script_forward'):
def script_forward(self, data):
return self.forward({'data': data})
detector.script_forward = torch.jit.trace(
func=types.MethodType(script_forward, detector),
example_inputs=[data],
)
outputs = detector.script_forward(data)
cls_prob = outputs['cls_prob'].numpy()
bbox_pred = outputs['bbox_pred'].numpy()
# Decode results
batch_boxes = []
for i in range(len(im_scales)):
boxes = box_util.bbox_transform_inv(
outputs['prior_boxes'],
outputs['bbox_pred'][i],
bbox_pred[i],
cfg.BBOX_REG_WEIGHTS,
)
boxes[:, 0] /= im_scales[i][1]
boxes[:, 1] /= im_scales[i][0]
boxes[:, 2] /= im_scales[i][1]
boxes[:, 3] /= im_scales[i][0]
boxes[:, 0::2] /= im_scales[i][1]
boxes[:, 1::2] /= im_scales[i][0]
batch_boxes.append(box_util.clip_boxes(boxes, ims[i].shape))
return outputs['cls_prob'], batch_boxes
return cls_prob, batch_boxes
def test_net(weights, num_classes, q_in, q_out, device):
......@@ -88,7 +91,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
indices, raw_images = [], []
for i in range(cfg.TEST.IMS_PER_BATCH):
idx, raw_image = q_in.get()
if raw_image is None:
if idx < 0:
must_stop = True
break
indices.append(idx)
......@@ -116,17 +119,16 @@ def test_net(weights, num_classes, q_in, q_out, device):
(cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms(
keep = nms_util.soft_nms(
cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms_wrapper.nms(
keep = nms_util.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
boxes_this_image.append(cls_detections)
......@@ -134,11 +136,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put((
indices[i],
{
'im_detect': _t['im_detect'].average_time,
'misc': _t['misc'].average_time,
},
{
'boxes': boxes_this_image,
},
dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]),
))
......@@ -22,9 +22,10 @@ import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils import boxes as box_util
from lib.utils import logger
from seetadet.core.config import cfg
from seetadet.utils import boxes as box_util
from seetadet.utils import boxes_v2 as box_util_v2
from seetadet.utils import logger
class Compose(object):
......@@ -40,43 +41,35 @@ class Compose(object):
class Distort(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
self._prob = 0.5
self._transforms = [
(PIL.ImageEnhance.Brightness, self._prob),
(PIL.ImageEnhance.Contrast, self._prob),
(PIL.ImageEnhance.Color, self._prob),
]
def apply(self, img, boxes=None):
if self._prob > 0:
img = PIL.Image.fromarray(img)
transforms = [
(PIL.ImageEnhance.Brightness, self._brightness_prob),
(PIL.ImageEnhance.Contrast, self._contrast_prob),
(PIL.ImageEnhance.Color, self._saturation_prob),
]
np.random.shuffle(transforms)
for transform_fn, prob in transforms:
for transform_fn, prob in self._transforms:
if npr.uniform() < prob:
img = transform_fn(img)
img = img.enhance(1. + npr.uniform(-.4, .4))
return np.array(img), boxes
return img, boxes
class Expand(object):
def __init__(self):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_ratio < 1.0:
logger.fatal(
'The max expand ratio must >= 1, got {}'
.format(self._max_ratio)
)
self._max_ratio = 1. / cfg.TRAIN.RANDOM_SCALES[0]
self._expand_prob = 0.5 if self._max_ratio > 1 else 0
def apply(self, img, boxes=None):
prob = npr.uniform()
if prob > self._expand_prob:
return img, boxes
ratio = npr.uniform(1., self._max_ratio)
if ratio == 1:
return img, boxes
ratio = npr.uniform(1., self._max_ratio)
im_h, im_w = img.shape[:2]
expand_h, expand_w = int(im_h * ratio), int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0., expand_h - im_h)))
......@@ -99,19 +92,14 @@ class Expand(object):
class Resize(object):
def __init__(self):
self._target_size = (
cfg.SSD.RESIZE.WIDTH,
cfg.SSD.RESIZE.HEIGHT,
)
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
self._target_size = (cfg.TRAIN.SCALES[0],) * 2
self._interp_mode = [
cv2.INTER_LINEAR,
cv2.INTER_AREA,
cv2.INTER_NEAREST,
cv2.INTER_CUBIC,
cv2.INTER_LANCZOS4,
]
def apply(self, img, boxes):
rand = npr.randint(len(self._interp_mode))
......@@ -144,7 +132,10 @@ class Sample(object):
@classmethod
def _compute_overlaps(cls, rand_box, gt_boxes):
return box_util.iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
return box_util_v2.iou(
np.expand_dims(rand_box, 0),
gt_boxes[:, 0:4],
)
@classmethod
def _generate_sample(cls, sample_param):
......@@ -162,18 +153,27 @@ class Sample(object):
h_off = npr.uniform(0., 1. - bbox_h)
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
def _check_center(self, sample_box, gt_boxes):
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= sample_box[0]) & (ctr_x <= sample_box[2]) &
(ctr_y >= sample_box[1]) & (ctr_y <= sample_box[3]))[0]
return len(keep_inds) > 0
def _check_overlap(self, sample_box, gt_boxes, constraint):
min_overlap = constraint.get('min_overlap', None)
max_overlap = constraint.get('max_overlap', None)
if min_overlap is None and \
max_overlap is None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
ovr = self._compute_overlaps(sample_box, gt_boxes).max()
if min_overlap is not None:
if max_overlap < min_overlap:
if ovr < min_overlap:
return False
if max_overlap is not None:
if max_overlap > max_overlap:
if ovr > max_overlap:
return False
return True
......@@ -187,8 +187,9 @@ class Sample(object):
sample_box = self._generate_sample(sampler)
if sampler['min_overlap'] != 0. or \
sampler['max_overlap'] != 1.:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok:
if not self._check_overlap(sample_box, gt_boxes, sampler):
continue
if not self._check_center(sample_box, gt_boxes):
continue
found += 1
sample_boxes.append(sample_box)
......@@ -206,8 +207,6 @@ class Sample(object):
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2]) &
(ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
......
......@@ -19,11 +19,14 @@ sys.path.append('../../')
import cv2
import numpy as np
from lib.ssd import transforms
from seetadet.algo.ssd import transforms
from seetadet.core.config import cfg
if __name__ == '__main__':
np.random.seed(3)
cfg.TRAIN.SCALES = [300]
cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00]
augmentor = transforms.Compose(
transforms.Distort(),
......@@ -36,8 +39,6 @@ if __name__ == '__main__':
img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes)
if len(boxes) < 1:
continue
for box in boxes:
x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0])
......
......@@ -20,7 +20,7 @@ from __future__ import print_function
import os.path as osp
import numpy as np
from lib.utils.attrdict import AttrDict
from seetadet.utils.attrdict import AttrDict
cfg = __C = AttrDict()
......@@ -38,41 +38,27 @@ __C.TRAIN = AttrDict()
# Initialize network with weights from this file
__C.TRAIN.WEIGHTS = ''
# Database to train
__C.TRAIN.DATABASE = ''
# Dataset to train
__C.TRAIN.DATASET = ''
# The number of workers to transform data
__C.TRAIN.NUM_WORKERS = 3
# The number of threads to load train data
__C.TRAIN.NUM_THREADS = 4
# Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,)
__C.TRAIN.SCALES = (300,)
# Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1
__C.TRAIN.MAX_SIZE = 1000
__C.TRAIN.MAX_SIZE = 0
# Images to use per mini-batch
__C.TRAIN.IMS_PER_BATCH = 1
# Minibatch size (number of regions of interest [ROIs])
__C.TRAIN.BATCH_SIZE = 128
# Fraction of minibatch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5
# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.0
# Use shuffle after each epoch
# Use shuffled images during training?
__C.TRAIN.USE_SHUFFLE = True
# The number of chunks to shuffle
__C.TRAIN.NUM_SHUFFLE_CHUNKS = 0
# The number of shuffle chunks
__C.TRAIN.SHUFFLE_CHUNKS = 0
# Use horizontally-flipped images during training?
__C.TRAIN.USE_FLIPPED = True
......@@ -80,17 +66,25 @@ __C.TRAIN.USE_FLIPPED = True
# Use the difficult(under occlusion) objects
__C.TRAIN.USE_DIFF = True
# Overlap required between a ROI and ground-truth box in order for that ROI to
# be used as a bounding-box regression training example
__C.TRAIN.BBOX_THRESH = 0.5
# If True, randomly scale the image by scale range
__C.TRAIN.USE_SCALE_JITTER = False
__C.TRAIN.SCALE_JITTER_RANGE = [0.75, 1.0]
# Range to jitter the image scales
__C.TRAIN.RANDOM_SCALES = [1., 1.]
# If True, randomly distort the image by brightness, contrast, and saturation
__C.TRAIN.USE_COLOR_JITTER = False
# Mini-batch size (#RoIs) for two stage detector
__C.TRAIN.BATCH_SIZE = 128
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5
# Fraction of mini-batch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.0
# IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
......@@ -123,20 +117,19 @@ __C.TRAIN.RPN_STRADDLE_THRESH = 0
__C.TEST = AttrDict()
# Database to test
__C.TEST.DATABASE = ''
# Dataset to test
__C.TEST.DATASET = ''
# Original json ground-truth file to use
# Records in the Database file will be used instead
__C.TEST.JSON_FILE = ''
# Scales to use during testing (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TEST.SCALES = (600,)
__C.TEST.SCALES = (300,)
# Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1
__C.TEST.MAX_SIZE = 1000
__C.TEST.MAX_SIZE = 0
# Images to use per mini-batch
__C.TEST.IMS_PER_BATCH = 1
......@@ -217,10 +210,20 @@ __C.MODEL.CLASSES = ['__background__']
# The value of ``K`` is usually set to 2
__C.MODEL.FREEZE_AT = 2
# The variant of ReLU activation
# ('ReLU', 'ReLU6')
__C.MODEL.RELU_VARIANT = 'ReLU'
# Setting of focal loss
__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
# The optional loss for bbox regression
# ('NORM', 'IOU')
__C.MODEL.REG_LOSS_TYPE = 'NORM'
# Weight for bbox regression loss
__C.MODEL.REG_LOSS_WEIGHT = 1.
# Stride of the coarsest Feature level
# This is needed so the input can be padded properly
__C.MODEL.COARSEST_STRIDE = 32
......@@ -268,9 +271,6 @@ __C.RETINANET.ANCHOR_SCALE = 4
# NOTE: this doesn't include the last conv for logits
__C.RETINANET.NUM_CONVS = 4
# Weight for bbox regression loss
__C.RETINANET.BBOX_REG_WEIGHT = 1.
# During inference, #locs to select based on cls score before NMS is performed
__C.RETINANET.PRE_NMS_TOP_N = 5000
......@@ -362,9 +362,6 @@ __C.SSD = AttrDict()
# NOTE: this doesn't include the last conv for logits
__C.SSD.NUM_CONVS = 0
# Weight for bbox regression loss
__C.SSD.BBOX_REG_WEIGHT = 1.
# MultiBox configs
__C.SSD.MULTIBOX = AttrDict()
__C.SSD.MULTIBOX.STRIDES = []
......@@ -379,23 +376,6 @@ __C.SSD.OHEM.NEG_OVERLAP = 0.5
# The ratio used in hard example mining
__C.SSD.OHEM.NEG_POS_RATIO = 3.0
# Distort the image?
__C.SSD.DISTORT = AttrDict()
__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
__C.SSD.DISTORT.CONTRAST_PROB = 0.5
__C.SSD.DISTORT.SATURATION_PROB = 0.5
# Expand the image?
__C.SSD.EXPAND = AttrDict()
__C.SSD.EXPAND.PROB = 0.5
__C.SSD.EXPAND.MAX_RATIO = 4.0
# Resize the image?
__C.SSD.RESIZE = AttrDict()
__C.SSD.RESIZE.HEIGHT = 300
__C.SSD.RESIZE.WIDTH = 300
__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
# Samplers
# Format as (min_scale, max_scale,
# min_aspect_ratio, max_aspect_ratio,
......@@ -486,7 +466,7 @@ __C.SOLVER.LR_POLICY = 'steps_with_decay'
# Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9
# L2 regularization hyper parameters
# L2 regularization for weight parameters
__C.SOLVER.WEIGHT_DECAY = 0.0001
# L2 norm factor for clipping gradients
__C.SOLVER.CLIP_NORM = -1.0
......@@ -505,6 +485,9 @@ __C.NUM_GPUS = 1
# Use NCCL for all reduce, otherwise use cuda-aware mpi
__C.USE_NCCL = True
# Use DALI to load the batch of data instead of original pipeline
__C.USE_DALI = False
# Hosts for Inter-Machine communication
__C.HOSTS = []
......@@ -531,9 +514,6 @@ __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
# Place outputs under an experiments directory
__C.EXP_DIR = ''
# Use GPU implementation of non-maximum suppression
__C.USE_GPU_NMS = True
# Default GPU device id
__C.GPU_ID = 0
......
......@@ -18,8 +18,8 @@ import shutil
import time
import numpy as np
from lib.core.config import cfg
from lib.core.config import cfg_from_file
from seetadet.core.config import cfg
from seetadet.core.config import cfg_from_file
class Coordinator(object):
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import functools
class Registry(object):
"""The base registry class."""
def __init__(self, name):
self._name = name
self._registry = collections.OrderedDict()
def has(self, key):
return key in self._registry
def register(self, name, func=None, **kwargs):
def decorated(inner_function):
for key in (name if isinstance(
name, (tuple, list)) else [name]):
if self.has(key):
raise KeyError(
'`%s` has been registered in %s.'
% (key, self._name)
)
self._registry[key] = functools.partial(
inner_function, **kwargs)
if func is not None:
return decorated(func)
return decorated
def get(self, name):
if not self.has(name):
raise KeyError(
"`%s` is not registered in <%s>."
% (name, self._name)
)
return self._registry[name]
def try_get(self, name):
if self.has(name):
return self.get(name)
return None
backbones = Registry('backbones')
models = Registry('models')
......@@ -20,9 +20,9 @@ import os
import cv2
import dragon
from lib.core.config import cfg
from lib.datasets.example import Example
from lib.datasets.factory import get_imdb
from seetadet.core.config import cfg
from seetadet.datasets.example import Example
from seetadet.datasets.factory import get_dataset
class _Server(object):
......@@ -50,13 +50,13 @@ class _Server(object):
class TestServer(_Server):
def __init__(self, output_dir):
super(TestServer, self).__init__(output_dir)
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.classes = self.imdb.classes
self.num_images = self.imdb.num_images
self.num_classes = self.imdb.num_classes
self.dataset = get_dataset(cfg.TEST.DATASET)
self.dataset.competition_mode(cfg.TEST.COMPETITION_MODE)
self.classes = self.dataset.classes
self.num_images = self.dataset.num_images
self.num_classes = self.dataset.num_classes
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
dataset=self.dataset.cls, source=self.dataset.source)
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
......@@ -81,16 +81,16 @@ class TestServer(_Server):
def evaluate_detections(self, all_boxes):
if cfg.TEST.PROTOCOL == 'dump':
self.imdb.dump_detections(all_boxes, self.output_dir)
self.dataset.dump_detections(all_boxes, self.output_dir)
else:
self.imdb.evaluate_detections(
self.dataset.evaluate_detections(
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
self.dataset.evaluate_segmentations(
all_boxes,
all_masks,
self.get_records(),
......@@ -101,7 +101,7 @@ class TestServer(_Server):
class InferServer(_Server):
def __init__(self, output_dir):
super(InferServer, self).__init__(output_dir)
self.images_dir = cfg.TEST.DATABASE
self.images_dir = cfg.TEST.DATASET
self.images = os.listdir(self.images_dir)
self.classes = cfg.MODEL.CLASSES
self.num_images = len(self.images)
......
......@@ -18,9 +18,9 @@ import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.utils import time_util
from lib.utils.vis import vis_one_image
from seetadet.core.config import cfg
from seetadet.utils import time_util
from seetadet.utils.vis import vis_one_image
def run_test_net(checkpoint, server, devices):
......@@ -30,8 +30,8 @@ def run_test_net(checkpoint, server, devices):
devices = devices if devices else [cfg.GPU_ID]
num_workers = len(devices)
test_fn = importlib.import_module(
'lib.%s.test' % cfg.MODEL.TYPE).test_net
test_module = 'seetadet.algo.%s.test' % cfg.MODEL.TYPE
test_fn = importlib.import_module(test_module).test_net
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
......
......@@ -22,11 +22,11 @@ import os
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.solver.sgd import SGDSolver
from lib.utils import logger
from lib.utils import time_util
from lib.utils.stats import SmoothedValue
from seetadet.core.config import cfg
from seetadet.solver.sgd import SGDSolver
from seetadet.utils import logger
from seetadet.utils import time_util
from seetadet.utils.stats import SmoothedValue
class SolverWrapper(object):
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.dali as dali
import numpy as np
from seetadet.core.config import cfg
class DataReader(dali.ops.KPLRecordReader):
def __init__(
self,
path,
features,
pipeline,
shard_id=0,
num_shards=1,
shuffle_after_epoch=False,
shuffle_chunks=0,
aspect_grouping=False,
):
super(DataReader, self).__init__(
path=path,
features=features,
pipeline=pipeline,
shard_id=shard_id,
num_shards=num_shards,
shuffle_after_epoch=shuffle_after_epoch,
shuffle_chunks=shuffle_chunks,
)
self._aspect_grouping = aspect_grouping
self._class_to_ind = dict(zip(
cfg.MODEL.CLASSES,
range(len(cfg.MODEL.CLASSES))
))
self._queue1, self._queue2 = [], []
def feed_inputs(self):
if not self._aspect_grouping:
feed_dict = collections.defaultdict(list)
for i in range(self._pipe.batch_size):
while True:
example = self._buffer.get()
if len(example['object']) > 0:
break
data = self.example_to_data(example)
for k, v in data.items():
feed_dict[k].append(v)
for k, v in self.features.items():
self._pipe.feed_input(self.features[k], feed_dict[k])
else:
batch_size = self._pipe.batch_size
while True:
batch_data = None
if len(self._queue1) >= batch_size:
batch_data = self._queue1[:batch_size]
self._queue1 = self._queue1[batch_size:]
elif len(self._queue2) >= batch_size:
batch_data = self._queue2[:batch_size]
self._queue2 = self._queue2[batch_size:]
if batch_data is not None:
feed_dict = collections.defaultdict(list)
for data in batch_data:
for k, v in data.items():
feed_dict[k].append(v)
for k, v in self.features.items():
self._pipe.feed_input(self.features[k], feed_dict[k])
break
while True:
example = self._buffer.get()
if len(example['object']) > 0:
break
data = self.example_to_data(example)
ratio = float(data['shape'][0]) / data['shape'][1]
if ratio > 1:
self._queue1.append(data)
else:
self._queue2.append(data)
def example_to_data(self, example):
bbox_data, bbox_ratio, bbox_label = [], [], []
h, w, c = example['height'], example['width'], example['depth']
for obj in example['object']:
x1 = float(max(obj['xmin'], 0))
y1 = float(max(obj['ymin'], 0))
x2 = float(min(obj['xmax'], w - 1))
y2 = float(min(obj['ymax'], h - 1))
bbox_data.append([x1, y1, x2, y2])
bbox_ratio.append([x1 / w, y1 / h, x2 / w, y2 / h])
bbox_label.append(self._class_to_ind[obj['name']])
return {
'image': example['content'],
'shape': np.array([h, w, c], 'int64'),
'bbox/data': np.array(bbox_data, 'float32'),
'bbox/ratio': np.array(bbox_ratio, 'float32'),
'bbox/label': np.array(bbox_label, 'int32')
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from dragon.vm import dali
from dragon.vm.dali.plugin.pytorch import DALIGenericIterator
from seetadet.core.config import cfg
from seetadet.dali.data_reader import DataReader
class Pipeline(dali.Pipeline):
def __init__(self, source):
super(Pipeline, self).__init__(
batch_size=cfg.TRAIN.IMS_PER_BATCH,
num_threads=cfg.TRAIN.NUM_THREADS,
)
random_scales = cfg.TRAIN.RANDOM_SCALES
if random_scales[1] > 1:
raise ValueError('The max scale range should be <= 1.')
mean_values = np.array(cfg.PIXEL_MEANS, 'int64').tolist()
self.max_size = cfg.TRAIN.MAX_SIZE
self.reader = DataReader(
path=source,
features=['image', 'shape', 'bbox/data', 'bbox/label'],
pipeline=self,
shard_id=dali.get_distributed_info()[0],
num_shards=dali.get_distributed_info()[1],
shuffle_after_epoch=cfg.TRAIN.USE_SHUFFLE,
shuffle_chunks=cfg.TRAIN.SHUFFLE_CHUNKS,
aspect_grouping=True,
)
self.decode = dali.ops.ImageDecoder()
self.resize = dali.ops.Resize(max_size=self.max_size)
self.brightness_contrast = dali.ops.BrightnessContrast()
self.hsv = dali.ops.Hsv()
self.cmn = dali.ops.CropMirrorNormalize(
mean=np.array(mean_values, 'int64').tolist(),
std=[1., 1., 1.],
)
self.pad = dali.ops.Pad(
axes=[1, 2],
align=cfg.MODEL.COARSEST_STRIDE
if cfg.MODEL.COARSEST_STRIDE > 0 else None,
)
with dali.device('cpu'):
self.resize_rng = dali.ops.Uniform([
cfg.TRAIN.SCALES[0] * random_scales[0],
cfg.TRAIN.SCALES[0] * random_scales[1],
])
self.twist_rng = dali.ops.Uniform([0.6, 1.4])
self.flip_rng = dali.ops.CoinFlip(0.5 if cfg.TRAIN.USE_FLIPPED else 0.)
def iter_setup(self):
self.reader.feed_inputs()
def define_graph(self):
# Read inputs from file
inputs = self.reader()
shape = inputs['shape']
bbox = inputs['bbox/data']
label = inputs['bbox/label']
# Decode image
image = self.decode(inputs['image'])
# Augment the color space
if cfg.TRAIN.USE_COLOR_JITTER:
image = self.hsv(
self.brightness_contrast(
image,
brightness=self.twist_rng(),
contrast=self.twist_rng(),
),
saturation=self.twist_rng()
)
# Resize to the target size
target_size = self.resize_rng()
image = self.resize(image, resize_shorter=target_size)
# Normalize and pad to blob shape
apply_flip = self.flip_rng()
image = self.cmn(image, mirror=apply_flip)
image = self.pad(image)
return image, bbox, label, target_size, shape, apply_flip
class Iterator(DALIGenericIterator):
def __init__(self, pipeline):
super(Iterator, self).__init__(pipeline)
@property
def handlers(self):
return ([0], self.copy_handler,), \
([1, 2, 3, 4, 5], self.gt_handler)
def next(self):
(images,), (gt_boxes, ims_info) = self.__next__()
return {'data': images, 'gt_boxes': gt_boxes, 'ims_info': ims_info}
def gt_handler(self, tensors):
def impl(box_list, labels, im_shape, target_size, max_size, flip):
num_images = len(box_list)
im_size_min = np.min(im_shape[:, :2], axis=1).astype('float32')
im_size_max = np.max(im_shape[:, :2], axis=1).astype('float32')
im_scales = target_size / im_size_min
inds = np.where(np.round(im_scales * im_size_max) > max_size)[0]
im_scales[inds] = max_size / im_size_max[inds]
box_list = [box_list[i] * im_scales[i] for i in range(num_images)]
for i in (np.where(flip > 0)[0]):
boxes = box_list[i]
boxes_flipped = box_list[i].copy()
width = im_shape[i, 1] * im_scales[i]
boxes_flipped[:, 0] = width - boxes[:, 2] - 1
boxes_flipped[:, 2] = width - boxes[:, 0] - 1
box_list[i] = boxes_flipped
im_scales = np.expand_dims(im_scales, 1)
batch_inds = [np.ones([e.size, 1]) * i for i, e in enumerate(labels)]
boxes = np.concatenate(box_list)
labels = np.expand_dims(np.concatenate(labels), axis=1)
batch_inds = np.concatenate(batch_inds)
gt_boxes = np.hstack([boxes, labels, batch_inds])
ims_info = np.hstack([im_shape[:, :2] * im_scales, im_scales])
return gt_boxes.astype('float32'), ims_info.astype('float32')
bbox, label, target_size, shape, flip = tensors
shape = shape.as_array()
return impl(
box_list=[bbox.at(i) for i in range(len(shape))],
labels=[label.at(i) for i in range(len(shape))],
im_shape=shape,
target_size=target_size.as_array().squeeze(),
max_size=self._pipe.max_size,
flip=flip.as_array()
)
def new_iterator(source):
with dali.device('cuda', cfg.GPU_ID):
return Iterator(Pipeline(source))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from dragon.vm import dali
from dragon.vm.dali.plugin.pytorch import DALIGenericIterator
from seetadet.core.config import cfg
from seetadet.dali.data_reader import DataReader
class Pipeline(dali.Pipeline):
def __init__(self, source):
super(Pipeline, self).__init__(
batch_size=cfg.TRAIN.IMS_PER_BATCH,
num_threads=cfg.TRAIN.NUM_THREADS,
)
paste_ratio = 1. / cfg.TRAIN.RANDOM_SCALES[0]
mean_values = np.array(cfg.PIXEL_MEANS, 'int64').tolist()
self.target_size = cfg.TRAIN.SCALES[0]
self.reader = DataReader(
path=source,
features=['image', 'bbox/ratio', 'bbox/label'],
pipeline=self,
shard_id=dali.get_distributed_info()[0],
num_shards=dali.get_distributed_info()[1],
shuffle_after_epoch=cfg.TRAIN.USE_SHUFFLE,
shuffle_chunks=cfg.TRAIN.SHUFFLE_CHUNKS,
)
self.decode = dali.ops.ImageDecoder()
self.brightness_contrast = dali.ops.BrightnessContrast()
self.hsv = dali.ops.Hsv()
self.paste = dali.ops.Paste(fill_value=mean_values)
self.slice = dali.ops.Slice()
self.resize = dali.ops.Resize(self.target_size, self.target_size)
self.cmn = dali.ops.CropMirrorNormalize(mean=mean_values, std=[1., 1., 1.])
with dali.device('cpu'):
self.bbox_paste = dali.ops.BBoxPaste()
self.bbox_crop = dali.ops.RandomBBoxCrop()
self.bbox_flip = dali.ops.BbFlip()
self.twist_rng = dali.ops.Uniform([0.6, 1.4])
self.paste_pos = dali.ops.Uniform((0., 1.))
self.paste_ratio = dali.ops.Uniform((0., paste_ratio - 1))
self.flip_rng = dali.ops.CoinFlip(0.5 if cfg.TRAIN.USE_FLIPPED else 0.)
def iter_setup(self):
self.reader.feed_inputs()
def define_graph(self):
# Read inputs from file
inputs = self.reader()
bbox = inputs['bbox/ratio']
label = inputs['bbox/label']
# Decode image
image = self.decode(inputs['image'])
# Augment the color space
image = self.hsv(
self.brightness_contrast(
image,
brightness=self.twist_rng(),
contrast=self.twist_rng(),
), saturation=self.twist_rng()
)
# Expand randomly to get smaller objects
pr = self.paste_ratio() * self.flip_rng() + 1.
px, py = self.paste_pos(), self.paste_pos()
image = self.paste(image, paste_x=px, paste_y=py, ratio=pr)
bbox = self.bbox_paste(bbox, paste_x=px, paste_y=py, ratio=pr)
# Sample RoIs with IoU constraint
crop_begin, crop_size, bbox, label = self.bbox_crop(bbox, label)
image = self.slice(image, crop_begin, crop_size)
# Resize image to a fixed size
image = self.resize(image)
# Normalize
apply_flip = self.flip_rng()
image = self.cmn(image, mirror=apply_flip)
bbox = self.bbox_flip(bbox, horizontal=apply_flip)
return image, bbox, label
class Iterator(DALIGenericIterator):
def __init__(self, pipeline):
super(Iterator, self).__init__(pipeline)
@property
def handlers(self):
return ([0], self.copy_handler,), ([1, 2], self.gt_handler)
def next(self):
(images,), gt_boxes = self.__next__()
return {'data': images, 'gt_boxes': gt_boxes}
def gt_handler(self, tensors):
bbox, label = tensors
num_images = self._pipe.batch_size
boxes = np.concatenate([bbox.at(i) for i in range(num_images)])
boxes[:, 0::2] *= self._pipe.target_size
boxes[:, 1::2] *= self._pipe.target_size
labels = [label.at(i) for i in range(num_images)]
batch_inds = [np.ones_like(e) * i for i, e in enumerate(labels)]
labels, batch_inds = np.concatenate(labels), np.concatenate(batch_inds)
return np.hstack([boxes, labels, batch_inds])
def new_iterator(source):
with dali.device('cuda', cfg.GPU_ID):
return Iterator(Pipeline(source))
......@@ -19,11 +19,11 @@ import sys
import numpy as np
from lib.core.config import cfg
from lib.pycocotools import mask as mask_tools
from lib.pycocotools.coco import COCO
from lib.pycocotools.cocoeval import COCOeval
from lib.utils import mask as mask_util
from seetadet.core.config import cfg
from seetadet.pycocotools import mask as mask_tools
from seetadet.pycocotools.coco import COCO
from seetadet.pycocotools.cocoeval import COCOeval
from seetadet.utils import mask as mask_util
class COCOEvaluator(object):
......
......@@ -20,12 +20,14 @@ from __future__ import print_function
import os
import uuid
from lib.core.config import cfg
from lib.datasets.coco_evaluator import COCOEvaluator
from lib.datasets.voc_evaluator import VOCEvaluator
from seetadet.core.config import cfg
from seetadet.datasets.coco_evaluator import COCOEvaluator
from seetadet.datasets.voc_evaluator import VOCEvaluator
class imdb(object):
class Dataset(object):
"""The base dataset class."""
def __init__(self, source):
self._source = source
self._num_images = 0
......@@ -51,6 +53,10 @@ class imdb(object):
return self._class_to_ind
@property
def cls(self):
return type(self)
@property
def comp_id(self):
return '_' + self._salt if self.config['use_salt'] else ''
......
......@@ -16,7 +16,7 @@ from __future__ import print_function
import cv2
import numpy as np
from lib.pycocotools import mask_utils
from seetadet.pycocotools import mask_utils
class Example(object):
......
......@@ -18,27 +18,29 @@ from __future__ import division
from __future__ import print_function
import os
from lib.datasets.taas import TaaS
from seetadet.datasets import kpl_record
# TaaS DataSet
_GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)}
def get_imdb(name):
"""Get an imdb (image database) by name."""
keys = name.split(':')
def get_dataset(name):
"""Get a dataset by name."""
keys = name.split('://')
if len(keys) >= 2:
cls, source = keys[0], ':'.join(keys[1:])
if cls not in _GLOBAL_DATA_SETS:
raise KeyError('Unknown DataSet: {}'.format(cls))
return _GLOBAL_DATA_SETS[cls](source)
cls, source = keys
if cls not in _GLOBAL_REGISTERED_DATASET:
raise KeyError('Unknown dataset:', cls)
return _GLOBAL_REGISTERED_DATASET[cls](source)
elif os.path.exists(name):
return _GLOBAL_DATA_SETS['taas'](name)
return _GLOBAL_REGISTERED_DATASET['default'](name)
else:
raise ValueError('Illegal Database: {}' + name)
raise ValueError('Illegal dataset:', name)
def list_dataset():
"""List all registered dataset."""
return _GLOBAL_REGISTERED_DATASET.keys()
def list_imdbs():
"""List all registered imdbs."""
return _GLOBAL_DATA_SETS.keys()
_GLOBAL_REGISTERED_DATASET = {
'default': lambda source:
kpl_record.KPLRecordDataset(source),
}
......@@ -21,23 +21,26 @@ import os
import dragon
from lib.core.config import cfg
from lib.datasets.imdb import imdb
from seetadet.core.config import cfg
from seetadet.datasets.dataset import Dataset
class TaaS(imdb):
class KPLRecordDataset(Dataset):
def __init__(self, source):
imdb.__init__(self, source)
self._dataset = dragon.io.SeetaRecordDataset
self._num_images = self._dataset(self.source).size
super(KPLRecordDataset, self).__init__(source)
self._num_images = self.cls(self.source).size
@property
def cls(self):
return dragon.io.KPLRecordDataset
def dump_detections(self, all_boxes, output_dir):
dataset = self._dataset(self.source)
dataset = self.cls(self.source)
for file in ('data.data', 'data.index', 'data.meta'):
file = os.path.join(output_dir, file)
if os.path.exists(file):
os.remove(file)
writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
writer = dragon.io.KPLRecordWriter(output_dir, dataset.protocol)
for i in range(len(dataset)):
example = dataset.get()
example['object'] = []
......
......@@ -20,11 +20,11 @@ from __future__ import print_function
import cv2
import numpy as np
from lib.core.config import cfg
from lib.pycocotools import mask_utils
from lib.utils import boxes as box_util
from lib.utils.framework import pickle
from lib.utils.mask import mask_overlap
from seetadet.core.config import cfg
from seetadet.pycocotools import mask_utils
from seetadet.utils import boxes as box_util
from seetadet.utils.env import pickle
from seetadet.utils.mask import mask_overlap
def voc_ap(rec, prec, use_07_metric=False):
......
......@@ -16,8 +16,8 @@ from __future__ import print_function
import os
import numpy as np
from lib.datasets import voc_eval
from lib.utils.framework import pickle
from seetadet.datasets import voc_eval
from seetadet.utils.env import pickle
class VOCEvaluator(object):
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Backbones
import seetadet.modeling.airnet
import seetadet.modeling.mobilenet
import seetadet.modeling.resnet
import seetadet.modeling.vgg
# Custom modules
from seetadet.modeling.fast_rcnn import FastRCNN
from seetadet.modeling.fpn import FPN
from seetadet.modeling.mask_rcnn import MaskRCNN
from seetadet.modeling.retinanet import RetinaNet
from seetadet.modeling.rpn import RPN
from seetadet.modeling.ssd import SSD
......@@ -15,17 +15,18 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.modules import init
from lib.modules import nn
from seetadet.core.registry import backbones
from seetadet.modules import init
from seetadet.modules import nn
class WideResBlock(nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.Affine(dim_out)
self.bn1 = nn.FrozenAffine(dim_out)
self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.Affine(dim_out)
self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample
self.relu = nn.ReLU(inplace=True)
......@@ -51,15 +52,15 @@ class InceptionBlock(nn.Module):
def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__()
self.conv1 = nn.Conv1x1(dim_in, dim_out)
self.bn1 = nn.Affine(dim_out)
self.bn1 = nn.FrozenAffine(dim_out)
self.conv2 = nn.Conv3x3(dim_out, dim_out // 2)
self.bn2 = nn.Affine(dim_out // 2)
self.bn2 = nn.FrozenAffine(dim_out // 2)
self.conv3a = nn.Conv3x3(dim_out // 2, dim_out)
self.bn3a = nn.Affine(dim_out)
self.bn3a = nn.FrozenAffine(dim_out)
self.conv3b = nn.Conv3x3(dim_out, dim_out)
self.bn3b = nn.Affine(dim_out)
self.bn3b = nn.FrozenAffine(dim_out)
self.conv4 = nn.Conv3x3(dim_out * 3, dim_out)
self.bn4 = nn.Affine(dim_out)
self.bn4 = nn.FrozenAffine(dim_out)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
......@@ -103,7 +104,7 @@ class AirNet(nn.Module):
padding=3,
bias=False,
)
self.bn1 = nn.Affine(self.dim_in)
self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(
kernel_size=2,
......@@ -127,7 +128,7 @@ class AirNet(nn.Module):
def make_blocks(self, dim_out, blocks, stride=1):
downsample = nn.Sequential(
nn.Conv1x1(self.dim_in, dim_out, stride=stride),
nn.Affine(dim_out),
nn.FrozenAffine(dim_out),
)
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out
......@@ -164,13 +165,7 @@ def airnet(num_stages):
return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5)
backbones.register('airnet', func=airnet, num_stages=5)
backbones.register('airnet_3b', func=airnet, num_stages=3)
backbones.register('airnet_4b', func=airnet, num_stages=4)
backbones.register('airnet_5b', func=airnet, num_stages=5)
......@@ -17,17 +17,12 @@ import collections
import importlib
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import FPN
from lib.modeling import RPN
from lib.modeling import FastRCNN
from lib.modeling import MaskRCNN
from lib.modeling import RetinaNet
from lib.modeling import SSD
from lib.modeling.factory import get_body_func
from lib.modules import nn
from lib.modules import vision
from lib.utils import logger
from seetadet import modeling as models
from seetadet.core.config import cfg
from seetadet.core.registry import backbones
from seetadet.modules import nn
from seetadet.modules import vision
from seetadet.utils import logger
class Detector(nn.Module):
......@@ -46,18 +41,17 @@ class Detector(nn.Module):
# + DataLoader
self.data_loader_cls = importlib.import_module(
'lib.{}'.format(model)).DataLoader
'seetadet.algo.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap()
# + FeatureExtractor
self.body = get_body_func(body)()
self.body = backbones.get(body)()
feature_dims = self.body.feature_dims
# + FeatureEnhancer
if 'fpn' in modules:
self.fpn = FPN(feature_dims)
self.fpn = models.FPN(feature_dims)
feature_dims = self.fpn.feature_dims
elif 'mbox' in modules:
pass # Placeholder
else:
......@@ -65,17 +59,17 @@ class Detector(nn.Module):
# + Detection Modules
if 'rcnn' in model:
self.rpn = RPN(feature_dims[0])
self.rpn = models.RPN(feature_dims[0])
if 'faster' in model:
self.rcnn = FastRCNN(feature_dims[0])
self.rcnn = models.FastRCNN(feature_dims[0])
elif 'mask' in model:
self.rcnn = MaskRCNN(feature_dims[0])
self.rcnn = models.MaskRCNN(feature_dims[0])
if 'retinanet' in model:
self.retinanet = RetinaNet(feature_dims[0])
self.retinanet = models.RetinaNet(feature_dims[0])
if 'ssd' in model:
self.ssd = SSD(feature_dims)
self.ssd = models.SSD(feature_dims)
def load_weights(self, weights):
"""Load the state dict of this detector.
......@@ -171,13 +165,11 @@ class Detector(nn.Module):
return outputs
def optimize_for_inference(self):
"""Optimize the graph for the inference.
"""Optimize the graph for the inference."""
It usually involves the removing of BN or Affine.
"""
##################################
###################################
# Merge Affine into Convolution #
##################################
###################################
last_module = None
for e in self.modules():
if isinstance(e, nn.Affine) and \
......@@ -195,7 +187,7 @@ class Detector(nn.Module):
last_module = None
for e in self.modules():
if isinstance(e, nn.BatchNorm2d) and \
nn.is_conv2d(last_module):
isinstance(last_module, nn.Conv2d):
if last_module.bias is None:
delattr(last_module, 'bias')
e.forward = lambda x: x
......
......@@ -18,12 +18,12 @@ import functools
import dragon.vm.torch as torch
from lib import faster_rcnn
from lib.core.config import cfg
from lib.modules import det
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
from seetadet.algo import faster_rcnn
from seetadet.core.config import cfg
from seetadet.modules import det
from seetadet.modules import init
from seetadet.modules import nn
from seetadet.modules import vision
class FastRCNN(nn.Module):
......@@ -54,7 +54,11 @@ class FastRCNN(nn.Module):
'RoIAlign': vision.roi_align
}[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss()
if 'IOU' in cfg.MODEL.REG_LOSS_TYPE.upper():
self.bbox_loss = nn.IoULoss(
delta_weights=cfg.BBOX_REG_WEIGHTS)
else:
self.bbox_loss = nn.SmoothL1Loss(reduction='sum')
# Compute spatial scales according to strides
self.spatial_scales = [
1. / (2 ** lvl)
......@@ -124,15 +128,22 @@ class FastRCNN(nn.Module):
if self.training:
# Compute rcnn losses
bbox_pred = outputs['bbox_pred'].view(0, -1, 4) \
.index_select((0, 1), self.data['bbox_indices'])
bbox_loss_weight = \
cfg.MODEL.REG_LOSS_WEIGHT / (
roi_features.shape[0] if isinstance(
self.bbox_loss, nn.SmoothL1Loss
) else 1.
)
outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss(
outputs['bbox_pred'],
bbox_pred,
self.data['bbox_targets'],
self.data['bbox_inside_weights'],
self.data['bbox_outside_weights'],
)),
self.data['bbox_anchors'],
) * bbox_loss_weight),
]))
else:
# Return the rois to decode the refine boxes
......
......@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from dragon.vm.torch.nn import functional as nn_funcs
from lib.core.config import cfg
from lib.modules import init
from lib.modules import nn
from seetadet.core.config import cfg
from seetadet.modules import init
from seetadet.modules import nn
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
......@@ -36,7 +36,7 @@ class FPN(nn.Module):
self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE:
self.apply_func = self.apply_on_rcnn
self.maxpool = nn.MaxPool2d(1, 2, ceil_mode=True)
self.maxpool = nn.MaxPool2d(kernel_size=1, stride=2)
else:
self.apply_func = self.apply_on_generic
self.relu = nn.ReLU(inplace=False)
......@@ -44,6 +44,7 @@ class FPN(nn.Module):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim]
self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE
self.reset_parameters()
def reset_parameters(self):
......@@ -56,14 +57,18 @@ class FPN(nn.Module):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features
# Apply max pool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=None, fx=2., fy=2.)
if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample(
fpn_input, scale_factor=2)
else:
upscale_output = nn_funcs.upsample(
fpn_input, size=lateral_output.shape[2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
......@@ -78,11 +83,15 @@ class FPN(nn.Module):
outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=None, fx=2., fy=2.)
if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample(
fpn_input, scale_factor=2)
else:
upscale_output = nn_funcs.upsample(
fpn_input, size=lateral_output.shape[2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
......
......@@ -18,12 +18,12 @@ import functools
import dragon.vm.torch as torch
from lib import mask_rcnn
from lib.core.config import cfg
from lib.modules import det
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
from seetadet.algo import mask_rcnn
from seetadet.core.config import cfg
from seetadet.modules import det
from seetadet.modules import init
from seetadet.modules import nn
from seetadet.modules import vision
class MaskRCNN(nn.Module):
......@@ -65,7 +65,7 @@ class MaskRCNN(nn.Module):
'RoIAlign': vision.roi_align,
}[cfg.MRCNN.ROI_XFORM_METHOD], size=cfg.MRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss()
self.bbox_loss = nn.SmoothL1Loss(reduction='sum')
self.mask_loss = nn.BCEWithLogitsLoss()
# Compute spatial scales according to strides
self.spatial_scales = [
......@@ -146,15 +146,14 @@ class MaskRCNN(nn.Module):
if self.training:
# Compute the loss of bbox branch
bbox_pred = outputs['bbox_pred'].view(0, -1, 4) \
.index_select((0, 1), self.data['bbox_indices'])
outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss(
outputs['bbox_pred'],
self.data['bbox_targets'],
self.data['bbox_inside_weights'],
self.data['bbox_outside_weights'],
)),
bbox_pred, self.data['bbox_targets'],
) / roi_features.shape[0]),
]))
# Compute the loss of mask branch
mask_score = self.get_mask_score(
......@@ -171,7 +170,7 @@ class MaskRCNN(nn.Module):
outputs['rois'] = self.data['rois'][0]
# Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score)
# Set a callback to decode mask from refine RoIs
# Set a callback to decode mask from refined RoIs
self.compute_mask_score = \
functools.partial(
self.get_mask_score,
......
......@@ -17,17 +17,18 @@ import functools
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
from seetadet.core.config import cfg
from seetadet.core.registry import backbones
from seetadet.modules import init
from seetadet.modules import nn
from seetadet.modules import vision
def conv_triplet(dim_in, dim_out):
"""1x1 convolution + BN + ReLU."""
return [
nn.Conv2d(dim_in, dim_out, 1, bias=False),
nn.Affine(dim_out),
nn.FrozenAffine(dim_out),
nn.ReLU(True),
]
......@@ -42,10 +43,10 @@ def conv_quintet(dim_in, dim_out, ks, stride):
padding=ks // 2,
bias=False,
),
nn.Affine(dim_in),
nn.FrozenAffine(dim_in),
nn.ReLU(True),
nn.Conv1x1(dim_in, dim_out),
nn.Affine(dim_out),
nn.FrozenAffine(dim_out),
]
......@@ -76,7 +77,7 @@ def Stem(dim_out, stride=1):
padding=1,
bias=False,
),
nn.Affine(dim_out),
nn.FrozenAffine(dim_out),
nn.ReLU(True),
)
......@@ -197,7 +198,8 @@ class NASMobileNet(nn.Module):
return outputs
def make_mobilenet_a1():
@backbones.register('mobilenet_a1')
def mobilenet_a1():
return NASMobileNet([
4, 6, 6, 6,
3, 3, 4, 6,
......@@ -207,7 +209,8 @@ def make_mobilenet_a1():
], Setting.PROXYLESS_MOBILE)
def make_mobilenet_v2():
@backbones.register('mobilenet_v2')
def mobilenet_v2():
return NASMobileNet([
1, 1,
1, 1, 1,
......
......@@ -19,9 +19,10 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modules import nn
from lib.modules import init
from seetadet.core.config import cfg
from seetadet.core.registry import backbones
from seetadet.modules import nn
from seetadet.modules import init
class BasicBlock(nn.Module):
......@@ -35,10 +36,10 @@ class BasicBlock(nn.Module):
):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.Affine(dim_out)
self.bn1 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.Affine(dim_out)
self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample
self.dropblock = dropblock
......@@ -83,11 +84,11 @@ class Bottleneck(torch.nn.Module):
super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction)
self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.Affine(dim)
self.bn1 = nn.FrozenAffine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.bn2 = nn.Affine(dim)
self.bn2 = nn.FrozenAffine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out)
self.bn3 = nn.Affine(dim_out)
self.bn3 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample
self.dropblock = dropblock
......@@ -132,7 +133,7 @@ class ResNet(torch.nn.Module):
padding=3,
bias=False,
)
self.bn1 = nn.Affine(self.dim_in)
self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
kernel_size=3,
......@@ -181,7 +182,7 @@ class ResNet(torch.nn.Module):
if stride != 1 or self.dim_in != dim_out:
downsample = nn.Sequential(
nn.Conv1x1(self.dim_in, dim_out, stride=stride),
nn.Affine(dim_out),
nn.FrozenAffine(dim_out),
)
layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
self.dim_in = dim_out
......@@ -194,11 +195,17 @@ class ResNet(torch.nn.Module):
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
outputs = [x]
outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])]
if self.training:
# Hold the frozen outputs if necessary
self.last_outputs = outputs
return outputs
......@@ -225,16 +232,8 @@ def resnet(depth):
return ResNet(block, units, filters)
def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152)
backbones.register(['res18', 'resnet18', 'resnet_18'], func=resnet, depth=18)
backbones.register(['res34', 'resnet34', 'resnet_34'], func=resnet, depth=34)
backbones.register(['res50', 'resnet50', 'resnet_50'], func=resnet, depth=50)
backbones.register(['res101', 'resnet101', 'resnet_101'], func=resnet, depth=101)
backbones.register(['res152', 'resnet152', 'resnet_152'], func=resnet, depth=152)
......@@ -17,11 +17,11 @@ import collections
import math
import dragon.vm.torch as torch
from lib import retinanet
from lib.core.config import cfg
from lib.modules import det
from lib.modules import init
from lib.modules import nn
from seetadet.algo import retinanet
from seetadet.core.config import cfg
from seetadet.modules import det
from seetadet.modules import init
from seetadet.modules import nn
class RetinaNet(nn.Module):
......@@ -56,7 +56,11 @@ class RetinaNet(nn.Module):
self.anchor_target = retinanet.AnchorTarget()
self.cls_loss = nn.SigmoidFocalLoss()
if 'IOU' in cfg.MODEL.REG_LOSS_TYPE.upper():
self.bbox_loss = nn.IoULoss()
else:
self.bbox_loss = nn.SmoothL1Loss(0.1111)
self.centerness_loss = nn.BCEWithLogitsLoss(reduction='valid')
self.reset_parameters()
def reset_parameters(self):
......@@ -71,7 +75,8 @@ class RetinaNet(nn.Module):
# For details, See the official codes:
# https://github.com/facebookresearch/Detectron
self.cls_score.bias.fill_(
-math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB))
-math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB)
)
def compute_outputs(self, features):
"""Compute the RetinaNet logits.
......@@ -97,48 +102,44 @@ class RetinaNet(nn.Module):
return torch.cat(cls_score_wide, dim=2), \
torch.cat(bbox_pred_wide, dim=2)
else:
return cls_score_wide[0], bbox_pred_wide[0]
return cls_score_wide[0], bbox_pred_wide[0], \
def compute_losses(
self,
features,
cls_score,
bbox_pred,
gt_boxes,
ims_info,
):
def compute_losses(self, features, cls_score, bbox_pred, gt_boxes):
"""Compute the RetinaNet classification loss and regression loss.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
features : Sequence[dragon.vm.torch.Tensor]
The features of specific conv layers.
cls_score : dragon.vm.torch.Tensor
The classification logits.
bbox_pred : dragon.vm.torch.Tensor
The bbox regression logits.
centerness : dragon.vm.torch.Tensor
The centerness logits.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
self.retinanet_data = \
self.data = \
self.anchor_target(
features=features,
gt_boxes=gt_boxes,
ims_info=ims_info,
)
return collections.OrderedDict([
bbox_pred = bbox_pred.permute(0, 2, 1) \
.index_select((0, 1), self.data['bbox_indices'])
outputs = collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.retinanet_data['labels'])),
cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss(
bbox_pred,
self.retinanet_data['bbox_targets'],
self.retinanet_data['bbox_inside_weights'],
self.retinanet_data['bbox_outside_weights'],
)),
self.data['bbox_targets'],
self.data['bbox_anchors'],
))
])
return outputs
def forward(self, *args, **kwargs):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
......@@ -149,19 +150,17 @@ class RetinaNet(nn.Module):
if self.training:
outputs.update(
self.compute_losses(
kwargs['features'],
cls_score,
bbox_pred,
kwargs['gt_boxes'],
kwargs['ims_info'],
features=kwargs['features'],
cls_score=cls_score,
bbox_pred=bbox_pred,
gt_boxes=kwargs['gt_boxes'],
)
)
else:
outputs['detections'] = \
self.decoder(
kwargs['features'],
self.cls_prob(cls_score)
.permute(0, 2, 1),
self.cls_prob(cls_score).permute(0, 2, 1),
bbox_pred,
kwargs['ims_info'],
)
......
......@@ -16,10 +16,10 @@ from __future__ import print_function
import collections
import dragon.vm.torch as torch
from lib import faster_rcnn
from lib.core.config import cfg
from lib.modules import init
from lib.modules import nn
from seetadet.algo import faster_rcnn
from seetadet.core.config import cfg
from seetadet.modules import init
from seetadet.modules import nn
class RPN(nn.Module):
......@@ -45,7 +45,8 @@ class RPN(nn.Module):
self.anchor_target = faster_rcnn.AnchorTarget()
self.cls_loss = nn.BCEWithLogitsLoss()
self.bbox_loss = nn.SmoothL1Loss(0.1111)
self.bbox_loss = nn.SmoothL1Loss(
beta=0.1111, reduction='sum')
self.reset_parameters()
def reset_parameters(self):
......@@ -108,21 +109,26 @@ class RPN(nn.Module):
The information of input images.
"""
self.rpn_data = \
self.data = \
self.anchor_target(
features=features,
gt_boxes=gt_boxes,
ims_info=ims_info,
)
bbox_pred = bbox_pred.permute(0, 2, 1) \
.index_select((0, 1), self.data['bbox_indices'])
bbox_loss_weight = 1. / (
cfg.TRAIN.RPN_BATCHSIZE *
cfg.TRAIN.IMS_PER_BATCH
)
return collections.OrderedDict([
('rpn_cls_loss', self.cls_loss(
cls_score, self.rpn_data['labels'])),
cls_score, self.data['labels'])),
('rpn_bbox_loss', self.bbox_loss(
bbox_pred,
self.rpn_data['bbox_targets'],
self.rpn_data['bbox_inside_weights'],
self.rpn_data['bbox_outside_weights'],
)),
self.data['bbox_targets'],
self.data['bbox_anchors'],
) * bbox_loss_weight),
])
def forward(self, *args, **kwargs):
......
......@@ -16,10 +16,10 @@ from __future__ import print_function
import collections
import dragon.vm.torch as torch
from lib import ssd
from lib.core.config import cfg
from lib.modules import init
from lib.modules import nn
from seetadet.algo import ssd
from seetadet.core.config import cfg
from seetadet.modules import init
from seetadet.modules import nn
class SSD(nn.Module):
......@@ -66,6 +66,10 @@ class SSD(nn.Module):
self.hard_mining = ssd.HardMining()
self.box_target = ssd.MultiBoxTarget()
self.cls_loss = nn.CrossEntropyLoss()
if 'IOU' in cfg.MODEL.REG_LOSS_TYPE:
self.bbox_loss = nn.IoULoss(
delta_weights=cfg.BBOX_REG_WEIGHTS)
else:
self.bbox_loss = nn.SmoothL1Loss()
self.reset_parameters()
......@@ -110,8 +114,7 @@ class SSD(nn.Module):
# Concat them if necessary
return \
torch.cat(cls_score_wide, dim=1) \
.view(0, -1, cfg.MODEL.NUM_CLASSES), \
torch.cat(cls_score_wide, dim=1).view(0, -1, cfg.MODEL.NUM_CLASSES), \
torch.cat(bbox_pred_wide, dim=1).view(0, -1, self.box_dim)
def compute_losses(
......@@ -160,6 +163,8 @@ class SSD(nn.Module):
gt_boxes,
)
)
bbox_pred = bbox_pred.index_select(
(0, 1), self.data['bbox_indices'])
return collections.OrderedDict([
# A compensating factor of 4.0 is used
# As we normalize both the pos and neg samples
......@@ -169,9 +174,8 @@ class SSD(nn.Module):
('bbox_loss', self.bbox_loss(
bbox_pred,
self.data['bbox_targets'],
self.data['bbox_inside_weights'],
self.data['bbox_outside_weights'],
)),
self.data['bbox_anchors'],
) * cfg.MODEL.REG_LOSS_WEIGHT)
])
def forward(self, *args, **kwargs):
......
......@@ -13,9 +13,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.core.config import cfg
from lib.modules import init
from lib.modules import nn
from seetadet.core.config import cfg
from seetadet.core.registry import backbones
from seetadet.modules import init
from seetadet.modules import nn
class VGG(nn.Module):
......@@ -41,14 +42,14 @@ class VGG(nn.Module):
if j == 0:
dim_in = filter_list[i]
if reduced:
# L2Norm is redundant from the observation
# We just keep a trainable scale
self.conv4_3_norm = nn.Affine(filter_list[3], bias=False)
self.conv4_3_norm.weight.zero_() # Zero-Init
self.conv4_3_norm = nn.L2Normalize(filter_list[3], init=20.)
self.fc6 = nn.Conv2d(
filter_list[-1], 1024,
kernel_size=3, padding=6,
stride=1, dilation=6,
in_channels=filter_list[-1],
out_channels=1024,
kernel_size=3,
padding=6,
stride=1,
dilation=6,
)
self.fc7 = nn.Conv1x1(1024, 1024, bias=True)
self.feature_dims = [filter_list[-2], 1024]
......@@ -142,14 +143,18 @@ class VGG(nn.Module):
else:
outputs.append(x)
if self.training:
# Hold the frozen outputs if necessary
self.last_outputs = outputs
return outputs
def make_vgg_16():
return VGG(([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]))
def vgg_16(**kwargs):
return VGG(([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]), **kwargs)
def make_vgg_16_reduced(scale=300):
def vgg_16_reduced(scale=300):
if scale == 300:
extra_arch = (
[2, 2, 1, 1],
......@@ -164,11 +169,9 @@ def make_vgg_16_reduced(scale=300):
)
else:
raise ValueError('Unsupported scale: {}'.format(scale))
return VGG(([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
extra_arch=extra_arch, reduced=True)
def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300)
return vgg_16(extra_arch=extra_arch, reduced=True)
def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512)
backbones.register('vgg16', func=vgg_16)
backbones.register('vgg16_reduced_300', func=vgg_16_reduced, scale=300)
backbones.register('vgg16_reduced_512', func=vgg_16_reduced, scale=512)
......@@ -13,8 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.ssd.data_loader import DataLoader
from lib.ssd.hard_mining import HardMining
from lib.ssd.multibox import MultiBoxMatch
from lib.ssd.multibox import MultiBoxTarget
from lib.ssd.priorbox import PriorBox
import os
from seetadet.utils import env
env.load_library(os.path.join(os.path.dirname(__file__), '_C'))
......@@ -14,21 +14,40 @@ from __future__ import division
from __future__ import print_function
from dragon.vm.torch import nn
from dragon.vm.torch.autograd import function
from dragon.vm.torch.autograd.function import Function
from lib.core.config import cfg
from seetadet.core.config import cfg
class _RetinaNetDecoder(function.Function):
class _NonMaxSuppression(Function):
"""Filter out boxes that have high IoU with selected ones."""
def __init__(self, key, dev, **kwargs):
super(_NonMaxSuppression, self).__init__(key, dev, **kwargs)
self.iou_threshold = kwargs.get('iou_threshold', 0.5)
def attributes(self):
return {
'op_type': 'NonMaxSuppression',
'arguments': {'iou_threshold': self.iou_threshold}
}
def forward(self, dets):
return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function):
"""Decode predictions from RetinaNet."""
def __init__(self, key, dev, **kwargs):
super(_RetinaNetDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
def attributes(self):
return {
'op_type': 'Proposal',
'op_type': 'RetinaNetDecoder',
'arguments': {
'det_type': 'RETINANET',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
......@@ -39,20 +58,21 @@ class _RetinaNetDecoder(function.Function):
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
return self.run(inputs, [self.alloc()], unify_devices=False)
self._check_device(inputs[:-1]) # Skip <ims_info>
return self.dispatch(inputs, [self.alloc()], check_device=False)
class _RPNDecoder(function.Function):
class _RPNDecoder(Function):
"""Decode proposal regions from RPN."""
def __init__(self, key, dev, **kwargs):
super(_RPNDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
def attributes(self):
return {
'op_type': 'Proposal',
'op_type': 'RPNDecoder',
'arguments': {
'det_type': 'RCNN',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
......@@ -69,9 +89,9 @@ class _RPNDecoder(function.Function):
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
self._check_device(inputs[:-1]) # Skip <ims_info>
outputs = [self.alloc() for _ in range(self.args['K'])]
return self.run(inputs, outputs, unify_devices=False)
return self.dispatch(inputs, outputs, check_device=False)
def decode_retinanet(
......@@ -85,8 +105,8 @@ def decode_retinanet(
pre_nms_top_n,
score_thresh,
):
return function.get(
_RetinaNetDecoder,
return _RetinaNetDecoder \
.instantiate(
cls_prob.device,
strides=strides,
ratios=ratios,
......@@ -114,8 +134,8 @@ def decode_rpn(
canonical_scale,
canonical_level,
):
return function.get(
_RPNDecoder,
return _RPNDecoder \
.instantiate(
cls_prob.device,
K=num_outputs,
strides=strides,
......@@ -132,8 +152,16 @@ def decode_rpn(
).apply(features, cls_prob, bbox_pred, ims_info)
def nms(dets, iou_threshold=0.5):
return _NonMaxSuppression \
.instantiate(
dets.device,
iou_threshold=iou_threshold,
).apply(dets)
class RetinaNetDecoder(nn.Module):
"""Generate pred regions from retinanet."""
"""Decode predictions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
......@@ -154,7 +182,7 @@ class RetinaNetDecoder(nn.Module):
ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
scales=self.scales,
pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
score_thresh=cfg.TEST.SCORE_THRESH,
score_thresh=float(cfg.TEST.SCORE_THRESH),
)
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon
from dragon.vm import torch
from dragon.vm.torch import nn
from dragon.vm.torch.nn import functional
from seetadet.core.config import cfg
class FrozenAffine(object):
"""Affine transformation with weight and bias fixed."""
def __new__(cls, dim_in, bias=True, inplace=True):
return nn.Affine(
num_features=dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Conv1x1(object):
"""1x1 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, bias=False):
return nn.Conv2d(
in_channels=dim_in,
out_channels=dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
class Conv3x3(object):
"""3x3 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, dilation=1, bias=False):
return nn.Conv2d(
in_channels=dim_in,
out_channels=dim_out,
kernel_size=3,
stride=stride,
padding=1 * dilation,
bias=bias,
)
class CrossEntropyLoss(object):
"""Cross entropy loss."""
def __new__(cls):
return nn.CrossEntropyLoss(ignore_index=-1)
class IoULoss(nn.Module):
def __init__(self, reduction='mean', delta_weights=None):
super(IoULoss, self).__init__()
self.data = {} # Store the detached tensors
self.reduction = reduction
self.delta_weights = delta_weights
def transform_inv(self, boxes, deltas, name=None):
widths = boxes[:, 2] - boxes[:, 0]
heights = boxes[:, 3] - boxes[:, 1]
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
if name is not None:
self.data[name + '/widths'] = widths
self.data[name + '/heights'] = heights
dx, dy, dw, dh = torch.chunk(deltas, chunks=4, dim=1)
if self.delta_weights is not None:
wx, wy, ww, wh = self.delta_weights
dx, dy, dw, dh = dx / wx, dy / wy, dw / ww, dh / wh
pred_ctr_x = dx * widths + ctr_x
pred_ctr_y = dy * heights + ctr_y
pred_w = torch.exp(dw) * widths
pred_h = torch.exp(dh) * heights
x1 = pred_ctr_x - 0.5 * pred_w
y1 = pred_ctr_y - 0.5 * pred_h
x2 = pred_ctr_x + 0.5 * pred_w
y2 = pred_ctr_y + 0.5 * pred_h
return x1, y1, x2, y2
def forward_impl(self, input, target, anchor):
x1, y1, x2, y2 = self.transform_inv(
anchor, input, name='logits')
self.x1, self.y1, self.x2, self.y2 = \
self.transform_inv(anchor, target)
# Compute the independent area
pred_area = (x2 - x1) * (y2 - y1)
target_area = (self.x2 - self.x1) * (self.y2 - self.y1)
# Compute the intersecting area
x1_inter = torch.maximum(x1, self.x1)
y1_inter = torch.maximum(y1, self.y1)
x2_inter = torch.minimum(x2, self.x2)
y2_inter = torch.minimum(y2, self.y2)
w_inter = torch.clamp(x2_inter - x1_inter, min=0)
h_inter = torch.clamp(y2_inter - y1_inter, min=0)
area_inter = w_inter * h_inter
# Compute the enclosing area
x1_enc = torch.minimum(x1, self.x1)
y1_enc = torch.minimum(y1, self.y1)
x2_enc = torch.maximum(x2, self.x2)
y2_enc = torch.maximum(y2, self.y2)
area_enc = (x2_enc - x1_enc) * (y2_enc - y1_enc) + 1.
# Compute the differentiable IoU metric
area_union = pred_area + target_area - area_inter
iou = area_inter / (area_union + 1.)
iou_metric = iou - (area_enc - area_union) / area_enc # GIoU
# Compute the reduced loss
if self.reduction == 'sum':
return (1 - iou_metric).sum()
else:
return (1 - iou_metric).mean()
def forward(self, *inputs, **kwargs):
# Enter a new detaching scope
with dragon.eager_scope('${IOU}'):
return self.forward_impl(*inputs, **kwargs)
class Identity(nn.Module):
"""Pass input to the output."""
def __init__(self, *args, **kwargs):
super(Identity, self).__init__()
_, _ = args, kwargs
def forward(self, x):
return x
class L2Normalize(nn.Module):
"""Normalize the input using L2 norm."""
def __init__(self, num_features, init=20.):
super(L2Normalize, self).__init__()
self.weight = nn.Parameter(torch.Tensor(num_features).fill_(init))
def forward(self, input):
out = functional.normalize(input, p=2, dim=1, eps=1e-5)
out = functional.affine(out, self.weight)
return out
class ReLU(object):
"""The generic ReLU activation."""
def __new__(cls, inplace=False):
return getattr(torch.nn, cfg.MODEL.RELU_VARIANT)(inplace)
class SigmoidFocalLoss(object):
"""Sigmoid focal loss."""
def __new__(cls):
return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
class SmoothL1Loss(nn.Module):
"""Smoothed l1 loss."""
def __init__(self, beta=1., reduction='batch_size'):
super(SmoothL1Loss, self).__init__()
self.beta = beta
self.reduction = reduction
def forward(self, input, target, *args):
return functional.smooth_l1_loss(
input, target,
beta=self.beta,
reduction=self.reduction,
)
Affine = nn.Affine
AvgPool2d = nn.AvgPool2d
BatchNorm2d = nn.BatchNorm2d
BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d
Linear = nn.Linear
MaxPool2d = nn.MaxPool2d
Module = nn.Module
ModuleList = nn.ModuleList
Sequential = nn.Sequential
Sigmoid = nn.Sigmoid
Softmax = nn.Softmax
......@@ -13,9 +13,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import dragon.vm.torch as torch
from lib.core.config import cfg
from seetadet.core.config import cfg
def roi_align(input, boxes, spatial_scale, size):
......@@ -35,12 +37,18 @@ def roi_pool(input, boxes, spatial_scale, size):
class Bootstrap(torch.nn.Module):
"""Extended operator to process the images."""
"""Process the input to match the computation."""
def __init__(self):
super(Bootstrap, self).__init__()
self.dtype = cfg.MODEL.PRECISION.lower()
self.mean_values = cfg.PIXEL_MEANS
self.normalize_func = functools.partial(
torch.channel_normalize,
mean=cfg.PIXEL_MEANS,
std=[1., 1., 1.],
dim=1,
dims=(0, 3, 1, 2),
dtype=cfg.MODEL.PRECISION.lower(),
)
self.dummy_buffer = torch.ones(1)
def _apply(self, fn):
......@@ -57,12 +65,13 @@ class Bootstrap(torch.nn.Module):
return self.dummy_buffer.device
def forward(self, input):
if isinstance(input, torch.Tensor):
if input.size(1) <= 3:
return input
cur_device = self.device()
if input._device != cur_device:
if cur_device.type == 'cpu':
input = input.cpu()
else:
input = input.cuda(cur_device.index)
return torch.vision.ops.image_data(
input, self.dtype, self.mean_values,
)
return self.normalize_func(input)
......@@ -13,5 +13,4 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.faster_rcnn.data_loader import DataLoader
from lib.retinanet.anchor_target import AnchorTarget
from seetadet.onnx import nodes as _
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.onnx import exporter
from dragon.vm.onnx import helper
@exporter.register('RetinanetDecoder')
def retinanet_decoder_exporter(op_def, shape_dict, ws):
node, const_tensors = exporter.translate(**locals())
node.op_type = 'ATen' # Currently not supported in ai.onnx
helper.add_attribute(node, 'op_type', 'RetinaNetDecoder')
for arg in op_def.arg:
if arg.name == 'strides':
helper.add_attribute(node, 'strides', arg.ints)
elif arg.name == 'ratios':
helper.add_attribute(node, 'ratios', arg.floats)
elif arg.name == 'scales':
helper.add_attribute(node, 'scales', arg.floats)
elif arg.name == 'pre_nms_top_n':
helper.add_attribute(node, 'pre_nms_top_n', arg.i)
elif arg.name == 'score_thresh':
helper.add_attribute(node, 'score_thresh', arg.f)
return node, const_tensors
@exporter.register('RPNDecoder')
def rpn_decoder_exporter(op_def, shape_dict, ws):
node, const_tensors = exporter.translate(**locals())
node.op_type = 'ATen' # Currently not supported in ai.onnx
helper.add_attribute(node, 'op_type', 'RPNDecoder')
for arg in op_def.arg:
if arg.name == 'strides':
helper.add_attribute(node, 'strides', arg.ints)
elif arg.name == 'ratios':
helper.add_attribute(node, 'ratios', arg.floats)
elif arg.name == 'scales':
helper.add_attribute(node, 'scales', arg.floats)
elif arg.name == 'pre_nms_top_n':
helper.add_attribute(node, 'pre_nms_top_n', arg.i)
elif arg.name == 'post_nms_top_n':
helper.add_attribute(node, 'post_nms_top_n', arg.i)
elif arg.name == 'nms_thresh':
helper.add_attribute(node, 'nms_thresh', arg.f)
elif arg.name == 'min_size':
helper.add_attribute(node, 'min_size', arg.i)
elif arg.name == 'min_level':
helper.add_attribute(node, 'min_level', arg.i)
elif arg.name == 'max_level':
helper.add_attribute(node, 'max_level', arg.i)
elif arg.name == 'canonical_scale':
helper.add_attribute(node, 'canonical_scale', arg.i)
elif arg.name == 'canonical_level':
helper.add_attribute(node, 'canonical_level', arg.i)
return node, const_tensors
__author__ = 'tsungyi'
import lib.pycocotools._mask as _mask
import seetadet.pycocotools._mask as _mask
# Interface for manipulating masks stored in RLE format.
#
......
......@@ -15,8 +15,8 @@ from __future__ import print_function
import numpy as np
from lib.pycocotools import mask as mask_tools
from lib.pycocotools.mask import frPyObjects
from seetadet.pycocotools import mask as mask_tools
from seetadet.pycocotools.mask import frPyObjects
def poly2rle(poly, height, width):
......
......@@ -15,7 +15,7 @@ from __future__ import print_function
import math
from lib.core.config import cfg
from seetadet.core.config import cfg
class _LRScheduler(object):
......
......@@ -15,11 +15,11 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling.detector import Detector
from lib.solver import lr_scheduler
from lib.utils import framework
from lib.utils import time_util
from seetadet.core.config import cfg
from seetadet.modeling.detector import Detector
from seetadet.solver import lr_scheduler
from seetadet.utils import env
from seetadet.utils import time_util
class SGDSolver(object):
......@@ -28,7 +28,7 @@ class SGDSolver(object):
self.detector = Detector()
# Define the optimizer and its arguments
self.optimizer = torch.optim.SGD(
framework.get_param_groups(self.detector),
env.get_param_groups(self.detector),
lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
......
......@@ -20,9 +20,9 @@ from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.image import distort_image
from lib.utils.image import resize_image
from seetadet.core.config import cfg
from seetadet.utils.image import distort_image
from seetadet.utils.image import resize_image
def im_list_to_blob(ims):
......@@ -56,7 +56,7 @@ def mask_list_to_blob(masks):
max_shape = np.array([mask.shape[1:] for mask in masks]).max(axis=0)
num_masks = np.array([mask.shape[0] for mask in masks]).sum()
blob_shape = ((num_masks, max_shape[0], max_shape[1]))
blob_shape = (num_masks, max_shape[0], max_shape[1])
blob = np.zeros(blob_shape, 'uint8')
count = 0
......@@ -89,9 +89,8 @@ def prep_im_for_blob(img, target_size, max_size):
im_size_max = np.max(im_shape[:2])
im_scale = float(target_size) / float(im_size_max)
if cfg.TRAIN.USE_SCALE_JITTER:
r = cfg.TRAIN.SCALE_JITTER_RANGE
r = cfg.TRAIN.RANDOM_SCALES
jitter = r[0] + np.random.rand() * (r[1] - r[0])
im_scale *= jitter
return resize_image(img, im_scale, im_scale), im_scale, jitter
return resize_image(img, im_scale, im_scale), im_scale
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# ------------------------------------------------------------
"""Box utilities for original coordinates."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from seetadet.utils import cython_bbox
def bbox_overlaps(boxes1, boxes2):
"""Compute the overlaps between two group of boxes."""
return cython_bbox.bbox_overlaps(
np.ascontiguousarray(boxes1, dtype=np.float),
np.ascontiguousarray(boxes2, dtype=np.float),
)
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
"""Transform the boxes to the regression targets."""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets = [wx * (gt_ctr_x - ex_ctr_x) / ex_widths]
targets += [wy * (gt_ctr_y - ex_ctr_y) / ex_heights]
targets += [ww * np.log(gt_widths / ex_widths)]
targets += [wh * np.log(gt_heights / ex_heights)]
return np.vstack(targets).transpose()
def bbox_centerness(ex_rois, gt_rois):
"""Compute centerness of the boxes to ground-truth."""
ex_ctr_x = (ex_rois[:, 2] + ex_rois[:, 0]) / 2
ex_ctr_y = (ex_rois[:, 3] + ex_rois[:, 1]) / 2
l = ex_ctr_x - gt_rois[:, 0]
t = ex_ctr_y - gt_rois[:, 1]
r = gt_rois[:, 2] - ex_ctr_x
b = gt_rois[:, 3] - ex_ctr_y
centerness = \
(np.minimum(l, r) / np.maximum(l, r)) * \
(np.minimum(t, b) / np.maximum(t, b))
min_dist = np.stack([l, t, r, b], axis=1).min(axis=1)
keep_inds = np.where(min_dist > 0.01)[0]
discard_inds = np.where(min_dist <= 0.01)[0]
centerness[keep_inds] = np.sqrt(centerness[keep_inds])
centerness[discard_inds] = -1
return centerness, keep_inds, discard_inds
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas."""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.
heights = boxes[:, 3] - boxes[:, 1] + 1.
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 # y2
return pred_boxes
def clip_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1] = np.maximum(np.minimum(boxes[:, 1], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], im_shape[0] - 1), 0)
return boxes
def clip_tiled_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def dismantle_boxes(gt_boxes, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
][:, :-1] for i in range(num_images)
]
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale."""
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
x_c = (boxes[:, 2] + boxes[:, 0]) * .5
y_c = (boxes[:, 3] + boxes[:, 1]) * .5
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
def flip_boxes(boxes, width):
"""Flip the boxes horizontally."""
boxes_flipped = boxes.copy()
boxes_flipped[:, 0] = width - boxes[:, 2] - 1
boxes_flipped[:, 2] = width - boxes[:, 0] - 1
return boxes_flipped
def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# ------------------------------------------------------------
"""Box utilities for normalized coordinates."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def boxes_area(boxes):
"""Compute the area of an array of boxes."""
w = (boxes[:, 2] - boxes[:, 0])
h = (boxes[:, 3] - boxes[:, 1])
area = w * h
assert np.all(area >= 0), 'Negative areas founds'
return area
def intersection(boxes1, boxes2):
"""Compute pairwise intersection areas between boxes."""
[y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
[y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
inter_heights = np.maximum(
np.zeros(all_pairs_max_ymin.shape),
all_pairs_min_ymax - all_pairs_max_ymin
)
inter_widths = np.maximum(
np.zeros(all_pairs_max_xmin.shape),
all_pairs_min_xmax - all_pairs_max_xmin
)
return inter_heights * inter_widths
def ioa1(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections."""
inter = intersection(boxes1, boxes2)
area = np.expand_dims(boxes_area(boxes1), axis=1)
return inter / area
def ioa2(boxes1, boxes2):
"""Computes pairwise intersection-over-area between box collections."""
inter = intersection(boxes1, boxes2)
area = np.expand_dims(boxes_area(boxes2), axis=0)
return inter / area
def iou(boxes1, boxes2):
"""Computes pairwise intersection-over-union between box collections."""
inter = intersection(boxes1, boxes2)
area1 = boxes_area(boxes1)
area2 = boxes_area(boxes2)
union = np.expand_dims(area1, axis=1) + \
np.expand_dims(area2, axis=0) - inter
return inter / union
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import importlib.machinery
import os
import dragon
from dragon.core.util import six
from dragon.vm import torch
import numpy as np
from seetadet.core.config import cfg
def get_param_groups(module):
"""Separate parameters according to weight decay.
Parameters
----------
module : dragon.vm.torch.nn.Module
The module to collect parameters.
Returns
-------
Sequence[ParamGroup]
The parameter groups.
"""
param_groups = [
{'params': []}, # Decayed always
{'params': [], 'weight_decay': -1.}
]
for name, param in module.named_parameters():
gi = 0 if 'weight' in name and param.dim() > 1 else 1
param_groups[gi]['params'].append(param)
if len(param_groups[1]['params']) == 0:
param_groups.pop() # Remove empty group
return param_groups
def load_library(library_prefix):
"""Load a shared library.
Parameters
----------
library_prefix : str
The prefix of library.
"""
loader_details = (
importlib.machinery.ExtensionFileLoader,
importlib.machinery.EXTENSION_SUFFIXES
)
library_prefix = os.path.abspath(library_prefix)
lib_dir, fullname = os.path.split(library_prefix)
finder = importlib.machinery.FileFinder(lib_dir, loader_details)
ext_specs = finder.find_spec(fullname)
if ext_specs is None:
raise ImportError(
'Could not find the pre-built library '
'for <%s>.' % library_prefix
)
dragon.load_library(ext_specs.origin)
def new_tensor(data, enforce_cpu=False):
"""Create a new tensor from the data.
Parameters
----------
data : array_like
The data value.
enforce_cpu : bool, optional, default=False
**True** to enforce the cpu storage.
Returns
-------
dragon.vm.torch.Tensor
The tensor taken with the data.
"""
if data is None:
return data
if isinstance(data, np.ndarray):
tensor = torch.from_numpy(data)
elif isinstance(data, torch.Tensor):
tensor = data
else:
tensor = torch.tensor(data)
if not enforce_cpu:
tensor = tensor.cuda(cfg.GPU_ID)
return tensor
# Aliases
pickle = six.moves.pickle
......@@ -18,7 +18,7 @@ import numpy as np
import PIL.Image
import PIL.ImageEnhance
from lib.core.config import cfg
from seetadet.core.config import cfg
def distort_image(img):
......@@ -28,7 +28,6 @@ def distort_image(img):
PIL.ImageEnhance.Contrast,
PIL.ImageEnhance.Color,
]
np.random.shuffle(transforms)
for transform in transforms:
if np.random.uniform() < 0.5:
img = transform(img)
......@@ -62,7 +61,7 @@ def get_image_with_target_size(target_size, img):
)
def resize_image(img, fx, fy):
def resize_image(img, fx=1, fy=1):
return cv2.resize(
img,
dsize=None,
......@@ -79,7 +78,6 @@ def scale_image(img):
im_size_max = np.max(img.shape[:2])
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
processed_ims.append(
......@@ -91,17 +89,16 @@ def scale_image(img):
))
ims_scales.append(im_scale)
else:
# Scale image along the longest side
im_size_max = np.max(img.shape[:2])
# Scale image into a square
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_max)
im_scale_h = float(target_size) / img.shape[0]
im_scale_w = float(target_size) / img.shape[1]
processed_ims.append(
cv2.resize(
img,
dsize=None,
fx=im_scale, fy=im_scale,
dsize=(target_size, target_size),
interpolation=cv2.INTER_LINEAR,
))
ims_scales.append(im_scale)
ims_scales.append([im_scale_h, im_scale_w])
return processed_ims, ims_scales
......@@ -42,7 +42,7 @@ def get_logger():
if _logger:
return _logger
logger = _logging.getLogger('detectron')
logger = _logging.getLogger('SeetaDet')
logger.setLevel('INFO')
logger.propagate = False
......
......@@ -21,7 +21,7 @@ import cv2
import numpy as np
import PIL.Image
from lib.utils import boxes as box_util
from seetadet.utils import boxes as box_util
def dismantle_masks(gt_boxes, gt_masks, num_images):
......
......@@ -17,44 +17,53 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.core.config import cfg
from seetadet.modules import det
from seetadet.utils import env
try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
except ImportError as e:
print('Failed to import cpu nms. Error: {0}'.format(str(e)))
from seetadet.utils.cython_nms import cpu_nms
from seetadet.utils.cython_nms import cpu_soft_nms
except ImportError:
cpu_nms = cpu_soft_nms = print
try:
from lib.nms.gpu_nms import gpu_nms
except ImportError as e:
print('Failed to import gpu nms. Error: {0}'.format(str(e)))
def gpu_nms(dets, thresh):
"""Filter out the detections using GPU-NMS."""
if dets.shape[0] == 0:
return []
scores = dets[:, 4]
order = scores.argsort()[::-1]
sorted_dets = env.new_tensor(dets[order, :])
keep = det.nms(sorted_dets, iou_threshold=thresh).numpy()
return order[keep]
def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0:
def nms(dets, thresh):
"""Filter out the detections using NMS."""
if dets.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else:
return cpu_nms(detections, thresh)
if cpu_nms is print:
raise ImportError('Failed to load <cython_nms> library.')
return cpu_nms(dets, thresh)
def soft_nms(
detections,
dets,
thresh,
method='linear',
sigma=0.5,
score_thresh=0.001,
):
"""Perform CPU Soft-NMS."""
if detections.shape[0] == 0:
"""Filter out the detections using Soft-NMS."""
if dets.shape[0] == 0:
return []
if cpu_soft_nms is print:
raise ImportError('Failed to load <cython_nms> library.')
methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods:
raise ValueError('Unknown soft nms method:', method)
return cpu_soft_nms(
detections,
dets,
thresh,
methods[method],
sigma,
......
......@@ -30,8 +30,8 @@ import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import numpy as np
from lib.utils.colormap import colormap
from lib.utils.boxes import expand_boxes
from seetadet.utils.colormap import colormap
from seetadet.utils.boxes import expand_boxes
plt.rcParams['pdf.fonttype'] = 42 # For editing in Adobe Illustrator
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
import setuptools
import setuptools.command.install
import sys
import subprocess
def clean():
"""Remove the work directories."""
if os.path.exists('build'):
shutil.rmtree('build')
if os.path.exists('seeta_det.egg-info'):
shutil.rmtree('seeta_det.egg-info')
def configure():
"""Prepare the package files."""
# Compile cxx sources
py_exec = sys.executable
if subprocess.call(
'cd csrc/cxx && '
'{} setup.py build_ext -b ../ --no-python-abi-suffix=0 -j 4 &&'
'{} setup.py clean'.format(py_exec, py_exec), shell=True
) > 0:
raise RuntimeError('Failed to build the cxx sources.')
# Compile pyx sources
if subprocess.call(
'cd csrc/pyx && '
'{} setup.py build_ext -b ../ --cython-c-in-temp -j 4 &&'
'{} setup.py clean'.format(py_exec, py_exec), shell=True,
) > 0:
raise RuntimeError('Failed to build the pyx sources.')
# Copy the pre-built libraries
for root, _, files in os.walk('csrc/install'):
root = root[len('csrc/install/'):]
for file in files:
src = os.path.join(root, file)
dest = src.replace('lib', 'seetadet')
if os.path.exists(dest):
os.remove(dest)
shutil.copy(os.path.join('csrc/install', src), dest)
shutil.rmtree('csrc/install')
class install(setuptools.command.install.install):
"""Old-style command to prevent from installing egg."""
def run(self):
setuptools.command.install.install.run(self)
def find_packages():
"""Return the python sources installed to package."""
packages = []
for root, _, files in os.walk('seetadet'):
if os.path.exists(os.path.join(root, '__init__.py')):
packages.append(root)
return packages
def find_package_data():
"""Return the external data installed to package."""
libraries = []
for root, _, files in os.walk('seetadet'):
root = root[len('seetadet/'):]
for file in files:
if file.endswith('.so') or file.endswith('.pyd'):
libraries.append(os.path.join(root, file))
return libraries
configure()
setuptools.setup(
name='seeta-det',
version='0.4.0',
description='SeetaDet: A platform implementing popular object detection algorithms.',
url='https://gitlab.seetatech.com/seetaresearch/SeetaDet',
author='SeetaTech',
license='BSD 2-Clause',
packages=find_packages(),
package_data={'seetadet': find_package_data()},
package_dir={'seetadet': 'seetadet'},
cmdclass={'install': install},
install_requires=['opencv-python', 'Pillow'],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: BSD License',
'Programming Language :: C++',
'Programming Language :: Python',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Software Development',
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
],
)
clean()
......@@ -21,10 +21,11 @@ import argparse
import dragon.vm.torch as torch
import pprint
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.modeling.detector import new_detector
from lib.utils import logger
from seetadet import onnx as _
from seetadet.core.config import cfg
from seetadet.core.coordinator import Coordinator
from seetadet.modeling.detector import new_detector
from seetadet.utils import logger
def parse_args():
......@@ -71,8 +72,8 @@ if __name__ == '__main__':
.format(coordinator.exports_dir()))
detector = new_detector(cfg.GPU_ID, checkpoint)
data = torch.zeros(*args.input_shape).byte()
ims_info = torch.zeros(args.input_shape[0], 3).float()
data = torch.zeros(*args.input_shape, dtype='uint8')
ims_info = torch.zeros(args.input_shape[0], 3, dtype='float32')
torch.onnx.export(
model=detector,
......
......@@ -21,11 +21,11 @@ import argparse
import dragon
import numpy
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.core.train import train_net
from lib.datasets.factory import get_imdb
from lib.utils import logger
from seetadet.core.config import cfg
from seetadet.core.coordinator import Coordinator
from seetadet.core.train import train_net
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
def parse_args():
......@@ -79,12 +79,12 @@ if __name__ == '__main__':
# Fix the random seed for reproducibility
numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED)
dragon.random.set_seed(cfg.RNG_SEED)
# Inspect the database
database = get_imdb(cfg.TRAIN.DATABASE)
logger.info('Database({}): {} images will be used to train.'
.format(cfg.TRAIN.DATABASE, database.num_images))
# Inspect the dataset
dataset = get_dataset(cfg.TRAIN.DATASET)
logger.info('Dataset({}): {} images will be used to train.'
.format(cfg.TRAIN.DATASET, dataset.num_images))
# Ready to train the network
logger.info('Output will be saved to `{:s}`'
......
......@@ -20,12 +20,12 @@ sys.path.insert(0, '..')
import argparse
import pprint
from lib.core import test_engine
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.core.test import TestServer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from seetadet.core import test_engine
from seetadet.core.config import cfg
from seetadet.core.coordinator import Coordinator
from seetadet.core.test import TestServer
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
def parse_args():
......@@ -81,11 +81,11 @@ if __name__ == '__main__':
if checkpoint is None:
raise RuntimeError('The checkpoint of global step {} does not exist.'.format(args.iter))
# Inspect the database
database = get_imdb(cfg.TEST.DATABASE)
# Inspect the dataset
dataset = get_dataset(cfg.TEST.DATASET)
cfg.TEST.PROTOCOL = 'dump' if args.dump else cfg.TEST.PROTOCOL
logger.info('Database({}): {} images will be used to test.'
.format(cfg.TEST.DATABASE, database.num_images))
logger.info('Dataset({}): {} images will be used to test.'
.format(cfg.TEST.DATASET, dataset.num_images))
# Ready to test the network
output_dir = coordinator.results_dir(checkpoint, args.output_dir)
......
......@@ -20,8 +20,8 @@ sys.path.insert(0, '..')
import argparse
import numpy
from lib.core.coordinator import Coordinator
from lib.utils import logger
from seetadet.core.coordinator import Coordinator
from seetadet.utils import logger
def parse_args():
......
......@@ -22,11 +22,11 @@ import dragon
import numpy
import pprint
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.core.train import train_net
from lib.datasets.factory import get_imdb
from lib.utils import logger
from seetadet.core.config import cfg
from seetadet.core.coordinator import Coordinator
from seetadet.core.train import train_net
from seetadet.datasets.factory import get_dataset
from seetadet.utils import logger
def parse_args():
......@@ -59,7 +59,7 @@ def mpi_train(cfg_file, exp_dir):
"""
import subprocess
args = 'mpirun --allow-run-as-root -n {} '.format(cfg.NUM_GPUS)
args = 'mpirun --allow-run-as-root -n {} --bind-to none '.format(cfg.NUM_GPUS)
args += '{} {} '.format(sys.executable, 'mpi_train.py')
args += '--cfg {} --exp_dir {} '.format(osp.abspath(cfg_file), exp_dir)
return subprocess.call(args, shell=True)
......@@ -84,12 +84,12 @@ if __name__ == '__main__':
# Fix the random seed for reproducibility
numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED)
dragon.random.set_seed(cfg.RNG_SEED)
# Inspect the database
database = get_imdb(cfg.TRAIN.DATABASE)
logger.info('Database({}): {} images will be used to train.'
.format(cfg.TRAIN.DATABASE, database.num_images))
# Inspect the dataset
dataset = get_dataset(cfg.TRAIN.DATASET)
logger.info('Dataset({}): {} images will be used to train.'
.format(cfg.TRAIN.DATASET, dataset.num_images))
# Ready to train the network
logger.info('Output will be saved to `{:s}`'
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!