Commit ca255ea0 by Ting PAN

Change to the PEP8 code style

1 parent 71593766
Showing with 2512 additions and 2579 deletions
## General
# Compiled Object files # Compiled Object files
*.slo *.slo
*.lo *.lo
...@@ -7,13 +5,15 @@ ...@@ -7,13 +5,15 @@
*.cuo *.cuo
# Compiled Dynamic libraries # Compiled Dynamic libraries
# *.so *.so
*.dll
*.dylib *.dylib
# Compiled Static libraries # Compiled Static libraries
*.lai *.lai
*.la *.la
#*.a *.a
*.lib
# Compiled python # Compiled python
*.pyc *.pyc
...@@ -40,6 +40,9 @@ __pycache__ ...@@ -40,6 +40,9 @@ __pycache__
# QtCreator files # QtCreator files
*.user *.user
# VSCode files
.vscode
# PyCharm files # PyCharm files
.idea .idea
......
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.1.2 (20190723)
Dragon Minimum Required (Version 0.3.0.0)
Changes:
Preview Features:
- Change to the PEP8 code style.
- Adapt the new Dragon API.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.1 (20190409) SeetaDet 0.1.1 (20190409)
Dragon Minimum Required (Version 0.3.0.0) Dragon Minimum Required (Version 0.3.0.0)
...@@ -33,4 +51,4 @@ Preview Features: ...@@ -33,4 +51,4 @@ Preview Features:
Bugs fixed: Bugs fixed:
- None - None
\ No newline at end of file
...@@ -87,4 +87,4 @@ python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION> ...@@ -87,4 +87,4 @@ python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
[6] [Mask R-CNN](https://arxiv.org/abs/1703.06870). Kaiming He, Georgia Gkioxari, Piotr Dollár and Ross Girshick. ICCV, 2017. [6] [Mask R-CNN](https://arxiv.org/abs/1703.06870). Kaiming He, Georgia Gkioxari, Piotr Dollár and Ross Girshick. ICCV, 2017.
[7] [Detectron](https://github.com/facebookresearch/Detectron). Ross Girshick, Ilija Radosavovic, Georgia Gkioxari, Piotr Dollar and Kaiming He. 2018. [7] [Detectron](https://github.com/facebookresearch/Detectron). Ross Girshick, Ilija Radosavovic, Georgia Gkioxari, Piotr Dollar and Kaiming He. 2018.
\ No newline at end of file
...@@ -21,8 +21,8 @@ set(CUDA_ARCH -gencode arch=compute_30,code=sm_30 ...@@ -21,8 +21,8 @@ set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
# ---------------- User Config ---------------- # ---------------- User Config ----------------
# ---[ Dependencies # ---[ Dependencies
include(${PROJECT_SOURCE_DIR}/CMake/FindPythonLibs.cmake) include(${PROJECT_SOURCE_DIR}/cmake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/CMake/FindNumPy.cmake) include(${PROJECT_SOURCE_DIR}/cmake/FindNumPy.cmake)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
......
# - Find the NumPy libraries # - Find the NumPy libraries
# This module finds if NumPy is installed, and sets the following variables # This module finds if NumPy is installed, and sets the following variables
# indicating where it is. # indicating where it is.
# #
# TODO: Update to provide the libraries and paths for linking npymath lib. # TODO: Update to provide the libraries and paths for linking npymath lib.
# #
# NUMPY_FOUND - was NumPy found # NUMPY_FOUND - was NumPy found
# NUMPY_VERSION - the version of NumPy found as a string # NUMPY_VERSION - the version of NumPy found as a string
# NUMPY_VERSION_MAJOR - the major version number of NumPy # NUMPY_VERSION_MAJOR - the major version number of NumPy
# NUMPY_VERSION_MINOR - the minor version number of NumPy # NUMPY_VERSION_MINOR - the minor version number of NumPy
# NUMPY_VERSION_PATCH - the patch version number of NumPy # NUMPY_VERSION_PATCH - the patch version number of NumPy
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 # NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
# NUMPY_INCLUDE_DIR - path to the NumPy include files # NUMPY_INCLUDE_DIR - path to the NumPy include files
unset(NUMPY_VERSION) unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR) unset(NUMPY_INCLUDE_DIR)
if(PYTHONINTERP_FOUND) if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());" "import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result RESULT_VARIABLE __result
OUTPUT_VARIABLE __output OUTPUT_VARIABLE __output
OUTPUT_STRIP_TRAILING_WHITESPACE) OUTPUT_STRIP_TRAILING_WHITESPACE)
if(__result MATCHES 0) if(__result MATCHES 0)
string(REGEX REPLACE ";" "\\\\;" __values ${__output}) string(REGEX REPLACE ";" "\\\\;" __values ${__output})
string(REGEX REPLACE "\r?\n" ";" __values ${__values}) string(REGEX REPLACE "\r?\n" ";" __values ${__values})
list(GET __values 0 NUMPY_VERSION) list(GET __values 0 NUMPY_VERSION)
list(GET __values 1 NUMPY_INCLUDE_DIR) list(GET __values 1 NUMPY_INCLUDE_DIR)
string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}") string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
if(NOT "${__ver_check}" STREQUAL "") if(NOT "${__ver_check}" STREQUAL "")
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1}) set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2}) set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3}) set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
math(EXPR NUMPY_VERSION_DECIMAL math(EXPR NUMPY_VERSION_DECIMAL
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR}) string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
else() else()
unset(NUMPY_VERSION) unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR) unset(NUMPY_INCLUDE_DIR)
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n") message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
endif() endif()
endif() endif()
else() else()
message("Can not find Python interpretator.") message("Can not find Python interpretator.")
message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?") message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
endif() endif()
include(FindPackageHandleStandardArgs) include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
VERSION_VAR NUMPY_VERSION) VERSION_VAR NUMPY_VERSION)
if(NUMPY_FOUND) if(NUMPY_FOUND)
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})") message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
endif() endif()
\ No newline at end of file
...@@ -7,4 +7,4 @@ ...@@ -7,4 +7,4 @@
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
\ No newline at end of file
# -------------------------------------------------------- # ------------------------------------------------------------
# Detectron @ Dragon # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# Copyright(c) 2017 SeetaTech #
# Written by Ting Pan # Licensed under the BSD 2-Clause License.
# -------------------------------------------------------- # You should have received a copy of the BSD 2-Clause License
\ No newline at end of file # along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -8,8 +8,3 @@ ...@@ -8,8 +8,3 @@
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from .distort import Distortor
from .expand import Expander
from .sample import Sampler
from .resize import Resizer
\ No newline at end of file
...@@ -38,4 +38,4 @@ if __name__ == '__main__': ...@@ -38,4 +38,4 @@ if __name__ == '__main__':
images_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'), images_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'), annotations_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'), imagesets_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test']) splits=['test'])
\ No newline at end of file
...@@ -7,4 +7,4 @@ ...@@ -7,4 +7,4 @@
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
\ No newline at end of file
...@@ -16,6 +16,7 @@ from __future__ import print_function ...@@ -16,6 +16,7 @@ from __future__ import print_function
import os import os
import sys import sys
import time import time
import cv2 import cv2
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from dragon.tools.db import LMDB from dragon.tools.db import LMDB
...@@ -23,6 +24,7 @@ from dragon.tools.db import LMDB ...@@ -23,6 +24,7 @@ from dragon.tools.db import LMDB
sys.path.insert(0, '../../..') sys.path.insert(0, '../../..')
from lib.proto import anno_pb2 as pb from lib.proto import anno_pb2 as pb
ZFILL = 8 ZFILL = 8
ENCODE_QUALITY = 95 ENCODE_QUALITY = 95
...@@ -46,14 +48,23 @@ def make_datum(image_file, xml_file): ...@@ -46,14 +48,23 @@ def make_datum(image_file, xml_file):
datum = pb.Datum() datum = pb.Datum()
im = cv2.imread(image_file) im = cv2.imread(image_file)
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.height, datum.width, datum.channels = im.shape datum.height, datum.width, datum.channels = im.shape
datum.encoded = ENCODE_QUALITY != 100 datum.encoded = ENCODE_QUALITY != 100
if datum.encoded: if datum.encoded:
result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY]) result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.data = im.tostring() datum.data = im.tostring()
anno_datum.datum.CopyFrom(datum) anno_datum.datum.CopyFrom(datum)
anno_datum.filename = filename.split('.')[0] anno_datum.filename = filename.split('.')[0]
if len(objs) == 0:
return None
for ix, obj in enumerate(objs): for ix, obj in enumerate(objs):
anno = pb.Annotation() anno = pb.Annotation()
bbox = obj.find('bndbox') bbox = obj.find('bndbox')
...@@ -64,6 +75,7 @@ def make_datum(image_file, xml_file): ...@@ -64,6 +75,7 @@ def make_datum(image_file, xml_file):
cls = obj.find('name').text.strip() cls = obj.find('name').text.strip()
anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2) anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
anno.name = cls anno.name = cls
class_name_set.add(cls)
anno.difficult = False anno.difficult = False
if obj.find('difficult') is not None: if obj.find('difficult') is not None:
anno.difficult = int(obj.find('difficult').text) == 1 anno.difficult = int(obj.find('difficult').text) == 1
...@@ -72,13 +84,15 @@ def make_datum(image_file, xml_file): ...@@ -72,13 +84,15 @@ def make_datum(image_file, xml_file):
return anno_datum return anno_datum
def make_db(database_file, def make_db(
images_path, database_file,
annotations_path, images_path,
imagesets_path, annotations_path,
splits): imagesets_path,
splits,
):
if os.path.isdir(database_file) is True: if os.path.isdir(database_file) is True:
raise ValueError('The database path is already exist.') print('Warning: The database path is already exist.')
else: else:
root_dir = database_file[:database_file.rfind('/')] root_dir = database_file[:database_file.rfind('/')]
if not os.path.exists(root_dir): if not os.path.exists(root_dir):
...@@ -95,12 +109,12 @@ def make_db(database_file, ...@@ -95,12 +109,12 @@ def make_db(database_file,
print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000) db = LMDB(max_commit=1000)
db.open(database_file, mode='w') db.open(database_file, mode='w')
count = 0 count = 0
total_line = 0 total_line = 0
start_time = time.time() start_time = time.time()
zfill_flag = '{0:0%d}' % (ZFILL) zfill_flag = '{0:0%d}' % ZFILL
for db_idx, split in enumerate(splits): for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt') split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
...@@ -109,18 +123,18 @@ def make_db(database_file, ...@@ -109,18 +123,18 @@ def make_db(database_file,
lines = f.readlines() lines = f.readlines()
total_line += len(lines) total_line += len(lines)
for line in lines: for line in lines:
count += 1
if count % 10000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
filename = line.strip() filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg') image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml') xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
datum = make_datum(image_file, xml_file) datum = make_datum(image_file, xml_file)
db.put(zfill_flag.format(count - 1), datum.SerializeToString()) if datum is not None:
count += 1
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
if count % 1000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
now_time = time.time() now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
...@@ -131,4 +145,4 @@ def make_db(database_file, ...@@ -131,4 +145,4 @@ def make_db(database_file,
print('{0} images have been stored in the database.'.format(total_line)) print('{0} images have been stored in the database.'.format(total_line))
print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
print('The size of database is {0} MB.'.format( print('The size of database is {0} MB.'.format(
float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000))) float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
\ No newline at end of file
# -------------------------------------------------------- # ------------------------------------------------------------
# Detectron # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# Copyright(c) 2017 SeetaTech #
# Written by Ting Pan # Licensed under the BSD 2-Clause License.
# -------------------------------------------------------- # You should have received a copy of the BSD 2-Clause License
\ No newline at end of file # along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -155,11 +155,11 @@ __C.TEST.SCORE_THRESH = 0.05 ...@@ -155,11 +155,11 @@ __C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting masks # The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5 __C.TEST.BINARY_THRESH = 0.5
## NMS threshold used on RPN proposals # NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7 __C.TEST.RPN_NMS_THRESH = 0.7
## Number of top scoring boxes to keep before apply NMS to RPN proposals # Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000 __C.TEST.RPN_PRE_NMS_TOP_N = 6000
## Number of top scoring boxes to keep after applying NMS to RPN proposals # Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300 __C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TEST.RPN_MIN_SIZE = 0 __C.TEST.RPN_MIN_SIZE = 0
...@@ -199,7 +199,7 @@ __C.MODEL.TYPE = '' ...@@ -199,7 +199,7 @@ __C.MODEL.TYPE = ''
# The float precision for training and inference # The float precision for training and inference
# (FLOAT32, FLOAT16,) # (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE= 'FLOAT32' __C.MODEL.DATA_TYPE = 'FLOAT32'
# The backbone # The backbone
__C.MODEL.BACKBONE = '' __C.MODEL.BACKBONE = ''
...@@ -394,13 +394,13 @@ __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4'] ...@@ -394,13 +394,13 @@ __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
# min_jaccard_overlap, max_jaccard_overlap, # min_jaccard_overlap, max_jaccard_overlap,
# max_trials, max_sample) # max_trials, max_sample)
__C.SSD.SAMPLERS = [ __C.SSD.SAMPLERS = [
(1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image
(0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1), # IoU >= 0.1 (0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1), # IoU >= 0.1
(0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1), # IoU >= 0.3 (0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1), # IoU >= 0.3
(0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1), # IoU >= 0.5 (0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1), # IoU >= 0.5
(0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1), # IoU >= 0.7 (0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1), # IoU >= 0.7
(0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1), # IoU >= 0.9 (0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1), # IoU >= 0.9
(0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1), # Any patches (0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1), # Any patches
] ]
...@@ -560,10 +560,11 @@ def _merge_a_into_b(a, b): ...@@ -560,10 +560,11 @@ def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the """Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a. options in b whenever they are also specified in a.
""" """
if not isinstance(a, dict): return if not isinstance(a, dict):
return
for k, v in a.items(): for k, v in a.items():
# a must specify keys that are in b # a must specify keys that are in b
if not k in b: if k not in b:
raise KeyError('{} is not a valid config key'.format(k)) raise KeyError('{} is not a valid config key'.format(k))
# the types must match, too # the types must match, too
v = _check_and_coerce_cfg_value_type(v, b[k], k) v = _check_and_coerce_cfg_value_type(v, b[k], k)
...@@ -598,15 +599,15 @@ def cfg_from_list(cfg_list): ...@@ -598,15 +599,15 @@ def cfg_from_list(cfg_list):
assert d.has_key(subkey) assert d.has_key(subkey)
d = d[subkey] d = d[subkey]
subkey = key_list[-1] subkey = key_list[-1]
assert d.has_key(subkey) assert subkey in d
try: try:
value = literal_eval(v) value = literal_eval(v)
except: except:
# handle the case when v is a string literal # Handle the case when v is a string literal
value = v value = v
assert type(value) == type(d[subkey]), \ assert type(value) == type(d[subkey]), \
'type {} does not match original type {}'.format( 'type {} does not match original type {}'\
type(value), type(d[subkey])) .format(type(value), type(d[subkey]))
d[subkey] = value d[subkey] = value
...@@ -618,8 +619,10 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key): ...@@ -618,8 +619,10 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
# The types must match (with some exceptions) # The types must match (with some exceptions)
type_b = type(value_b) type_b = type(value_b)
type_a = type(value_a) type_a = type(value_a)
if type_a is type_b: return value_a if type_a is type_b:
if type_b is float and type_a is int: return float(value_a) return value_a
if type_b is float and type_a is int:
return float(value_a)
# Exceptions: numpy arrays, strings, tuple<->list # Exceptions: numpy arrays, strings, tuple<->list
if isinstance(value_b, np.ndarray): if isinstance(value_b, np.ndarray):
...@@ -635,4 +638,4 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key): ...@@ -635,4 +638,4 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
'Type mismatch ({} vs. {}) with values ({} vs. {}) for config ' 'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
'key: {}'.format(type_b, type_a, value_b, value_a, key) 'key: {}'.format(type_b, type_a, value_b, value_a, key)
) )
return value_a return value_a
\ No newline at end of file
...@@ -18,7 +18,8 @@ import shutil ...@@ -18,7 +18,8 @@ import shutil
import time import time
import numpy as np import numpy as np
from lib.core.config import cfg, cfg_from_file from lib.core.config import cfg
from lib.core.config import cfg_from_file
class Coordinator(object): class Coordinator(object):
...@@ -44,7 +45,8 @@ class Coordinator(object): ...@@ -44,7 +45,8 @@ class Coordinator(object):
def _path_at(self, file, auto_create=True): def _path_at(self, file, auto_create=True):
path = os.path.abspath(os.path.join(self.experiment_dir, file)) path = os.path.abspath(os.path.join(self.experiment_dir, file))
if auto_create and not os.path.exists(path): os.makedirs(path) if auto_create and not os.path.exists(path):
os.makedirs(path)
return path return path
def checkpoints_dir(self): def checkpoints_dir(self):
...@@ -67,8 +69,10 @@ class Coordinator(object): ...@@ -67,8 +69,10 @@ class Coordinator(object):
return os.path.join(self.checkpoints_dir(), files[ix]), step return os.path.join(self.checkpoints_dir(), files[ix]), step
steps.append(step) steps.append(step)
if global_step is None: if global_step is None:
if len(files) == 0: return None, 0 if len(files) == 0:
last_idx = int(np.argmax(steps)); last_step = steps[last_idx] return None, 0
last_idx = int(np.argmax(steps))
last_step = steps[last_idx]
return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
return None, 0 return None, 0
result = locate() result = locate()
...@@ -80,4 +84,4 @@ class Coordinator(object): ...@@ -80,4 +84,4 @@ class Coordinator(object):
def delete_experiment(self): def delete_experiment(self):
if os.path.exists(self.experiment_dir): if os.path.exists(self.experiment_dir):
shutil.rmtree(self.experiment_dir) shutil.rmtree(self.experiment_dir)
\ No newline at end of file
...@@ -30,7 +30,8 @@ class Solver(object): ...@@ -30,7 +30,8 @@ class Solver(object):
self.opt_arguments = { self.opt_arguments = {
'scale_gradient': 1. / ( 'scale_gradient': 1. / (
cfg.SOLVER.LOSS_SCALING * cfg.SOLVER.LOSS_SCALING *
cfg.SOLVER.ITER_SIZE), cfg.SOLVER.ITER_SIZE
),
'clip_gradient': float(cfg.SOLVER.CLIP_NORM), 'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
'weight_decay': cfg.SOLVER.WEIGHT_DECAY, 'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
} }
...@@ -57,8 +58,10 @@ class Solver(object): ...@@ -57,8 +58,10 @@ class Solver(object):
} }
] ]
for name, param in self.detector.named_parameters(): for name, param in self.detector.named_parameters():
if 'bias' in name: param_groups[1]['params'].append(param) if 'bias' in name:
else: param_groups[0]['params'].append(param) param_groups[1]['params'].append(param)
else:
param_groups[0]['params'].append(param)
return param_groups return param_groups
def set_learning_rate(self): def set_learning_rate(self):
...@@ -67,8 +70,10 @@ class Solver(object): ...@@ -67,8 +70,10 @@ class Solver(object):
if self._current_step < len(cfg.SOLVER.STEPS) \ if self._current_step < len(cfg.SOLVER.STEPS) \
and self.iter >= cfg.SOLVER.STEPS[self._current_step]: and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
self._current_step = self._current_step + 1 self._current_step = self._current_step + 1
logger.info('MultiStep Status: Iteration {}, step = {}' \ logger.info(
.format(self.iter, self._current_step)) 'MultiStep Status: Iteration {}, step = {}'
.format(self.iter, self._current_step)
)
new_lr = cfg.SOLVER.BASE_LR * ( new_lr = cfg.SOLVER.BASE_LR * (
cfg.SOLVER.GAMMA ** self._current_step) cfg.SOLVER.GAMMA ** self._current_step)
self.optimizer.param_groups[0]['lr'] = \ self.optimizer.param_groups[0]['lr'] = \
...@@ -77,13 +82,14 @@ class Solver(object): ...@@ -77,13 +82,14 @@ class Solver(object):
raise ValueError('Unknown lr policy: ' + policy) raise ValueError('Unknown lr policy: ' + policy)
def one_step(self): def one_step(self):
def add_loss(x, y):
return y if x is None else x + y
# Forward & Backward & Compute_loss # Forward & Backward & Compute_loss
iter_size = cfg.SOLVER.ITER_SIZE iter_size = cfg.SOLVER.ITER_SIZE
loss_scaling = cfg.SOLVER.LOSS_SCALING loss_scaling = cfg.SOLVER.LOSS_SCALING
run_time = 0.; stats = {'loss': {'total': 0.}, 'iter': self.iter} stats = {'loss': {'total': 0.}, 'iter': self.iter}
add_loss = lambda x, y: y if x is None else x + y
tic = time.time() run_time, tic = 0., time.time()
if iter_size > 1: if iter_size > 1:
# Dragon is designed for manual gradients accumulating # Dragon is designed for manual gradients accumulating
...@@ -99,10 +105,13 @@ class Solver(object): ...@@ -99,10 +105,13 @@ class Solver(object):
stats['loss'][k] = 0. stats['loss'][k] = 0.
total_loss = add_loss(total_loss, v) total_loss = add_loss(total_loss, v)
stats['loss'][k] += float(v) * loss_scaling stats['loss'][k] += float(v) * loss_scaling
if loss_scaling != 1.: total_loss *= loss_scaling if loss_scaling != 1.:
total_loss *= loss_scaling
stats['loss']['total'] += float(total_loss) stats['loss']['total'] += float(total_loss)
total_loss.backward() total_loss.backward()
if iter_size > 1: self.optimizer.accumulate_grad() if iter_size > 1:
self.optimizer.accumulate_grad()
run_time += (time.time() - tic) run_time += (time.time() - tic)
...@@ -190,5 +199,8 @@ def get_solver_func(type): ...@@ -190,5 +199,8 @@ def get_solver_func(type):
elif type == 'Adam': elif type == 'Adam':
return AdamSolver return AdamSolver
else: else:
raise ValueError('Unsupported solver type: {}.\n' raise ValueError(
'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam)'.format(type)) 'Unsupported solver type: {}.\n'
\ No newline at end of file 'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam).'
.format(type)
)
...@@ -33,25 +33,27 @@ class TestServer(object): ...@@ -33,25 +33,27 @@ class TestServer(object):
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = DataReader(**{'source': self.imdb.source}) self.data_reader = DataReader(**{'source': self.imdb.source})
self.data_transformer = DataTransformer() self.data_transformer = DataTransformer()
self.data_reader.Q_out = Queue(cfg.TEST.IMS_PER_BATCH) self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.start() self.data_reader.start()
self.gt_recs = OrderedDict() self.gt_recs = OrderedDict()
self.output_dir = output_dir self.output_dir = output_dir
if cfg.VIS_ON_FILE: if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis') self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir) if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls): def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls() self.data_transformer = transformer_cls()
def get_image(self): def get_image(self):
serialized = self.data_reader.Q_out.get() serialized = self.data_reader.q_out.get()
image = self.data_transformer.get_image(serialized) image = self.data_transformer.get_image(serialized)
image_id, objects = self.data_transformer.get_annotations(serialized) image_id, objects = self.data_transformer.get_annotations(serialized)
self.gt_recs[image_id] = { self.gt_recs[image_id] = {
'objects': objects, 'objects': objects,
'width': image.shape[1], 'width': image.shape[1],
'height': image.shape[0]} 'height': image.shape[0],
}
return image_id, image return image_id, image
def get_save_filename(self, image_id, ext='.jpg'): def get_save_filename(self, image_id, ext='.jpg'):
...@@ -60,9 +62,10 @@ class TestServer(object): ...@@ -60,9 +62,10 @@ class TestServer(object):
def get_records(self): def get_records(self):
if len(self.gt_recs) != self.num_images: if len(self.gt_recs) != self.num_images:
raise RuntimeError('Loading {} records, ' raise RuntimeError(
'while the specific database required {}'.format( 'Loading {} records, while {} required.'
len(self.gt_recs), self.num_images)) .format(len(self.gt_recs), self.num_images),
)
return self.gt_recs return self.gt_recs
def evaluate_detections(self, all_boxes): def evaluate_detections(self, all_boxes):
...@@ -87,7 +90,8 @@ class InferServer(object): ...@@ -87,7 +90,8 @@ class InferServer(object):
self.image_idx = 0 self.image_idx = 0
if cfg.VIS_ON_FILE: if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis') self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir) if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls): def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls() self.data_transformer = transformer_cls()
...@@ -99,7 +103,8 @@ class InferServer(object): ...@@ -99,7 +103,8 @@ class InferServer(object):
self.image_idx = (self.image_idx + 1) % self.num_images self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = { self.gt_recs[image_id] = {
'width': image.shape[1], 'width': image.shape[1],
'height': image.shape[0]} 'height': image.shape[0],
}
return image_id, image return image_id, image
def get_save_filename(self, image_id, ext='.jpg'): def get_save_filename(self, image_id, ext='.jpg'):
...@@ -108,15 +113,23 @@ class InferServer(object): ...@@ -108,15 +113,23 @@ class InferServer(object):
def get_records(self): def get_records(self):
if len(self.gt_recs) != self.num_images: if len(self.gt_recs) != self.num_images:
raise RuntimeError('Loading {} records, ' raise RuntimeError(
'while the specific database required {}'.format( 'Loading {} records, while {} required.'
len(self.gt_recs), self.num_images)) .format(len(self.gt_recs), self.num_images),
)
return self.gt_recs return self.gt_recs
def evaluate_detections(self, all_boxes): def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections( self.imdb.evaluate_detections(
all_boxes, self.get_records(), self.output_dir) all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks): def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations( self.imdb.evaluate_segmentations(
all_boxes, all_masks, self.get_records(), self.output_dir) all_boxes,
\ No newline at end of file all_masks,
self.get_records(),
self.output_dir,
)
...@@ -17,17 +17,17 @@ from __future__ import absolute_import ...@@ -17,17 +17,17 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import collections
import datetime import datetime
from collections import OrderedDict import os
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.solver import get_solver_func from lib.core.solver import get_solver_func
from lib.utils.timer import Timer
from lib.utils.stats import SmoothedValue
from lib.utils import logger from lib.utils import logger
from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer
class SolverWrapper(object): class SolverWrapper(object):
...@@ -46,18 +46,19 @@ class SolverWrapper(object): ...@@ -46,18 +46,19 @@ class SolverWrapper(object):
# Mixed precision training? # Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16': if cfg.MODEL.DATA_TYPE.lower() == 'float16':
self.solver.detector.half() # Powerful FP16 Support self.solver.detector.half() # Powerful FP16 Support
self.solver.detector.cuda(cfg.GPU_ID) self.solver.detector.cuda(cfg.GPU_ID)
# Plan the metrics # Plan the metrics
self.metrics = OrderedDict() self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD: if cfg.ENABLE_TENSOR_BOARD:
from dragon.tools.tensorboard import TensorBoard from dragon.tools.tensorboard import TensorBoard
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs') self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
def snapshot(self): def snapshot(self):
if not logger.is_root(): return None if not logger.is_root():
return None
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}' filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth') .format(self.solver.iter) + '.pth')
filename = os.path.join(self.output_dir, filename) filename = os.path.join(self.output_dir, filename)
...@@ -77,19 +78,35 @@ class SolverWrapper(object): ...@@ -77,19 +78,35 @@ class SolverWrapper(object):
self.board.scalar_summary('time', stats['time'], stats['iter']) self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items(): for k, v in self.metrics.items():
if k == 'total': if k == 'total':
self.board.scalar_summary('total_loss', v.GetMedianValue(), stats['iter']) self.board.scalar_summary(
else: self.board.scalar_summary(k, v.GetMedianValue(), stats['iter']) 'total_loss',
v.GetMedianValue(),
stats['iter'],
)
else:
self.board.scalar_summary(
k,
v.GetMedianValue(),
stats['iter'],
)
def step(self, display=False): def step(self, display=False):
stats = self.solver.one_step() stats = self.solver.one_step()
self.add_metrics(stats) self.add_metrics(stats)
self.send_metrics(stats) self.send_metrics(stats)
if display: if display:
logger.info('Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (stats['iter'], logger.info(
stats['lr'], self.metrics['total'].GetMedianValue(), stats['time'])) 'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
stats['iter'], stats['lr'],
self.metrics['total'].GetMedianValue(),
stats['time'],
)
)
for k, v in self.metrics.items(): for k, v in self.metrics.items():
if k == 'total': continue if k == 'total':
logger.info(' Train net output({}): {}'.format(k, v.GetMedianValue())) continue
logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue()))
def train_model(self): def train_model(self):
"""Network training loop.""" """Network training loop."""
...@@ -104,9 +121,8 @@ class SolverWrapper(object): ...@@ -104,9 +121,8 @@ class SolverWrapper(object):
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha) start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
# Apply 1-step SGD update # Apply 1-step SGD update
timer.tic() with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0) self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
timer.toc()
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0: if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time average_time = timer.average_time
...@@ -114,8 +130,10 @@ class SolverWrapper(object): ...@@ -114,8 +130,10 @@ class SolverWrapper(object):
cfg.SOLVER.MAX_ITERS - self.solver.iter) cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds))) eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
logger.info('< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' logger.info(
.format(progress, timer.average_time, eta)) '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta)
)
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0: if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter last_snapshot_iter = self.solver.iter
...@@ -131,4 +149,4 @@ def train_net(coordinator, start_iter=0): ...@@ -131,4 +149,4 @@ def train_net(coordinator, start_iter=0):
sw.solver.iter = start_iter sw.solver.iter = start_iter
logger.info('Solving...') logger.info('Solving...')
model_paths = sw.train_model() model_paths = sw.train_model()
return model_paths return model_paths
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
from lib.datasets.taas import TaaS from lib.datasets.taas import TaaS
...@@ -37,4 +41,4 @@ def get_imdb(name): ...@@ -37,4 +41,4 @@ def get_imdb(name):
def list_imdbs(): def list_imdbs():
"""List all registered imdbs.""" """List all registered imdbs."""
return _GLOBAL_DATA_SETS.keys() return _GLOBAL_DATA_SETS.keys()
\ No newline at end of file
...@@ -61,7 +61,7 @@ class imdb(object): ...@@ -61,7 +61,7 @@ class imdb(object):
return num_entries return num_entries
def evaluate_detections(self, all_boxes, gt_recs, output_dir): def evaluate_detections(self, all_boxes, gt_recs, output_dir):
raise NotImplementedError pass
def evaluate_masks(self, all_boxes, all_masks, output_dir): def evaluate_masks(self, all_boxes, all_masks, output_dir):
raise NotImplementedError pass
\ No newline at end of file
...@@ -62,10 +62,14 @@ class TaaS(imdb): ...@@ -62,10 +62,14 @@ class TaaS(imdb):
def _get_comp_id(self): def _get_comp_id(self):
return '_' + self._salt if self.config['use_salt'] else '' return '_' + self._salt if self.config['use_salt'] else ''
def _get_prefix(self, type='bbox'): @classmethod
if type == 'bbox': return 'detections_' def _get_prefix(cls, type='bbox'):
elif type == 'segm': return 'segmentations_' if type == 'bbox':
elif type == 'kpt': return 'keypoints_' return 'detections_'
elif type == 'segm':
return 'segmentations_'
elif type == 'kpt':
return 'keypoints_'
return '' return ''
def _get_voc_results_T(self, results_folder, type='bbox'): def _get_voc_results_T(self, results_folder, type='bbox'):
...@@ -76,19 +80,22 @@ class TaaS(imdb): ...@@ -76,19 +80,22 @@ class TaaS(imdb):
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl' filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
else: else:
raise ValueError('Type of results can be either bbox or segm.') raise ValueError('Type of results can be either bbox or segm.')
if not os.path.exists(results_folder): os.makedirs(results_folder) if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename) return os.path.join(results_folder, filename)
def _get_coco_annotations_T(self, results_folder, type='bbox'): def _get_coco_annotations_T(self, results_folder, type='bbox'):
# experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json # experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
filename = '[GT]_' + self._get_prefix(type) + self._name + '.json' filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
if not os.path.exists(results_folder): os.makedirs(results_folder) if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename) return os.path.join(results_folder, filename)
def _get_coco_results_T(self, results_folder, type='bbox'): def _get_coco_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>.json # experiments/model_id/results/detections_taas_<comp_id>.json
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json' filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
if not os.path.exists(results_folder): os.makedirs(results_folder) if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename) return os.path.join(results_folder, filename)
############################################## ##############################################
...@@ -136,7 +143,8 @@ class TaaS(imdb): ...@@ -136,7 +143,8 @@ class TaaS(imdb):
with open(filename, 'wt') as f: with open(filename, 'wt') as f:
ix = 0 ix = 0
for image_id, rec in gt_recs.items(): for image_id, rec in gt_recs.items():
dets = all_boxes[cls_ind][ix]; ix += 1 dets = all_boxes[cls_ind][ix]
ix += 1
if len(dets) == 0: if len(dets) == 0:
continue continue
for k in range(dets.shape[0]): for k in range(dets.shape[0]):
...@@ -148,7 +156,8 @@ class TaaS(imdb): ...@@ -148,7 +156,8 @@ class TaaS(imdb):
def _write_voc_segm_results(self, all_boxes, all_masks, output_dir): def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
for cls_inds, cls in enumerate(self.classes): for cls_inds, cls in enumerate(self.classes):
if cls == '__background__': continue if cls == '__background__':
continue
print('Writing {} VOC format segm results'.format(cls)) print('Writing {} VOC format segm results'.format(cls))
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls) segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections') bbox_filename = segm_filename.replace('segmentations', 'detections')
...@@ -161,11 +170,15 @@ class TaaS(imdb): ...@@ -161,11 +170,15 @@ class TaaS(imdb):
aps = [] aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self._classes): for i, cls in enumerate(self._classes):
if cls == '__background__': continue if cls == '__background__':
continue
det_file = self._get_voc_results_T(output_dir).format(cls) det_file = self._get_voc_results_T(output_dir).format(cls)
rec, prec, ap = voc_bbox_eval(det_file, gt_recs, cls, rec, prec, ap = voc_bbox_eval(
IoU=IoU, use_07_metric=use_07_metric) det_file, gt_recs, cls,
if ap > 0: aps += [ap] IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap)) print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps))) print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
...@@ -173,12 +186,16 @@ class TaaS(imdb): ...@@ -173,12 +186,16 @@ class TaaS(imdb):
aps = [] aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.classes): for i, cls in enumerate(self.classes):
if cls == '__background__': continue if cls == '__background__':
continue
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls) segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections') bbox_filename = segm_filename.replace('segmentations', 'detections')
ap = voc_segm_eval(bbox_filename, segm_filename, gt_recs, cls, ap = voc_segm_eval(
IoU=IoU, use_07_metric=use_07_metric) bbox_filename, segm_filename, gt_recs, cls,
if ap > 0: aps += [ap] IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap)) print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps))) print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
...@@ -188,12 +205,16 @@ class TaaS(imdb): ...@@ -188,12 +205,16 @@ class TaaS(imdb):
# # # #
############################################## ##############################################
def _get_coco_image_id(self, image_name): @classmethod
def _get_coco_image_id(cls, image_name):
image_id = image_name.split('_')[-1].split('.')[0] image_id = image_name.split('_')[-1].split('.')[0]
try: return int(image_id) try:
except: return image_name return int(image_id)
except:
return image_name
def _encode_coco_masks(self, masks, boxes, im_h, im_w): @classmethod
def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
num_pred = len(boxes) num_pred = len(boxes)
assert len(masks) == num_pred assert len(masks) == num_pred
mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F') mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
...@@ -216,26 +237,29 @@ class TaaS(imdb): ...@@ -216,26 +237,29 @@ class TaaS(imdb):
y1 = max(ref_box[1], 0) y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_w) x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h) y2 = min(ref_box[3] + 1, im_h)
mask_image[y1 : y2, x1 : x2, i] = \ mask_image[y1:y2, x1:x2, i] = \
mask[(y1 - ref_box[1]) : (y2 - ref_box[1]), mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
(x1 - ref_box[0]) : (x2 - ref_box[0])] (x1 - ref_box[0]):(x2 - ref_box[0])]
return encode_masks(mask_image) return encode_masks(mask_image)
def _write_coco_bbox_annotations(self, gt_recs, output_dir): def _write_coco_bbox_annotations(self, gt_recs, output_dir):
dataset = {}
# Build images # Build images
dataset['images'] = [] dataset = {'images': []}
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
dataset['images'].append({ dataset['images'].append({
'file_name': image_name + '.jpg', 'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name), 'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width']}) 'height': rec['height'], 'width': rec['width'],
})
# Build categories # Build categories
dataset['categories'] = [] dataset['categories'] = []
for cls in self._classes: for cls in self._classes:
if cls == '__background__': continue if cls == '__background__':
continue
dataset['categories'].append({ dataset['categories'].append({
'name': cls, 'id': self._class_to_ind[cls]}) 'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations # Build annotations
dataset['annotations'] = [] dataset['annotations'] = []
ann_id = 0 ann_id = 0
...@@ -249,27 +273,32 @@ class TaaS(imdb): ...@@ -249,27 +273,32 @@ class TaaS(imdb):
'area': w * h, 'area': w * h,
'iscrowd': obj['difficult'], 'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name), 'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']]}) 'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1 ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='bbox') ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
with open(ann_file, 'w') as f: json.dump(dataset, f) with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file return ann_file
def _write_coco_segm_annotations(self, gt_recs, output_dir): def _write_coco_segm_annotations(self, gt_recs, output_dir):
dataset = {}
# Build images # Build images
dataset['images'] = [] dataset = {'images': []}
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
dataset['images'].append({ dataset['images'].append({
'file_name': image_name + '.jpg', 'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name), 'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width']}) 'height': rec['height'], 'width': rec['width'],
})
# Build categories # Build categories
dataset['categories'] = [] dataset['categories'] = []
for cls in self._classes: for cls in self._classes:
if cls == '__background__': continue if cls == '__background__':
continue
dataset['categories'].append({ dataset['categories'].append({
'name': cls, 'id': self._class_to_ind[cls]}) 'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations # Build annotations
dataset['annotations'] = [] dataset['annotations'] = []
ann_id = 0 ann_id = 0
...@@ -283,20 +312,25 @@ class TaaS(imdb): ...@@ -283,20 +312,25 @@ class TaaS(imdb):
'area': w * h, 'area': w * h,
'segmentation': { 'segmentation': {
'size': [rec['height'], rec['width']], 'size': [rec['height'], rec['width']],
'counts': obj['mask']}, 'counts': obj['mask'],
},
'iscrowd': obj['difficult'], 'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name), 'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']]}) 'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1 ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='segm') ann_file = self._get_coco_annotations_T(output_dir, type='segm')
with open(ann_file, 'w') as f: json.dump(dataset, f) with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file return ann_file
def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs): def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
ix, results = 0, [] ix, results = 0, []
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
dets = boxes[ix]; ix += 1 dets = boxes[ix]
if isinstance(dets, list) and len(dets) == 0: continue ix += 1
if isinstance(dets, list) and len(dets) == 0:
continue
dets = dets.astype(np.float) dets = dets.astype(np.float)
scores = dets[:, -1] scores = dets[:, -1]
xs = dets[:, 0] xs = dets[:, 0]
...@@ -307,7 +341,9 @@ class TaaS(imdb): ...@@ -307,7 +341,9 @@ class TaaS(imdb):
[{'image_id': self._get_coco_image_id(image_name), [{'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id, 'category_id': cat_id,
'bbox': [xs[k], ys[k], ws[k], hs[k]], 'bbox': [xs[k], ys[k], ws[k], hs[k]],
'score': scores[k]} for k in range(dets.shape[0])]) 'score': scores[k],
} for k in range(dets.shape[0])]
)
return results return results
def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs): def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
...@@ -321,7 +357,8 @@ class TaaS(imdb): ...@@ -321,7 +357,8 @@ class TaaS(imdb):
ix = 0 ix = 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
dets = boxes[ix].astype(np.float) dets = boxes[ix].astype(np.float)
msks = masks[ix]; ix += 1 msks = masks[ix]
ix += 1
keep = filter_boxes(dets) keep = filter_boxes(dets)
im_h, im_w = rec['height'], rec['width'] im_h, im_w = rec['height'], rec['width']
if len(keep) == 0: if len(keep) == 0:
...@@ -330,39 +367,47 @@ class TaaS(imdb): ...@@ -330,39 +367,47 @@ class TaaS(imdb):
mask_encode = self._encode_coco_masks( mask_encode = self._encode_coco_masks(
msks[keep], dets[keep, :4], im_h, im_w) msks[keep], dets[keep, :4], im_h, im_w)
for k in range(dets[keep].shape[0]): for k in range(dets[keep].shape[0]):
rle = mask_encode[k] rle = mask_encode[k]
if sys.version_info >= (3,0): rle['counts'] = rle['counts'].decode() if sys.version_info >= (3, 0):
rle['counts'] = rle['counts'].decode()
results.append({ results.append({
'image_id': self._get_coco_image_id(image_name), 'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id, 'category_id': cat_id,
'segmentation': rle, 'segmentation': rle,
'score': scores[k]}) 'score': scores[k],
})
return results return results
def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir): def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir) filename = self._get_coco_results_T(output_dir)
results = [] results = []
for cls_ind, cls in enumerate(self.classes): for cls_ind, cls in enumerate(self.classes):
if cls == '__background__': continue if cls == '__background__':
print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, self.num_classes - 1)) continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls] cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_bbox_results_one_category( results.extend(self._coco_bbox_results_one_category(
all_boxes[cls_ind], cat_id, gt_recs)) all_boxes[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename)) print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid: json.dump(results, fid) with open(filename, 'w') as fid:
json.dump(results, fid)
return filename return filename
def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir): def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir, type='segm') filename = self._get_coco_results_T(output_dir, type='segm')
results = [] results = []
for cls_ind, cls in enumerate(self.classes): for cls_ind, cls in enumerate(self.classes):
if cls == '__background__': continue if cls == '__background__':
print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, self.num_classes - 1)) continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls] cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_segm_results_one_category( results.extend(self._coco_segm_results_one_category(
all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs)) all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename)) print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid: json.dump(results, fid) with open(filename, 'w') as fid:
json.dump(results, fid)
return filename return filename
def _do_coco_bbox_eval(self, coco, res_file): def _do_coco_bbox_eval(self, coco, res_file):
...@@ -401,8 +446,8 @@ class TaaS(imdb): ...@@ -401,8 +446,8 @@ class TaaS(imdb):
precision = \ precision = \
coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
ap_default = np.mean(precision[precision > -1]) ap_default = np.mean(precision[precision > -1])
print ('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ' print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
'~~~~'.format(IoU_lo_thresh, IoU_hi_thresh)) '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
print('{:.1f}'.format(100 * ap_default)) print('{:.1f}'.format(100 * ap_default))
for cls_ind, cls in enumerate(self.classes): for cls_ind, cls in enumerate(self.classes):
if cls == '__background__': if cls == '__background__':
...@@ -460,7 +505,7 @@ class TaaS(imdb): ...@@ -460,7 +505,7 @@ class TaaS(imdb):
protocol = cfg.TEST.PROTOCOL protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol: if 'voc' in protocol:
self._write_voc_segm_results(all_boxes, all_masks, output_dir) self._write_voc_segm_results(all_boxes, all_masks, output_dir)
if not 'wo' in protocol: if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~') print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
self._do_voc_segm_eval( self._do_voc_segm_eval(
gt_recs, output_dir, IoU=0.5, gt_recs, output_dir, IoU=0.5,
...@@ -477,10 +522,12 @@ class TaaS(imdb): ...@@ -477,10 +522,12 @@ class TaaS(imdb):
cats = coco.loadCats(coco.getCatIds()) cats = coco.loadCats(coco.getCatIds())
self._class_to_cat_id = dict( self._class_to_cat_id = dict(
zip([c['name'] for c in cats], coco.getCatIds())) zip([c['name'] for c in cats], coco.getCatIds()))
else: coco = None else:
coco = None
res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir) res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
if not 'wo' in protocol: if 'wo' not in protocol:
if coco is None: coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir)) if coco is None:
coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
self._do_coco_segm_eval(coco, res_file) self._do_coco_segm_eval(coco, res_file)
def competition_mode(self, on): def competition_mode(self, on):
......
...@@ -19,16 +19,16 @@ from __future__ import print_function ...@@ -19,16 +19,16 @@ from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
try: try:
import cPickle import cPickle
except: except:
import pickle as cPickle import pickle as cPickle
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.mask_transform import mask_overlap
from lib.utils.boxes import expand_boxes
from lib.pycocotools.mask_utils import mask_rle2im from lib.pycocotools.mask_utils import mask_rle2im
from lib.utils.boxes import expand_boxes
from lib.utils.mask_transform import mask_overlap
def voc_ap(rec, prec, use_07_metric=False): def voc_ap(rec, prec, use_07_metric=False):
...@@ -65,8 +65,13 @@ def voc_ap(rec, prec, use_07_metric=False): ...@@ -65,8 +65,13 @@ def voc_ap(rec, prec, use_07_metric=False):
return ap return ap
def voc_bbox_eval(det_file, gt_recs, cls_name, def voc_bbox_eval(
IoU=0.5, use_07_metric=False): det_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
class_recs = {} class_recs = {}
n_pos = 0 n_pos = 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
...@@ -81,35 +86,35 @@ def voc_bbox_eval(det_file, gt_recs, cls_name, ...@@ -81,35 +86,35 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
'det': det 'det': det
} }
# read detections # Read detections
with open(det_file, 'r') as f: lines = f.readlines() with open(det_file, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines] splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines] image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines]) confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# avoid IndexError if detecting nothing # Avoid IndexError if detecting nothing
if len(BB) == 0: return 0, 0, -1 if len(BB) == 0:
return 0, 0, -1
# sort by confidence # Sort by confidence
sorted_ind = np.argsort(-confidence) sorted_ind = np.argsort(-confidence)
BB = BB[sorted_ind, :] BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind] image_ids = [image_ids[x] for x in sorted_ind]
# go down dets and mark TPs and FPs # Go down detections and mark TPs and FPs
nd = len(image_ids) nd = len(image_ids)
tp = np.zeros(nd) tp, fp = np.zeros(nd), np.zeros(nd)
fp = np.zeros(nd)
for d in range(nd): for d in range(nd):
R = class_recs[image_ids[d]] R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float) bb = BB[d, :].astype(float)
ovmax = -np.inf ovmax, jmax = -np.inf, 0
BBGT = R['bbox'].astype(float) BBGT = R['bbox'].astype(float)
if BBGT.size > 0: if BBGT.size > 0:
# compute overlaps # Compute overlaps intersection
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0]) ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1]) iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2]) ixmax = np.minimum(BBGT[:, 2], bb[2])
...@@ -118,10 +123,10 @@ def voc_bbox_eval(det_file, gt_recs, cls_name, ...@@ -118,10 +123,10 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
ih = np.maximum(iymax - iymin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih inters = iw * ih
# union # Union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni overlaps = inters / uni
ovmax = np.max(overlaps) ovmax = np.max(overlaps)
...@@ -149,8 +154,14 @@ def voc_bbox_eval(det_file, gt_recs, cls_name, ...@@ -149,8 +154,14 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
return rec, prec, ap return rec, prec, ap
def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, def voc_segm_eval(
IoU=0.5, use_07_metric=False): det_file,
seg_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
# 0. Constants # 0. Constants
M = cfg.MRCNN.RESOLUTION M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH binary_thresh = cfg.TEST.BINARY_THRESH
...@@ -175,8 +186,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -175,8 +186,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
image_names.append(image_name) image_names.append(image_name)
# 2. Get predict pickle file for this class # 2. Get predict pickle file for this class
with open(det_file, 'rb') as f: boxes_pkl = cPickle.load(f) with open(det_file, 'rb') as f:
with open(seg_file, 'rb') as f: masks_pkl = cPickle.load(f) boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f)
# 3. Pre-compute number of total instances to allocate memory # 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs) num_images = len(gt_recs)
...@@ -185,7 +198,8 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -185,7 +198,8 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
box_num += len(boxes_pkl[im_i]) box_num += len(boxes_pkl[im_i])
# avoid IndexError if detecting nothing # avoid IndexError if detecting nothing
if box_num == 0: return 0, 0, -1 if box_num == 0:
return 0, 0, -1
# 4. Re-organize all the predicted boxes # 4. Re-organize all the predicted boxes
new_boxes = np.zeros((box_num, 5)) new_boxes = np.zeros((box_num, 5))
...@@ -223,11 +237,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -223,11 +237,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
fp[i] = 1 fp[i] = 1
continue continue
R = class_recs[image_name] R = class_recs[image_name]
im_h, im_w = \ im_h = gt_recs[image_name]['height']
gt_recs[image_name]['height'], \ im_w = gt_recs[image_name]['width']
gt_recs[image_name]['width']
# decode mask # Decode mask
ref_box = ref_boxes[i, :4] ref_box = ref_boxes[i, :4]
mask = new_masks[i] mask = new_masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :] padded_mask[1:-1, 1:-1] = mask[:, :]
...@@ -244,14 +257,14 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -244,14 +257,14 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]), pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
(x1 - ref_box[0]): (x2 - ref_box[0])] (x1 - ref_box[0]): (x2 - ref_box[0])]
# calculate max region overlap # Calculate max region overlap
ovmax = -1; jmax = -1 ovmax, jmax = -1, -1
for j in range(len(R['det'])): for j in range(len(R['det'])):
gt_mask_bound = R['bbox'][j].astype(int) gt_mask_bound = R['bbox'][j].astype(int)
pred_mask_bound = new_boxes[i, :4].astype(int) pred_mask_bound = new_boxes[i, :4].astype(int)
crop_mask = R['mask'][j][gt_mask_bound[1] : gt_mask_bound[3] + 1, crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0] : gt_mask_bound[2] + 1] gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask) ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
...@@ -276,4 +289,4 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -276,4 +289,4 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
# avoid divide by zero in case the first matches a difficult gt # avoid divide by zero in case the first matches a difficult gt
prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps) prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric=use_07_metric) ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
return ap return ap
\ No newline at end of file
...@@ -13,7 +13,7 @@ from __future__ import absolute_import ...@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.proposal_layer import ProposalLayer from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
\ No newline at end of file
...@@ -7,4 +7,4 @@ ...@@ -7,4 +7,4 @@
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
\ No newline at end of file
...@@ -13,21 +13,21 @@ from __future__ import absolute_import ...@@ -13,21 +13,21 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import multiprocessing
import numpy as np import numpy as np
from multiprocessing import Process
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
class BlobFetcher(Process): class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(BlobFetcher, self).__init__() super(BlobFetcher, self).__init__()
self.Q1_in = self.Q2_in = self.Q_out = None self.q1_in = self.q2_in = self.q_out = None
self.daemon = True self.daemon = True
def get(self, Q_in): def get(self, Q_in):
processed_ims = []; ims_info = []; all_boxes = [] processed_ims, ims_info, all_boxes = [], [], []
for ix in range(cfg.TRAIN.IMS_PER_BATCH): for ix in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = Q_in.get() im, im_scale, gt_boxes = Q_in.get()
processed_ims.append(im) processed_ims.append(im)
...@@ -46,7 +46,7 @@ class BlobFetcher(Process): ...@@ -46,7 +46,7 @@ class BlobFetcher(Process):
def run(self): def run(self):
while True: while True:
if self.Q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH: if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q_out.put(self.get(self.Q1_in)) self.q_out.put(self.get(self.q1_in))
elif self.Q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH: elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q_out.put(self.get(self.Q2_in)) self.q_out.put(self.get(self.q2_in))
\ No newline at end of file
...@@ -13,16 +13,17 @@ from __future__ import absolute_import ...@@ -13,16 +13,17 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from multiprocessing import Queue
import time import time
import dragon
import pprint import pprint
from multiprocessing import Queue
import dragon.core.mpi as mpi
from lib.core.config import cfg from lib.core.config import cfg
import lib.utils.logger as logger
from lib.faster_rcnn.data.data_reader import DataReader from lib.faster_rcnn.data.data_reader import DataReader
from lib.faster_rcnn.data.data_transformer import DataTransformer from lib.faster_rcnn.data.data_transformer import DataTransformer
from lib.faster_rcnn.data.blob_fetcher import BlobFetcher from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
from lib.utils import logger
class DataBatch(object): class DataBatch(object):
...@@ -53,13 +54,14 @@ class DataBatch(object): ...@@ -53,13 +54,14 @@ class DataBatch(object):
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
# Init mpi # Init mpi
global_rank, local_rank, group_size = 0, 0, 1 global_rank, local_rank, group_size = 0, 0, 1
if mpi.Is_Init(): if dragon.mpi.is_init():
idx, group = mpi.AllowParallel() group = dragon.mpi.is_parallel()
if idx != -1: # DataParallel if group is not None: # DataParallel
global_rank = mpi.Rank() global_rank = dragon.mpi.rank()
group_size = len(group) group_size = len(group)
for i, node in enumerate(group): for i, node in enumerate(group):
if global_rank == node: local_rank = i if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size kwargs['group_size'] = group_size
# Configuration # Configuration
...@@ -89,7 +91,7 @@ class DataBatch(object): ...@@ -89,7 +91,7 @@ class DataBatch(object):
self._readers = [] self._readers = []
for i in range(self._num_readers): for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs)) self._readers.append(DataReader(**kwargs))
self._readers[-1].Q_out = self.Q1 self._readers[-1].q_out = self.Q1
for i in range(self._num_readers): for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers part_idx, num_parts = i, self._num_readers
...@@ -106,9 +108,9 @@ class DataBatch(object): ...@@ -106,9 +108,9 @@ class DataBatch(object):
for i in range(self._num_transformers): for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs) transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers) transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.Q_in = self.Q1 transformer.q_in = self.Q1
transformer.Q1_out = self.Q21 transformer.q1_out = self.Q21
transformer.Q2_out = self.Q22 transformer.q2_out = self.Q22
transformer.start() transformer.start()
self._transformers.append(transformer) self._transformers.append(transformer)
time.sleep(0.1) time.sleep(0.1)
...@@ -117,15 +119,17 @@ class DataBatch(object): ...@@ -117,15 +119,17 @@ class DataBatch(object):
self._fetchers = [] self._fetchers = []
for i in range(self._num_fetchers): for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs) fetcher = BlobFetcher(**kwargs)
fetcher.Q1_in = self.Q21 fetcher.q1_in = self.Q21
fetcher.Q2_in = self.Q22 fetcher.q2_in = self.Q22
fetcher.Q_out = self.Q3 fetcher.q_out = self.Q3
fetcher.start() fetcher.start()
self._fetchers.append(fetcher) self._fetchers.append(fetcher)
time.sleep(0.1) time.sleep(0.1)
# Prevent to echo multiple nodes # Prevent to echo multiple nodes
if local_rank == 0: self.echo() if local_rank == 0:
self.echo()
def cleanup(): def cleanup():
def terminate(processes): def terminate(processes):
for process in processes: for process in processes:
...@@ -137,6 +141,7 @@ class DataBatch(object): ...@@ -137,6 +141,7 @@ class DataBatch(object):
logger.info('Terminating DataTransformer ......') logger.info('Terminating DataTransformer ......')
terminate(self._readers) terminate(self._readers)
logger.info('Terminating DataReader......') logger.info('Terminating DataReader......')
import atexit import atexit
atexit.register(cleanup) atexit.register(cleanup)
......
...@@ -14,40 +14,42 @@ from __future__ import division ...@@ -14,40 +14,42 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import math import math
import numpy
import multiprocessing import multiprocessing
import numpy
from dragon import config as _cfg from dragon.tools import db
from dragon.tools import db as _db from lib.core.config import cfg
class DataReader(multiprocessing.Process): class DataReader(multiprocessing.Process):
"""DataReader is deployed to queue encoded str from `LMDB`_. """Collect encoded str from `LMDB`_.
It is supported to adaptively partition and shuffle records over all distributed nodes. Partition and shuffle records over distributed nodes.
""" Parameters
def __init__(self, **kwargs): ----------
"""Construct a ``DataReader``. source : str
The path of database.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=2048
The number of chunks to split.
Parameters """
----------
source : str
The path of database.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=2048
The number of chunks to split.
""" def __init__(self, **kwargs):
"""Create a DataReader."""
super(DataReader, self).__init__() super(DataReader, self).__init__()
self._source = kwargs.get('source', '') self._source = kwargs.get('source', '')
self._use_shuffle = kwargs.get('shuffle', False) self._use_shuffle = kwargs.get('shuffle', False)
self._num_chunks = kwargs.get('num_chunks', 2048) self._num_chunks = kwargs.get('num_chunks', 2048)
self._part_idx, self._num_parts = 0, 1 self._part_idx, self._num_parts = 0, 1
self._cursor, self._chunk_cursor = 0, 0 self._cursor, self._chunk_cursor = 0, 0
self._rng_seed = _cfg.GetRandomSeed() self._chunk_size, self._perm_size = 0, 0
self.Q_out = None self._head, self._tail, self._num_entries = 0, 0, 0
self._db, self._zfill, self._perm = None, None, None
self._rng_seed = cfg.RNG_SEED
self.q_out = None
self.daemon = True self.daemon = True
def element(self): def element(self):
...@@ -69,10 +71,6 @@ class DataReader(multiprocessing.Process): ...@@ -69,10 +71,6 @@ class DataReader(multiprocessing.Process):
target : int target : int
The key of the record. The key of the record.
Returns
-------
None
Notes Notes
----- -----
The redirection reopens the database. The redirection reopens the database.
...@@ -88,17 +86,12 @@ class DataReader(multiprocessing.Process): ...@@ -88,17 +86,12 @@ class DataReader(multiprocessing.Process):
self._db.set(str(target).zfill(self._zfill)) self._db.set(str(target).zfill(self._zfill))
def reset(self): def reset(self):
"""Reset the cursor and environment. """Reset the cursor and environment."""
Returns
-------
None
"""
if self._num_parts > 1 or self._use_shuffle: if self._num_parts > 1 or self._use_shuffle:
self._chunk_cursor = 0 self._chunk_cursor = 0
self._part_idx = (self._part_idx + 1) % self._num_parts self._part_idx = (self._part_idx + 1) % self._num_parts
if self._use_shuffle: self._perm = numpy.random.permutation(self._perm_size) if self._use_shuffle:
self._perm = numpy.random.permutation(self._perm_size)
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor] self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._tail = self._head * self._chunk_size self._tail = self._head * self._chunk_size
if self._head >= self._num_entries: self.next_chunk() if self._head >= self._num_entries: self.next_chunk()
...@@ -109,26 +102,15 @@ class DataReader(multiprocessing.Process): ...@@ -109,26 +102,15 @@ class DataReader(multiprocessing.Process):
self.redirect(self._head) self.redirect(self._head)
def next_record(self): def next_record(self):
"""Step the cursor of records. """Step the cursor of records."""
Returns
-------
None
"""
self._db.next() self._db.next()
self._cursor += 1 self._cursor += 1
def next_chunk(self): def next_chunk(self):
"""Step the cursor of shuffling chunks. """Step the cursor of chunks."""
Returns
-------
None
"""
self._chunk_cursor += 1 self._chunk_cursor += 1
if self._chunk_cursor >= self._perm_size: self.reset() if self._chunk_cursor >= self._perm_size:
self.reset()
else: else:
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor] self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._head = self._head * self._chunk_size self._head = self._head * self._chunk_size
...@@ -140,18 +122,12 @@ class DataReader(multiprocessing.Process): ...@@ -140,18 +122,12 @@ class DataReader(multiprocessing.Process):
self.redirect(self._head) self.redirect(self._head)
def run(self): def run(self):
"""Start the process. """Start the process."""
Returns
-------
None
"""
# Fix seed # Fix seed
numpy.random.seed(self._rng_seed) numpy.random.seed(self._rng_seed)
# Init db # Init db
self._db = _db.LMDB() self._db = db.LMDB()
self._db.open(self._source) self._db.open(self._source)
self._zfill = self._db.zfill() self._zfill = self._db.zfill()
self._num_entries = self._db.num_entries() self._num_entries = self._db.num_entries()
...@@ -166,14 +142,14 @@ class DataReader(multiprocessing.Process): ...@@ -166,14 +142,14 @@ class DataReader(multiprocessing.Process):
# Search a optimal chunk size (Chunk-Wise) # Search a optimal chunk size (Chunk-Wise)
min_size, max_size = \ min_size, max_size = \
1, self._db._total_size * 1.0 \ 1, self._db._total_size * 1.0 \
/ (self._num_chunks * (1 << 20)) / (self._num_chunks * (1 << 20))
while min_size * 2 < max_size: min_size *= 2 while min_size * 2 < max_size: min_size *= 2
self._perm_size = int(math.ceil( self._perm_size = int(math.ceil(
self._db._total_size * 1.1 / self._db._total_size * 1.1 /
(self._num_parts * min_size << 20))) (self._num_parts * min_size << 20)))
self._chunk_size = int( self._chunk_size = int(
self._num_entries * 1.0 / self._num_entries * 1.0 /
(self._perm_size * self._num_parts) + 1) (self._perm_size * self._num_parts) + 1)
limit = (self._num_parts - 0.5) * self._perm_size * self._chunk_size limit = (self._num_parts - 0.5) * self._perm_size * self._chunk_size
if self._num_entries <= limit: if self._num_entries <= limit:
# Roll back to Record-Wise shuffle # Roll back to Record-Wise shuffle
...@@ -189,9 +165,10 @@ class DataReader(multiprocessing.Process): ...@@ -189,9 +165,10 @@ class DataReader(multiprocessing.Process):
# Run! # Run!
while True: while True:
self.Q_out.put(self.element()) self.q_out.put(self.element())
self.next_record() self.next_record()
if self._cursor >= self._tail: if self._cursor >= self._tail:
if self._num_parts > 1 or self._use_shuffle: if self._num_parts > 1 or self._use_shuffle:
self.next_chunk() self.next_chunk()
else: self.reset() else:
\ No newline at end of file self.reset()
...@@ -13,7 +13,7 @@ from __future__ import absolute_import ...@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from multiprocessing import Process import multiprocessing
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
...@@ -28,11 +28,11 @@ except ImportError as e: ...@@ -28,11 +28,11 @@ except ImportError as e:
from lib.core.config import cfg from lib.core.config import cfg
from lib.proto import anno_pb2 as pb from lib.proto import anno_pb2 as pb
from lib.utils import logger
from lib.utils.blob import prep_im_for_blob from lib.utils.blob import prep_im_for_blob
import lib.utils.logger as logger
class DataTransformer(Process): class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._rng_seed = cfg.RNG_SEED self._rng_seed = cfg.RNG_SEED
...@@ -42,60 +42,64 @@ class DataTransformer(Process): ...@@ -42,60 +42,64 @@ class DataTransformer(Process):
self._num_classes = len(self._classes) self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes))) self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._queues = [] self._queues = []
self.Q_in = self.Q1_out = self.Q2_out = None self.q_in = self.q1_out = self.q2_out = None
self.daemon = True self.daemon = True
def make_record( def make_roi_dict(
self, self,
ann_datum, ann_datum,
im_scale, im_scale,
flip=False, apply_flip=False,
offsets=None, offsets=None,
): ):
annotations = ann_datum.annotation annotations = ann_datum.annotation
n_objects = 0 n_objects = 0
if not self._use_diff: if not self._use_diff:
for ann in annotations: for ann in annotations:
if not ann.difficult: n_objects += 1 if not ann.difficult:
else: n_objects = len(annotations) n_objects += 1
else:
n_objects = len(annotations)
record = { roi_dict = {
'width': ann_datum.datum.width, 'width': ann_datum.datum.width,
'height': ann_datum.datum.height, 'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32), 'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), dtype=np.float32), 'boxes': np.zeros((n_objects, 4), 'float32'),
} }
# Filter the difficult instances # Filter the difficult instances
instance_idx = 0 rec_idx = 0
for ann in annotations: for ann in annotations:
if not self._use_diff and ann.difficult: continue if not self._use_diff and ann.difficult:
record['boxes'][instance_idx, :] = [ continue
max(0, ann.x1), roi_dict['boxes'][rec_idx, :] = [
max(0, ann.y1), max(0, ann.x1),
min(ann.x2, ann_datum.datum.width - 1), max(0, ann.y1),
min(ann.y2, ann_datum.datum.height - 1), min(ann.x2, ann_datum.datum.width - 1),
] min(ann.y2, ann_datum.datum.height - 1),
record['gt_classes'][instance_idx] = self._class_to_ind[ann.name] ]
instance_idx += 1 roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name]
rec_idx += 1
# Flip the boxes if necessary # Flip the boxes if necessary
if flip: if apply_flip:
record['boxes'] = _flip_boxes( roi_dict['boxes'] = _flip_boxes(
record['boxes'], record['width']) roi_dict['boxes'], roi_dict['width'])
# Scale the boxes to the detecting scale # Scale the boxes to the detecting scale
record['boxes'] *= im_scale roi_dict['boxes'] *= im_scale
# Apply the offsets from scale jitter # Apply the offsets from scale jitter
if offsets is not None: if offsets is not None:
record['boxes'][:, 0::2] += offsets[0] roi_dict['boxes'][:, 0::2] += offsets[0]
record['boxes'][:, 1::2] += offsets[1] roi_dict['boxes'][:, 1::2] += offsets[1]
record['boxes'][:, :] = np.minimum( roi_dict['boxes'][:, :] = np.minimum(
np.maximum(record['boxes'][:, :], 0), np.maximum(roi_dict['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2) [offsets[2][1] - 1, offsets[2][0] - 1] * 2,
)
return record return roi_dict
@classmethod @classmethod
def get_image(cls, serialized): def get_image(cls, serialized):
...@@ -127,20 +131,23 @@ class DataTransformer(Process): ...@@ -127,20 +131,23 @@ class DataTransformer(Process):
datum.ParseFromString(serialized) datum.ParseFromString(serialized)
im_datum = datum.datum im_datum = datum.datum
im = np.fromstring(im_datum.data, np.uint8) im = np.fromstring(im_datum.data, np.uint8)
if im_datum.encoded is True: im = cv2.imdecode(im, -1) if im_datum.encoded is True:
else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels)) im = cv2.imdecode(im, -1)
else:
h, w = im_datum.height, im_datum.width
im = im.reshape((h, w, im_datum.channels))
# Scale # Scale
scale_indices = npr.randint(0, high=len(cfg.TRAIN.SCALES)) scale_indices = npr.randint(len(cfg.TRAIN.SCALES))
target_size = cfg.TRAIN.SCALES[scale_indices] target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE) im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
# Flip # Flip
flip = False apply_flip = False
if self._use_flipped: if self._use_flipped:
if npr.randint(0, 2) > 0: if npr.randint(0, 2) > 0:
im = im[:, ::-1, :] im = im[:, ::-1, :]
flip = True apply_flip = True
# Random Crop or RandomPad # Random Crop or RandomPad
offsets = None offsets = None
...@@ -153,57 +160,63 @@ class DataTransformer(Process): ...@@ -153,57 +160,63 @@ class DataTransformer(Process):
# To a square (target_size, target_size) # To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im) im, offsets = _get_image_with_target_size([target_size] * 2, im)
# Datum -> Record # Datum -> RoIDict
rec = self.make_record(datum, im_scale, flip, offsets) roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(rec['gt_classes']), 5), dtype=np.float32) gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4], gt_boxes[:, 4] = rec['boxes'], rec['gt_classes'] gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
return im, im_scale, gt_boxes return im, im_scale, gt_boxes
def run(self): def run(self):
npr.seed(self._rng_seed) npr.seed(self._rng_seed)
while True: while True:
serialized = self.Q_in.get() serialized = self.q_in.get()
data = self.get(serialized) data = self.get(serialized)
# Ensure that there should be at least 1 ground-truth # Ensure that there should be at least 1 ground-truth
if len(data[2]) < 1: continue if len(data[2]) < 1:
continue
aspect_ratio = float(data[0].shape[0]) / data[0].shape[1] aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
if aspect_ratio > 1.0: self.Q1_out.put(data) if aspect_ratio > 1.0:
else: self.Q2_out.put(data) self.q1_out.put(data)
else:
self.q2_out.put(data)
def _flip_boxes(boxes, width): def _flip_boxes(boxes, width):
flip_boxes = boxes.copy() flip_boxes = boxes.copy()
oldx1 = boxes[:, 0].copy() old_x1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy() old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - oldx2 - 1 flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - oldx1 - 1 flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all(): if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.') logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes return flip_boxes
def _get_image_with_target_size(target_size, im): def _get_image_with_target_size(target_size, img):
im_shape = list(im.shape) im_shape = list(img.shape)
width_diff = target_size[1] - im_shape[1]
offset_crop_width = np.random.randint(0, max(-width_diff, 0) + 1)
offset_pad_width = np.random.randint(0, max(width_diff, 0) + 1)
height_diff = target_size[0] - im_shape[0] height_diff = target_size[0] - im_shape[0]
offset_crop_height = np.random.randint(0, max(-height_diff, 0) + 1) width_diff = target_size[1] - im_shape[1]
offset_pad_height = np.random.randint(0, max(height_diff, 0) + 1)
im_shape[0 : 2] = target_size
new_im = np.empty(im_shape, dtype=im.dtype)
new_im[:] = cfg.PIXEL_MEANS
new_im[offset_pad_height:offset_pad_height + im.shape[0],
offset_pad_width:offset_pad_width + im.shape[1]] = \
im[offset_crop_height:offset_crop_height + target_size[0],
offset_crop_width:offset_crop_width + target_size[1]]
return new_im, (offset_pad_width - offset_crop_width, ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
offset_pad_height - offset_crop_height, target_size) ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
\ No newline at end of file ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
im_shape[:2] = target_size
new_img = np.empty(im_shape, dtype=img.dtype)
new_img[:] = cfg.PIXEL_MEANS
new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
ofs_pad_width:ofs_pad_width + img.shape[1]] = \
img[ofs_crop_height:ofs_crop_height + target_size[0],
ofs_crop_width:ofs_crop_width + target_size[1]]
return new_img, (
ofs_pad_width - ofs_crop_width,
ofs_pad_height - ofs_crop_height,
target_size,
)
...@@ -32,7 +32,7 @@ import numpy as np ...@@ -32,7 +32,7 @@ import numpy as np
# -79 -167 96 184 # -79 -167 96 184
# -167 -343 184 360 # -167 -343 184 360
#array([[ -83., -39., 100., 56.], # array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.], # [-175., -87., 192., 104.],
# [-359., -183., 376., 200.], # [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.], # [ -55., -55., 72., 72.],
...@@ -42,8 +42,12 @@ import numpy as np ...@@ -42,8 +42,12 @@ import numpy as np
# [ -79., -167., 96., 184.], # [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]]) # [-167., -343., 184., 360.]])
def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6)): def generate_anchors(
base_size=16,
ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6),
):
""" """
Generate anchor (reference) windows by enumerating aspect ratios X Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window. scales wrt a reference (0, 0, 15, 15) window.
...@@ -55,22 +59,25 @@ def generate_anchors(base_size=16, ratios=(0.5, 1, 2), ...@@ -55,22 +59,25 @@ def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
return anchors return anchors
def generate_anchors_v2(stride=16, ratios=(0.5, 1, 2), def generate_anchors_v2(
sizes=(32, 64, 128, 256, 512)): stride=16,
ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512),
):
""" """
Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
are centered on stride / 2, have (approximate) sqrt areas of the specified are centered on stride / 2, have (approximate) sqrt areas of the specified
sizes, and aspect ratios as given. sizes, and aspect ratios as given.
""" """
return generate_anchors(stride, ratios, return generate_anchors(
np.array(sizes, dtype=np.float) / stride) base_size=stride,
ratios=ratios,
scales=np.array(sizes, dtype=np.float) / stride,
)
def _whctrs(anchor): def _whctrs(anchor):
""" """Return width, height, x center, and y center for an anchor (window)."""
Return width, height, x center, and y center for an anchor (window).
"""
w = anchor[2] - anchor[0] + 1 w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1 h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1) x_ctr = anchor[0] + 0.5 * (w - 1)
...@@ -83,7 +90,6 @@ def _mkanchors(ws, hs, x_ctr, y_ctr): ...@@ -83,7 +90,6 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
Given a vector of widths (ws) and heights (hs) around a center Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows). (x_ctr, y_ctr), output a set of anchors (windows).
""" """
ws = ws[:, np.newaxis] ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis] hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1), anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
...@@ -94,10 +100,7 @@ def _mkanchors(ws, hs, x_ctr, y_ctr): ...@@ -94,10 +100,7 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
def _ratio_enum(anchor, ratios): def _ratio_enum(anchor, ratios):
""" """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h size = w * h
size_ratios = size / ratios size_ratios = size / ratios
...@@ -108,10 +111,7 @@ def _ratio_enum(anchor, ratios): ...@@ -108,10 +111,7 @@ def _ratio_enum(anchor, ratios):
def _scale_enum(anchor, scales): def _scale_enum(anchor, scales):
""" """Enumerate a set of anchors for each scale wrt an anchor."""
Enumerate a set of anchors for each scale wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales ws = w * scales
hs = h * scales hs = h * scales
...@@ -120,4 +120,4 @@ def _scale_enum(anchor, scales): ...@@ -120,4 +120,4 @@ def _scale_enum(anchor, scales):
if __name__ == '__main__': if __name__ == '__main__':
print(generate_anchors()) print(generate_anchors())
\ No newline at end of file
...@@ -19,9 +19,10 @@ import dragon.vm.torch as torch ...@@ -19,9 +19,10 @@ import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
...@@ -32,10 +33,9 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -32,10 +33,9 @@ class AnchorTargetLayer(torch.nn.Module):
super(AnchorTargetLayer, self).__init__() super(AnchorTargetLayer, self).__init__()
# Load the basic configs # Load the basic configs
# C4 backbone takes the first stride # C4 backbone takes the first stride
self.scales, self.stride, self.ratios = \ self.scales = cfg.RPN.SCALES
cfg.RPN.SCALES, \ self.stride = cfg.RPN.STRIDES[0]
cfg.RPN.STRIDES[0], \ self.ratios = cfg.RPN.ASPECT_RATIOS
cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount # Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
...@@ -61,11 +61,13 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -61,11 +61,13 @@ class AnchorTargetLayer(torch.nn.Module):
""" """
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \ logger.fatal(
.format(num_images, len(gt_boxes_wide))) 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
height, width = features[0].shape[-2:] height, width = features[0].shape[-2:]
...@@ -85,7 +87,7 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -85,7 +87,7 @@ class AnchorTargetLayer(torch.nn.Module):
all_anchors = all_anchors.reshape((K * A, 4)) all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A) total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is dont care # label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32) all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32) all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32) all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
...@@ -101,8 +103,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -101,8 +103,8 @@ class AnchorTargetLayer(torch.nn.Module):
inds_inside = np.where( inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :] anchors = all_anchors[inds_inside, :]
else: else:
inds_inside = np.arange(all_anchors.shape[0]) inds_inside = np.arange(all_anchors.shape[0])
...@@ -143,7 +145,10 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -143,7 +145,10 @@ class AnchorTargetLayer(torch.nn.Module):
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg: if len(fg_inds) > num_fg:
disable_inds = npr.choice( disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False) fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1 labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
...@@ -152,12 +157,17 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -152,12 +157,17 @@ class AnchorTargetLayer(torch.nn.Module):
bg_inds = np.where(labels == 0)[0] bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg: if len(bg_inds) > num_bg:
disable_inds = npr.choice( disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False) bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1 labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform( bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], 0:4]) ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0)) bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
...@@ -169,34 +179,26 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -169,34 +179,26 @@ class AnchorTargetLayer(torch.nn.Module):
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
# labels labels = all_labels \
labels = all_labels.reshape( .reshape((num_images, height, width, A)) \
(num_images, height, width, A)).transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2) \
labels = labels.reshape((num_images, total_anchors)) .reshape((num_images, total_anchors))
# bbox_targets bbox_targets = all_bbox_targets \
bbox_targets = all_bbox_targets.reshape( .reshape((num_images, height, width, A * 4)) \
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
# bbox_inside_weights bbox_inside_weights = all_bbox_inside_weights \
bbox_inside_weights = all_bbox_inside_weights.reshape( .reshape((num_images, height, width, A * 4)) \
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
# bbox_outside_weights bbox_outside_weights = all_bbox_outside_weights \
bbox_outside_weights = all_bbox_outside_weights.reshape( .reshape((num_images, height, width, A * 4)) \
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
return { return {
'labels': to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
...@@ -28,15 +28,13 @@ class DataLayer(torch.nn.Module): ...@@ -28,15 +28,13 @@ class DataLayer(torch.nn.Module):
'source': database.source, 'source': database.source,
'classes': database.classes, 'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE, 'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': 0, # Record-Wise Shuffle 'num_chunks': 0, # Record-Wise Shuffle
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2, 'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
}) })
def forward(self): def forward(self):
# Get a mini-batch from the Queue # Get an array blob from the Queue
blobs = self.data_batch.get() outputs = self.data_batch.get()
# Zero-Copy from numpy # Zero-Copy the array to tensor
blobs['data'] = torch.from_numpy(blobs['data']) outputs['data'] = torch.from_numpy(outputs['data'])
# Switch the data to Device return outputs
blobs['data'].cuda(cfg.GPU_ID)
return blobs
\ No newline at end of file
...@@ -9,27 +9,35 @@ ...@@ -9,27 +9,35 @@
# #
# -------------------------------------------------------- # --------------------------------------------------------
import numpy as np from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.nms.nms_wrapper import nms
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes from lib.nms.nms_wrapper import nms
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module): class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box """
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors"). transformations to a set of regular boxes (called "anchors").
""" """
def __init__(self): def __init__(self):
super(ProposalLayer, self).__init__() super(ProposalLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales, self.stride, self.ratios = \ self.scales = cfg.RPN.SCALES
cfg.RPN.SCALES, cfg.RPN.STRIDES[0], cfg.RPN.ASPECT_RATIOS self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors # Generate base anchors
self.base_anchors = generate_anchors( self.base_anchors = generate_anchors(
...@@ -40,10 +48,10 @@ class ProposalLayer(torch.nn.Module): ...@@ -40,10 +48,10 @@ class ProposalLayer(torch.nn.Module):
def forward(self, features, cls_prob, bbox_pred, ims_info): def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST' cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources # Get resources
num_images = ims_info.shape[0] num_images = ims_info.shape[0]
...@@ -61,22 +69,21 @@ class ProposalLayer(torch.nn.Module): ...@@ -61,22 +69,21 @@ class ProposalLayer(torch.nn.Module):
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0] A = self.base_anchors.shape[0]
K = shifts.shape[0] K = shifts.shape[0]
anchors = self.base_anchors.reshape((1, A, 4)) + \ anchors = \
shifts.reshape((1, K, 4)).transpose((1, 0, 2)) self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4)) all_anchors = anchors.reshape((K * A, 4))
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
# scores & deltas are (1, A, H, W) format # scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A) # Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1)) batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1)) batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
# Extract RoIs separately # Extract RoIs separately
for ix in range(num_images): for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1] scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4)) deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
...@@ -95,11 +102,11 @@ class ProposalLayer(torch.nn.Module): ...@@ -95,11 +102,11 @@ class ProposalLayer(torch.nn.Module):
proposals = bbox_transform_inv(anchors, deltas) proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image # 2. Clip predicted boxes to image
proposals = clip_boxes(proposals, ims_info[ix, :2]) proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold # 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2]) # (NOTE: convert min_size to input image scale stored in im_info[2])
keep = _filter_boxes(proposals, min_size * ims_info[ix, 2]) keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :] proposals = proposals[keep, :]
scores = scores[keep] scores = scores[keep]
...@@ -107,7 +114,8 @@ class ProposalLayer(torch.nn.Module): ...@@ -107,7 +114,8 @@ class ProposalLayer(torch.nn.Module):
# 7. Take after_nms_topN (e.g. 300) # 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top) # 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh) keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: keep = keep[:post_nms_topN] if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :] proposals = proposals[keep, :]
# Output rois blob # Output rois blob
...@@ -118,13 +126,7 @@ class ProposalLayer(torch.nn.Module): ...@@ -118,13 +126,7 @@ class ProposalLayer(torch.nn.Module):
# Merge RoIs into a blob # Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0) rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN': return rpn_rois if cfg_key == 'TRAIN':
else: return [to_tensor(rpn_rois)] return rpn_rois
else:
return [blob_to_tensor(rpn_rois)]
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
...@@ -9,22 +9,24 @@ ...@@ -9,22 +9,24 @@
# #
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
class ProposalTargetLayer(torch.nn.Module): class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets. """Assign object detection proposals to ground-truth targets."""
Produces proposal classification labels and bounding-box regression targets.
"""
def __init__(self): def __init__(self):
super(ProposalTargetLayer, self).__init__() super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES self.num_classes = cfg.MODEL.NUM_CLASSES
...@@ -34,8 +36,8 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -34,8 +36,8 @@ class ProposalTargetLayer(torch.nn.Module):
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label, has_mask) # GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs # Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets', keys = ['labels', 'rois', 'bbox_targets',
...@@ -50,14 +52,12 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -50,14 +52,12 @@ class ProposalTargetLayer(torch.nn.Module):
# Include ground-truth boxes in the set of candidate rois # Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4])))) rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes) rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([ _fmap_batch([
labels, labels,
rois, rois,
...@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module):
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0) batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return { return {
'rois': [to_tensor(batch_outputs['rois'])], 'rois': [blob_to_tensor(batch_outputs['rois'])],
'labels': to_tensor(batch_outputs['labels']), 'labels': blob_to_tensor(batch_outputs['labels']),
'bbox_targets': to_tensor(batch_outputs['bbox_targets']), 'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': to_tensor(batch_outputs['bbox_inside_weights']), 'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': to_tensor(batch_outputs['bbox_outside_weights']), 'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
} }
...@@ -109,7 +109,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): ...@@ -109,7 +109,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
def _compute_targets(ex_rois, gt_rois, labels): def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image.""" """Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4 assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
...@@ -117,12 +116,18 @@ def _compute_targets(ex_rois, gt_rois, labels): ...@@ -117,12 +116,18 @@ def _compute_targets(ex_rois, gt_rois, labels):
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False) return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): def _sample_rois(
"""Generate a random sample of RoIs comprising foreground and background examples.""" all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
)
gt_assignment = overlaps.argmax(axis=1) gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1) max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4] labels = gt_boxes[gt_assignment, 4]
...@@ -164,11 +169,6 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas ...@@ -164,11 +169,6 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
return labels, rois, bbox_targets, bbox_inside_weights return labels, rois, bbox_targets, bbox_inside_weights
def _dismantle_gt_boxes(gt_boxes, num_images):
return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
for ix in range(num_images)]
def _fmap_batch(inputs, outputs, keys): def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(inputs[i]) outputs[key].append(inputs[i])
\ No newline at end of file
...@@ -13,27 +13,23 @@ from __future__ import absolute_import ...@@ -13,27 +13,23 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv
from lib.nms.nms_wrapper import nms, soft_nms
from lib.utils.timer import Timer from lib.utils.timer import Timer
from lib.utils.blob import im_list_to_blob, to_array
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales. """Detect a image, with single or multiple scales."""
"""
# Prepare images # Prepare images
ims, ims_scale = scale_image(raw_image) ims, ims_scale = scale_image(raw_image)
...@@ -41,26 +37,31 @@ def im_detect(detector, raw_image): ...@@ -41,26 +37,31 @@ def im_detect(detector, raw_image):
blobs = {'data': im_list_to_blob(ims)} blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32) for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID) blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward # Do Forward
with torch.no_grad(): with torch.no_grad():
outputs = detector.forward(inputs=blobs) outputs = detector.forward(inputs=blobs)
# Decode results # Decode results
batch_rois = to_array(outputs['rois']) batch_rois = tensor_to_blob(outputs['rois'])
batch_scores = to_array(outputs['cls_prob']) batch_scores = tensor_to_blob(outputs['cls_prob'])
batch_deltas = to_array(outputs['bbox_pred']) batch_deltas = tensor_to_blob(outputs['bbox_pred'])
batch_boxes = bbox_transform_inv( batch_boxes = bbox_transform_inv(
batch_rois[:, 1:5], batch_deltas, cfg.BBOX_REG_WEIGHTS) boxes=batch_rois[:, 1:5],
scores_wide = []; boxes_wide = [] deltas=batch_deltas,
weights=cfg.BBOX_REG_WEIGHTS,
)
scores_wide, boxes_wide = [], []
for im_idx in range(len(ims)): for im_idx in range(len(ims)):
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0] indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
boxes = batch_boxes[indices] boxes = batch_boxes[indices]
boxes /= ims_scale[im_idx] boxes /= ims_scale[im_idx]
clip_boxes(boxes, raw_image.shape) clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices]) scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes) boxes_wide.append(boxes)
...@@ -69,12 +70,13 @@ def im_detect(detector, raw_image): ...@@ -69,12 +70,13 @@ def im_detect(detector, raw_image):
def test_net(detector, server): def test_net(detector, server):
classes, num_images, num_classes = \ # Load settings
server.classes, server.num_images, server.num_classes classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect' : Timer(), 'misc' : Timer()} _t = {'im_detect': Timer(), 'misc': Timer()}
for i in range(num_images): for i in range(num_images):
image_id, raw_image = server.get_image() image_id, raw_image = server.get_image()
...@@ -89,22 +91,27 @@ def test_net(detector, server): ...@@ -89,22 +91,27 @@ def test_net(detector, server):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j] cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4] cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).\ cls_detections = np.hstack(
astype(np.float32, copy=False) (cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(cls_dets, cfg.TEST.NMS, keep = soft_nms(
cls_detections, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA) sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else: else:
keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True) keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_dets all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_dets) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE: if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_image, classes, boxes_this_image, vis_one_image(
raw_image, classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True, thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_id)) filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes* # Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0: if cfg.TEST.DETECTIONS_PER_IM > 0:
...@@ -112,7 +119,8 @@ def test_net(detector, server): ...@@ -112,7 +119,8 @@ def test_net(detector, server):
for j in range(1, num_classes): for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue if len(all_boxes[j][i]) < 1: continue
image_scores.append(all_boxes[j][i][:, -1]) image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores) if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes): for j in range(1, num_classes):
...@@ -120,11 +128,11 @@ def test_net(detector, server): ...@@ -120,11 +128,11 @@ def test_net(detector, server):
all_boxes[j][i] = all_boxes[j][i][keep, :] all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(i + 1, num_images, _t['im_detect'].average_time, .format(i + 1, num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='') _t['misc'].average_time), end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections') print('Evaluating detections')
server.evaluate_detections(all_boxes) server.evaluate_detections(all_boxes)
\ No newline at end of file
...@@ -15,4 +15,4 @@ from __future__ import print_function ...@@ -15,4 +15,4 @@ from __future__ import print_function
from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
from lib.fpn.layers.proposal_layer import ProposalLayer from lib.fpn.layers.proposal_layer import ProposalLayer
from lib.fpn.layers.proposal_target_layer import ProposalTargetLayer from lib.fpn.layers.proposal_target_layer import ProposalTargetLayer
\ No newline at end of file
...@@ -13,16 +13,18 @@ from __future__ import absolute_import ...@@ -13,16 +13,18 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
import lib.utils.logger as logger
from lib.utils.blob import to_tensor
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module): class AnchorTargetLayer(torch.nn.Module):
...@@ -31,14 +33,14 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -31,14 +33,14 @@ class AnchorTargetLayer(torch.nn.Module):
def __init__(self): def __init__(self):
super(AnchorTargetLayer, self).__init__() super(AnchorTargetLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales, self.strides, self.ratios = \ self.scales = cfg.RPN.SCALES
cfg.RPN.SCALES, \ self.strides = cfg.RPN.STRIDES
cfg.RPN.STRIDES, \ self.ratios = cfg.RPN.ASPECT_RATIOS
cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides): if len(self.scales) != len(self.strides):
logger.fatal( logger.fatal(
'Given {} scales and {} strides.' 'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))) .format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount # Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
...@@ -46,9 +48,9 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -46,9 +48,9 @@ class AnchorTargetLayer(torch.nn.Module):
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
base_size = self.strides[i] base_size, scale = self.strides[i], self.scales[i]
scale = self.scales[i] if not isinstance(scale, collections.Iterable):
if not isinstance(scale, list): scale = [scale] scale = [scale]
self.base_anchors.append( self.base_anchors.append(
generate_anchors( generate_anchors(
base_size=base_size, base_size=base_size,
...@@ -59,16 +61,17 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -59,16 +61,17 @@ class AnchorTargetLayer(torch.nn.Module):
def forward(self, features, gt_boxes, ims_info): def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.""" """Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \ logger.fatal(
.format(num_images, len(gt_boxes_wide))) 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
all_anchors = []; total_anchors = 0 all_anchors, total_anchors = [], 0
for i in range(len(self.strides)): for i in range(len(self.strides)):
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i] shift_x = np.arange(0, width) * self.strides[i]
...@@ -107,8 +110,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -107,8 +110,8 @@ class AnchorTargetLayer(torch.nn.Module):
inds_inside = np.where( inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :] anchors = all_anchors[inds_inside, :]
else: else:
inds_inside = np.arange(all_anchors.shape[0]) inds_inside = np.arange(all_anchors.shape[0])
...@@ -180,16 +183,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -180,16 +183,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
...@@ -9,39 +9,49 @@ ...@@ -9,39 +9,49 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import numpy as np from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms from lib.nms.nms_wrapper import nms
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes from lib.utils.boxes import bbox_transform_inv
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module): class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box. """
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors"). transformations to a set of regular boxes (called "anchors").
""" """
def __init__(self): def __init__(self):
super(ProposalLayer, self).__init__() super(ProposalLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales, self.strides, self.ratios = \ self.scales = cfg.RPN.SCALES
cfg.RPN.SCALES, cfg.RPN.STRIDES, cfg.RPN.ASPECT_RATIOS self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides): if len(self.scales) != len(self.strides):
logger.fatal( logger.fatal(
'Given {} scales and {} strides.' 'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))) .format(len(self.scales), len(self.strides))
)
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
base_size = self.strides[i] base_size, scale = self.strides[i], self.scales[i]
scale = self.scales[i] if not isinstance(scale, collections.Iterable):
if not isinstance(scale, list): scale = [scale] scale = [scale]
self.base_anchors.append( self.base_anchors.append(
generate_anchors( generate_anchors(
base_size=base_size, base_size=base_size,
...@@ -76,14 +86,14 @@ class ProposalLayer(torch.nn.Module): ...@@ -76,14 +86,14 @@ class ProposalLayer(torch.nn.Module):
def forward(self, features, cls_prob, bbox_pred, ims_info): def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST' cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources # Get resources
num_images = ims_info.shape[0] num_images = ims_info.shape[0]
all_anchors = self.generate_grid_anchors(features) # [n, 4] all_anchors = self.generate_grid_anchors(features) # [n, 4]
if cls_prob.shape[0] != num_images or \ if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images: bbox_pred.shape[0] != num_images:
...@@ -92,12 +102,13 @@ class ProposalLayer(torch.nn.Module): ...@@ -92,12 +102,13 @@ class ProposalLayer(torch.nn.Module):
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
batch_scores = cls_prob.numpy(True) batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4] batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately # Extract RoIs separately
for ix in range(num_images): for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1] scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4] deltas = batch_deltas[ix] # [n, 4]
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
order = np.argsort(-scores.squeeze()) order = np.argsort(-scores.squeeze())
...@@ -115,10 +126,10 @@ class ProposalLayer(torch.nn.Module): ...@@ -115,10 +126,10 @@ class ProposalLayer(torch.nn.Module):
proposals = bbox_transform_inv(anchors, deltas) proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image # 2. Clip predicted boxes to image
proposals = clip_boxes(proposals, ims_info[ix, :2]) proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold # 3. remove predicted boxes with either height or width < threshold
keep = _filter_boxes(proposals, min_size * ims_info[ix, 2]) keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :] proposals = proposals[keep, :]
scores = scores[keep] scores = scores[keep]
...@@ -126,7 +137,8 @@ class ProposalLayer(torch.nn.Module): ...@@ -126,7 +137,8 @@ class ProposalLayer(torch.nn.Module):
# 7. Take after_nms_topN (e.g. 300) # 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top) # 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh) keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: keep = keep[:post_nms_topN] if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :] proposals = proposals[keep, :]
# Output rois blob # Output rois blob
...@@ -151,32 +163,23 @@ class ProposalLayer(torch.nn.Module): ...@@ -151,32 +163,23 @@ class ProposalLayer(torch.nn.Module):
lv_indices = np.where(fpn_levels == (i + min_level))[0] lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0: if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling # Fake a tiny roi to avoid empty roi pooling
all_rois.append(to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32))) all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else: else:
all_rois.append(to_tensor(rpn_rois[lv_indices])) all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
return all_rois return all_rois
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size.
"""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def _map_rois_to_fpn_levels(rois, k_min, k_max): def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
""" """
if len(rois) == 0: return [] Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1 ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1 hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs) s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224 s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4 lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max) return np.clip(target_levels, k_min, k_max)
\ No newline at end of file
...@@ -9,14 +9,19 @@ ...@@ -9,14 +9,19 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
class ProposalTargetLayer(torch.nn.Module): class ProposalTargetLayer(torch.nn.Module):
...@@ -36,26 +41,19 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -36,26 +41,19 @@ class ProposalTargetLayer(torch.nn.Module):
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32), 'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
} }
def _map_rois(self, inputs, fake_outputs, outputs, keys, levels):
f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(f(inputs[i], fake_outputs[key], inds))
def forward(self, rpn_rois, gt_boxes): def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label, has_mask) # GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs # Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets', keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights'] 'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys])) outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys])) batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately # Generate targets separately
for ix in range(num_images): for ix in range(num_images):
...@@ -65,11 +63,9 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -65,11 +63,9 @@ class ProposalTargetLayer(torch.nn.Module):
# Include ground-truth boxes in the set of candidate rois # Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4])))) rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
# Sample rois with labels & bbox targets
labels, rois, bbox_targets, bbox_inside_weights = \ labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes) _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
...@@ -94,14 +90,20 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -94,14 +90,20 @@ class ProposalTargetLayer(torch.nn.Module):
K = max_level - min_level + 1 K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level) fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)] lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
_fmap_rois([batch_outputs[key] for key in keys], self.fake_outputs, outputs, keys, lvs_indices) _fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return { return {
'rois': [to_tensor(outputs['rois'][i]) for i in range(K)], 'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': to_tensor(np.concatenate(outputs['labels'], axis=0)), 'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': to_tensor(np.vstack(outputs['bbox_targets'])), 'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': to_tensor(np.vstack(outputs['bbox_inside_weights'])), 'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': to_tensor(np.vstack(outputs['bbox_outside_weights'])), 'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
} }
...@@ -115,6 +117,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): ...@@ -115,6 +117,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
Returns: Returns:
bbox_target (ndarray): N x 4K blob of regression targets bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights bbox_inside_weights (ndarray): N x 4K blob of loss weights
""" """
clss = bbox_target_data[:, 0] clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
...@@ -131,7 +134,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): ...@@ -131,7 +134,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
def _compute_targets(ex_rois, gt_rois, labels): def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image.""" """Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4 assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
...@@ -140,10 +142,12 @@ def _compute_targets(ex_rois, gt_rois, labels): ...@@ -140,10 +142,12 @@ def _compute_targets(ex_rois, gt_rois, labels):
def _map_rois_to_fpn_levels(rois, k_min, k_max): def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
""" """
if len(rois) == 0: return [] Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1 ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1 hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs) s = np.sqrt(ws * hs)
...@@ -154,9 +158,7 @@ def _map_rois_to_fpn_levels(rois, k_min, k_max): ...@@ -154,9 +158,7 @@ def _map_rois_to_fpn_levels(rois, k_min, k_max):
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background """Sample a batch of RoIs comprising foreground and background examples."""
examples.
"""
# overlaps: (rois x gt_boxes) # overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
...@@ -203,19 +205,15 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas ...@@ -203,19 +205,15 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
return labels, rois, bbox_targets, bbox_inside_weights return labels, rois, bbox_targets, bbox_inside_weights
def _dismantle_gt_boxes(gt_boxes, num_images):
return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
for ix in range(num_images)]
def _fmap_batch(inputs, outputs, keys): def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(inputs[i]) outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels): def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
f = lambda a, b, indices: a[indices] if len(indices) > 0 else b def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)): for k in range(len(levels)):
inds = levels[k] inds = levels[k]
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(f(inputs[i], fake_outputs[key], inds)) outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
\ No newline at end of file
...@@ -9,13 +9,17 @@ ...@@ -9,13 +9,17 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Import custom modules # Import custom modules
from lib.modeling.base import Bootstarp from lib.modeling.base import affine
from lib.modeling.base import RPNDecoder from lib.modeling.base import bn
from lib.modeling.base import RetinaNetDecoder from lib.modeling.base import conv1x1
from lib.modeling.base import conv1x1, conv3x3, bn, affine from lib.modeling.base import conv3x3
from lib.modeling.fpn import FPN
from lib.modeling.rpn import RPN
from lib.modeling.fast_rcnn import FastRCNN from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN
from lib.modeling.retinanet import RetinaNet from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD from lib.modeling.ssd import SSD
...@@ -15,7 +15,9 @@ from __future__ import print_function ...@@ -15,7 +15,9 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.modeling import conv1x1, conv3x3, bn, affine from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module): class WideResBlock(torch.nn.Module):
...@@ -112,8 +114,10 @@ class AirNet(torch.nn.Module): ...@@ -112,8 +114,10 @@ class AirNet(torch.nn.Module):
) )
self.layer1 = self.make_blocks(filters[0], blocks[0]) self.layer1 = self.make_blocks(filters[0], blocks[0])
self.layer2 = self.make_blocks(filters[1], blocks[1], 2) self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
if num_stages >= 4: self.layer3 = self.make_blocks(filters[2], blocks[2], 2) if num_stages >= 4:
if num_stages >= 5: self.layer4 = self.make_blocks(filters[3], blocks[3], 2) self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5:
self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -124,7 +128,7 @@ class AirNet(torch.nn.Module): ...@@ -124,7 +128,7 @@ class AirNet(torch.nn.Module):
m.weight, m.weight,
# Fix the gain for [-127, 127] # Fix the gain for [-127, 127]
a=1, a=1,
) # Xavier Initialization ) # Xavier Initialization
def make_blocks(self, dim_out, blocks, stride=1): def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential( downsample = torch.nn.Sequential(
...@@ -158,14 +162,21 @@ class AirNet(torch.nn.Module): ...@@ -158,14 +162,21 @@ class AirNet(torch.nn.Module):
def airnet(num_stages): def airnet(num_stages):
blocks = ( blocks = (
('r', 'r'), # conv2 ('r', 'r'), # conv2
('r', 'i'), # conv3 ('r', 'i'), # conv3
('r', 'i'), # conv4 ('r', 'i'), # conv4
('r', 'i'), # conv5 ('r', 'i'), # conv5
) )
return AirNet(blocks, num_stages) return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5) def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3) def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4) def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5)
\ No newline at end of file
def make_airnet_5b(): return airnet(5)
...@@ -17,99 +17,20 @@ from __future__ import print_function ...@@ -17,99 +17,20 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Bootstarp(torch.nn.Module):
"""Extended operator to process the images."""
def __init__(self):
super(Bootstarp, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'ImageData',
'arguments': {
'dtype': self.dtype,
'data_format': 'NCHW',
'mean_values': cfg.PIXEL_MEANS,
}
}
def forward(self, x):
inputs, outputs = [x], [self.register_output()]
return self.run(inputs, outputs)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.register_op()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': cfg.RPN.STRIDES,
'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS],
'scales': [float(e) for e in cfg.RPN.SCALES],
'pre_nms_top_n': cfg.TEST.RPN_PRE_NMS_TOP_N,
'post_nms_top_n': cfg.TEST.RPN_POST_NMS_TOP_N,
'nms_thresh': cfg.TEST.RPN_NMS_THRESH,
'min_size': cfg.TEST.RPN_MIN_SIZE,
'min_leve': cfg.FPN.ROI_MIN_LEVEL,
'max_level': cfg.FPN.ROI_MAX_LEVEL,
'canonical_scale': cfg.FPN.ROI_CANONICAL_SCALE,
'canonical_level': cfg.FPN.ROI_CANONICAL_LEVEL,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
outputs = [self.register_output() for _ in range(self.K)]
outputs = self.run(inputs, outputs)
return outputs if isinstance(outputs, list) else [outputs]
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info): def bn(dim_in, eps=1e-5):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)] """The BatchNorm."""
outputs = [self.register_output()] return torch.nn.BatchNorm2d(dim_in, eps=eps)
return self.run(inputs, outputs)
def conv1x1(dim_in, dim_out, stride=1, bias=False): def conv1x1(dim_in, dim_out, stride=1, bias=False):
...@@ -133,18 +54,3 @@ def conv3x3(dim_in, dim_out, stride=1, bias=False): ...@@ -133,18 +54,3 @@ def conv3x3(dim_in, dim_out, stride=1, bias=False):
padding=1, padding=1,
bias=bias, bias=bias,
) )
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
\ No newline at end of file
...@@ -13,22 +13,19 @@ from __future__ import absolute_import ...@@ -13,22 +13,19 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import importlib import importlib
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.logger import is_root from lib.modeling import FPN
from lib.modeling import RPN
from lib.modeling import FastRCNN
from lib.modeling import RetinaNet
from lib.modeling import SSD
from lib.modeling.factory import get_body_func from lib.modeling.factory import get_body_func
from lib.ops.modules import Bootstrap
from lib.modeling import ( from lib.utils.logger import is_root
Bootstarp,
FPN,
RPN,
FastRCNN,
RetinaNet,
SSD,
)
class Detector(torch.nn.Module): class Detector(torch.nn.Module):
...@@ -47,7 +44,7 @@ class Detector(torch.nn.Module): ...@@ -47,7 +44,7 @@ class Detector(torch.nn.Module):
# + Data Loader # + Data Loader
self.data_layer = importlib.import_module( self.data_layer = importlib.import_module(
'lib.{}'.format(model)).DataLayer 'lib.{}'.format(model)).DataLayer
self.bootstarp = Bootstarp() self.bootstrap = Bootstrap()
# + Feature Extractor # + Feature Extractor
self.body = get_body_func(body)() self.body = get_body_func(body)()
...@@ -58,7 +55,7 @@ class Detector(torch.nn.Module): ...@@ -58,7 +55,7 @@ class Detector(torch.nn.Module):
self.fpn = FPN(feature_dims) self.fpn = FPN(feature_dims)
feature_dims = self.fpn.feature_dims feature_dims = self.fpn.feature_dims
elif 'mbox' in modules: elif 'mbox' in modules:
pass # Placeholder pass # Placeholder
else: else:
feature_dims = [feature_dims[-1]] feature_dims = [feature_dims[-1]]
...@@ -84,8 +81,11 @@ class Detector(torch.nn.Module): ...@@ -84,8 +81,11 @@ class Detector(torch.nn.Module):
The path of the weights file. The path of the weights file.
""" """
self.load_state_dict(torch.load(weights), self.load_state_dict(
strict=False, verbose=is_root()) torch.load(weights),
strict=False,
verbose=is_root(),
)
def forward(self, inputs=None): def forward(self, inputs=None):
"""Compute the detection outputs. """Compute the detection outputs.
...@@ -107,9 +107,9 @@ class Detector(torch.nn.Module): ...@@ -107,9 +107,9 @@ class Detector(torch.nn.Module):
# 1. Extract features # 1. Extract features
# Process the data: # Process the data:
# 1) NHWC => NCHW # 1) NHWC => NCHW
# 2) Uint8 => Float32 or Float16 # 2) uint8 => float32 or float16
# 3) Mean subtraction # 3) Mean subtraction
image_data = self.bootstarp(inputs['data']) image_data = self.bootstrap(inputs['data'])
features = self.body(image_data) features = self.body(image_data)
# 2. Apply the FPN to enhance features if necessary # 2. Apply the FPN to enhance features if necessary
...@@ -117,7 +117,7 @@ class Detector(torch.nn.Module): ...@@ -117,7 +117,7 @@ class Detector(torch.nn.Module):
features = self.fpn(features) features = self.fpn(features)
# 3. Collect detection outputs # 3. Collect detection outputs
outputs = OrderedDict() outputs = collections.OrderedDict()
# 3.1 Feature -> RPN -> Fast R-CNN # 3.1 Feature -> RPN -> Fast R-CNN
if hasattr(self, 'rpn'): if hasattr(self, 'rpn'):
...@@ -197,4 +197,4 @@ class Detector(torch.nn.Module): ...@@ -197,4 +197,4 @@ class Detector(torch.nn.Module):
last_module.weight.copy_(weight) last_module.weight.copy_(weight)
else: else:
last_module.weight.data.mul_(term) last_module.weight.data.mul_(term)
last_module = e last_module = e
\ No newline at end of file
...@@ -13,27 +13,11 @@ from __future__ import absolute_import ...@@ -13,27 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import importlib import importlib
from collections import defaultdict
_STORE = defaultdict(dict) _STORE = collections.defaultdict(dict)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
########################################### ###########################################
...@@ -59,6 +43,23 @@ for D in ['', '3b', '4b', '5b']: ...@@ -59,6 +43,23 @@ for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \ _STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D) 'lib.modeling.airnet.make_airnet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name): def get_body_func(name):
return get_template_func( return get_template_func(
name, _STORE['BODY'], 'Body') name, _STORE['BODY'], 'Body')
\ No newline at end of file
...@@ -13,11 +13,11 @@ from __future__ import absolute_import ...@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import RPNDecoder from lib.ops.modules import RPNDecoder
class FastRCNN(torch.nn.Module): class FastRCNN(torch.nn.Module):
...@@ -50,11 +50,11 @@ class FastRCNN(torch.nn.Module): ...@@ -50,11 +50,11 @@ class FastRCNN(torch.nn.Module):
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False) self.sigmoid = torch.nn.Sigmoid(inplace=False)
self.roi_func = { self.roi_func = {
'RoIPool': torch.roi_pool, 'RoIPool': torch.vision.ops.roi_pool,
'RoIAlign': torch.roi_align, 'RoIAlign': torch.vision.ops.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD] }[cfg.FRCNN.ROI_XFORM_METHOD]
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1.) self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size')
# Compute spatial scales for multiple strides # Compute spatial scales for multiple strides
roi_levels = [level for level in range( roi_levels = [level for level in range(
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)] cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
...@@ -66,13 +66,16 @@ class FastRCNN(torch.nn.Module): ...@@ -66,13 +66,16 @@ class FastRCNN(torch.nn.Module):
torch.nn.init.normal_(self.cls_score.weight, std=0.01) torch.nn.init.normal_(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001) torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters(): for name, p in self.named_parameters():
if 'bias' in name: torch.nn.init.constant_(p, 0) if 'bias' in name:
torch.nn.init.constant_(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale): def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func( return self.roi_func(
feature, rois, feature, rois,
pooled_h=cfg.FRCNN.ROI_XFORM_RESOLUTION, output_size=(
pooled_w=cfg.FRCNN.ROI_XFORM_RESOLUTION, cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale, spatial_scale=spatial_scale,
) )
...@@ -127,14 +130,14 @@ class FastRCNN(torch.nn.Module): ...@@ -127,14 +130,14 @@ class FastRCNN(torch.nn.Module):
# Compute rcnn logits # Compute rcnn logits
cls_score = self.cls_score(rcnn_output).float() cls_score = self.cls_score(rcnn_output).float()
outputs = OrderedDict({ outputs = collections.OrderedDict({
'bbox_pred': 'bbox_pred':
self.bbox_pred(rcnn_output).float(), self.bbox_pred(rcnn_output).float(),
}) })
if self.training: if self.training:
# Compute rcnn losses # Compute rcnn losses
outputs.update(OrderedDict({ outputs.update(collections.OrderedDict({
'cls_loss': self.cls_loss( 'cls_loss': self.cls_loss(
cls_score, cls_score,
self.rcnn_data['labels'], self.rcnn_data['labels'],
......
...@@ -16,7 +16,8 @@ from __future__ import print_function ...@@ -16,7 +16,8 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3 from lib.modeling import conv1x1
from lib.modeling import conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
...@@ -48,50 +49,44 @@ class FPN(torch.nn.Module): ...@@ -48,50 +49,44 @@ class FPN(torch.nn.Module):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_( torch.nn.init.kaiming_uniform_(
m.weight, m.weight,
# Fix the gain for [-127, 127] a=1, # Fix the gain for [-127, 127]
a=1, ) # Xavier Initialization
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0) torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)] outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features # Apply MaxPool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1])) outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.nn_resize( upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:]) fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output) fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
def apply_on_retinanet(self, features): def apply_on_retinanet(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)] outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
# Add extra convolutions for higher features # Add extra convolutions for higher features
extra_input = features[-1] extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input)) outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: extra_input = self.relu(outputs[-1]) if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.nn_resize( upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:]) fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output) fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
def forward(self, features): def forward(self, features):
return self.apply_func(features) return self.apply_func(features)
\ No newline at end of file
...@@ -20,12 +20,20 @@ from __future__ import print_function ...@@ -20,12 +20,20 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3, affine from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module): class BasicBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1, def __init__(
downsample=None, dropblock=None): self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out) self.bn1 = affine(dim_out)
...@@ -65,8 +73,14 @@ class Bottleneck(torch.nn.Module): ...@@ -65,8 +73,14 @@ class Bottleneck(torch.nn.Module):
contraction = cfg.RESNET.NUM_GROUPS \ contraction = cfg.RESNET.NUM_GROUPS \
* cfg.RESNET.GROUP_WIDTH / 256.0 * cfg.RESNET.GROUP_WIDTH / 256.0
def __init__(self, dim_in, dim_out, stride=1, def __init__(
downsample=None, dropblock=None): self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(Bottleneck, self).__init__() super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction) dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim) self.conv1 = conv1x1(dim_in, dim)
...@@ -128,11 +142,17 @@ class ResNet(torch.nn.Module): ...@@ -128,11 +142,17 @@ class ResNet(torch.nn.Module):
ceil_mode=True, ceil_mode=True,
) )
self.drop3 = torch.nn.DropBlock2d( self.drop3 = torch.nn.DropBlock2d(
7, 0.9, alpha=0.25, decrement=cfg.DROPBLOCK.DECREMENT) \ kp=0.9,
if cfg.DROPBLOCK.DROP_ON else None block_size=7,
alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d( self.drop4 = torch.nn.DropBlock2d(
7, 0.9, alpha=1., decrement=cfg.DROPBLOCK.DECREMENT) \ kp=0.9,
if cfg.DROPBLOCK.DROP_ON else None block_size=7,
alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0]) self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2) self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3) self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
...@@ -145,7 +165,8 @@ class ResNet(torch.nn.Module): ...@@ -145,7 +165,8 @@ class ResNet(torch.nn.Module):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_( torch.nn.init.kaiming_normal_(
m.weight, m.weight,
nonlinearity='relu') nonlinearity='relu',
)
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
...@@ -178,25 +199,31 @@ class ResNet(torch.nn.Module): ...@@ -178,25 +199,31 @@ class ResNet(torch.nn.Module):
x = self.bn1(x) x = self.bn1(x)
x = self.relu(x) x = self.relu(x)
x = self.maxpool(x) x = self.maxpool(x)
outputs = [x] outputs = [x]
outputs += [self.layer1(outputs[-1])] outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])] outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])] outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])] outputs += [self.layer4(outputs[-1])]
return outputs return outputs
def resnet(depth): def resnet(depth):
if depth == 18: units = [2, 2, 2, 2] if depth == 18:
elif depth == 34: units = [3, 4, 6, 3] units = [2, 2, 2, 2]
elif depth == 50: units = [3, 4, 6, 3] elif depth == 34:
elif depth == 101: units = [3, 4, 23, 3] units = [3, 4, 6, 3]
elif depth == 152: units = [3, 8, 36, 3] elif depth == 50:
elif depth == 200: units = [3, 24, 36, 3] units = [3, 4, 6, 3]
elif depth == 269: units = [3, 30, 48, 8] elif depth == 101:
else: raise ValueError('Unsupported depth: %d' % depth) units = [3, 4, 23, 3]
elif depth == 152:
units = [3, 8, 36, 3]
elif depth == 200:
units = [3, 24, 36, 3]
elif depth == 269:
units = [3, 30, 48, 8]
else:
raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock block = Bottleneck if depth >= 50 else BasicBlock
filters = [64, 256, 512, 1024, 2048] \ filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512] if depth >= 50 else [64, 64, 128, 256, 512]
...@@ -204,7 +231,15 @@ def resnet(depth): ...@@ -204,7 +231,15 @@ def resnet(depth):
def make_resnet_18(): return resnet(18) def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34) def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50) def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101) def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152)
\ No newline at end of file
def make_resnet_152(): return resnet(152)
...@@ -13,12 +13,13 @@ from __future__ import absolute_import ...@@ -13,12 +13,13 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import math import math
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv3x3, RetinaNetDecoder from lib.modeling import conv3x3
from lib.ops.modules import RetinaNetDecoder
from lib.retinanet import AnchorTargetLayer from lib.retinanet import AnchorTargetLayer
...@@ -32,14 +33,16 @@ class RetinaNet(torch.nn.Module): ...@@ -32,14 +33,16 @@ class RetinaNet(torch.nn.Module):
self.cls_conv = torch.nn.ModuleList( self.cls_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True) conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS)) for _ in range(cfg.RETINANET.NUM_CONVS)
)
self.bbox_conv = torch.nn.ModuleList( self.bbox_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True) conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS)) for _ in range(cfg.RETINANET.NUM_CONVS)
)
# Packed as [C, A] not [A, C] # Packed as [C, A] not [A, C]
self.C = cfg.MODEL.NUM_CLASSES - 1 self.C = cfg.MODEL.NUM_CLASSES - 1
A = len(cfg.RETINANET.ASPECT_RATIOS) * \ A = len(cfg.RETINANET.ASPECT_RATIOS) * \
cfg.RETINANET.SCALES_PER_OCTAVE cfg.RETINANET.SCALES_PER_OCTAVE
self.cls_score = conv3x3(dim_in, self.C * A, bias=True) self.cls_score = conv3x3(dim_in, self.C * A, bias=True)
self.bbox_pred = conv3x3(dim_in, 4 * A, bias=True) self.bbox_pred = conv3x3(dim_in, 4 * A, bias=True)
self.cls_prob = torch.nn.Sigmoid(inplace=True) self.cls_prob = torch.nn.Sigmoid(inplace=True)
...@@ -53,8 +56,11 @@ class RetinaNet(torch.nn.Module): ...@@ -53,8 +56,11 @@ class RetinaNet(torch.nn.Module):
self.anchor_target_layer = AnchorTargetLayer() self.anchor_target_layer = AnchorTargetLayer()
self.cls_loss = torch.nn.SigmoidFocalLoss( self.cls_loss = torch.nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA, alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA) gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.) )
self.bbox_loss = torch.nn.SmoothL1Loss(
beta=1. / 9., reduction='batch_size',
)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -127,7 +133,7 @@ class RetinaNet(torch.nn.Module): ...@@ -127,7 +133,7 @@ class RetinaNet(torch.nn.Module):
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info, ims_info=ims_info,
) )
return OrderedDict({ return collections.OrderedDict({
'cls_loss': 'cls_loss':
self.cls_loss( self.cls_loss(
cls_score, cls_score,
...@@ -146,7 +152,7 @@ class RetinaNet(torch.nn.Module): ...@@ -146,7 +152,7 @@ class RetinaNet(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({'bbox_pred': bbox_pred}) outputs = collections.OrderedDict({'bbox_pred': bbox_pred})
if self.training: if self.training:
outputs.update( outputs.update(
...@@ -168,4 +174,4 @@ class RetinaNet(torch.nn.Module): ...@@ -168,4 +174,4 @@ class RetinaNet(torch.nn.Module):
kwargs['ims_info'], kwargs['ims_info'],
) )
return outputs return outputs
\ No newline at end of file
...@@ -13,11 +13,12 @@ from __future__ import absolute_import ...@@ -13,11 +13,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3 from lib.modeling import conv1x1
from lib.modeling import conv3x3
class RPN(torch.nn.Module): class RPN(torch.nn.Module):
...@@ -119,7 +120,7 @@ class RPN(torch.nn.Module): ...@@ -119,7 +120,7 @@ class RPN(torch.nn.Module):
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info, ims_info=ims_info,
) )
return OrderedDict({ return collections.OrderedDict({
'rpn_cls_loss': 'rpn_cls_loss':
self.cls_loss(cls_score, self.rpn_data['labels']), self.cls_loss(cls_score, self.rpn_data['labels']),
'rpn_bbox_loss': 'rpn_bbox_loss':
...@@ -135,7 +136,7 @@ class RPN(torch.nn.Module): ...@@ -135,7 +136,7 @@ class RPN(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({ outputs = collections.OrderedDict({
'rpn_cls_score': cls_score, 'rpn_cls_score': cls_score,
'rpn_bbox_pred': bbox_pred, 'rpn_bbox_pred': bbox_pred,
}) })
...@@ -151,4 +152,4 @@ class RPN(torch.nn.Module): ...@@ -151,4 +152,4 @@ class RPN(torch.nn.Module):
) )
) )
return outputs return outputs
\ No newline at end of file
...@@ -13,18 +13,15 @@ from __future__ import absolute_import ...@@ -13,18 +13,15 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv3x3 from lib.modeling import conv3x3
from lib.ssd import HardMiningLayer
from lib.ssd import ( from lib.ssd import MultiBoxMatchLayer
PriorBoxLayer, from lib.ssd import MultiBoxTargetLayer
MultiBoxMatchLayer, from lib.ssd import PriorBoxLayer
HardMiningLayer,
MultiBoxTargetLayer,
)
class SSD(torch.nn.Module): class SSD(torch.nn.Module):
...@@ -57,7 +54,7 @@ class SSD(torch.nn.Module): ...@@ -57,7 +54,7 @@ class SSD(torch.nn.Module):
self.hard_mining_layer = HardMiningLayer() self.hard_mining_layer = HardMiningLayer()
self.box_target_layer = MultiBoxTargetLayer() self.box_target_layer = MultiBoxTargetLayer()
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss() self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -88,9 +85,10 @@ class SSD(torch.nn.Module): ...@@ -88,9 +85,10 @@ class SSD(torch.nn.Module):
.permute(0, 2, 3, 1).view(0, -1)) .permute(0, 2, 3, 1).view(0, -1))
# Concat them if necessary # Concat them if necessary
return torch.cat(cls_score_wide, dim=1).view( return \
0, -1, cfg.MODEL.NUM_CLASSES), \ torch.cat(cls_score_wide, dim=1) \
torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4) .view(0, -1, cfg.MODEL.NUM_CLASSES), \
torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4)
def compute_losses( def compute_losses(
self, self,
...@@ -138,7 +136,7 @@ class SSD(torch.nn.Module): ...@@ -138,7 +136,7 @@ class SSD(torch.nn.Module):
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
) )
) )
return OrderedDict({ return collections.OrderedDict({
# A compensating factor of 4.0 is used # A compensating factor of 4.0 is used
# As we normalize both the pos and neg samples # As we normalize both the pos and neg samples
'cls_loss': 'cls_loss':
...@@ -160,7 +158,7 @@ class SSD(torch.nn.Module): ...@@ -160,7 +158,7 @@ class SSD(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({ outputs = collections.OrderedDict({
'prior_boxes': prior_boxes, 'prior_boxes': prior_boxes,
'bbox_pred': bbox_pred, 'bbox_pred': bbox_pred,
}) })
...@@ -179,4 +177,4 @@ class SSD(torch.nn.Module): ...@@ -179,4 +177,4 @@ class SSD(torch.nn.Module):
outputs['cls_prob'] = \ outputs['cls_prob'] = \
self.softmax(cls_score) self.softmax(cls_score)
return outputs return outputs
\ No newline at end of file
...@@ -16,7 +16,8 @@ from __future__ import print_function ...@@ -16,7 +16,8 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3 from lib.modeling import conv1x1
from lib.modeling import conv3x3
class VGG(torch.nn.Module): class VGG(torch.nn.Module):
...@@ -35,16 +36,22 @@ class VGG(torch.nn.Module): ...@@ -35,16 +36,22 @@ class VGG(torch.nn.Module):
dim_in = 3 if i == 0 else filter_list[i - 1] dim_in = 3 if i == 0 else filter_list[i - 1]
for j in range(self.units[i]): for j in range(self.units[i]):
self.__setattr__( self.__setattr__(
'{}_{}'.format(conv_name, j + 1), '{}_{}'
conv3x3(dim_in, filter_list[i], bias=True)) .format(conv_name, j + 1),
if j == 0: dim_in = filter_list[i] conv3x3(dim_in, filter_list[i], bias=True),
)
if j == 0:
dim_in = filter_list[i]
if reduced: if reduced:
# L2Norm is redundant from the observation of # L2Norm is redundant from the observation of
# empirical experiments. We just keep a trainable scale # empirical experiments. We just keep a trainable scale
self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False) self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False)
self.conv4_3_norm.weight.zero_() # Zero-Init self.conv4_3_norm.weight.zero_() # Zero-Init
self.fc6 = torch.nn.Conv2d(filter_list[-1], 1024, self.fc6 = torch.nn.Conv2d(
kernel_size=3, stride=1, padding=6, dilation=6) filter_list[-1], 1024,
kernel_size=3, padding=6,
stride=1, dilation=6,
)
self.fc7 = conv1x1(1024, 1024, bias=True) self.fc7 = conv1x1(1024, 1024, bias=True)
self.feature_dims = [filter_list[-2], 1024] self.feature_dims = [filter_list[-2], 1024]
if extra_arch is not None: if extra_arch is not None:
...@@ -54,15 +61,23 @@ class VGG(torch.nn.Module): ...@@ -54,15 +61,23 @@ class VGG(torch.nn.Module):
for i in range(len(strides)): for i in range(len(strides)):
conv_name = 'conv{}'.format(i + 6) conv_name = 'conv{}'.format(i + 6)
dim_in = 1024 if i == 0 else filter_list[i - 1] * 2 dim_in = 1024 if i == 0 else filter_list[i - 1] * 2
self.__setattr__('{}_1'.format(conv_name), self.__setattr__(
conv1x1(dim_in, filter_list[i], bias=True)) '{}_1'.format(conv_name),
conv1x1(dim_in, filter_list[i], bias=True),
)
if strides[i] == 2: if strides[i] == 2:
self.__setattr__('{}_2'.format(conv_name), self.__setattr__(
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True)) '{}_2'.format(conv_name),
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True),
)
else: else:
self.__setattr__('{}_2'.format(conv_name), self.__setattr__(
torch.nn.Conv2d(filter_list[i], filter_list[i] * 2, '{}_2'.format(conv_name),
kernel_size=kps[0], padding=kps[1], stride=kps[2])) torch.nn.Conv2d(
filter_list[i], filter_list[i] * 2,
kernel_size=kps[0], padding=kps[1], stride=kps[2]
),
)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -72,7 +87,7 @@ class VGG(torch.nn.Module): ...@@ -72,7 +87,7 @@ class VGG(torch.nn.Module):
m.weight, m.weight,
# Fix the gain for [-127, 127] # Fix the gain for [-127, 127]
a=1, a=1,
) # Xavier Initialization ) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0) torch.nn.init.constant_(m.bias, 0)
# Stop the gradients if necessary # Stop the gradients if necessary
...@@ -88,8 +103,9 @@ class VGG(torch.nn.Module): ...@@ -88,8 +103,9 @@ class VGG(torch.nn.Module):
for i in range(cfg.MODEL.FREEZE_AT, 0, -1): for i in range(cfg.MODEL.FREEZE_AT, 0, -1):
conv_name = 'conv{}'.format(i) conv_name = 'conv{}'.format(i)
for j in range(self.units[i - 1]): for j in range(self.units[i - 1]):
self.__getattr__('{}_{}'.format( self.__getattr__(
conv_name, j + 1)).apply(freeze_func) '{}_{}'.format(conv_name, j + 1)
).apply(freeze_func)
def forward(self, x): def forward(self, x):
outputs = [] outputs = []
...@@ -101,8 +117,10 @@ class VGG(torch.nn.Module): ...@@ -101,8 +117,10 @@ class VGG(torch.nn.Module):
'{}_{}'.format(conv_name, j + 1))(x)) '{}_{}'.format(conv_name, j + 1))(x))
if self.reduced and i == 3: if self.reduced and i == 3:
outputs.append(self.conv4_3_norm(x)) outputs.append(self.conv4_3_norm(x))
if i < 4: x = self.maxpool(x) if i < 4:
else: x = self.s1pool(x) if self.reduced else x x = self.maxpool(x)
else:
x = self.s1pool(x) if self.reduced else x
# Internal FC layers and Extra Conv Layers # Internal FC layers and Extra Conv Layers
if self.reduced: if self.reduced:
...@@ -145,4 +163,6 @@ def make_vgg_16_reduced(scale=300): ...@@ -145,4 +163,6 @@ def make_vgg_16_reduced(scale=300):
def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300) def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300)
def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512)
\ No newline at end of file
def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.core.config import cfg from lib.core.config import cfg
import lib.utils.logger as logger from lib.utils import logger
try: try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
...@@ -33,10 +33,12 @@ except ImportError as e: ...@@ -33,10 +33,12 @@ except ImportError as e:
def nms(detections, thresh, force_cpu=False): def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS.""" """Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0: return [] if detections.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu: if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID) return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else: return cpu_nms(detections, thresh) else:
return cpu_nms(detections, thresh)
def soft_nms( def soft_nms(
...@@ -47,7 +49,8 @@ def soft_nms( ...@@ -47,7 +49,8 @@ def soft_nms(
score_thresh=0.001, score_thresh=0.001,
): ):
"""Perform CPU Soft-NMS.""" """Perform CPU Soft-NMS."""
if detections.shape[0] == 0: return [] if detections.shape[0] == 0:
return []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2} methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods: if method not in methods:
logger.fatal('Unknown soft nms method: {}'.format(method)) logger.fatal('Unknown soft nms method: {}'.format(method))
...@@ -57,4 +60,4 @@ def soft_nms( ...@@ -57,4 +60,4 @@ def soft_nms(
methods[method], methods[method],
sigma, sigma,
score_thresh, score_thresh,
) )
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
from lib.ops import functions
def decode_retinanet(
features,
cls_prob,
bbox_pred,
ims_info,
strides,
ratios,
scales,
pre_nms_top_n,
score_thresh,
):
return function.get(
functions.RetinaNetDecoder,
cls_prob.device,
strides=strides,
ratios=ratios,
scales=scales,
pre_nms_top_n=pre_nms_top_n,
score_thresh=score_thresh,
).apply(features, cls_prob, bbox_pred, ims_info)
def decode_rpn(
features,
cls_prob,
bbox_pred,
ims_info,
num_outputs,
strides,
ratios,
scales,
pre_nms_top_n,
post_nms_top_n,
nms_thresh,
min_size,
min_level,
max_level,
canonical_scale,
canonical_level,
):
return function.get(
functions.RPNDecoder,
cls_prob.device,
K=num_outputs,
strides=strides,
ratios=ratios,
scales=scales,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=nms_thresh,
min_size=min_size,
min_level=min_level,
max_level=max_level,
canonical_scale=canonical_scale,
canonical_level=canonical_level,
).apply(features, cls_prob, bbox_pred, ims_info)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
class RetinaNetDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RetinaNetDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'score_thresh': self.args['score_thresh'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
return self.run(inputs, [self.alloc()], unify_devices=False)
class RPNDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RPNDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'post_nms_top_n': self.args['post_nms_top_n'],
'nms_thresh': self.args['nms_thresh'],
'min_size': self.args['min_size'],
'min_level': self.args['min_level'],
'max_level': self.args['max_level'],
'canonical_scale': self.args['canonical_scale'],
'canonical_level': self.args['canonical_level'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
outputs = [self.alloc() for _ in range(self.args['K'])]
return self.run(inputs, outputs, unify_devices=False)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.ops import functional as F
from lib.utils.blob import blob_to_tensor
class Bootstrap(torch.nn.Module):
"""Extended operator to process the images."""
def __init__(self):
super(Bootstrap, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.mean_values = cfg.PIXEL_MEANS
self.dummy_buffer = torch.ones(1)
def _apply(self, fn):
fn(self.dummy_buffer)
def cpu(self):
self._device = torch.device('cpu')
def cuda(self, device=None):
self._device = torch.device('cuda', device)
def device(self):
"""Return the device of this module."""
return self.dummy_buffer.device
def forward(self, input):
cur_device = self.device()
if input._device != cur_device:
if cur_device.type == 'cpu':
input = input.cpu()
else:
input = input.cuda(cur_device.index)
return torch.vision.ops.image_data(
input, self.dtype, self.mean_values,
)
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
return F.decode_retinanet(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
strides=self.strides,
ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
scales=self.scales,
pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
score_thresh=cfg.TEST.SCORE_THRESH,
)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def forward(self, features, cls_prob, bbox_pred, ims_info):
outputs = F.decode_rpn(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
num_outputs=self.K,
strides=cfg.RPN.STRIDES,
ratios=[float(e) for e in cfg.RPN.ASPECT_RATIOS],
scales=[float(e) for e in cfg.RPN.SCALES],
pre_nms_top_n=cfg.TEST.RPN_PRE_NMS_TOP_N,
post_nms_top_n=cfg.TEST.RPN_POST_NMS_TOP_N,
nms_thresh=cfg.TEST.RPN_NMS_THRESH,
min_size=cfg.TEST.RPN_MIN_SIZE,
min_level=cfg.FPN.ROI_MIN_LEVEL,
max_level=cfg.FPN.ROI_MAX_LEVEL,
canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
)
return [outputs] if self.K == 1 else outputs
...@@ -15,4 +15,3 @@ from __future__ import print_function ...@@ -15,4 +15,3 @@ from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
from lib.retinanet.layers.proposal_layer import ProposalLayer
\ No newline at end of file
...@@ -7,4 +7,4 @@ ...@@ -7,4 +7,4 @@
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
\ No newline at end of file
...@@ -13,15 +13,16 @@ from __future__ import absolute_import ...@@ -13,15 +13,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.blob import to_tensor
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
class AnchorTargetLayer(torch.nn.Module): class AnchorTargetLayer(torch.nn.Module):
...@@ -35,28 +36,32 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -35,28 +36,32 @@ class AnchorTargetLayer(torch.nn.Module):
anchor_scale = cfg.RETINANET.ANCHOR_SCALE anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)] self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for stride in self.strides: for stride in self.strides:
sizes = [stride * anchor_scale * sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave))) (2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)] for octave in range(scales_per_octave)]
self.base_anchors.append(generate_anchors_v2( self.base_anchors.append(
stride=stride, ratios=self.ratios, sizes=sizes)) generate_anchors_v2(
stride=stride,
ratios=self.ratios,
sizes=sizes,
))
def forward(self, features, gt_boxes, ims_info): def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.""" """Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \ logger.fatal(
.format(num_images, len(gt_boxes_wide))) 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
all_anchors = []; total_anchors = 0 all_anchors, total_anchors = [], 0
for i in range(len(self.strides)): for i in range(len(self.strides)):
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i] shift_x = np.arange(0, width) * self.strides[i]
...@@ -101,7 +106,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -101,7 +106,8 @@ class AnchorTargetLayer(torch.nn.Module):
# Overlaps between the anchors and the gt boxes # Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float)) np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1) argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps] max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
...@@ -125,10 +131,10 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -125,10 +131,10 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_targets[fg_inds, :] = bbox_transform( bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4]) anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1.0, 1.0, 1.0, 1.0)) bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1.0) bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets bbox_targets_wide[ix, inds_inside] = bbox_targets
...@@ -141,16 +147,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -141,16 +147,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.bbox_transform import bbox_transform_inv
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box.
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = []
for stride in self.strides:
sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.base_anchors.append(generate_anchors_v2(
stride=stride, ratios=self.ratios, sizes=sizes))
def forward(self, features, cls_prob, bbox_pred, ims_info):
# Get resources
num_images = ims_info.shape[0]
cls_prob, bbox_pred = cls_prob.numpy(True), bbox_pred.numpy(True)
lvl_info = [features[i].shape[-2:] for i in range(len(self.strides))]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_probs = cls_prob
batch_deltas = bbox_pred.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
batch_detections = []
# Extract Detections separately
for ix in range(num_images):
im_scale = ims_info[ix, 2]
if cfg.RETINANET.SOFTMAX: P = batch_probs[ix, 1:, :]
else: P = batch_probs[ix] # [num_classes - 1, n]
D = batch_deltas[ix] # [n, 4]
anchor_pos = 0
for lvl, (H, W) in enumerate(lvl_info):
A, K = self.base_anchors[lvl].shape[0], H * W
num_anchors = A * K
prob = P[:, anchor_pos : anchor_pos + num_anchors]
deltas = D[anchor_pos : anchor_pos + num_anchors]
anchor_pos += num_anchors
prob_ravel = prob.ravel()
candidate_inds = np.where(prob_ravel > cfg.TEST.SCORE_THRESH)[0]
if len(candidate_inds) == 0: continue
pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
inds = np.argpartition(
prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
inds = candidate_inds[inds]
prob_4d = prob.reshape((prob.shape[0], A, H, W))
inds_2d = np.array(np.unravel_index(inds, prob.shape)).transpose()
inds_4d = np.array(np.unravel_index(inds, prob_4d.shape)).transpose()
classes, anchor_ids = inds_2d[:, 0], inds_2d[:, 1]
a, y, x = inds_4d[:, 1], inds_4d[:, 2], inds_4d[:, 3]
scores = prob[classes, anchor_ids]
deltas = deltas[anchor_ids]
anchors = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
anchors = (anchors * self.strides[lvl]) + self.base_anchors[lvl][a, :]
pred_boxes = bbox_transform_inv(anchors, deltas)
pred_boxes /= im_scale
# {im_idx, x1, y1, x2, y2, score, cls}
detections = np.zeros((pred_boxes.shape[0], 7), dtype=np.float32)
detections[:, 0], detections[:, 1:5] = ix, pred_boxes
detections[:, 5], detections[:, 6] = scores, classes + 1
batch_detections.append(detections)
# Merge Detections into a blob
batch_detections = np.vstack(batch_detections) \
if len(batch_detections) > 0 else \
np.zeros((1, 7), dtype=np.float32)
return batch_detections
\ No newline at end of file
...@@ -13,20 +13,16 @@ from __future__ import absolute_import ...@@ -13,20 +13,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.bbox_transform import clip_boxes
from lib.nms.nms_wrapper import nms, soft_nms
from lib.utils.timer import Timer from lib.utils.timer import Timer
from lib.utils.blob import im_list_to_blob
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
...@@ -39,72 +35,65 @@ def im_detect(detector, raw_image): ...@@ -39,72 +35,65 @@ def im_detect(detector, raw_image):
blobs = {'data': im_list_to_blob(ims)} blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32) for im_scale in ims_scale], dtype=np.float32,
blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID) )
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward # Do Forward
with torch.no_grad(): with torch.no_grad():
outputs = detector.forward(inputs=blobs) outputs = detector.forward(inputs=blobs)
# Decode results # Unpack results
results = outputs['detections'] return tensor_to_blob(outputs['detections'])[:, 1:]
detections_wide = []
for im_idx in range(len(ims)):
indices = np.where(results[:, 0].astype(np.int32) == im_idx)[0]
detections = results[indices, 1:]
detections[:, :4] = clip_boxes(detections[:, :4], raw_image.shape)
detections_wide.append(detections)
return np.vstack(detections_wide) \
if len(detections_wide) > 1 else detections_wide[0]
def ims_detect(detector, raw_images):
def ims_detect(net, raw_images): """Detect images, with single or multiple scales."""
"""Detect images, with single or multiple scales.
"""
# Prepare images # Prepare images
ims, ims_scale = scale_image(raw_images[0]) ims, ims_scale = scale_image(raw_images[0])
num_scales = len(ims_scale)
ims_shape = [im.shape for im in raw_images] ims_shape = [im.shape for im in raw_images]
for item_idx in range(1, len(raw_images)): for item_idx in range(1, len(raw_images)):
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx]) ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
ims += ims_ext; ims_scale += ims_scale_ext ims += ims_ext
ims_scale += ims_scale_ext
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[2:4]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32) for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward # Do Forward
net.forward(**blobs)() with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results # Unpack results
results = net.blobs['detections'].data.get_value() results = tensor_to_blob(outputs['detections'])
detections_wide = [[] for _ in range(len(ims_shape))] detections_wide = [[] for _ in range(len(ims_shape))]
for i in range(len(ims)): for i in range(len(ims)):
j = i % len(ims_shape)
indices = np.where(results[:, 0].astype(np.int32) == i)[0] indices = np.where(results[:, 0].astype(np.int32) == i)[0]
detections = results[indices, 1:] detections = results[indices, 1:]
detections[:, :4] = clip_boxes(detections[:, :4], ims_shape[j]) detections_wide[i // num_scales].append(detections)
detections_wide[j].append(detections)
for j in range(len(ims_shape)): for i in range(len(ims_shape)):
detections_wide[j] = np.vstack(detections_wide[j]) \ detections_wide[i] = np.vstack(detections_wide[i]) \
if len(detections_wide[j]) > 1 else detections_wide[j][0] if len(detections_wide[i]) > 1 else detections_wide[i][0]
return detections_wide return detections_wide
def test_net(net, server): def test_net(net, server):
classes, num_images, num_classes = \ # Load settings
server.classes, server.num_images, server.num_classes classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect' : Timer(), 'misc' : Timer()} _t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH): for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths # Collect raw images and ground-truths
...@@ -134,30 +123,46 @@ def test_net(net, server): ...@@ -134,30 +123,46 @@ def test_net(net, server):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0] cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4] cls_boxes = detections[cls_indices, 0:4]
cls_scores = detections[cls_indices, 4] cls_scores = detections[cls_indices, 4]
cls_dets = np.hstack(( cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])).\ cls_boxes, cls_scores[:, np.newaxis])) \
astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(cls_dets, cfg.TEST.NMS, keep = soft_nms(
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA) sigma=cfg.TEST.SOFT_NMS_SIGMA,
else: keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True) )
cls_dets = cls_dets[keep, :] else:
all_boxes[j][i] = cls_dets keep = nms(
boxes_this_image.append(cls_dets) cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE: if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_images[item_idx], classes, boxes_this_image, vis_one_image(
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True, raw_images[item_idx],
filename=server.get_save_filename(image_ids[item_idx])) classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes* # Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0: if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = [] image_scores = []
for j in range(1, num_classes): for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1]) image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores) if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes): for j in range(1, num_classes):
...@@ -165,12 +170,12 @@ def test_net(net, server): ...@@ -165,12 +170,12 @@ def test_net(net, server):
all_boxes[j][i] = all_boxes[j][i][keep, :] all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH, .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time, num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='') _t['misc'].average_time), end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections') print('Evaluating detections')
server.evaluate_detections(all_boxes) server.evaluate_detections(all_boxes)
\ No newline at end of file
...@@ -14,7 +14,7 @@ from __future__ import division ...@@ -14,7 +14,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.ssd.layers.data_layer import DataLayer from lib.ssd.layers.data_layer import DataLayer
from lib.ssd.layers.prior_box_layer import PriorBoxLayer
from lib.ssd.layers.multibox_match_layer import MultiBoxMatchLayer
from lib.ssd.layers.hard_mining_layer import HardMiningLayer from lib.ssd.layers.hard_mining_layer import HardMiningLayer
from lib.ssd.layers.multibox_target_layer import MultiBoxTargetLayer from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer
\ No newline at end of file from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer
from lib.ssd.layers.priorbox_layer import PriorBoxLayer
...@@ -13,32 +13,39 @@ from __future__ import absolute_import ...@@ -13,32 +13,39 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import multiprocessing
import numpy as np import numpy as np
from multiprocessing import Process
from lib.core.config import cfg from lib.core.config import cfg
class BlobFetcher(Process): class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(BlobFetcher, self).__init__() super(BlobFetcher, self).__init__()
self.Q_in = self.Q_out = None self._img_blob_size = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
self.q_in = self.q_out = None
self.daemon = True self.daemon = True
def get(self): def get(self):
num_images = cfg.TRAIN.IMS_PER_BATCH img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
target_h = cfg.SSD.RESIZE.HEIGHT; target_w = cfg.SSD.RESIZE.WIDTH
ims_blob = np.zeros(shape=(num_images, target_h, target_w, 3), dtype=np.uint8) for i in range(cfg.TRAIN.IMS_PER_BATCH):
gt_boxes_wide = [] img_blob[i], gt_boxes = self.q_in.get()
for ix in range(cfg.TRAIN.IMS_PER_BATCH): # Pack the boxes by adding the index of images
im, gt_boxes = self.Q_in.get() boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
ims_blob[ix, :, :, :] = im boxes[:, :gt_boxes.shape[1]] = gt_boxes
# Encode boxes by adding the idx of images boxes[:, -1] = i
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32) boxes_blob.append(boxes)
im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
im_boxes[:, -1] = ix return {
gt_boxes_wide.append(im_boxes) 'data': img_blob,
'gt_boxes': np.concatenate(boxes_blob, 0),
return {'data': ims_blob, 'gt_boxes': np.concatenate(gt_boxes_wide, axis=0)} }
def run(self): def run(self):
while True: self.Q_out.put(self.get()) while True:
\ No newline at end of file self.q_out.put(self.get())
...@@ -13,15 +13,16 @@ from __future__ import absolute_import ...@@ -13,15 +13,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from multiprocessing import Queue
import time import time
import dragon
import pprint import pprint
from multiprocessing import Queue
import dragon.core.mpi as mpi
import lib.utils.logger as logger
from lib.faster_rcnn.data.data_reader import DataReader from lib.faster_rcnn.data.data_reader import DataReader
from lib.ssd.data.data_transformer import DataTransformer from lib.ssd.data.data_transformer import DataTransformer
from lib.ssd.data.blob_fetcher import BlobFetcher from lib.ssd.data.blob_fetcher import BlobFetcher
from lib.utils import logger
class DataBatch(object): class DataBatch(object):
...@@ -52,19 +53,20 @@ class DataBatch(object): ...@@ -52,19 +53,20 @@ class DataBatch(object):
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
# Init mpi # Init mpi
global_rank, local_rank, group_size = 0, 0, 1 global_rank, local_rank, group_size = 0, 0, 1
if mpi.Is_Init(): if dragon.mpi.is_init():
idx, group = mpi.AllowParallel() group = dragon.mpi.is_parallel()
if idx != -1: # DataParallel if group is not None: # DataParallel
global_rank = mpi.Rank() global_rank = dragon.mpi.rank()
group_size = len(group) group_size = len(group)
for i, node in enumerate(group): for i, node in enumerate(group):
if global_rank == node: local_rank = i if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size kwargs['group_size'] = group_size
# Configuration # Configuration
self._prefetch = kwargs.get('prefetch', 5) self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32) self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get( 'num_readers', 1) self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', -1) self._num_transformers = kwargs.get('num_transformers', -1)
self._max_transformers = kwargs.get('max_transformers', 3) self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1) self._num_fetchers = kwargs.get('num_fetchers', 1)
...@@ -84,7 +86,7 @@ class DataBatch(object): ...@@ -84,7 +86,7 @@ class DataBatch(object):
self._readers = [] self._readers = []
for i in range(self._num_readers): for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs)) self._readers.append(DataReader(**kwargs))
self._readers[-1].Q_out = self.Q1 self._readers[-1].q_out = self.Q1
for i in range(self._num_readers): for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers part_idx, num_parts = i, self._num_readers
...@@ -101,8 +103,8 @@ class DataBatch(object): ...@@ -101,8 +103,8 @@ class DataBatch(object):
for i in range(self._num_transformers): for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs) transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers) transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.Q_in = self.Q1 transformer.q_in = self.Q1
transformer.Q_out = self.Q2 transformer.q_out = self.Q2
transformer.start() transformer.start()
self._transformers.append(transformer) self._transformers.append(transformer)
time.sleep(0.1) time.sleep(0.1)
...@@ -111,14 +113,16 @@ class DataBatch(object): ...@@ -111,14 +113,16 @@ class DataBatch(object):
self._fetchers = [] self._fetchers = []
for i in range(self._num_fetchers): for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs) fetcher = BlobFetcher(**kwargs)
fetcher.Q_in = self.Q2 fetcher.q_in = self.Q2
fetcher.Q_out = self.Q3 fetcher.q_out = self.Q3
fetcher.start() fetcher.start()
self._fetchers.append(fetcher) self._fetchers.append(fetcher)
time.sleep(0.1) time.sleep(0.1)
# Prevent to echo multiple nodes # Prevent to echo multiple nodes
if local_rank == 0: self.echo() if local_rank == 0:
self.echo()
def cleanup(): def cleanup():
def terminate(processes): def terminate(processes):
for process in processes: for process in processes:
...@@ -130,6 +134,7 @@ class DataBatch(object): ...@@ -130,6 +134,7 @@ class DataBatch(object):
logger.info('Terminating DataTransformer ......') logger.info('Terminating DataTransformer ......')
terminate(self._readers) terminate(self._readers)
logger.info('Terminating DataReader......') logger.info('Terminating DataReader......')
import atexit import atexit
atexit.register(cleanup) atexit.register(cleanup)
...@@ -145,13 +150,7 @@ class DataBatch(object): ...@@ -145,13 +150,7 @@ class DataBatch(object):
return self.Q3.get() return self.Q3.get()
def echo(self): def echo(self):
"""Print I/O Information. """Print I/O Information."""
Returns
-------
None
"""
print('---------------------------------------------------------') print('---------------------------------------------------------')
print('BatchFetcher({} Threads), Using config:'.format( print('BatchFetcher({} Threads), Using config:'.format(
self._num_readers + self._num_transformers + self._num_fetchers)) self._num_readers + self._num_transformers + self._num_fetchers))
...@@ -160,4 +159,4 @@ class DataBatch(object): ...@@ -160,4 +159,4 @@ class DataBatch(object):
'n_transformers': self._num_transformers, 'n_transformers': self._num_transformers,
'n_fetchers': self._num_fetchers} 'n_fetchers': self._num_fetchers}
pprint.pprint(params) pprint.pprint(params)
print('---------------------------------------------------------') print('---------------------------------------------------------')
\ No newline at end of file
...@@ -14,34 +14,34 @@ from __future__ import division ...@@ -14,34 +14,34 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import cv2 import cv2
import multiprocessing
import numpy as np import numpy as np
import numpy.random as npr
from multiprocessing import Process
from lib.core.config import cfg from lib.core.config import cfg
from lib.proto import anno_pb2 as pb from lib.proto import anno_pb2 as pb
from lib.ssd.data.preprocessing import * from lib.ssd.data import transforms
import lib.utils.logger as logger from lib.utils import logger
class DataTransformer(Process): class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._distorter = Distortor()
self._expander = Expander()
self._sampler = Sampler(cfg.SSD.SAMPLERS)
self._resizer = Resizer()
self._rng_seed = cfg.RNG_SEED self._rng_seed = cfg.RNG_SEED
self._mirror = cfg.TRAIN.USE_FLIPPED self._mirror = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF self._use_diff = cfg.TRAIN.USE_DIFF
self._classes = kwargs.get('classes', ('__background__',)) self._classes = kwargs.get('classes', ('__background__',))
self._num_classes = len(self._classes) self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes))) self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._queues = [] self._image_aug = transforms.Compose(
self.Q_in = self.Q_out = None transforms.Distort(), # Color augmentation
transforms.Expand(), # Expand and padding
transforms.Sample(), # Sample a patch randomly
transforms.Resize(), # Resize to a fixed scale
)
self.q_in = self.q_out = None
self.daemon = True self.daemon = True
def make_roidb(self, ann_datum, flip=False): def make_roi_dict(self, ann_datum, flip=False):
annotations = ann_datum.annotation annotations = ann_datum.annotation
n_objects = 0 n_objects = 0
if not self._use_diff: if not self._use_diff:
...@@ -49,7 +49,7 @@ class DataTransformer(Process): ...@@ -49,7 +49,7 @@ class DataTransformer(Process):
if not ann.difficult: n_objects += 1 if not ann.difficult: n_objects += 1
else: n_objects = len(annotations) else: n_objects = len(annotations)
roidb = { roi_dict = {
'width': ann_datum.datum.width, 'width': ann_datum.datum.width,
'height': ann_datum.datum.height, 'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32), 'gt_classes': np.zeros((n_objects,), dtype=np.int32),
...@@ -57,75 +57,82 @@ class DataTransformer(Process): ...@@ -57,75 +57,82 @@ class DataTransformer(Process):
'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32), 'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
} }
ix = 0 rec_idx = 0
for ann in annotations: for ann in annotations:
if not self._use_diff and ann.difficult: continue if not self._use_diff and ann.difficult:
roidb['boxes'][ix, :] = [ continue
max(0, ann.x1), max(0, ann.y1), roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1), min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1)] min(ann.y2, ann_datum.datum.height - 1),
roidb['gt_classes'][ix] = self._class_to_ind[ann.name] ]
ix += 1 roi_dict['gt_classes'][rec_idx] = \
self._class_to_ind[ann.name]
rec_idx += 1
if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width']) if flip:
roidb['normalized_boxes'][:, 0::2] = roidb['boxes'][:, 0::2] / float(roidb['width']) roi_dict['boxes'] = _flip_boxes(
roidb['normalized_boxes'][:, 1::2] = roidb['boxes'][:, 1::2] / float(roidb['height']) roi_dict['boxes'], roi_dict['width'])
return roidb roi_dict['boxes'][:, 0::2] /= roi_dict['width']
roi_dict['boxes'][:, 1::2] /= roi_dict['height']
return roi_dict
def get(self, serialized): def get(self, serialized):
ann_datum = pb.AnnotatedDatum() ann_datum = pb.AnnotatedDatum()
ann_datum.ParseFromString(serialized) ann_datum.ParseFromString(serialized)
im_datum = ann_datum.datum img_datum = ann_datum.datum
im = np.fromstring(im_datum.data, np.uint8) img = np.fromstring(img_datum.data, np.uint8)
if im_datum.encoded is True: im = cv2.imdecode(im, -1) if img_datum.encoded is True:
else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels)) img = cv2.imdecode(img, -1)
else:
h, w = img_datum.height, img_datum.width
img = img.reshape((h, w, img_datum.channels))
# Flip # Flip
flip = False flip = False
if self._mirror: if self._mirror:
if npr.randint(0, 2) > 0: if np.random.randint(0, 2) > 0:
im = im[:, ::-1, :] img = img[:, ::-1, :]
flip = True flip = True
# Datum -> RoIDB # Datum -> RoIDB
roidb = self.make_roidb(ann_datum, flip) roi_dict = self.make_roi_dict(ann_datum, flip)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32) gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), 'float32')
gt_boxes[:, 0:4] = roidb['normalized_boxes'] gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
gt_boxes[:, 4] = roidb['gt_classes']
# Distort => Expand => Sample => Resize # Distort => Expand => Sample => Resize
im = self._distorter.distort_image(im) img, gt_boxes = self._image_aug(img, gt_boxes)
im, gt_boxes = self._expander.expand_image(im, gt_boxes)
im, gt_boxes = self._sampler.sample_image(im, gt_boxes)
im = self._resizer.resize_image(im)
# Modify gt boxes to the blob scale # Restore to the blob scale
gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT
gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT
return im, gt_boxes return img, gt_boxes
def run(self): def run(self):
npr.seed(self._rng_seed) np.random.seed(self._rng_seed)
while True: while True:
serialized = self.Q_in.get() serialized = self.q_in.get()
im, gt_boxes = self.get(serialized) im, gt_boxes = self.get(serialized)
if len(gt_boxes) < 1: continue if len(gt_boxes) < 1:
self.Q_out.put((im, gt_boxes)) continue
self.q_out.put((im, gt_boxes))
def _flip_boxes(boxes, width): def _flip_boxes(boxes, width):
flip_boxes = boxes.copy() flip_boxes = boxes.copy()
oldx1 = boxes[:, 0].copy() old_x1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy() old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - oldx2 - 1 flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - oldx1 - 1 flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all(): if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.') logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes return flip_boxes
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import PIL.Image
import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
class Distortor(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._brightness_delta = 0.3
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._contrast_delta = 0.3
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
self._saturation_delta = 0.3
def distort_image(self, im):
im = PIL.Image.fromarray(im)
if npr.uniform() < self._brightness_prob:
delta_brightness = npr.uniform(-self._brightness_delta, self._brightness_delta) + 1.0
im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness)
if npr.uniform() < self._contrast_prob:
delta_contrast = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast)
if npr.uniform() < self._saturation_prob:
delta_saturation = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation)
im = np.array(im)
return im
if __name__ == '__main__':
distortor = Distortor()
while True:
im = cv2.imread('cat.jpg')
im = distortor.distort_image(im)
cv2.imshow('Distort', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy.random as npr
import numpy as np
import math
from lib.core.config import cfg
import lib.utils.logger as logger
class Expander(object):
def __init__(self, **params):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_expand_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_expand_ratio < 1.0:
logger.fatal('The max expand ratio must >= 1.0, got {}'.format(self._max_expand_ratio))
def expand_image(self, im, gt_boxes=None):
prob = npr.uniform()
if prob > self._expand_prob : return im, gt_boxes
ratio = npr.uniform(1.0, self._max_expand_ratio)
if ratio == 1: return im, gt_boxes
im_h = im.shape[0]
im_w = im.shape[1]
expand_h = int(im_h * ratio)
expand_w = int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0.0, expand_h - im_h)))
w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w)))
new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_im[:] = cfg.PIXEL_MEANS
new_im[h_off : h_off + im_h, w_off : w_off + im_w, :] = im
if gt_boxes is not None:
ex_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
ex_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w + w_off) / expand_w
ex_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h + h_off) / expand_h
ex_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w + w_off) / expand_w
ex_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h + h_off) / expand_h
return new_im, ex_gt_boxes
return new_im, gt_boxes
if __name__ == '__main__':
expander = Expander()
while True:
im = cv2.imread('cat.jpg')
gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
im, gt_boxes = expander.expand_image(im, gt_boxes)
x1 = int(gt_boxes[0][0] * im.shape[1])
y1 = int(gt_boxes[0][1] * im.shape[0])
x2 = int(gt_boxes[0][2] * im.shape[1])
y2 = int(gt_boxes[0][3] * im.shape[0])
cv2.rectangle(im, (x1, y1), (x2, y2), (188,119,64), 2)
cv2.imshow('Expand', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy.random as npr
from lib.core.config import cfg
class Resizer(object):
def __init__(self):
self._re_height = cfg.SSD.RESIZE.HEIGHT
self._re_width = cfg.SSD.RESIZE.WIDTH
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def resize_image(self, im):
rand = npr.randint(0, len(self._interp_mode))
return cv2.resize(
im, (self._re_width, self._re_height),
interpolation=self._interp_mode[rand])
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
from lib.utils.bbox_transform import clip_boxes
from lib.utils.boxes import iou
import lib.utils.logger as logger
class Sampler(object):
def __init__(self, samplers):
if not isinstance(samplers, list): samplers = [samplers]
self._samplers = []
for sampler in samplers:
if len(sampler) != 8:
logger.fatal('The sample params should be a tuple of length 8.')
sample_param = {
'min_scale': sampler[0],
'max_scale': sampler[1],
'min_aspect_ratio': sampler[2],
'max_aspect_ratio': sampler[3],
'min_jaccard_overlap': sampler[4],
'max_jaccard_overlap': sampler[5],
'max_trials': sampler[6],
'max_sample': sampler[7]}
self._samplers.append(sample_param)
def _compute_overlaps(self, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
def _generate_sample(self, sample_param):
min_scale = sample_param.get('min_scale', 1.0)
max_scale = sample_param.get('max_scale', 1.0)
scale = npr.uniform(min_scale, max_scale)
min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.0)
max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.0)
min_aspect_ratio = max(min_aspect_ratio, scale**2)
max_aspect_ratio = min(max_aspect_ratio, 1.0 / (scale**2))
aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_w = scale * (aspect_ratio ** 0.5)
bbox_h = scale / (aspect_ratio ** 0.5)
w_off = npr.uniform(0.0, float(1 - bbox_w))
h_off = npr.uniform(0.0, float(1 - bbox_h))
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
min_jaccard_overlap = constraint.get('min_jaccard_overlap', None)
max_jaccard_overlap = constraint.get('max_jaccard_overlap', None)
if min_jaccard_overlap == None and \
max_jaccard_overlap == None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
if min_jaccard_overlap is not None:
if max_overlap < min_jaccard_overlap: return False
if max_jaccard_overlap is not None:
if max_overlap > max_jaccard_overlap: return False
return True
def _generate_batch_samples(self, gt_boxes):
sample_boxes = []
for sampler in self._samplers:
found = 0
for i in range(sampler['max_trials']):
if found >= sampler['max_sample']: break
sample_box = self._generate_sample(sampler)
if sampler['min_jaccard_overlap'] != 0.0 or \
sampler['max_jaccard_overlap'] != 1.0:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok: continue
found += 1
sample_boxes.append(sample_box)
return sample_boxes
def _rand_crop(self, im, rand_box, gt_boxes=None):
im_h = im.shape[0]
im_w = im.shape[1]
w_off = int(rand_box[0] * im_w)
h_off = int(rand_box[1] * im_h)
crop_w = int((rand_box[2] - rand_box[0]) * im_w)
crop_h = int((rand_box[3] - rand_box[1]) * im_h)
new_im = im[h_off: h_off + crop_h, w_off: w_off + crop_w, :]
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2])
& (ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
return new_im, new_gt_boxes
return new_im, gt_boxes
def sample_image(self, im, gt_boxes):
sample_boxes = self._generate_batch_samples(gt_boxes)
if len(sample_boxes) > 0:
# Apply sampling if found at least one valid sample box
# Then randomly pick one
sample_idx = npr.randint(0, len(sample_boxes))
rand_box = sample_boxes[sample_idx]
im, gt_boxes = self._rand_crop(im, rand_box, gt_boxes)
return im, gt_boxes
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
import numpy.random as npr
npr.seed(3)
import sys
sys.path.append('../../')
from resize import Resizer
from expand import Expander
from distort import Distortor
from sample import Sampler
from lib.core.config import cfg
if __name__ == '__main__':
distorter = Distortor()
expander = Expander()
sampler = Sampler(cfg.SSD.SAMPLERS)
resizer = Resizer()
while True:
im = cv2.imread('cat.jpg')
gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
im = distorter.distort_image(im)
im, gt_boxes = expander.expand_image(im, gt_boxes)
im, gt_boxes = sampler.sample_image(im, gt_boxes)
if len(gt_boxes) < 1: continue
im = resizer.resize_image(im)
for gt_box in gt_boxes:
x1 = int(gt_box[0] * im.shape[1])
y1 = int(gt_box[1] * im.shape[0])
x2 = int(gt_box[2] * im.shape[1])
y2 = int(gt_box[3] * im.shape[0])
cv2.rectangle(im, (x1, y1), (x2, y2), (188, 119, 64), 2)
print(x1, y1, x2, y2)
cv2.imshow('Sample', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import math
import cv2
import PIL.Image
import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import iou
class Compose(object):
"""Compose the several transforms together."""
def __init__(self, *transforms):
self.transforms = transforms
def __call__(self, img, boxes):
for transform in self.transforms:
img, boxes = transform.apply(img, boxes)
return img, boxes
class Distort(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
def apply(self, img, boxes=None):
img = PIL.Image.fromarray(img)
if npr.uniform() < self._brightness_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Brightness(img)
img = img.enhance(delta)
if npr.uniform() < self._contrast_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Contrast(img)
img = img.enhance(delta)
if npr.uniform() < self._saturation_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Color(img)
img = img.enhance(delta)
return np.array(img), boxes
class Expand(object):
def __init__(self):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_ratio < 1.0:
logger.fatal(
'The max expand ratio must >= 1, got {}'
.format(self._max_ratio)
)
def apply(self, img, boxes=None):
prob = npr.uniform()
if prob > self._expand_prob:
return img, boxes
ratio = npr.uniform(1., self._max_ratio)
if ratio == 1:
return img, boxes
im_h, im_w = img.shape[:2]
expand_h, expand_w = int(im_h * ratio), int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0., expand_h - im_h)))
w_off = int(math.floor(npr.uniform(0., expand_w - im_w)))
new_img = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_img[:] = cfg.PIXEL_MEANS
new_img[h_off:h_off + im_h, w_off:w_off + im_w, :] = img
if boxes is not None:
new_boxes = boxes.astype(boxes.dtype, copy=True)
new_boxes[:, 0] = (boxes[:, 0] * im_w + w_off) / expand_w
new_boxes[:, 1] = (boxes[:, 1] * im_h + h_off) / expand_h
new_boxes[:, 2] = (boxes[:, 2] * im_w + w_off) / expand_w
new_boxes[:, 3] = (boxes[:, 3] * im_h + h_off) / expand_h
boxes = new_boxes
return new_img, boxes
class Resize(object):
def __init__(self):
self._target_size = (
cfg.SSD.RESIZE.WIDTH,
cfg.SSD.RESIZE.HEIGHT,
)
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def apply(self, img, boxes):
rand = npr.randint(len(self._interp_mode))
return cv2.resize(
img, self._target_size,
interpolation=self._interp_mode[rand],
), boxes
class Sample(object):
def __init__(self):
samplers = cfg.SSD.SAMPLERS
if not isinstance(samplers, collections.Iterable):
samplers = [samplers]
self._samplers = []
for sampler in samplers:
if len(sampler) != 8:
logger.fatal('The sample params should be a tuple of length 8.')
sample_param = {
'min_scale': sampler[0],
'max_scale': sampler[1],
'min_aspect_ratio': sampler[2],
'max_aspect_ratio': sampler[3],
'min_overlap': sampler[4],
'max_overlap': sampler[5],
'max_trials': sampler[6],
'max_sample': sampler[7],
}
self._samplers.append(sample_param)
@classmethod
def _compute_overlaps(cls, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
@classmethod
def _generate_sample(cls, sample_param):
min_scale = sample_param.get('min_scale', 1.)
max_scale = sample_param.get('max_scale', 1.)
scale = npr.uniform(min_scale, max_scale)
min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.)
max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.)
min_aspect_ratio = max(min_aspect_ratio, scale**2)
max_aspect_ratio = min(max_aspect_ratio, 1. / (scale**2))
aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_w = scale * (aspect_ratio ** 0.5)
bbox_h = scale / (aspect_ratio ** 0.5)
w_off = npr.uniform(0., 1. - bbox_w)
h_off = npr.uniform(0., 1. - bbox_h)
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
min_overlap = constraint.get('min_overlap', None)
max_overlap = constraint.get('max_overlap', None)
if min_overlap is None and \
max_overlap is None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
if min_overlap is not None:
if max_overlap < min_overlap:
return False
if max_overlap is not None:
if max_overlap > max_overlap:
return False
return True
def _generate_batch_samples(self, gt_boxes):
sample_boxes = []
for sampler in self._samplers:
found = 0
for i in range(sampler['max_trials']):
if found >= sampler['max_sample']:
break
sample_box = self._generate_sample(sampler)
if sampler['min_overlap'] != 0. or \
sampler['max_overlap'] != 1.:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok:
continue
found += 1
sample_boxes.append(sample_box)
return sample_boxes
@classmethod
def _rand_crop(cls, im, rand_box, gt_boxes=None):
im_h, im_w = im.shape[:2]
w_off = int(rand_box[0] * im_w)
h_off = int(rand_box[1] * im_h)
crop_w = int((rand_box[2] - rand_box[0]) * im_w)
crop_h = int((rand_box[3] - rand_box[1]) * im_h)
new_im = im[h_off:h_off + crop_h, w_off:w_off + crop_w, :]
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2]) &
(ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_tiled_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
return new_im, new_gt_boxes
return new_im, gt_boxes
def apply(self, img, boxes):
sample_boxes = self._generate_batch_samples(boxes)
if len(sample_boxes) > 0:
# Apply sampling if found at least one valid sample box
# Then randomly pick one
sample_idx = npr.randint(len(sample_boxes))
rand_box = sample_boxes[sample_idx]
img, boxes = self._rand_crop(img, rand_box, boxes)
return img, boxes
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
sys.path.append('../../')
import cv2
import numpy as np
from lib.ssd.data import transforms
if __name__ == '__main__':
np.random.seed(3)
augmentor = transforms.Compose(
transforms.Distort(),
transforms.Expand(),
transforms.Sample(),
transforms.Resize(),
)
while True:
img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes)
if len(boxes) < 1:
continue
for box in boxes:
x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0])
x2 = int(box[2] * img.shape[1])
y2 = int(box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2)
cv2.imshow('Sample', img)
cv2.waitKey(0)
...@@ -17,7 +17,8 @@ import numpy as np ...@@ -17,7 +17,8 @@ import numpy as np
def generate_anchors(min_sizes, max_sizes, ratios): def generate_anchors(min_sizes, max_sizes, ratios):
"""Generate anchor (reference) windows by enumerating """
Generate anchor (reference) windows by enumerating
aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h). aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
""" """
...@@ -34,39 +35,32 @@ def generate_anchors(min_sizes, max_sizes, ratios): ...@@ -34,39 +35,32 @@ def generate_anchors(min_sizes, max_sizes, ratios):
base_anchor, min_size, max_size)]) base_anchor, min_size, max_size)])
anchors = np.vstack([_anchors, anchors[1:]]) anchors = np.vstack([_anchors, anchors[1:]])
total_anchors.append(anchors) total_anchors.append(anchors)
return np.vstack(total_anchors)
return np.vstack([total_anchors[i] for i in range(len(total_anchors))])
def _whctrs(anchor): def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window). """Return width, height, x center, and y center for an anchor (window)."""
w, h = anchor[2], anchor[3]
Note that it is a little different from Faster-RCNN. x_ctr, y_ctr = anchor[0], anchor[1]
"""
w = anchor[2]; h = anchor[3]
x_ctr = anchor[0]; y_ctr = anchor[1]
return w, h, x_ctr, y_ctr return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr): def _mkanchors(ws, hs, x_ctr, y_ctr):
"""Given a vector of widths (ws) and heights (hs) around a center """
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows). (x_ctr, y_ctr), output a set of anchors (windows).
""" """
ws = ws[:, np.newaxis] ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis] hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws), anchors = np.hstack((x_ctr - 0.5 * ws,
y_ctr - 0.5 * (hs), y_ctr - 0.5 * hs,
x_ctr + 0.5 * (ws), x_ctr + 0.5 * ws,
y_ctr + 0.5 * (hs))) y_ctr + 0.5 * hs))
return anchors return anchors
def _ratio_enum(anchor, ratios): def _ratio_enum(anchor, ratios):
"""Enumerate a set of anchors for each aspect ratio wrt an anchor. """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h size = w * h
size_ratios = size / ratios size_ratios = size / ratios
...@@ -77,9 +71,7 @@ def _ratio_enum(anchor, ratios): ...@@ -77,9 +71,7 @@ def _ratio_enum(anchor, ratios):
def _max_size_enum(base_anchor, min_size, max_size): def _max_size_enum(base_anchor, min_size, max_size):
"""Enumerate a anchor for max_size wrt base_anchor. """Enumerate a anchor for max_size wrt base_anchor."""
"""
w, h, x_ctr, y_ctr = _whctrs(base_anchor) w, h, x_ctr, y_ctr = _whctrs(base_anchor)
ws = hs = np.sqrt([min_size * max_size]) ws = hs = np.sqrt([min_size * max_size])
anchors = _mkanchors(ws, hs, x_ctr, y_ctr) anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
......
...@@ -15,8 +15,8 @@ from __future__ import print_function ...@@ -15,8 +15,8 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.datasets.factory import get_imdb
from lib.core.config import cfg from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.ssd.data.data_batch import DataBatch from lib.ssd.data.data_batch import DataBatch
...@@ -28,15 +28,13 @@ class DataLayer(torch.nn.Module): ...@@ -28,15 +28,13 @@ class DataLayer(torch.nn.Module):
'source': database.source, 'source': database.source,
'classes': database.classes, 'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE, 'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': 2048, # Chunk-Wise Shuffle 'num_chunks': 2048, # Chunk-Wise Shuffle
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2, 'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
}) })
def forward(self): def forward(self):
# Get a mini-batch from the Queue # Get an array blob from the Queue
blobs = self.data_batch.get() outputs = self.data_batch.get()
# Zero-Copy from numpy # Zero-Copy the array to tensor
blobs['data'] = torch.from_numpy(blobs['data']) outputs['data'] = torch.from_numpy(outputs['data'])
# Switch the data to Device return outputs
blobs['data'].cuda(cfg.GPU_ID)
return blobs
\ No newline at end of file
...@@ -13,11 +13,11 @@ from __future__ import absolute_import ...@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
class HardMiningLayer(torch.nn.Module): class HardMiningLayer(torch.nn.Module):
...@@ -42,8 +42,8 @@ class HardMiningLayer(torch.nn.Module): ...@@ -42,8 +42,8 @@ class HardMiningLayer(torch.nn.Module):
conf_loss = np.zeros(match_labels.shape, dtype=np.float32) conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
inds = np.where(match_labels >= 0)[0] inds = np.where(match_labels >= 0)[0]
flt_min = np.finfo(float).eps flt_min = np.finfo(float).eps
# Naive softmax cross-entropy # Softmax cross-entropy
conf_loss[inds] = -1.0 * np.log(np.maximum( conf_loss[inds] = -np.log(np.maximum(
conf_prob[inds, match_labels[inds]], flt_min)) conf_prob[inds, match_labels[inds]], flt_min))
# Filter negatives # Filter negatives
...@@ -59,8 +59,8 @@ class HardMiningLayer(torch.nn.Module): ...@@ -59,8 +59,8 @@ class HardMiningLayer(torch.nn.Module):
num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds)) num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
sorted_sel_inds = sel_inds[np.argsort(-sel_loss)] sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
bg_inds = sorted_sel_inds[:num_sel] bg_inds = sorted_sel_inds[:num_sel]
labels_wide[ix][fg_inds] = match_labels[fg_inds] # Keep fg indices labels_wide[ix][fg_inds] = match_labels[fg_inds] # Keep fg indices
labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss # Feed labels to compute cls loss
return {'labels': to_tensor(labels_wide)} return {'labels': blob_to_tensor(labels_wide)}
\ No newline at end of file
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class MultiBoxMatchLayer(torch.nn.Module): from lib.utils.cython_bbox import bbox_overlaps
def __init__(self):
super(MultiBoxMatchLayer, self).__init__()
class MultiBoxMatchLayer(torch.nn.Module):
def forward(self, prior_boxes, gt_boxes): def __init__(self):
num_images = cfg.TRAIN.IMS_PER_BATCH super(MultiBoxMatchLayer, self).__init__()
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
num_priors = len(prior_boxes) def forward(self, prior_boxes, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Do matching between prior boxes and gt boxes gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
match_inds_wide = -np.ones((num_images, num_priors), dtype=np.int32) num_priors = len(prior_boxes)
match_labels_wide = np.zeros(match_inds_wide.shape, dtype=np.int64)
max_overlaps_wide = np.zeros(match_inds_wide.shape, dtype=np.float32) # Do matching between prior boxes and gt boxes
match_inds_wide = -np.ones((num_images, num_priors), dtype=np.int32)
for ix in range(num_images): match_labels_wide = np.zeros(match_inds_wide.shape, dtype=np.int64)
# GT boxes (x1, y1, x2, y2, label) max_overlaps_wide = np.zeros(match_inds_wide.shape, dtype=np.float32)
gt_boxes = gt_boxes_wide[ix]
if gt_boxes.shape[0] == 0: continue for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
# Compute the overlaps between prior boxes and gt boxes gt_boxes = gt_boxes_wide[ix]
overlaps = bbox_overlaps( if gt_boxes.shape[0] == 0:
np.ascontiguousarray(prior_boxes, dtype=np.float), continue
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1) # Compute the overlaps between prior boxes and gt boxes
max_overlaps = overlaps[np.arange(num_priors), argmax_overlaps] overlaps = bbox_overlaps(
max_overlaps_wide[ix] = max_overlaps np.ascontiguousarray(prior_boxes, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
# Bipartite matching & assignments argmax_overlaps = overlaps.argmax(axis=1)
bipartite_inds = overlaps.argmax(axis=0) max_overlaps = overlaps[np.arange(num_priors), argmax_overlaps]
class_assignment = gt_boxes[:, 4] max_overlaps_wide[ix] = max_overlaps
match_inds_wide[ix][bipartite_inds] = np.arange(
gt_boxes.shape[0], dtype=np.int32) # Bipartite matching & assignments
match_labels_wide[ix][bipartite_inds] = class_assignment bipartite_inds = overlaps.argmax(axis=0)
class_assignment = gt_boxes[:, 4]
# Per prediction matching & assignments match_inds_wide[ix][bipartite_inds] = np.arange(
# Note that SSD match each prior box for only once gt_boxes.shape[0], dtype=np.int32)
# We simply implement it by clobbering the assignments matched in bipartite match_labels_wide[ix][bipartite_inds] = class_assignment
per_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
gt_assignment = argmax_overlaps[per_inds] # Per prediction matching & assignments
class_assignment = gt_boxes[gt_assignment, 4] # Note that SSD match each prior box for only once
match_inds_wide[ix][per_inds] = gt_assignment # We simply implement it by clobbering the assignments matched in bipartite
match_labels_wide[ix][per_inds] = class_assignment per_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
gt_assignment = argmax_overlaps[per_inds]
return { class_assignment = gt_boxes[gt_assignment, 4]
'match_inds': match_inds_wide, match_inds_wide[ix][per_inds] = gt_assignment
'match_labels': match_labels_wide, match_labels_wide[ix][per_inds] = class_assignment
'max_overlaps': max_overlaps_wide,
} return {
'match_inds': match_inds_wide,
'match_labels': match_labels_wide,
def _dismantle_gt_boxes(gt_boxes, num_images): 'max_overlaps': max_overlaps_wide,
return [ }
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images) class MultiBoxTargetLayer(torch.nn.Module):
] def __init__(self):
\ No newline at end of file super(MultiBoxTargetLayer, self).__init__()
def forward(self, match_inds, match_labels, prior_boxes, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# GT assignments between default boxes and gt boxes
match_inds_wide = match_inds
# Matched labels (After hard mining possibly)
match_labels_wide = match_labels
num_priors = len(prior_boxes)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
bbox_targets_wide = np.zeros((num_images, num_priors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
# Number of matched boxes(#positive)
# We divide it by num of images, as SmoothLLLoss will divide it also
n_pos = max(len(np.where(match_labels_wide > 0)[0]), 1)
bbox_normalization = n_pos / num_images
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
if gt_boxes.shape[0] == 0:
continue
# Sample fg-rois(default boxes) & gt-rois(gt boxes)
match_inds = match_inds_wide[ix]
match_labels = match_labels_wide[ix]
ex_inds = np.where(match_labels > 0)[0]
ex_rois = prior_boxes[ex_inds]
gt_assignment = match_inds[ex_inds]
gt_rois = gt_boxes[gt_assignment]
# Assign targets & inside weights & outside weights
bbox_targets_wide[ix][ex_inds] = bbox_transform(
ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_inside_weights_wide[ix, :] = (1.0, 1.0, 1.0, 1.0)
bbox_outside_weights_wide[ix][ex_inds] = 1.0 / bbox_normalization
return {
'bbox_targets': blob_to_tensor(bbox_targets_wide),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights_wide),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights_wide),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.utils.bbox_transform import bbox_transform
class MultiBoxTargetLayer(torch.nn.Module):
def __init__(self):
super(MultiBoxTargetLayer, self).__init__()
def forward(self, match_inds, match_labels, prior_boxes, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# GT assignments between default boxes and gt boxes
match_inds_wide = match_inds
# Matched labels (After hard mining possibly)
match_labels_wide = match_labels
num_priors = len(prior_boxes)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
bbox_targets_wide = np.zeros((num_images, num_priors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
# Number of matched boxes(#positive)
# We divide it by num of images, as SmoothLLLoss will divide it also
n_pos = max(len(np.where(match_labels_wide > 0)[0]), 1)
bbox_normalization = n_pos / num_images
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
if gt_boxes.shape[0] == 0: continue
# Sample fg-rois(default boxes) & gt-rois(gt boxes)
match_inds = match_inds_wide[ix]
match_labels = match_labels_wide[ix]
ex_inds = np.where(match_labels > 0)[0]
ex_rois = prior_boxes[ex_inds]
gt_assignment = match_inds[ex_inds]
gt_rois = gt_boxes[gt_assignment]
# Assign targets & inside weights & outside weights
bbox_targets_wide[ix][ex_inds] = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_inside_weights_wide[ix, :] = (1.0, 1.0, 1.0, 1.0)
bbox_outside_weights_wide[ix][ex_inds] = 1.0 / bbox_normalization
return {
'bbox_targets': to_tensor(bbox_targets_wide),
'bbox_inside_weights': to_tensor(bbox_inside_weights_wide),
'bbox_outside_weights': to_tensor(bbox_outside_weights_wide),
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import logger from lib.ssd.generate_anchors import generate_anchors
from lib.ssd.generate_anchors import generate_anchors from lib.utils import logger
class PriorBoxLayer(torch.nn.Module): class PriorBoxLayer(torch.nn.Module):
"""Generate default boxes(anchors).""" """Generate default boxes(anchors)."""
def __init__(self): def __init__(self):
super(PriorBoxLayer, self).__init__() super(PriorBoxLayer, self).__init__()
min_sizes = cfg.SSD.MULTIBOX.MIN_SIZES min_sizes = cfg.SSD.MULTIBOX.MIN_SIZES
max_sizes = cfg.SSD.MULTIBOX.MAX_SIZES max_sizes = cfg.SSD.MULTIBOX.MAX_SIZES
if len(max_sizes) > 0: if len(max_sizes) > 0:
if len(min_sizes) != len(max_sizes): if len(min_sizes) != len(max_sizes):
logger.fatal('Got {} min sizes and {} max sizes.'.format( logger.fatal('Got {} min sizes and {} max sizes.'.format(
len(min_sizes), len(max_sizes))) len(min_sizes), len(max_sizes)))
self.strides = cfg.SSD.MULTIBOX.STRIDES self.strides = cfg.SSD.MULTIBOX.STRIDES
aspect_ratios = cfg.SSD.MULTIBOX.ASPECT_RATIOS aspect_ratios = cfg.SSD.MULTIBOX.ASPECT_RATIOS
self.num_anchors = len(min_sizes) * len(aspect_ratios) + len(max_sizes) self.num_anchors = len(min_sizes) * len(aspect_ratios) + len(max_sizes)
self.base_anchors = [] self.base_anchors = []
for i in range(len(min_sizes)): for i in range(len(min_sizes)):
self.base_anchors.append( self.base_anchors.append(
generate_anchors( generate_anchors(
min_sizes[i] if isinstance( min_sizes[i] if isinstance(
min_sizes[i], (list, tuple)) else [min_sizes[i]], min_sizes[i], (list, tuple)) else [min_sizes[i]],
max_sizes[i] if isinstance( max_sizes[i] if isinstance(
max_sizes[i], (list, tuple)) else [max_sizes[i]], max_sizes[i], (list, tuple)) else [max_sizes[i]],
aspect_ratios[i], aspect_ratios[i],
) )
) )
def forward(self, features): def forward(self, features):
all_anchors = [] all_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
# 1. Generate base grids # 1. Generate base grids
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
shift_x = (np.arange(0, width) + 0.5) * self.strides[i] shift_x = (np.arange(0, width) + 0.5) * self.strides[i]
shift_y = (np.arange(0, height) + 0.5) * self.strides[i] shift_y = (np.arange(0, height) + 0.5) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# 2. Apply anchors on base grids # 2. Apply anchors on base grids
# Add A anchors (1, A, 4) to # Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get # cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0] A = self.base_anchors[i].shape[0]
K = shifts.shape[0] # K = map_h * map_w K = shifts.shape[0] # K = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, 4)) + anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, 4)).astype(np.float32) anchors = anchors.reshape((K * A, 4)).astype(np.float32)
all_anchors.append(anchors) all_anchors.append(anchors)
return np.concatenate(all_anchors, axis=0)
return np.concatenate(all_anchors, axis=0)
\ No newline at end of file
...@@ -13,26 +13,24 @@ from __future__ import absolute_import ...@@ -13,26 +13,24 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import cv2 import cv2
import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import nms, soft_nms from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.timer import Timer from lib.utils.timer import Timer
from lib.utils.blob import to_array
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
def get_images(ims): def get_images(ims):
target_h = cfg.SSD.RESIZE.HEIGHT target_h = cfg.SSD.RESIZE.HEIGHT
target_w = cfg.SSD.RESIZE.WIDTH target_w = cfg.SSD.RESIZE.WIDTH
processed_ims = []; im_scales = [] processed_ims, im_scales = [], []
for im in ims: for im in ims:
im_scales.append((float(target_h) / im.shape[0], im_scales.append((float(target_h) / im.shape[0],
float(target_w) / im.shape[1])) float(target_w) / im.shape[1]))
...@@ -43,36 +41,37 @@ def get_images(ims): ...@@ -43,36 +41,37 @@ def get_images(ims):
def ims_detect(detector, ims): def ims_detect(detector, ims):
"""Detect images, with the single scale.""" """Detect images, with the single scale."""
# Prepare blobs # Prepare blobs
data, im_scales = get_images(ims) data, im_scales = get_images(ims)
data = torch.from_numpy(data).cuda(cfg.GPU_ID) data = torch.from_numpy(data).cuda(cfg.GPU_ID)
# Do Forward # Do Forward
# Do Forward
with torch.no_grad(): with torch.no_grad():
outputs = detector.forward(inputs={'data': data}) outputs = detector.forward(inputs={'data': data})
# Decode results # Decode results
scores = to_array(outputs['cls_prob'])
prior_boxes = to_array(outputs['prior_boxes'])
box_deltas = to_array(outputs['bbox_pred'])
batch_boxes = [] batch_boxes = []
scores = tensor_to_blob(outputs['cls_prob'])
for ix in range(box_deltas.shape[0]): prior_boxes = tensor_to_blob(outputs['prior_boxes'])
boxes = bbox_transform_inv(prior_boxes, box_deltas[ix], cfg.BBOX_REG_WEIGHTS) box_deltas = tensor_to_blob(outputs['bbox_pred'])
boxes[:, 0::2] /= im_scales[ix][1] for i in range(box_deltas.shape[0]):
boxes[:, 1::2] /= im_scales[ix][0] boxes = bbox_transform_inv(
batch_boxes.append(clip_boxes(boxes, ims[ix].shape)) boxes=prior_boxes,
deltas=box_deltas[i],
weights=cfg.BBOX_REG_WEIGHTS,
)
boxes[:, 0::2] /= im_scales[i][1]
boxes[:, 1::2] /= im_scales[i][0]
batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape))
return scores, batch_boxes return scores, batch_boxes
def test_net(net, server): def test_net(net, server):
classes, num_images, num_classes = \ # Load settings
server.classes, server.num_images, server.num_classes classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()} _t = {'im_detect': Timer(), 'misc': Timer()}
...@@ -101,34 +100,49 @@ def test_net(net, server): ...@@ -101,34 +100,49 @@ def test_net(net, server):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j] cls_scores = scores[inds, j]
cls_boxes = boxes[inds] cls_boxes = boxes[inds]
pre_nms_inds = np.argsort(-cls_scores)[0 : cfg.TEST.NMS_TOP_K] pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K]
cls_scores = cls_scores[pre_nms_inds] cls_scores = cls_scores[pre_nms_inds]
cls_boxes = cls_boxes[pre_nms_inds] cls_boxes = cls_boxes[pre_nms_inds]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms( keep = soft_nms(
cls_dets, cfg.TEST.NMS, cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA) sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else: else:
keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True) keep = nms(
cls_dets = cls_dets[keep, :] cls_detections,
all_boxes[j][i] = cls_dets cfg.TEST.NMS,
boxes_this_image.append(cls_dets) force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE: if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_images[item_idx], classes, boxes_this_image, vis_one_image(
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True, raw_images[item_idx],
filename=server.get_save_filename(image_ids[item_idx])) classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.0,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes* # Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0: if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = [] image_scores = []
for j in range(1, num_classes): for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1]) image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores) if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes): for j in range(1, num_classes):
...@@ -136,12 +150,12 @@ def test_net(net, server): ...@@ -136,12 +150,12 @@ def test_net(net, server):
all_boxes[j][i] = all_boxes[j][i][keep, :] all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH, .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time, num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='') _t['misc'].average_time), end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections') print('Evaluating detections')
server.evaluate_detections(all_boxes) server.evaluate_detections(all_boxes)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -62,4 +62,4 @@ class AttrDict(dict): ...@@ -62,4 +62,4 @@ class AttrDict(dict):
v.immutable(is_immutable) v.immutable(is_immutable)
def is_immutable(self): def is_immutable(self):
return self.__dict__[AttrDict.IMMUTABLE] return self.__dict__[AttrDict.IMMUTABLE]
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py>
#
# ------------------------------------------------------------
import numpy as np
from lib.core.config import cfg
def bbox_transform(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = ww * np.log(gt_widths / ex_widths)
targets_dh = wh * np.log(gt_heights / ex_heights)
targets = np.vstack(
(targets_dx, targets_dy,
targets_dw, targets_dh)).transpose()
return targets
def bbox_transform_inv(boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0)):
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
if cfg.USE_XFORM_CLIP:
dw = np.minimum(dw, cfg.BBOX_XFORM_CLIP)
dh = np.minimum(dh, cfg.BBOX_XFORM_CLIP)
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2
return pred_boxes
def clip_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
\ No newline at end of file
...@@ -89,7 +89,7 @@ def prep_im_for_blob(im, target_size, max_size): ...@@ -89,7 +89,7 @@ def prep_im_for_blob(im, target_size, max_size):
return resize_image(im, im_scale, im_scale), im_scale, jitter return resize_image(im, im_scale, im_scale), im_scale, jitter
def to_tensor(blob, enforce_cpu=False): def blob_to_tensor(blob, enforce_cpu=False):
if isinstance(blob, np.ndarray): if isinstance(blob, np.ndarray):
# Zero-Copy from numpy # Zero-Copy from numpy
cpu_tensor = torch.from_numpy(blob) cpu_tensor = torch.from_numpy(blob)
...@@ -99,10 +99,10 @@ def to_tensor(blob, enforce_cpu=False): ...@@ -99,10 +99,10 @@ def to_tensor(blob, enforce_cpu=False):
cpu_tensor.cuda(cfg.GPU_ID) cpu_tensor.cuda(cfg.GPU_ID)
def to_array(blob, copy=False): def tensor_to_blob(blob, copy=False):
if isinstance(blob, torch.Tensor): if isinstance(blob, torch.Tensor):
# Zero-Copy from numpy # Zero-Copy from numpy
array = blob.numpy(True) array = blob.numpy(True)
else: else:
array = blob array = blob
return array.copy() if copy else array return array.copy() if copy else array
\ No newline at end of file
...@@ -13,19 +13,11 @@ ...@@ -13,19 +13,11 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import numpy as np from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
def area(boxes):
"""Computes area of boxes.
Args:
boxes: Numpy array with shape [N, 4] holding N boxes
Returns:
a numpy array with shape [N*1] representing box areas
""" import numpy as np
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
def intersection(boxes1, boxes2): def intersection(boxes1, boxes2):
...@@ -66,10 +58,11 @@ def iou(boxes1, boxes2): ...@@ -66,10 +58,11 @@ def iou(boxes1, boxes2):
""" """
intersect = intersection(boxes1, boxes2) intersect = intersection(boxes1, boxes2)
area1 = area(boxes1) area1 = boxes_area(boxes1)
area2 = area(boxes2) area2 = boxes_area(boxes2)
union = np.expand_dims(area1, axis=1) + np.expand_dims( union = \
area2, axis=0) - intersect np.expand_dims(area1, axis=1) + \
np.expand_dims(area2, axis=0) - intersect
return intersect / union return intersect / union
...@@ -88,7 +81,7 @@ def ioa1(boxes1, boxes2): ...@@ -88,7 +81,7 @@ def ioa1(boxes1, boxes2):
""" """
intersect = intersection(boxes1, boxes2) intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(area(boxes1), axis=1) areas = np.expand_dims(boxes_area(boxes1), axis=1)
return intersect / areas return intersect / areas
...@@ -107,14 +100,90 @@ def ioa2(boxes1, boxes2): ...@@ -107,14 +100,90 @@ def ioa2(boxes1, boxes2):
""" """
intersect = intersection(boxes1, boxes2) intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(area(boxes2), axis=0) areas = np.expand_dims(boxes_area(boxes2), axis=0)
return intersect / areas return intersect / areas
def expand_boxes(boxes, scale): def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
"""Expand an array of boxes by a given scale. """Transform the boxes to the regression targets."""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
""" gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = ww * np.log(gt_widths / ex_widths)
targets_dh = wh * np.log(gt_heights / ex_heights)
targets = np.vstack(
(targets_dx, targets_dy,
targets_dw, targets_dh)).transpose()
return targets
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas."""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2
return pred_boxes
def boxes_area(boxes):
"""Compute the area of an array of boxes."""
w = (boxes[:, 2] - boxes[:, 0] + 1)
h = (boxes[:, 3] - boxes[:, 1] + 1)
areas = w * h
assert np.all(areas >= 0), 'Negative areas founds'
return areas
def clip_tiled_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale."""
w_half = (boxes[:, 2] - boxes[:, 0]) * .5 w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5 h_half = (boxes[:, 3] - boxes[:, 1]) * .5
x_c = (boxes[:, 2] + boxes[:, 0]) * .5 x_c = (boxes[:, 2] + boxes[:, 0]) * .5
...@@ -129,4 +198,21 @@ def expand_boxes(boxes, scale): ...@@ -129,4 +198,21 @@ def expand_boxes(boxes, scale):
boxes_exp[:, 1] = y_c - h_half boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half boxes_exp[:, 3] = y_c + h_half
return boxes_exp return boxes_exp
\ No newline at end of file
def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def dismantle_gt_boxes(gt_boxes, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
...@@ -115,4 +115,4 @@ def colormap(rgb=False): ...@@ -115,4 +115,4 @@ def colormap(rgb=False):
color_list = color_list.reshape((-1, 3)) * 255 color_list = color_list.reshape((-1, 3)) * 255
if not rgb: if not rgb:
color_list = color_list[:, ::-1] color_list = color_list[:, ::-1]
return color_list return color_list
\ No newline at end of file
...@@ -15,7 +15,6 @@ from __future__ import print_function ...@@ -15,7 +15,6 @@ from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import PIL.ImageEnhance import PIL.ImageEnhance
...@@ -24,8 +23,11 @@ from lib.core.config import cfg ...@@ -24,8 +23,11 @@ from lib.core.config import cfg
def resize_image(im, fx, fy): def resize_image(im, fx, fy):
return cv2.resize( return cv2.resize(
im, None, fx=fx, fy=fy, im,
interpolation=cv2.INTER_LINEAR) dsize=None,
fx=fx, fy=fy,
interpolation=cv2.INTER_LINEAR,
)
# Faster and robust resizing than OpenCV methods # Faster and robust resizing than OpenCV methods
...@@ -37,15 +39,15 @@ def resize_mask(mask, size): ...@@ -37,15 +39,15 @@ def resize_mask(mask, size):
def distort_image(im): def distort_image(im):
im = PIL.Image.fromarray(im) im = PIL.Image.fromarray(im)
if np.random.uniform() < 0.5: if np.random.uniform() < 0.5:
delta_brightness = np.random.uniform(-0.3, 0.3) + 1.0 delta_brightness = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Brightness(im) im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness) im = im.enhance(delta_brightness)
if np.random.uniform() < 0.5: if np.random.uniform() < 0.5:
delta_contrast = np.random.uniform(-0.3, 0.3) + 1.0 delta_contrast = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Contrast(im) im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast) im = im.enhance(delta_contrast)
if np.random.uniform() < 0.3: if np.random.uniform() < 0.5:
delta_saturation = np.random.uniform(-0.3, 0.3) + 1.0 delta_saturation = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Color(im) im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation) im = im.enhance(delta_saturation)
return np.array(im) return np.array(im)
...@@ -55,16 +57,20 @@ def scale_image(im): ...@@ -55,16 +57,20 @@ def scale_image(im):
processed_ims, ims_scales = [], [] processed_ims, ims_scales = [], []
if cfg.TEST.MAX_SIZE > 0: if cfg.TEST.MAX_SIZE > 0:
im_size_min = np.min(im.shape[0:2]) im_size_min = np.min(im.shape[:2])
im_size_max = np.max(im.shape[0:2]) im_size_max = np.max(im.shape[:2])
for target_size in cfg.TEST.SCALES: for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min) im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE # Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
processed_ims.append( processed_ims.append(
cv2.resize(im, None, None, fx=im_scale, fy=im_scale, cv2.resize(
interpolation=cv2.INTER_LINEAR)) im,
dsize=None,
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR,
))
ims_scales.append(im_scale) ims_scales.append(im_scale)
else: else:
# Scale image along the longest side # Scale image along the longest side
...@@ -72,8 +78,12 @@ def scale_image(im): ...@@ -72,8 +78,12 @@ def scale_image(im):
for target_size in cfg.TEST.SCALES: for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_max) im_scale = float(target_size) / float(im_size_max)
processed_ims.append( processed_ims.append(
cv2.resize(im, None, None, fx=im_scale, fy=im_scale, cv2.resize(
interpolation=cv2.INTER_LINEAR)) im,
dsize=None,
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR,
))
ims_scales.append(im_scale) ims_scales.append(im_scale)
return processed_ims, ims_scales return processed_ims, ims_scales
\ No newline at end of file
...@@ -17,12 +17,12 @@ from __future__ import absolute_import ...@@ -17,12 +17,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os
import inspect import inspect
import sys as _sys
import logging as _logging import logging as _logging
import os
import sys as _sys
import threading import threading
from logging import DEBUG, ERROR, FATAL, INFO, WARN
_logger = None _logger = None
_is_root = True _is_root = True
...@@ -43,7 +43,7 @@ def get_logger(): ...@@ -43,7 +43,7 @@ def get_logger():
return _logger return _logger
logger = _logging.getLogger('detectron') logger = _logging.getLogger('detectron')
logger.setLevel(INFO) logger.setLevel('INFO')
logger.propagate = False logger.propagate = False
if True: if True:
...@@ -51,7 +51,8 @@ def get_logger(): ...@@ -51,7 +51,8 @@ def get_logger():
_interactive = False _interactive = False
try: try:
# This is only defined in interactive shells. # This is only defined in interactive shells.
if _sys.ps1: _interactive = True if _sys.ps1:
_interactive = True
except AttributeError: except AttributeError:
# Even now, we may be in an interactive shell with `python -i`. # Even now, we may be in an interactive shell with `python -i`.
_interactive = _sys.flags.interactive _interactive = _sys.flags.interactive
...@@ -59,7 +60,7 @@ def get_logger(): ...@@ -59,7 +60,7 @@ def get_logger():
# If we are in an interactive environment (like Jupyter), set loglevel # If we are in an interactive environment (like Jupyter), set loglevel
# to INFO and pipe the output to stdout. # to INFO and pipe the output to stdout.
if _interactive: if _interactive:
logger.setLevel(INFO) logger.setLevel('INFO')
_logging_target = _sys.stdout _logging_target = _sys.stdout
else: else:
_logging_target = _sys.stderr _logging_target = _sys.stderr
...@@ -86,7 +87,8 @@ def log(level, msg, *args, **kwargs): ...@@ -86,7 +87,8 @@ def log(level, msg, *args, **kwargs):
def debug(msg, *args, **kwargs): def debug(msg, *args, **kwargs):
if is_root(): get_logger().debug(_detailed_msg(msg), *args, **kwargs) if is_root():
get_logger().debug(_detailed_msg(msg), *args, **kwargs)
def error(msg, *args, **kwargs): def error(msg, *args, **kwargs):
...@@ -100,15 +102,18 @@ def fatal(msg, *args, **kwargs): ...@@ -100,15 +102,18 @@ def fatal(msg, *args, **kwargs):
def info(msg, *args, **kwargs): def info(msg, *args, **kwargs):
if is_root(): get_logger().info(_detailed_msg(msg), *args, **kwargs) if is_root():
get_logger().info(_detailed_msg(msg), *args, **kwargs)
def warn(msg, *args, **kwargs): def warn(msg, *args, **kwargs):
if is_root(): get_logger().warn(_detailed_msg(msg), *args, **kwargs) if is_root():
get_logger().warn(_detailed_msg(msg), *args, **kwargs)
def warning(msg, *args, **kwargs): def warning(msg, *args, **kwargs):
if is_root(): get_logger().warning(_detailed_msg(msg), *args, **kwargs) if is_root():
get_logger().warning(_detailed_msg(msg), *args, **kwargs)
def get_verbosity(): def get_verbosity():
...@@ -128,12 +133,3 @@ def set_root_logger(is_root=True): ...@@ -128,12 +133,3 @@ def set_root_logger(is_root=True):
def is_root(): def is_root():
return _is_root return _is_root
_level_names = {
FATAL: 'FATAL',
ERROR: 'ERROR',
WARN: 'WARN',
INFO: 'INFO',
DEBUG: 'DEBUG',
}
\ No newline at end of file
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np import numpy as np
...@@ -21,15 +25,19 @@ def intersect_box_mask(ex_box, gt_box, gt_mask): ...@@ -21,15 +25,19 @@ def intersect_box_mask(ex_box, gt_box, gt_mask):
y1 = max(ex_box[1], gt_box[1]) y1 = max(ex_box[1], gt_box[1])
x2 = min(ex_box[2], gt_box[2]) x2 = min(ex_box[2], gt_box[2])
y2 = min(ex_box[3], gt_box[3]) y2 = min(ex_box[3], gt_box[3])
if x1 > x2 or y1 > y2: return None if x1 > x2 or y1 > y2:
return None
w = x2 - x1 + 1 w = x2 - x1 + 1
h = y2 - y1 + 1 h = y2 - y1 + 1
ex_starty = y1 - ex_box[1] ex_start_y = y1 - ex_box[1]
ex_startx = x1 - ex_box[0] ex_start_x = x1 - ex_box[0]
inter_maskb = gt_mask[y1 : y2 + 1 , x1 : x2 + 1] inter_mask = gt_mask[y1:y2 + 1, x1:x2 + 1]
regression_target = np.zeros((ex_box[3] - ex_box[1] + 1, ex_box[2] - ex_box[0] + 1), dtype=np.uint8) target_h = ex_box[3] - ex_box[1] + 1
regression_target[ex_starty: ex_starty + h, ex_startx: ex_startx + w] = inter_maskb target_w = ex_box[2] - ex_box[0] + 1
return regression_target reg_target = np.zeros((target_h, target_w), dtype=np.uint8)
reg_target[ex_start_y:ex_start_y + h,
ex_start_x:ex_start_x + w] = inter_mask
return reg_target
def mask_overlap(box1, box2, mask1, mask2): def mask_overlap(box1, box2, mask1, mask2):
...@@ -37,21 +45,24 @@ def mask_overlap(box1, box2, mask1, mask2): ...@@ -37,21 +45,24 @@ def mask_overlap(box1, box2, mask1, mask2):
y1 = max(box1[1], box2[1]) y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2]) x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3]) y2 = min(box1[3], box2[3])
if x1 > x2 or y1 > y2: return 0 if x1 > x2 or y1 > y2:
return 0
w = x2 - x1 + 1 w = x2 - x1 + 1
h = y2 - y1 + 1 h = y2 - y1 + 1
# Get masks in the intersection part # Get masks in the intersection part
start_ya = y1 - box1[1] start_ya = y1 - box1[1]
start_xa = x1 - box1[0] start_xa = x1 - box1[0]
inter_maska = mask1[start_ya: start_ya + h, start_xa:start_xa + w] inter_mask_a = mask1[start_ya: start_ya + h, start_xa:start_xa + w]
start_yb = y1 - box2[1] start_yb = y1 - box2[1]
start_xb = x1 - box2[0] start_xb = x1 - box2[0]
inter_maskb = mask2[start_yb: start_yb + h, start_xb:start_xb + w] inter_mask_b = mask2[start_yb: start_yb + h, start_xb:start_xb + w]
assert inter_maska.shape == inter_maskb.shape, (inter_maska.shape, inter_maskb.shape) assert inter_mask_a.shape == inter_mask_b.shape
inter = np.logical_and(inter_maskb, inter_maska).sum() inter = np.logical_and(inter_mask_b, inter_mask_a).sum()
union = mask1.sum() + mask2.sum() - inter union = mask1.sum() + mask2.sum() - inter
if union < 1.0: return 0 if union < 1.:
return float(inter) / float(union) return 0.
\ No newline at end of file return float(inter) / float(union)
...@@ -17,17 +17,19 @@ from __future__ import absolute_import ...@@ -17,17 +17,19 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from collections import deque import collections
import numpy as np import numpy as np
class SmoothedValue(object): class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a """
window or the global series average. Track a series of values and provide access to smoothed values
over a window or the global series average.
""" """
def __init__(self, window_size): def __init__(self, window_size):
self.deque = deque(maxlen=window_size) self.deque = collections.deque(maxlen=window_size)
self.series = [] self.series = []
self.total = 0.0 self.total = 0.0
self.count = 0 self.count = 0
...@@ -45,4 +47,4 @@ class SmoothedValue(object): ...@@ -45,4 +47,4 @@ class SmoothedValue(object):
return np.mean(self.deque) return np.mean(self.deque)
def GetGlobalAverageValue(self): def GetGlobalAverageValue(self):
return self.total / self.count return self.total / self.count
\ No newline at end of file
...@@ -13,6 +13,11 @@ ...@@ -13,6 +13,11 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import time import time
...@@ -27,7 +32,7 @@ class Timer(object): ...@@ -27,7 +32,7 @@ class Timer(object):
def tic(self): def tic(self):
# Using time.time instead of time.clock because time time.clock # Using time.time instead of time.clock because time time.clock
# does not normalize for multithreading # does not normalize for multi-threading
self.start_time = time.time() self.start_time = time.time()
def toc(self, average=True): def toc(self, average=True):
...@@ -39,3 +44,10 @@ class Timer(object): ...@@ -39,3 +44,10 @@ class Timer(object):
return self.average_time return self.average_time
else: else:
return self.diff return self.diff
@contextlib.contextmanager
def tic_and_toc(self):
try:
yield self.tic()
finally:
self.toc()
...@@ -26,12 +26,13 @@ from __future__ import print_function ...@@ -26,12 +26,13 @@ from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import cv2 import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import numpy as np import numpy as np
from lib.utils.colormap import colormap from lib.utils.colormap import colormap
from lib.utils.boxes import expand_boxes from lib.utils.boxes import expand_boxes
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
plt.rcParams['pdf.fonttype'] = 42 # For editing in Adobe Illustrator plt.rcParams['pdf.fonttype'] = 42 # For editing in Adobe Illustrator
...@@ -63,17 +64,20 @@ def kp_connections(keypoints): ...@@ -63,17 +64,20 @@ def kp_connections(keypoints):
def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps): def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps):
""" """Convert from the class boxes/segms/keyps format generated by the testing code."""
Convert from the class boxes/segms/keyps format generated by the testing code.
"""
box_list = [b for b in cls_boxes if len(b) > 0] box_list = [b for b in cls_boxes if len(b) > 0]
if len(box_list) > 0: boxes = np.concatenate(box_list) if len(box_list) > 0:
else: boxes = None boxes = np.concatenate(box_list)
if cls_segms is not None: segms = [s for slist in cls_segms for s in slist] else:
else: segms = None boxes = None
if cls_keyps is not None: keyps = [k for klist in cls_keyps for k in klist] if cls_segms is not None:
else: keyps = None segms = [s for slist in cls_segms for s in slist]
else:
segms = None
if cls_keyps is not None:
keyps = [k for klist in cls_keyps for k in klist]
else:
keyps = None
classes = [] classes = []
for j in range(len(cls_boxes)): for j in range(len(cls_boxes)):
classes += [j] * len(cls_boxes[j]) classes += [j] * len(cls_boxes[j])
...@@ -81,23 +85,28 @@ def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps): ...@@ -81,23 +85,28 @@ def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps):
def convert_from_cls_format_v2(cls_boxes, cls_segms, cls_keyps, class_names): def convert_from_cls_format_v2(cls_boxes, cls_segms, cls_keyps, class_names):
""" """Convert from the class boxes/segms/keyps format generated by the testing code."""
Convert from the class boxes/segms/keyps format generated by the testing code. box_list, segm_list = [], []
"""
box_list, segm_list = [], []
for j, name in enumerate(class_names): for j, name in enumerate(class_names):
if name == '__background__': continue if name == '__background__':
continue
if len(cls_boxes[j]) > 0: if len(cls_boxes[j]) > 0:
box_list.append(cls_boxes[j]) box_list.append(cls_boxes[j])
if cls_segms is not None: segm_list.append(cls_segms[j]) if cls_segms is not None:
segm_list.append(cls_segms[j])
if len(box_list) > 0: boxes = np.concatenate(box_list) if len(box_list) > 0:
else: boxes = None boxes = np.concatenate(box_list)
if len(segm_list) > 0: segms = np.concatenate(segm_list) else:
else: segms = None boxes = None
if cls_keyps is not None: keyps = [k for klist in cls_keyps for k in klist] if len(segm_list) > 0:
else: keyps = None segms = np.concatenate(segm_list)
else:
segms = None
if cls_keyps is not None:
keyps = [k for klist in cls_keyps for k in klist]
else:
keyps = None
classes = [] classes = []
for j in range(len(cls_boxes)): for j in range(len(cls_boxes)):
...@@ -137,7 +146,6 @@ def get_mask(boxes, segms, im_shape, mask_thresh=0.4): ...@@ -137,7 +146,6 @@ def get_mask(boxes, segms, im_shape, mask_thresh=0.4):
def vis_mask(img, mask, col, alpha=0.4, show_border=True, border_thick=1): def vis_mask(img, mask, col, alpha=0.4, show_border=True, border_thick=1):
"""Visualizes a single binary mask.""" """Visualizes a single binary mask."""
img = img.astype(np.float32) img = img.astype(np.float32)
idx = np.nonzero(mask) idx = np.nonzero(mask)
...@@ -178,80 +186,25 @@ def vis_bbox(img, bbox, thick=1): ...@@ -178,80 +186,25 @@ def vis_bbox(img, bbox, thick=1):
return img return img
def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):
"""Visualizes keypoints (adapted from vis_one_image).
kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).
"""
dataset_keypoints, _ = keypoint_utils.get_keypoints()
kp_lines = kp_connections(dataset_keypoints)
# Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
cmap = plt.get_cmap('rainbow')
colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
# Perform the drawing on a copy of the image, to allow for blending.
kp_mask = np.copy(img)
# Draw mid shoulder / mid hip first for better visualization.
mid_shoulder = (
kps[:2, dataset_keypoints.index('right_shoulder')] +
kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
sc_mid_shoulder = np.minimum(
kps[2, dataset_keypoints.index('right_shoulder')],
kps[2, dataset_keypoints.index('left_shoulder')])
mid_hip = (
kps[:2, dataset_keypoints.index('right_hip')] +
kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
sc_mid_hip = np.minimum(
kps[2, dataset_keypoints.index('right_hip')],
kps[2, dataset_keypoints.index('left_hip')])
nose_idx = dataset_keypoints.index('nose')
if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:
cv2.line(
kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),
color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)
if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
cv2.line(
kp_mask, tuple(mid_shoulder), tuple(mid_hip),
color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)
# Draw the keypoints.
for l in range(len(kp_lines)):
i1 = kp_lines[l][0]
i2 = kp_lines[l][1]
p1 = kps[0, i1], kps[1, i1]
p2 = kps[0, i2], kps[1, i2]
if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
cv2.line(
kp_mask, p1, p2,
color=colors[l], thickness=2, lineType=cv2.LINE_AA)
if kps[2, i1] > kp_thresh:
cv2.circle(
kp_mask, p1,
radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
if kps[2, i2] > kp_thresh:
cv2.circle(
kp_mask, p2,
radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
# Blend the keypoints.
return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)
def vis_one_image_opencv( def vis_one_image_opencv(
im, class_names, im,
boxes, segms=None, keypoints=None, class_names,
thresh=0.9, kp_thresh=2, boxes,
show_box=False, show_class=False): segms=None,
keypoints=None,
thresh=0.9,
kp_thresh=2,
show_box=False,
show_class=False,
):
"""Constructs a numpy array with the detections visualized.""" """Constructs a numpy array with the detections visualized."""
boxes, segms, keypoints, classes = \ boxes, segms, keypoints, classes = \
convert_from_cls_format_v2(boxes, segms, keypoints, class_names) convert_from_cls_format_v2(boxes, segms, keypoints, class_names)
if boxes is None \ if boxes is None \
or boxes.shape[0] == 0 or \ or boxes.shape[0] == 0 or \
max(boxes[:, 4]) < thresh: return im max(boxes[:, 4]) < thresh:
return im
mask_color_id, masks, color_list = 0, None, colormap() mask_color_id, masks, color_list = 0, None, colormap()
...@@ -284,41 +237,43 @@ def vis_one_image_opencv( ...@@ -284,41 +237,43 @@ def vis_one_image_opencv(
mask_color_id += 1 mask_color_id += 1
im = vis_mask(im, masks[..., i], color_mask) im = vis_mask(im, masks[..., i], color_mask)
# show keypoints # # show keypoints
if keypoints is not None and len(keypoints) > i: # if keypoints is not None and len(keypoints) > i:
im = vis_keypoints(im, keypoints[i], kp_thresh) # im = vis_keypoints(im, keypoints[i], kp_thresh)
cv2.imshow('Detectron', im) cv2.imshow('Detectron', im)
cv2.waitKey(0) cv2.waitKey(0)
def vis_one_image( def vis_one_image(
im, class_names, im,
boxes, segms=None, keypoints=None, class_names,
thresh=0.9, kp_thresh=2, dpi=100, boxes,
box_alpha=0.0, show_class=True, segms=None,
filename=None): keypoints=None,
thresh=0.9,
kp_thresh=2,
dpi=100,
box_alpha=0.,
show_class=True,
filename=None,
):
"""Visual debugging of detections.""" """Visual debugging of detections."""
boxes, segms, keypoints, classes = \ boxes, segms, keypoints, classes = \
convert_from_cls_format_v2(boxes, segms, keypoints, class_names) convert_from_cls_format_v2(boxes, segms, keypoints, class_names)
if boxes is None \ if boxes is None \
or boxes.shape[0] == 0 or \ or boxes.shape[0] == 0 or \
max(boxes[:, 4]) < thresh: return max(boxes[:, 4]) < thresh:
return
im, mask = im[:, :, ::-1], None
#dataset_keypoints, _ = keypoint_utils.get_keypoints() im, mask, masks = im[:, :, ::-1], None, None
if segms is not None and len(segms) > 0: if segms is not None and len(segms) > 0:
masks = get_mask(boxes, segms, im.shape[0:2]) masks = get_mask(boxes, segms, im.shape[:2])
color_list = colormap(rgb=True) / 255 color_list = colormap(rgb=True) / 255
# kp_lines = kp_connections(dataset_keypoints)
# cmap = plt.get_cmap('rainbow')
# colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
fig = plt.figure(frameon=False) fig = plt.figure(frameon=False)
fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi) fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
ax = plt.Axes(fig, [0., 0., 1., 1.]) ax = plt.Axes(fig, [0., 0., 1., 1.])
...@@ -379,59 +334,9 @@ def vis_one_image( ...@@ -379,59 +334,9 @@ def vis_one_image(
alpha=0.5) alpha=0.5)
ax.add_patch(polygon) ax.add_patch(polygon)
# show keypoints
if keypoints is not None and len(keypoints) > i:
kps = keypoints[i]
# plt.autoscale(False)
# for l in range(len(kp_lines)):
# i1 = kp_lines[l][0]
# i2 = kp_lines[l][1]
# if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
# x = [kps[0, i1], kps[0, i2]]
# y = [kps[1, i1], kps[1, i2]]
# line = plt.plot(x, y)
# plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
# if kps[2, i1] > kp_thresh:
# plt.plot(
# kps[0, i1], kps[1, i1], '.', color=colors[l],
# markersize=3.0, alpha=0.7)
#
# if kps[2, i2] > kp_thresh:
# plt.plot(
# kps[0, i2], kps[1, i2], '.', color=colors[l],
# markersize=3.0, alpha=0.7)
#
# # add mid shoulder / mid hip for better visualization
# mid_shoulder = (
# kps[:2, dataset_keypoints.index('right_shoulder')] +
# kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
# sc_mid_shoulder = np.minimum(
# kps[2, dataset_keypoints.index('right_shoulder')],
# kps[2, dataset_keypoints.index('left_shoulder')])
# mid_hip = (
# kps[:2, dataset_keypoints.index('right_hip')] +
# kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
# sc_mid_hip = np.minimum(
# kps[2, dataset_keypoints.index('right_hip')],
# kps[2, dataset_keypoints.index('left_hip')])
# if (sc_mid_shoulder > kp_thresh and
# kps[2, dataset_keypoints.index('nose')] > kp_thresh):
# x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]
# y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]
# line = plt.plot(x, y)
# plt.setp(
# line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7)
# if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
# x = [mid_shoulder[0], mid_hip[0]]
# y = [mid_shoulder[1], mid_hip[1]]
# line = plt.plot(x, y)
# plt.setp(
# line, color=colors[len(kp_lines) + 1], linewidth=1.0,
# alpha=0.7)
if filename is not None: if filename is not None:
fig.savefig(filename, dpi=dpi) fig.savefig(filename, dpi=dpi)
plt.close('all') plt.close('all')
else: else:
plt.imshow(im) plt.imshow(im)
plt.show() plt.show()
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import base64
import importlib
import sys
import argparse
import cv2
import numpy as np
import pprint
from seetaas_helper import visualization_test
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.modeling.detector import Detector
import lib.ssd.test
import lib.faster_rcnn.test
from lib.faster_rcnn.test import nms, soft_nms
##############################################
# #
# ARGS #
# #
##############################################
def parse_args():
"""Parse input arguments"""
parser = argparse.ArgumentParser(description='Test a Detection Network')
parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str)
parser.add_argument('--exp_dir', dest='exp_dir',
help='experiment dir',
default=None, type=str)
parser.add_argument('--iter', dest='iter', help='global step',
default=0, type=int)
parser.add_argument('--workers', dest='num_workers',
help='number of workers',
default=1, type=int)
if len(sys.argv) == 233:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
##############################################
# #
# UTILS #
# #
##############################################
def get_image(base64_str):
if detector is None:
return {
"state": "False",
"message": "detect model is not init",
"objects": [],
"res": 2,
}
try:
str = base64.b64decode(base64_str)
im = np.fromstring(str, np.uint8)
im = cv2.imdecode(im, cv2.IMREAD_COLOR)
return im
except:
return {
"state": "False",
"message": "detect image is not valid",
"objects": [],
"res": 1,
}
def send_detections(boxes_this_image):
detections = []
for j, name in enumerate(cfg.MODEL.CLASSES):
if name == '__background__':
continue
dets = boxes_this_image[j] # [num, {x1, y1, x2, y2, score}]
keep_inds = np.where(dets[:, 4] > cfg.VIS_TH)[0]
dets = dets[keep_inds]
cls_inds = np.ones((dets.shape[0], 1), dtype=np.float32) * j
dets = np.hstack((dets.astype(np.float32, copy=False), cls_inds))
detections.extend(dets.tolist())
return {"state": "True", "message": "", "objects": detections, "res": 0}
##############################################
# #
# PROCEDURE #
# #
##############################################
def ssd_infer(base64_str):
im = get_image(base64_str)
if not isinstance(im, np.ndarray):
return im
batch_scores, batch_boxes = \
lib.ssd.test.ims_detect(detector, [im])
scores, boxes = batch_scores[0], batch_boxes[0]
boxes_this_image = [[]]
for j in range(1, cfg.MODEL.NUM_CLASSES):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds]
pre_nms_inds = np.argsort(-cls_scores)[0: cfg.TEST.NMS_TOP_K]
cls_scores = cls_scores[pre_nms_inds]
cls_boxes = cls_boxes[pre_nms_inds]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_dets, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
cls_dets, cfg.TEST.NMS,
force_cpu=True,
)
cls_dets = cls_dets[keep, :]
boxes_this_image.append(cls_dets)
return send_detections(boxes_this_image)
def faster_rcnn_infer(base64_str):
im = get_image(base64_str)
if not isinstance(im, np.ndarray):
return im
scores, boxes = lib.faster_rcnn.test.im_detect(detector, im)
boxes_this_image = [[]]
for j in range(1, cfg.MODEL.NUM_CLASSES):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])). \
astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_dets, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :]
boxes_this_image.append(cls_dets)
return send_detections(boxes_this_image)
##############################################
# #
# MAIN #
# #
##############################################
@visualization_test.Deploy.register
def infer(base64_str):
infer_procedure = globals()['{}_infer'.format(cfg.MODEL.TYPE)]
return infer_procedure(base64_str)
args = parse_args()
print('Called with args:')
print(args)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
print('Using config:')
pprint.pprint(cfg)
checkpoint = coordinator.checkpoint(global_step=args.iter, wait=False)
test_engine = importlib.import_module('lib.{}.test'.format(cfg.MODEL.TYPE))
detector = Detector().eval().cuda(cfg.GPU_ID)
detector.load_weights(checkpoint)
detector.optimize_for_inference()
# setup database
visualization_test.Deploy.run(args.num_workers)
\ No newline at end of file
...@@ -16,14 +16,15 @@ from __future__ import print_function ...@@ -16,14 +16,15 @@ from __future__ import print_function
import os import os
import sys import sys
sys.path.insert(0, '..') sys.path.insert(0, '..')
import argparse import argparse
import pprint
import dragon.vm.torch as torch import dragon.vm.torch as torch
import pprint
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.coordinator import Coordinator from lib.core.coordinator import Coordinator
from lib.modeling.detector import Detector from lib.modeling.detector import Detector
import lib.utils.logger as logger from lib.utils import logger
def parse_args(): def parse_args():
...@@ -50,9 +51,11 @@ if __name__ == '__main__': ...@@ -50,9 +51,11 @@ if __name__ == '__main__':
args = parse_args() args = parse_args()
if args.exp_dir is None or \ if args.exp_dir is None or \
not os.path.exists(args.exp_dir): not os.path.exists(args.exp_dir):
raise ValueError('Excepted a existing experiment dir. \nGot {}' raise ValueError(
.format(os.path.abspath(args.exp_dir)) if args.exp_dir else 'None') 'Excepted a existing experiment dir. \nGot {}.'
.format(os.path.abspath(args.exp_dir))
)
logger.info('Called with args:') logger.info('Called with args:')
logger.info(args) logger.info(args)
...@@ -72,7 +75,7 @@ if __name__ == '__main__': ...@@ -72,7 +75,7 @@ if __name__ == '__main__':
# Mixed precision training? # Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16': if cfg.MODEL.DATA_TYPE.lower() == 'float16':
detector.half() # Powerful FP16 Support detector.half() # Powerful FP16 Support
data = torch.zeros(*args.input_shape).byte() data = torch.zeros(*args.input_shape).byte()
ims_info = torch.zeros(args.input_shape[0], 3).float() ims_info = torch.zeros(args.input_shape[0], 3).float()
...@@ -80,8 +83,7 @@ if __name__ == '__main__': ...@@ -80,8 +83,7 @@ if __name__ == '__main__':
torch.onnx.export( torch.onnx.export(
model=detector, model=detector,
args={'data': data, 'ims_info': ims_info}, args={'data': data, 'ims_info': ims_info},
f=checkpoint.replace( f=checkpoint.replace('checkpoints', 'exports')
'checkpoints', 'exports') .replace('pth', 'onnx'),
.replace('pth', 'onnx'),
verbose=True, verbose=True,
) )
\ No newline at end of file
...@@ -9,20 +9,23 @@ ...@@ -9,20 +9,23 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import sys import sys
sys.path.insert(0, '..') sys.path.insert(0, '..')
import argparse
import numpy as np
import argparse
import dragon import dragon
import dragon.core.mpi as mpi import numpy
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.coordinator import Coordinator from lib.core.coordinator import Coordinator
from lib.core.train import train_net from lib.core.train import train_net
from lib.datasets.factory import get_imdb from lib.datasets.factory import get_imdb
import lib.utils.logger as logger from lib.utils import logger
def parse_args(): def parse_args():
...@@ -47,30 +50,36 @@ if __name__ == '__main__': ...@@ -47,30 +50,36 @@ if __name__ == '__main__':
args = parse_args() args = parse_args()
if args.exp_dir is None or \ if args.exp_dir is None or \
not os.path.exists(args.exp_dir): not os.path.exists(args.exp_dir):
raise ValueError('Excepted a existing experiment dir. \nGot {}' raise ValueError(
.format(os.path.abspath(args.exp_dir)) if args.exp_dir else 'None') 'Excepted a existing experiment dir. \nGot {}.'
.format(os.path.abspath(args.exp_dir))
)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir) coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
checkpoint, start_iter = coordinator.checkpoint(wait=False) checkpoint, start_iter = coordinator.checkpoint(wait=False)
if checkpoint is not None: cfg.TRAIN.WEIGHTS = checkpoint
if checkpoint is not None:
cfg.TRAIN.WEIGHTS = checkpoint
# Setup MPI # Setup MPI
if cfg.NUM_GPUS != mpi.Size(): if cfg.NUM_GPUS != dragon.mpi.size():
raise ValueError('Excepted {} mpi nodes, but got {}.' raise ValueError(
.format(len(args.gpus), mpi.Size())) 'Excepted {} mpi nodes, but got {}.'
.format(len(args.gpus), dragon.mpi.size())
)
GPUs = [i for i in range(cfg.NUM_GPUS)] GPUs = [i for i in range(cfg.NUM_GPUS)]
cfg.GPU_ID = GPUs[mpi.Rank()] cfg.GPU_ID = GPUs[dragon.mpi.rank()]
mpi.Parallel([i for i in range(cfg.NUM_GPUS)]) dragon.mpi.add_parallel_group([i for i in range(cfg.NUM_GPUS)])
mpi.SetParallelMode('NCCL' if cfg.USE_NCCL else 'MPI') dragon.mpi.set_parallel_mode('NCCL' if cfg.USE_NCCL else 'MPI')
# Setup logger # Setup logger
if mpi.Rank() != 0: if dragon.mpi.rank() != 0:
logger.set_root_logger(False) logger.set_root_logger(False)
# Fix the random seeds (numpy and dragon) for reproducibility # Fix the random seeds (numpy and dragon) for reproducibility
np.random.seed(cfg.RNG_SEED) numpy.random.seed(cfg.RNG_SEED)
dragon.SetRandomSeed(cfg.RNG_SEED) dragon.config.set_random_seed(cfg.RNG_SEED)
# Inspect the database # Inspect the database
database = get_imdb(cfg.TRAIN.DATABASE) database = get_imdb(cfg.TRAIN.DATABASE)
...@@ -83,4 +92,4 @@ if __name__ == '__main__': ...@@ -83,4 +92,4 @@ if __name__ == '__main__':
train_net(coordinator, start_iter) train_net(coordinator, start_iter)
# Finalize mpi # Finalize mpi
mpi.Finalize() dragon.mpi.finalize()
\ No newline at end of file
...@@ -13,17 +13,19 @@ from __future__ import absolute_import ...@@ -13,17 +13,19 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import importlib
import os
import sys import sys
sys.path.insert(0, '..') sys.path.insert(0, '..')
import argparse import argparse
import pprint import pprint
import importlib
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.coordinator import Coordinator from lib.core.coordinator import Coordinator
from lib.core.test import TestServer from lib.core.test import TestServer
from lib.modeling.detector import Detector
from lib.datasets.factory import get_imdb from lib.datasets.factory import get_imdb
from lib.modeling.detector import Detector
from lib.utils import logger from lib.utils import logger
...@@ -51,7 +53,16 @@ def parse_args(): ...@@ -51,7 +53,16 @@ def parse_args():
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args() args = parse_args()
logger.info('Called with args:\n' + str(args))
if args.exp_dir is None or \
not os.path.exists(args.exp_dir):
raise ValueError(
'Excepted a existing experiment dir. \nGot {}.'
.format(os.path.abspath(args.exp_dir))
)
logger.info('Called with args:')
logger.info(args)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir) coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
logger.info('Using config:\n' + pprint.pformat(cfg)) logger.info('Using config:\n' + pprint.pformat(cfg))
...@@ -76,7 +87,7 @@ if __name__ == '__main__': ...@@ -76,7 +87,7 @@ if __name__ == '__main__':
# Mixed precision training? # Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16': if cfg.MODEL.DATA_TYPE.lower() == 'float16':
detector.half() # Powerful FP16 Support detector.half() # Powerful FP16 Support
server = TestServer(coordinator.results_dir(checkpoint)) server = TestServer(coordinator.results_dir(checkpoint))
test_engine.test_net(detector, server) test_engine.test_net(detector, server)
\ No newline at end of file
...@@ -16,8 +16,9 @@ from __future__ import print_function ...@@ -16,8 +16,9 @@ from __future__ import print_function
import os import os
import sys import sys
sys.path.insert(0, '..') sys.path.insert(0, '..')
import argparse import argparse
import numpy as np import numpy
from lib.core.coordinator import Coordinator from lib.core.coordinator import Coordinator
from lib.utils import logger from lib.utils import logger
...@@ -41,36 +42,37 @@ def parse_args(): ...@@ -41,36 +42,37 @@ def parse_args():
def test(cfg_file, exp_dir, global_step): def test(cfg_file, exp_dir, global_step):
"""Call test.py to test models on specific global step. """Call test.py to test models on specific global step."""
Parameters
----------
cfg_file : str
The path of the cfg file.
global_step : int
The iteration to test.
"""
import subprocess import subprocess
args = '--cfg {} --exp_dir {} --iter {}'.format( args = '{} {} '.format(sys.executable, 'test.py')
args += '--cfg {} --exp_dir {} --iter {} '.format(
os.path.abspath(cfg_file), exp_dir, global_step) os.path.abspath(cfg_file), exp_dir, global_step)
return subprocess.call('{} {} {}'.format( return subprocess.call(args, shell=True)
sys.executable, 'test.py', args), shell=True)
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args() args = parse_args()
if args.exp_dir is None or \
not os.path.exists(args.exp_dir):
raise ValueError(
'Excepted a existing experiment dir. \nGot {}.'
.format(os.path.abspath(args.exp_dir))
)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir) coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
global_steps = [] global_steps = []
files = os.listdir(coordinator.checkpoints_dir()) files = os.listdir(coordinator.checkpoints_dir())
for ix, file in enumerate(files):
step = int(file.split('_iter_')[-1].split('.')[0]) for file in files:
step = int(file.split('_iter_')[-1].split(b'.')[0])
global_steps.append(step) global_steps.append(step)
order = np.argsort(-np.array(global_steps)) order = numpy.argsort(-numpy.array(global_steps))
for test_idx in order: for test_idx in order:
logger.info('Testing net at global step: {}......'.format(global_steps[test_idx])) logger.info('Testing net at global step: {}......'
.format(global_steps[test_idx]))
logger.info(' - Using model file: {}'.format(files[test_idx])) logger.info(' - Using model file: {}'.format(files[test_idx]))
test(args.cfg_file, args.exp_dir, global_steps[test_idx]) test(args.cfg_file, args.exp_dir, global_steps[test_idx])
\ No newline at end of file
...@@ -16,16 +16,17 @@ from __future__ import print_function ...@@ -16,16 +16,17 @@ from __future__ import print_function
import sys import sys
sys.path.insert(0, '..') sys.path.insert(0, '..')
import os.path as osp import os.path as osp
import argparse import argparse
import pprint
import dragon import dragon
import numpy as np import numpy
import pprint
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.coordinator import Coordinator from lib.core.coordinator import Coordinator
from lib.core.train import train_net from lib.core.train import train_net
from lib.datasets.factory import get_imdb from lib.datasets.factory import get_imdb
import lib.utils.logger as logger from lib.utils import logger
def parse_args(): def parse_args():
...@@ -58,19 +59,10 @@ def mpi_train(cfg_file, exp_dir): ...@@ -58,19 +59,10 @@ def mpi_train(cfg_file, exp_dir):
""" """
import subprocess import subprocess
args = '--cfg {} --exp_dir {}'.format(osp.abspath(cfg_file), exp_dir) args = 'mpirun --allow-run-as-root -n {} '.format(cfg.NUM_GPUS)
mpi_args = 'mpirun --allow-run-as-root -n {}'.format(cfg.NUM_GPUS) args += '{} {} '.format(sys.executable, 'mpi_train.py')
if len(cfg.HOSTS) > 0: args += '--cfg {} --exp_dir {} '.format(osp.abspath(cfg_file), exp_dir)
mpi_args += ' -x NCCL_DEBUG=INFO' \ return subprocess.call(args, shell=True)
' -x NCCL_IB_CUDA_SUPPORT=1' \
' -mca btl_openib_allow_ib 1' \
' -mca mpi_warn_on_fork 0 -H '
for i, host in enumerate(cfg.HOSTS):
mpi_args += (host + ':{},'.format(cfg.NUM_GPUS // len(cfg.HOSTS)))
if i > 0: subprocess.call('scp -r {} {}:{}'.format(
osp.abspath(exp_dir), host, osp.dirname(exp_dir)), shell=True)
return subprocess.call('{} {} {} {}'.format(
mpi_args, sys.executable, 'mpi_train.py', args), shell=True)
if __name__ == '__main__': if __name__ == '__main__':
...@@ -85,9 +77,14 @@ if __name__ == '__main__': ...@@ -85,9 +77,14 @@ if __name__ == '__main__':
coordinator.checkpoints_dir() coordinator.checkpoints_dir()
mpi_train(args.cfg_file, coordinator.experiment_dir) mpi_train(args.cfg_file, coordinator.experiment_dir)
else: else:
# Resume training?
checkpoint, start_iter = coordinator.checkpoint(wait=False)
if checkpoint is not None:
cfg.TRAIN.WEIGHTS = checkpoint
# Fix the random seeds (numpy and dragon) for reproducibility # Fix the random seeds (numpy and dragon) for reproducibility
np.random.seed(cfg.RNG_SEED) numpy.random.seed(cfg.RNG_SEED)
dragon.SetRandomSeed(cfg.RNG_SEED) dragon.config.set_random_seed(cfg.RNG_SEED)
# Inspect the database # Inspect the database
database = get_imdb(cfg.TRAIN.DATABASE) database = get_imdb(cfg.TRAIN.DATABASE)
...@@ -97,4 +94,4 @@ if __name__ == '__main__': ...@@ -97,4 +94,4 @@ if __name__ == '__main__':
# Ready to train the network # Ready to train the network
logger.info('Output will be saved to `{:s}`' logger.info('Output will be saved to `{:s}`'
.format(coordinator.checkpoints_dir())) .format(coordinator.checkpoints_dir()))
train_net(coordinator) train_net(coordinator, start_iter)
\ No newline at end of file
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!