Commit ca255ea0 by Ting PAN

Change to the PEP8 code style

1 parent 71593766
Showing with 1572 additions and 1414 deletions
## General
# Compiled Object files # Compiled Object files
*.slo *.slo
*.lo *.lo
...@@ -7,13 +5,15 @@ ...@@ -7,13 +5,15 @@
*.cuo *.cuo
# Compiled Dynamic libraries # Compiled Dynamic libraries
# *.so *.so
*.dll
*.dylib *.dylib
# Compiled Static libraries # Compiled Static libraries
*.lai *.lai
*.la *.la
#*.a *.a
*.lib
# Compiled python # Compiled python
*.pyc *.pyc
...@@ -40,6 +40,9 @@ __pycache__ ...@@ -40,6 +40,9 @@ __pycache__
# QtCreator files # QtCreator files
*.user *.user
# VSCode files
.vscode
# PyCharm files # PyCharm files
.idea .idea
......
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.1.2 (20190723)
Dragon Minimum Required (Version 0.3.0.0)
Changes:
Preview Features:
- Change to the PEP8 code style.
- Adapt the new Dragon API.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.1 (20190409) SeetaDet 0.1.1 (20190409)
Dragon Minimum Required (Version 0.3.0.0) Dragon Minimum Required (Version 0.3.0.0)
......
...@@ -21,8 +21,8 @@ set(CUDA_ARCH -gencode arch=compute_30,code=sm_30 ...@@ -21,8 +21,8 @@ set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
# ---------------- User Config ---------------- # ---------------- User Config ----------------
# ---[ Dependencies # ---[ Dependencies
include(${PROJECT_SOURCE_DIR}/CMake/FindPythonLibs.cmake) include(${PROJECT_SOURCE_DIR}/cmake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/CMake/FindNumPy.cmake) include(${PROJECT_SOURCE_DIR}/cmake/FindNumPy.cmake)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
......
# -------------------------------------------------------- # ------------------------------------------------------------
# Detectron @ Dragon # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# Copyright(c) 2017 SeetaTech #
# Written by Ting Pan # Licensed under the BSD 2-Clause License.
# -------------------------------------------------------- # You should have received a copy of the BSD 2-Clause License
\ No newline at end of file # along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -8,8 +8,3 @@ ...@@ -8,8 +8,3 @@
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from .distort import Distortor
from .expand import Expander
from .sample import Sampler
from .resize import Resizer
\ No newline at end of file
...@@ -16,6 +16,7 @@ from __future__ import print_function ...@@ -16,6 +16,7 @@ from __future__ import print_function
import os import os
import sys import sys
import time import time
import cv2 import cv2
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from dragon.tools.db import LMDB from dragon.tools.db import LMDB
...@@ -23,6 +24,7 @@ from dragon.tools.db import LMDB ...@@ -23,6 +24,7 @@ from dragon.tools.db import LMDB
sys.path.insert(0, '../../..') sys.path.insert(0, '../../..')
from lib.proto import anno_pb2 as pb from lib.proto import anno_pb2 as pb
ZFILL = 8 ZFILL = 8
ENCODE_QUALITY = 95 ENCODE_QUALITY = 95
...@@ -46,14 +48,23 @@ def make_datum(image_file, xml_file): ...@@ -46,14 +48,23 @@ def make_datum(image_file, xml_file):
datum = pb.Datum() datum = pb.Datum()
im = cv2.imread(image_file) im = cv2.imread(image_file)
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.height, datum.width, datum.channels = im.shape datum.height, datum.width, datum.channels = im.shape
datum.encoded = ENCODE_QUALITY != 100 datum.encoded = ENCODE_QUALITY != 100
if datum.encoded: if datum.encoded:
result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY]) result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.data = im.tostring() datum.data = im.tostring()
anno_datum.datum.CopyFrom(datum) anno_datum.datum.CopyFrom(datum)
anno_datum.filename = filename.split('.')[0] anno_datum.filename = filename.split('.')[0]
if len(objs) == 0:
return None
for ix, obj in enumerate(objs): for ix, obj in enumerate(objs):
anno = pb.Annotation() anno = pb.Annotation()
bbox = obj.find('bndbox') bbox = obj.find('bndbox')
...@@ -64,6 +75,7 @@ def make_datum(image_file, xml_file): ...@@ -64,6 +75,7 @@ def make_datum(image_file, xml_file):
cls = obj.find('name').text.strip() cls = obj.find('name').text.strip()
anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2) anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
anno.name = cls anno.name = cls
class_name_set.add(cls)
anno.difficult = False anno.difficult = False
if obj.find('difficult') is not None: if obj.find('difficult') is not None:
anno.difficult = int(obj.find('difficult').text) == 1 anno.difficult = int(obj.find('difficult').text) == 1
...@@ -72,13 +84,15 @@ def make_datum(image_file, xml_file): ...@@ -72,13 +84,15 @@ def make_datum(image_file, xml_file):
return anno_datum return anno_datum
def make_db(database_file, def make_db(
database_file,
images_path, images_path,
annotations_path, annotations_path,
imagesets_path, imagesets_path,
splits): splits,
):
if os.path.isdir(database_file) is True: if os.path.isdir(database_file) is True:
raise ValueError('The database path is already exist.') print('Warning: The database path is already exist.')
else: else:
root_dir = database_file[:database_file.rfind('/')] root_dir = database_file[:database_file.rfind('/')]
if not os.path.exists(root_dir): if not os.path.exists(root_dir):
...@@ -95,12 +109,12 @@ def make_db(database_file, ...@@ -95,12 +109,12 @@ def make_db(database_file,
print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000) db = LMDB(max_commit=1000)
db.open(database_file, mode='w') db.open(database_file, mode='w')
count = 0 count = 0
total_line = 0 total_line = 0
start_time = time.time() start_time = time.time()
zfill_flag = '{0:0%d}' % (ZFILL) zfill_flag = '{0:0%d}' % ZFILL
for db_idx, split in enumerate(splits): for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt') split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
...@@ -109,18 +123,18 @@ def make_db(database_file, ...@@ -109,18 +123,18 @@ def make_db(database_file,
lines = f.readlines() lines = f.readlines()
total_line += len(lines) total_line += len(lines)
for line in lines: for line in lines:
count += 1
if count % 10000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
filename = line.strip() filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg') image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml') xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
datum = make_datum(image_file, xml_file) datum = make_datum(image_file, xml_file)
if datum is not None:
count += 1
db.put(zfill_flag.format(count - 1), datum.SerializeToString()) db.put(zfill_flag.format(count - 1), datum.SerializeToString())
if count % 1000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
now_time = time.time() now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
......
# -------------------------------------------------------- # ------------------------------------------------------------
# Detectron # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# Copyright(c) 2017 SeetaTech #
# Written by Ting Pan # Licensed under the BSD 2-Clause License.
# -------------------------------------------------------- # You should have received a copy of the BSD 2-Clause License
\ No newline at end of file # along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -155,11 +155,11 @@ __C.TEST.SCORE_THRESH = 0.05 ...@@ -155,11 +155,11 @@ __C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting masks # The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5 __C.TEST.BINARY_THRESH = 0.5
## NMS threshold used on RPN proposals # NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7 __C.TEST.RPN_NMS_THRESH = 0.7
## Number of top scoring boxes to keep before apply NMS to RPN proposals # Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000 __C.TEST.RPN_PRE_NMS_TOP_N = 6000
## Number of top scoring boxes to keep after applying NMS to RPN proposals # Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300 __C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TEST.RPN_MIN_SIZE = 0 __C.TEST.RPN_MIN_SIZE = 0
...@@ -199,7 +199,7 @@ __C.MODEL.TYPE = '' ...@@ -199,7 +199,7 @@ __C.MODEL.TYPE = ''
# The float precision for training and inference # The float precision for training and inference
# (FLOAT32, FLOAT16,) # (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE= 'FLOAT32' __C.MODEL.DATA_TYPE = 'FLOAT32'
# The backbone # The backbone
__C.MODEL.BACKBONE = '' __C.MODEL.BACKBONE = ''
...@@ -560,10 +560,11 @@ def _merge_a_into_b(a, b): ...@@ -560,10 +560,11 @@ def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the """Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a. options in b whenever they are also specified in a.
""" """
if not isinstance(a, dict): return if not isinstance(a, dict):
return
for k, v in a.items(): for k, v in a.items():
# a must specify keys that are in b # a must specify keys that are in b
if not k in b: if k not in b:
raise KeyError('{} is not a valid config key'.format(k)) raise KeyError('{} is not a valid config key'.format(k))
# the types must match, too # the types must match, too
v = _check_and_coerce_cfg_value_type(v, b[k], k) v = _check_and_coerce_cfg_value_type(v, b[k], k)
...@@ -598,15 +599,15 @@ def cfg_from_list(cfg_list): ...@@ -598,15 +599,15 @@ def cfg_from_list(cfg_list):
assert d.has_key(subkey) assert d.has_key(subkey)
d = d[subkey] d = d[subkey]
subkey = key_list[-1] subkey = key_list[-1]
assert d.has_key(subkey) assert subkey in d
try: try:
value = literal_eval(v) value = literal_eval(v)
except: except:
# handle the case when v is a string literal # Handle the case when v is a string literal
value = v value = v
assert type(value) == type(d[subkey]), \ assert type(value) == type(d[subkey]), \
'type {} does not match original type {}'.format( 'type {} does not match original type {}'\
type(value), type(d[subkey])) .format(type(value), type(d[subkey]))
d[subkey] = value d[subkey] = value
...@@ -618,8 +619,10 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key): ...@@ -618,8 +619,10 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
# The types must match (with some exceptions) # The types must match (with some exceptions)
type_b = type(value_b) type_b = type(value_b)
type_a = type(value_a) type_a = type(value_a)
if type_a is type_b: return value_a if type_a is type_b:
if type_b is float and type_a is int: return float(value_a) return value_a
if type_b is float and type_a is int:
return float(value_a)
# Exceptions: numpy arrays, strings, tuple<->list # Exceptions: numpy arrays, strings, tuple<->list
if isinstance(value_b, np.ndarray): if isinstance(value_b, np.ndarray):
......
...@@ -18,7 +18,8 @@ import shutil ...@@ -18,7 +18,8 @@ import shutil
import time import time
import numpy as np import numpy as np
from lib.core.config import cfg, cfg_from_file from lib.core.config import cfg
from lib.core.config import cfg_from_file
class Coordinator(object): class Coordinator(object):
...@@ -44,7 +45,8 @@ class Coordinator(object): ...@@ -44,7 +45,8 @@ class Coordinator(object):
def _path_at(self, file, auto_create=True): def _path_at(self, file, auto_create=True):
path = os.path.abspath(os.path.join(self.experiment_dir, file)) path = os.path.abspath(os.path.join(self.experiment_dir, file))
if auto_create and not os.path.exists(path): os.makedirs(path) if auto_create and not os.path.exists(path):
os.makedirs(path)
return path return path
def checkpoints_dir(self): def checkpoints_dir(self):
...@@ -67,8 +69,10 @@ class Coordinator(object): ...@@ -67,8 +69,10 @@ class Coordinator(object):
return os.path.join(self.checkpoints_dir(), files[ix]), step return os.path.join(self.checkpoints_dir(), files[ix]), step
steps.append(step) steps.append(step)
if global_step is None: if global_step is None:
if len(files) == 0: return None, 0 if len(files) == 0:
last_idx = int(np.argmax(steps)); last_step = steps[last_idx] return None, 0
last_idx = int(np.argmax(steps))
last_step = steps[last_idx]
return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
return None, 0 return None, 0
result = locate() result = locate()
......
...@@ -30,7 +30,8 @@ class Solver(object): ...@@ -30,7 +30,8 @@ class Solver(object):
self.opt_arguments = { self.opt_arguments = {
'scale_gradient': 1. / ( 'scale_gradient': 1. / (
cfg.SOLVER.LOSS_SCALING * cfg.SOLVER.LOSS_SCALING *
cfg.SOLVER.ITER_SIZE), cfg.SOLVER.ITER_SIZE
),
'clip_gradient': float(cfg.SOLVER.CLIP_NORM), 'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
'weight_decay': cfg.SOLVER.WEIGHT_DECAY, 'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
} }
...@@ -57,8 +58,10 @@ class Solver(object): ...@@ -57,8 +58,10 @@ class Solver(object):
} }
] ]
for name, param in self.detector.named_parameters(): for name, param in self.detector.named_parameters():
if 'bias' in name: param_groups[1]['params'].append(param) if 'bias' in name:
else: param_groups[0]['params'].append(param) param_groups[1]['params'].append(param)
else:
param_groups[0]['params'].append(param)
return param_groups return param_groups
def set_learning_rate(self): def set_learning_rate(self):
...@@ -67,8 +70,10 @@ class Solver(object): ...@@ -67,8 +70,10 @@ class Solver(object):
if self._current_step < len(cfg.SOLVER.STEPS) \ if self._current_step < len(cfg.SOLVER.STEPS) \
and self.iter >= cfg.SOLVER.STEPS[self._current_step]: and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
self._current_step = self._current_step + 1 self._current_step = self._current_step + 1
logger.info('MultiStep Status: Iteration {}, step = {}' \ logger.info(
.format(self.iter, self._current_step)) 'MultiStep Status: Iteration {}, step = {}'
.format(self.iter, self._current_step)
)
new_lr = cfg.SOLVER.BASE_LR * ( new_lr = cfg.SOLVER.BASE_LR * (
cfg.SOLVER.GAMMA ** self._current_step) cfg.SOLVER.GAMMA ** self._current_step)
self.optimizer.param_groups[0]['lr'] = \ self.optimizer.param_groups[0]['lr'] = \
...@@ -77,13 +82,14 @@ class Solver(object): ...@@ -77,13 +82,14 @@ class Solver(object):
raise ValueError('Unknown lr policy: ' + policy) raise ValueError('Unknown lr policy: ' + policy)
def one_step(self): def one_step(self):
def add_loss(x, y):
return y if x is None else x + y
# Forward & Backward & Compute_loss # Forward & Backward & Compute_loss
iter_size = cfg.SOLVER.ITER_SIZE iter_size = cfg.SOLVER.ITER_SIZE
loss_scaling = cfg.SOLVER.LOSS_SCALING loss_scaling = cfg.SOLVER.LOSS_SCALING
run_time = 0.; stats = {'loss': {'total': 0.}, 'iter': self.iter} stats = {'loss': {'total': 0.}, 'iter': self.iter}
add_loss = lambda x, y: y if x is None else x + y
tic = time.time() run_time, tic = 0., time.time()
if iter_size > 1: if iter_size > 1:
# Dragon is designed for manual gradients accumulating # Dragon is designed for manual gradients accumulating
...@@ -99,10 +105,13 @@ class Solver(object): ...@@ -99,10 +105,13 @@ class Solver(object):
stats['loss'][k] = 0. stats['loss'][k] = 0.
total_loss = add_loss(total_loss, v) total_loss = add_loss(total_loss, v)
stats['loss'][k] += float(v) * loss_scaling stats['loss'][k] += float(v) * loss_scaling
if loss_scaling != 1.: total_loss *= loss_scaling if loss_scaling != 1.:
total_loss *= loss_scaling
stats['loss']['total'] += float(total_loss) stats['loss']['total'] += float(total_loss)
total_loss.backward() total_loss.backward()
if iter_size > 1: self.optimizer.accumulate_grad() if iter_size > 1:
self.optimizer.accumulate_grad()
run_time += (time.time() - tic) run_time += (time.time() - tic)
...@@ -190,5 +199,8 @@ def get_solver_func(type): ...@@ -190,5 +199,8 @@ def get_solver_func(type):
elif type == 'Adam': elif type == 'Adam':
return AdamSolver return AdamSolver
else: else:
raise ValueError('Unsupported solver type: {}.\n' raise ValueError(
'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam)'.format(type)) 'Unsupported solver type: {}.\n'
\ No newline at end of file 'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam).'
.format(type)
)
...@@ -33,25 +33,27 @@ class TestServer(object): ...@@ -33,25 +33,27 @@ class TestServer(object):
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = DataReader(**{'source': self.imdb.source}) self.data_reader = DataReader(**{'source': self.imdb.source})
self.data_transformer = DataTransformer() self.data_transformer = DataTransformer()
self.data_reader.Q_out = Queue(cfg.TEST.IMS_PER_BATCH) self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.start() self.data_reader.start()
self.gt_recs = OrderedDict() self.gt_recs = OrderedDict()
self.output_dir = output_dir self.output_dir = output_dir
if cfg.VIS_ON_FILE: if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis') self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir) if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls): def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls() self.data_transformer = transformer_cls()
def get_image(self): def get_image(self):
serialized = self.data_reader.Q_out.get() serialized = self.data_reader.q_out.get()
image = self.data_transformer.get_image(serialized) image = self.data_transformer.get_image(serialized)
image_id, objects = self.data_transformer.get_annotations(serialized) image_id, objects = self.data_transformer.get_annotations(serialized)
self.gt_recs[image_id] = { self.gt_recs[image_id] = {
'objects': objects, 'objects': objects,
'width': image.shape[1], 'width': image.shape[1],
'height': image.shape[0]} 'height': image.shape[0],
}
return image_id, image return image_id, image
def get_save_filename(self, image_id, ext='.jpg'): def get_save_filename(self, image_id, ext='.jpg'):
...@@ -60,9 +62,10 @@ class TestServer(object): ...@@ -60,9 +62,10 @@ class TestServer(object):
def get_records(self): def get_records(self):
if len(self.gt_recs) != self.num_images: if len(self.gt_recs) != self.num_images:
raise RuntimeError('Loading {} records, ' raise RuntimeError(
'while the specific database required {}'.format( 'Loading {} records, while {} required.'
len(self.gt_recs), self.num_images)) .format(len(self.gt_recs), self.num_images),
)
return self.gt_recs return self.gt_recs
def evaluate_detections(self, all_boxes): def evaluate_detections(self, all_boxes):
...@@ -87,7 +90,8 @@ class InferServer(object): ...@@ -87,7 +90,8 @@ class InferServer(object):
self.image_idx = 0 self.image_idx = 0
if cfg.VIS_ON_FILE: if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis') self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir) if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls): def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls() self.data_transformer = transformer_cls()
...@@ -99,7 +103,8 @@ class InferServer(object): ...@@ -99,7 +103,8 @@ class InferServer(object):
self.image_idx = (self.image_idx + 1) % self.num_images self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = { self.gt_recs[image_id] = {
'width': image.shape[1], 'width': image.shape[1],
'height': image.shape[0]} 'height': image.shape[0],
}
return image_id, image return image_id, image
def get_save_filename(self, image_id, ext='.jpg'): def get_save_filename(self, image_id, ext='.jpg'):
...@@ -108,15 +113,23 @@ class InferServer(object): ...@@ -108,15 +113,23 @@ class InferServer(object):
def get_records(self): def get_records(self):
if len(self.gt_recs) != self.num_images: if len(self.gt_recs) != self.num_images:
raise RuntimeError('Loading {} records, ' raise RuntimeError(
'while the specific database required {}'.format( 'Loading {} records, while {} required.'
len(self.gt_recs), self.num_images)) .format(len(self.gt_recs), self.num_images),
)
return self.gt_recs return self.gt_recs
def evaluate_detections(self, all_boxes): def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections( self.imdb.evaluate_detections(
all_boxes, self.get_records(), self.output_dir) all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks): def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations( self.imdb.evaluate_segmentations(
all_boxes, all_masks, self.get_records(), self.output_dir) all_boxes,
\ No newline at end of file all_masks,
self.get_records(),
self.output_dir,
)
...@@ -17,17 +17,17 @@ from __future__ import absolute_import ...@@ -17,17 +17,17 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import collections
import datetime import datetime
from collections import OrderedDict import os
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.solver import get_solver_func from lib.core.solver import get_solver_func
from lib.utils.timer import Timer
from lib.utils.stats import SmoothedValue
from lib.utils import logger from lib.utils import logger
from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer
class SolverWrapper(object): class SolverWrapper(object):
...@@ -51,13 +51,14 @@ class SolverWrapper(object): ...@@ -51,13 +51,14 @@ class SolverWrapper(object):
self.solver.detector.cuda(cfg.GPU_ID) self.solver.detector.cuda(cfg.GPU_ID)
# Plan the metrics # Plan the metrics
self.metrics = OrderedDict() self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD: if cfg.ENABLE_TENSOR_BOARD:
from dragon.tools.tensorboard import TensorBoard from dragon.tools.tensorboard import TensorBoard
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs') self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
def snapshot(self): def snapshot(self):
if not logger.is_root(): return None if not logger.is_root():
return None
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}' filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth') .format(self.solver.iter) + '.pth')
filename = os.path.join(self.output_dir, filename) filename = os.path.join(self.output_dir, filename)
...@@ -77,19 +78,35 @@ class SolverWrapper(object): ...@@ -77,19 +78,35 @@ class SolverWrapper(object):
self.board.scalar_summary('time', stats['time'], stats['iter']) self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items(): for k, v in self.metrics.items():
if k == 'total': if k == 'total':
self.board.scalar_summary('total_loss', v.GetMedianValue(), stats['iter']) self.board.scalar_summary(
else: self.board.scalar_summary(k, v.GetMedianValue(), stats['iter']) 'total_loss',
v.GetMedianValue(),
stats['iter'],
)
else:
self.board.scalar_summary(
k,
v.GetMedianValue(),
stats['iter'],
)
def step(self, display=False): def step(self, display=False):
stats = self.solver.one_step() stats = self.solver.one_step()
self.add_metrics(stats) self.add_metrics(stats)
self.send_metrics(stats) self.send_metrics(stats)
if display: if display:
logger.info('Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (stats['iter'], logger.info(
stats['lr'], self.metrics['total'].GetMedianValue(), stats['time'])) 'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
stats['iter'], stats['lr'],
self.metrics['total'].GetMedianValue(),
stats['time'],
)
)
for k, v in self.metrics.items(): for k, v in self.metrics.items():
if k == 'total': continue if k == 'total':
logger.info(' Train net output({}): {}'.format(k, v.GetMedianValue())) continue
logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue()))
def train_model(self): def train_model(self):
"""Network training loop.""" """Network training loop."""
...@@ -104,9 +121,8 @@ class SolverWrapper(object): ...@@ -104,9 +121,8 @@ class SolverWrapper(object):
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha) start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
# Apply 1-step SGD update # Apply 1-step SGD update
timer.tic() with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0) self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
timer.toc()
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0: if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time average_time = timer.average_time
...@@ -114,8 +130,10 @@ class SolverWrapper(object): ...@@ -114,8 +130,10 @@ class SolverWrapper(object):
cfg.SOLVER.MAX_ITERS - self.solver.iter) cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds))) eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
logger.info('< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' logger.info(
.format(progress, timer.average_time, eta)) '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta)
)
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0: if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter last_snapshot_iter = self.solver.iter
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
from lib.datasets.taas import TaaS from lib.datasets.taas import TaaS
......
...@@ -61,7 +61,7 @@ class imdb(object): ...@@ -61,7 +61,7 @@ class imdb(object):
return num_entries return num_entries
def evaluate_detections(self, all_boxes, gt_recs, output_dir): def evaluate_detections(self, all_boxes, gt_recs, output_dir):
raise NotImplementedError pass
def evaluate_masks(self, all_boxes, all_masks, output_dir): def evaluate_masks(self, all_boxes, all_masks, output_dir):
raise NotImplementedError pass
\ No newline at end of file
...@@ -19,16 +19,16 @@ from __future__ import print_function ...@@ -19,16 +19,16 @@ from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
try: try:
import cPickle import cPickle
except: except:
import pickle as cPickle import pickle as cPickle
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.mask_transform import mask_overlap
from lib.utils.boxes import expand_boxes
from lib.pycocotools.mask_utils import mask_rle2im from lib.pycocotools.mask_utils import mask_rle2im
from lib.utils.boxes import expand_boxes
from lib.utils.mask_transform import mask_overlap
def voc_ap(rec, prec, use_07_metric=False): def voc_ap(rec, prec, use_07_metric=False):
...@@ -65,8 +65,13 @@ def voc_ap(rec, prec, use_07_metric=False): ...@@ -65,8 +65,13 @@ def voc_ap(rec, prec, use_07_metric=False):
return ap return ap
def voc_bbox_eval(det_file, gt_recs, cls_name, def voc_bbox_eval(
IoU=0.5, use_07_metric=False): det_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
class_recs = {} class_recs = {}
n_pos = 0 n_pos = 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
...@@ -81,35 +86,35 @@ def voc_bbox_eval(det_file, gt_recs, cls_name, ...@@ -81,35 +86,35 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
'det': det 'det': det
} }
# read detections # Read detections
with open(det_file, 'r') as f: lines = f.readlines() with open(det_file, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines] splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines] image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines]) confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# avoid IndexError if detecting nothing # Avoid IndexError if detecting nothing
if len(BB) == 0: return 0, 0, -1 if len(BB) == 0:
return 0, 0, -1
# sort by confidence # Sort by confidence
sorted_ind = np.argsort(-confidence) sorted_ind = np.argsort(-confidence)
BB = BB[sorted_ind, :] BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind] image_ids = [image_ids[x] for x in sorted_ind]
# go down dets and mark TPs and FPs # Go down detections and mark TPs and FPs
nd = len(image_ids) nd = len(image_ids)
tp = np.zeros(nd) tp, fp = np.zeros(nd), np.zeros(nd)
fp = np.zeros(nd)
for d in range(nd): for d in range(nd):
R = class_recs[image_ids[d]] R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float) bb = BB[d, :].astype(float)
ovmax = -np.inf ovmax, jmax = -np.inf, 0
BBGT = R['bbox'].astype(float) BBGT = R['bbox'].astype(float)
if BBGT.size > 0: if BBGT.size > 0:
# compute overlaps # Compute overlaps intersection
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0]) ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1]) iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2]) ixmax = np.minimum(BBGT[:, 2], bb[2])
...@@ -118,7 +123,7 @@ def voc_bbox_eval(det_file, gt_recs, cls_name, ...@@ -118,7 +123,7 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
ih = np.maximum(iymax - iymin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih inters = iw * ih
# union # Union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
...@@ -149,8 +154,14 @@ def voc_bbox_eval(det_file, gt_recs, cls_name, ...@@ -149,8 +154,14 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
return rec, prec, ap return rec, prec, ap
def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, def voc_segm_eval(
IoU=0.5, use_07_metric=False): det_file,
seg_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
# 0. Constants # 0. Constants
M = cfg.MRCNN.RESOLUTION M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH binary_thresh = cfg.TEST.BINARY_THRESH
...@@ -175,8 +186,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -175,8 +186,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
image_names.append(image_name) image_names.append(image_name)
# 2. Get predict pickle file for this class # 2. Get predict pickle file for this class
with open(det_file, 'rb') as f: boxes_pkl = cPickle.load(f) with open(det_file, 'rb') as f:
with open(seg_file, 'rb') as f: masks_pkl = cPickle.load(f) boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f)
# 3. Pre-compute number of total instances to allocate memory # 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs) num_images = len(gt_recs)
...@@ -185,7 +198,8 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -185,7 +198,8 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
box_num += len(boxes_pkl[im_i]) box_num += len(boxes_pkl[im_i])
# avoid IndexError if detecting nothing # avoid IndexError if detecting nothing
if box_num == 0: return 0, 0, -1 if box_num == 0:
return 0, 0, -1
# 4. Re-organize all the predicted boxes # 4. Re-organize all the predicted boxes
new_boxes = np.zeros((box_num, 5)) new_boxes = np.zeros((box_num, 5))
...@@ -223,11 +237,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -223,11 +237,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
fp[i] = 1 fp[i] = 1
continue continue
R = class_recs[image_name] R = class_recs[image_name]
im_h, im_w = \ im_h = gt_recs[image_name]['height']
gt_recs[image_name]['height'], \ im_w = gt_recs[image_name]['width']
gt_recs[image_name]['width']
# decode mask # Decode mask
ref_box = ref_boxes[i, :4] ref_box = ref_boxes[i, :4]
mask = new_masks[i] mask = new_masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :] padded_mask[1:-1, 1:-1] = mask[:, :]
...@@ -244,14 +257,14 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name, ...@@ -244,14 +257,14 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]), pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
(x1 - ref_box[0]): (x2 - ref_box[0])] (x1 - ref_box[0]): (x2 - ref_box[0])]
# calculate max region overlap # Calculate max region overlap
ovmax = -1; jmax = -1 ovmax, jmax = -1, -1
for j in range(len(R['det'])): for j in range(len(R['det'])):
gt_mask_bound = R['bbox'][j].astype(int) gt_mask_bound = R['bbox'][j].astype(int)
pred_mask_bound = new_boxes[i, :4].astype(int) pred_mask_bound = new_boxes[i, :4].astype(int)
crop_mask = R['mask'][j][gt_mask_bound[1] : gt_mask_bound[3] + 1, crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0] : gt_mask_bound[2] + 1] gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask) ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
......
...@@ -13,7 +13,7 @@ from __future__ import absolute_import ...@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.proposal_layer import ProposalLayer from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
...@@ -13,21 +13,21 @@ from __future__ import absolute_import ...@@ -13,21 +13,21 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import multiprocessing
import numpy as np import numpy as np
from multiprocessing import Process
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
class BlobFetcher(Process): class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(BlobFetcher, self).__init__() super(BlobFetcher, self).__init__()
self.Q1_in = self.Q2_in = self.Q_out = None self.q1_in = self.q2_in = self.q_out = None
self.daemon = True self.daemon = True
def get(self, Q_in): def get(self, Q_in):
processed_ims = []; ims_info = []; all_boxes = [] processed_ims, ims_info, all_boxes = [], [], []
for ix in range(cfg.TRAIN.IMS_PER_BATCH): for ix in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = Q_in.get() im, im_scale, gt_boxes = Q_in.get()
processed_ims.append(im) processed_ims.append(im)
...@@ -46,7 +46,7 @@ class BlobFetcher(Process): ...@@ -46,7 +46,7 @@ class BlobFetcher(Process):
def run(self): def run(self):
while True: while True:
if self.Q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH: if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q_out.put(self.get(self.Q1_in)) self.q_out.put(self.get(self.q1_in))
elif self.Q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH: elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q_out.put(self.get(self.Q2_in)) self.q_out.put(self.get(self.q2_in))
\ No newline at end of file
...@@ -13,16 +13,17 @@ from __future__ import absolute_import ...@@ -13,16 +13,17 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from multiprocessing import Queue
import time import time
import dragon
import pprint import pprint
from multiprocessing import Queue
import dragon.core.mpi as mpi
from lib.core.config import cfg from lib.core.config import cfg
import lib.utils.logger as logger
from lib.faster_rcnn.data.data_reader import DataReader from lib.faster_rcnn.data.data_reader import DataReader
from lib.faster_rcnn.data.data_transformer import DataTransformer from lib.faster_rcnn.data.data_transformer import DataTransformer
from lib.faster_rcnn.data.blob_fetcher import BlobFetcher from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
from lib.utils import logger
class DataBatch(object): class DataBatch(object):
...@@ -53,13 +54,14 @@ class DataBatch(object): ...@@ -53,13 +54,14 @@ class DataBatch(object):
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
# Init mpi # Init mpi
global_rank, local_rank, group_size = 0, 0, 1 global_rank, local_rank, group_size = 0, 0, 1
if mpi.Is_Init(): if dragon.mpi.is_init():
idx, group = mpi.AllowParallel() group = dragon.mpi.is_parallel()
if idx != -1: # DataParallel if group is not None: # DataParallel
global_rank = mpi.Rank() global_rank = dragon.mpi.rank()
group_size = len(group) group_size = len(group)
for i, node in enumerate(group): for i, node in enumerate(group):
if global_rank == node: local_rank = i if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size kwargs['group_size'] = group_size
# Configuration # Configuration
...@@ -89,7 +91,7 @@ class DataBatch(object): ...@@ -89,7 +91,7 @@ class DataBatch(object):
self._readers = [] self._readers = []
for i in range(self._num_readers): for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs)) self._readers.append(DataReader(**kwargs))
self._readers[-1].Q_out = self.Q1 self._readers[-1].q_out = self.Q1
for i in range(self._num_readers): for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers part_idx, num_parts = i, self._num_readers
...@@ -106,9 +108,9 @@ class DataBatch(object): ...@@ -106,9 +108,9 @@ class DataBatch(object):
for i in range(self._num_transformers): for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs) transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers) transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.Q_in = self.Q1 transformer.q_in = self.Q1
transformer.Q1_out = self.Q21 transformer.q1_out = self.Q21
transformer.Q2_out = self.Q22 transformer.q2_out = self.Q22
transformer.start() transformer.start()
self._transformers.append(transformer) self._transformers.append(transformer)
time.sleep(0.1) time.sleep(0.1)
...@@ -117,15 +119,17 @@ class DataBatch(object): ...@@ -117,15 +119,17 @@ class DataBatch(object):
self._fetchers = [] self._fetchers = []
for i in range(self._num_fetchers): for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs) fetcher = BlobFetcher(**kwargs)
fetcher.Q1_in = self.Q21 fetcher.q1_in = self.Q21
fetcher.Q2_in = self.Q22 fetcher.q2_in = self.Q22
fetcher.Q_out = self.Q3 fetcher.q_out = self.Q3
fetcher.start() fetcher.start()
self._fetchers.append(fetcher) self._fetchers.append(fetcher)
time.sleep(0.1) time.sleep(0.1)
# Prevent to echo multiple nodes # Prevent to echo multiple nodes
if local_rank == 0: self.echo() if local_rank == 0:
self.echo()
def cleanup(): def cleanup():
def terminate(processes): def terminate(processes):
for process in processes: for process in processes:
...@@ -137,6 +141,7 @@ class DataBatch(object): ...@@ -137,6 +141,7 @@ class DataBatch(object):
logger.info('Terminating DataTransformer ......') logger.info('Terminating DataTransformer ......')
terminate(self._readers) terminate(self._readers)
logger.info('Terminating DataReader......') logger.info('Terminating DataReader......')
import atexit import atexit
atexit.register(cleanup) atexit.register(cleanup)
......
...@@ -14,21 +14,17 @@ from __future__ import division ...@@ -14,21 +14,17 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import math import math
import numpy
import multiprocessing import multiprocessing
import numpy
from dragon import config as _cfg from dragon.tools import db
from dragon.tools import db as _db from lib.core.config import cfg
class DataReader(multiprocessing.Process): class DataReader(multiprocessing.Process):
"""DataReader is deployed to queue encoded str from `LMDB`_. """Collect encoded str from `LMDB`_.
It is supported to adaptively partition and shuffle records over all distributed nodes.
""" Partition and shuffle records over distributed nodes.
def __init__(self, **kwargs):
"""Construct a ``DataReader``.
Parameters Parameters
---------- ----------
...@@ -40,14 +36,20 @@ class DataReader(multiprocessing.Process): ...@@ -40,14 +36,20 @@ class DataReader(multiprocessing.Process):
The number of chunks to split. The number of chunks to split.
""" """
def __init__(self, **kwargs):
"""Create a DataReader."""
super(DataReader, self).__init__() super(DataReader, self).__init__()
self._source = kwargs.get('source', '') self._source = kwargs.get('source', '')
self._use_shuffle = kwargs.get('shuffle', False) self._use_shuffle = kwargs.get('shuffle', False)
self._num_chunks = kwargs.get('num_chunks', 2048) self._num_chunks = kwargs.get('num_chunks', 2048)
self._part_idx, self._num_parts = 0, 1 self._part_idx, self._num_parts = 0, 1
self._cursor, self._chunk_cursor = 0, 0 self._cursor, self._chunk_cursor = 0, 0
self._rng_seed = _cfg.GetRandomSeed() self._chunk_size, self._perm_size = 0, 0
self.Q_out = None self._head, self._tail, self._num_entries = 0, 0, 0
self._db, self._zfill, self._perm = None, None, None
self._rng_seed = cfg.RNG_SEED
self.q_out = None
self.daemon = True self.daemon = True
def element(self): def element(self):
...@@ -69,10 +71,6 @@ class DataReader(multiprocessing.Process): ...@@ -69,10 +71,6 @@ class DataReader(multiprocessing.Process):
target : int target : int
The key of the record. The key of the record.
Returns
-------
None
Notes Notes
----- -----
The redirection reopens the database. The redirection reopens the database.
...@@ -88,17 +86,12 @@ class DataReader(multiprocessing.Process): ...@@ -88,17 +86,12 @@ class DataReader(multiprocessing.Process):
self._db.set(str(target).zfill(self._zfill)) self._db.set(str(target).zfill(self._zfill))
def reset(self): def reset(self):
"""Reset the cursor and environment. """Reset the cursor and environment."""
Returns
-------
None
"""
if self._num_parts > 1 or self._use_shuffle: if self._num_parts > 1 or self._use_shuffle:
self._chunk_cursor = 0 self._chunk_cursor = 0
self._part_idx = (self._part_idx + 1) % self._num_parts self._part_idx = (self._part_idx + 1) % self._num_parts
if self._use_shuffle: self._perm = numpy.random.permutation(self._perm_size) if self._use_shuffle:
self._perm = numpy.random.permutation(self._perm_size)
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor] self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._tail = self._head * self._chunk_size self._tail = self._head * self._chunk_size
if self._head >= self._num_entries: self.next_chunk() if self._head >= self._num_entries: self.next_chunk()
...@@ -109,26 +102,15 @@ class DataReader(multiprocessing.Process): ...@@ -109,26 +102,15 @@ class DataReader(multiprocessing.Process):
self.redirect(self._head) self.redirect(self._head)
def next_record(self): def next_record(self):
"""Step the cursor of records. """Step the cursor of records."""
Returns
-------
None
"""
self._db.next() self._db.next()
self._cursor += 1 self._cursor += 1
def next_chunk(self): def next_chunk(self):
"""Step the cursor of shuffling chunks. """Step the cursor of chunks."""
Returns
-------
None
"""
self._chunk_cursor += 1 self._chunk_cursor += 1
if self._chunk_cursor >= self._perm_size: self.reset() if self._chunk_cursor >= self._perm_size:
self.reset()
else: else:
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor] self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._head = self._head * self._chunk_size self._head = self._head * self._chunk_size
...@@ -140,18 +122,12 @@ class DataReader(multiprocessing.Process): ...@@ -140,18 +122,12 @@ class DataReader(multiprocessing.Process):
self.redirect(self._head) self.redirect(self._head)
def run(self): def run(self):
"""Start the process. """Start the process."""
Returns
-------
None
"""
# Fix seed # Fix seed
numpy.random.seed(self._rng_seed) numpy.random.seed(self._rng_seed)
# Init db # Init db
self._db = _db.LMDB() self._db = db.LMDB()
self._db.open(self._source) self._db.open(self._source)
self._zfill = self._db.zfill() self._zfill = self._db.zfill()
self._num_entries = self._db.num_entries() self._num_entries = self._db.num_entries()
...@@ -189,9 +165,10 @@ class DataReader(multiprocessing.Process): ...@@ -189,9 +165,10 @@ class DataReader(multiprocessing.Process):
# Run! # Run!
while True: while True:
self.Q_out.put(self.element()) self.q_out.put(self.element())
self.next_record() self.next_record()
if self._cursor >= self._tail: if self._cursor >= self._tail:
if self._num_parts > 1 or self._use_shuffle: if self._num_parts > 1 or self._use_shuffle:
self.next_chunk() self.next_chunk()
else: self.reset() else:
\ No newline at end of file self.reset()
...@@ -13,7 +13,7 @@ from __future__ import absolute_import ...@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from multiprocessing import Process import multiprocessing
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
...@@ -28,11 +28,11 @@ except ImportError as e: ...@@ -28,11 +28,11 @@ except ImportError as e:
from lib.core.config import cfg from lib.core.config import cfg
from lib.proto import anno_pb2 as pb from lib.proto import anno_pb2 as pb
from lib.utils import logger
from lib.utils.blob import prep_im_for_blob from lib.utils.blob import prep_im_for_blob
import lib.utils.logger as logger
class DataTransformer(Process): class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._rng_seed = cfg.RNG_SEED self._rng_seed = cfg.RNG_SEED
...@@ -42,60 +42,64 @@ class DataTransformer(Process): ...@@ -42,60 +42,64 @@ class DataTransformer(Process):
self._num_classes = len(self._classes) self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes))) self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._queues = [] self._queues = []
self.Q_in = self.Q1_out = self.Q2_out = None self.q_in = self.q1_out = self.q2_out = None
self.daemon = True self.daemon = True
def make_record( def make_roi_dict(
self, self,
ann_datum, ann_datum,
im_scale, im_scale,
flip=False, apply_flip=False,
offsets=None, offsets=None,
): ):
annotations = ann_datum.annotation annotations = ann_datum.annotation
n_objects = 0 n_objects = 0
if not self._use_diff: if not self._use_diff:
for ann in annotations: for ann in annotations:
if not ann.difficult: n_objects += 1 if not ann.difficult:
else: n_objects = len(annotations) n_objects += 1
else:
n_objects = len(annotations)
record = { roi_dict = {
'width': ann_datum.datum.width, 'width': ann_datum.datum.width,
'height': ann_datum.datum.height, 'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32), 'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), dtype=np.float32), 'boxes': np.zeros((n_objects, 4), 'float32'),
} }
# Filter the difficult instances # Filter the difficult instances
instance_idx = 0 rec_idx = 0
for ann in annotations: for ann in annotations:
if not self._use_diff and ann.difficult: continue if not self._use_diff and ann.difficult:
record['boxes'][instance_idx, :] = [ continue
roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1), max(0, ann.x1),
max(0, ann.y1), max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1), min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1), min(ann.y2, ann_datum.datum.height - 1),
] ]
record['gt_classes'][instance_idx] = self._class_to_ind[ann.name] roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name]
instance_idx += 1 rec_idx += 1
# Flip the boxes if necessary # Flip the boxes if necessary
if flip: if apply_flip:
record['boxes'] = _flip_boxes( roi_dict['boxes'] = _flip_boxes(
record['boxes'], record['width']) roi_dict['boxes'], roi_dict['width'])
# Scale the boxes to the detecting scale # Scale the boxes to the detecting scale
record['boxes'] *= im_scale roi_dict['boxes'] *= im_scale
# Apply the offsets from scale jitter # Apply the offsets from scale jitter
if offsets is not None: if offsets is not None:
record['boxes'][:, 0::2] += offsets[0] roi_dict['boxes'][:, 0::2] += offsets[0]
record['boxes'][:, 1::2] += offsets[1] roi_dict['boxes'][:, 1::2] += offsets[1]
record['boxes'][:, :] = np.minimum( roi_dict['boxes'][:, :] = np.minimum(
np.maximum(record['boxes'][:, :], 0), np.maximum(roi_dict['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2) [offsets[2][1] - 1, offsets[2][0] - 1] * 2,
)
return record return roi_dict
@classmethod @classmethod
def get_image(cls, serialized): def get_image(cls, serialized):
...@@ -127,20 +131,23 @@ class DataTransformer(Process): ...@@ -127,20 +131,23 @@ class DataTransformer(Process):
datum.ParseFromString(serialized) datum.ParseFromString(serialized)
im_datum = datum.datum im_datum = datum.datum
im = np.fromstring(im_datum.data, np.uint8) im = np.fromstring(im_datum.data, np.uint8)
if im_datum.encoded is True: im = cv2.imdecode(im, -1) if im_datum.encoded is True:
else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels)) im = cv2.imdecode(im, -1)
else:
h, w = im_datum.height, im_datum.width
im = im.reshape((h, w, im_datum.channels))
# Scale # Scale
scale_indices = npr.randint(0, high=len(cfg.TRAIN.SCALES)) scale_indices = npr.randint(len(cfg.TRAIN.SCALES))
target_size = cfg.TRAIN.SCALES[scale_indices] target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE) im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
# Flip # Flip
flip = False apply_flip = False
if self._use_flipped: if self._use_flipped:
if npr.randint(0, 2) > 0: if npr.randint(0, 2) > 0:
im = im[:, ::-1, :] im = im[:, ::-1, :]
flip = True apply_flip = True
# Random Crop or RandomPad # Random Crop or RandomPad
offsets = None offsets = None
...@@ -153,57 +160,63 @@ class DataTransformer(Process): ...@@ -153,57 +160,63 @@ class DataTransformer(Process):
# To a square (target_size, target_size) # To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im) im, offsets = _get_image_with_target_size([target_size] * 2, im)
# Datum -> Record # Datum -> RoIDict
rec = self.make_record(datum, im_scale, flip, offsets) roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(rec['gt_classes']), 5), dtype=np.float32) gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4], gt_boxes[:, 4] = rec['boxes'], rec['gt_classes'] gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
return im, im_scale, gt_boxes return im, im_scale, gt_boxes
def run(self): def run(self):
npr.seed(self._rng_seed) npr.seed(self._rng_seed)
while True: while True:
serialized = self.Q_in.get() serialized = self.q_in.get()
data = self.get(serialized) data = self.get(serialized)
# Ensure that there should be at least 1 ground-truth # Ensure that there should be at least 1 ground-truth
if len(data[2]) < 1: continue if len(data[2]) < 1:
continue
aspect_ratio = float(data[0].shape[0]) / data[0].shape[1] aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
if aspect_ratio > 1.0: self.Q1_out.put(data) if aspect_ratio > 1.0:
else: self.Q2_out.put(data) self.q1_out.put(data)
else:
self.q2_out.put(data)
def _flip_boxes(boxes, width): def _flip_boxes(boxes, width):
flip_boxes = boxes.copy() flip_boxes = boxes.copy()
oldx1 = boxes[:, 0].copy() old_x1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy() old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - oldx2 - 1 flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - oldx1 - 1 flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all(): if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.') logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes return flip_boxes
def _get_image_with_target_size(target_size, im): def _get_image_with_target_size(target_size, img):
im_shape = list(im.shape) im_shape = list(img.shape)
width_diff = target_size[1] - im_shape[1]
offset_crop_width = np.random.randint(0, max(-width_diff, 0) + 1)
offset_pad_width = np.random.randint(0, max(width_diff, 0) + 1)
height_diff = target_size[0] - im_shape[0] height_diff = target_size[0] - im_shape[0]
offset_crop_height = np.random.randint(0, max(-height_diff, 0) + 1) width_diff = target_size[1] - im_shape[1]
offset_pad_height = np.random.randint(0, max(height_diff, 0) + 1)
im_shape[0 : 2] = target_size
new_im = np.empty(im_shape, dtype=im.dtype)
new_im[:] = cfg.PIXEL_MEANS
new_im[offset_pad_height:offset_pad_height + im.shape[0],
offset_pad_width:offset_pad_width + im.shape[1]] = \
im[offset_crop_height:offset_crop_height + target_size[0],
offset_crop_width:offset_crop_width + target_size[1]]
return new_im, (offset_pad_width - offset_crop_width, ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
offset_pad_height - offset_crop_height, target_size) ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
\ No newline at end of file ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
im_shape[:2] = target_size
new_img = np.empty(im_shape, dtype=img.dtype)
new_img[:] = cfg.PIXEL_MEANS
new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
ofs_pad_width:ofs_pad_width + img.shape[1]] = \
img[ofs_crop_height:ofs_crop_height + target_size[0],
ofs_crop_width:ofs_crop_width + target_size[1]]
return new_img, (
ofs_pad_width - ofs_crop_width,
ofs_pad_height - ofs_crop_height,
target_size,
)
...@@ -32,7 +32,7 @@ import numpy as np ...@@ -32,7 +32,7 @@ import numpy as np
# -79 -167 96 184 # -79 -167 96 184
# -167 -343 184 360 # -167 -343 184 360
#array([[ -83., -39., 100., 56.], # array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.], # [-175., -87., 192., 104.],
# [-359., -183., 376., 200.], # [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.], # [ -55., -55., 72., 72.],
...@@ -42,8 +42,12 @@ import numpy as np ...@@ -42,8 +42,12 @@ import numpy as np
# [ -79., -167., 96., 184.], # [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]]) # [-167., -343., 184., 360.]])
def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6)): def generate_anchors(
base_size=16,
ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6),
):
""" """
Generate anchor (reference) windows by enumerating aspect ratios X Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window. scales wrt a reference (0, 0, 15, 15) window.
...@@ -55,22 +59,25 @@ def generate_anchors(base_size=16, ratios=(0.5, 1, 2), ...@@ -55,22 +59,25 @@ def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
return anchors return anchors
def generate_anchors_v2(stride=16, ratios=(0.5, 1, 2), def generate_anchors_v2(
sizes=(32, 64, 128, 256, 512)): stride=16,
ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512),
):
""" """
Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
are centered on stride / 2, have (approximate) sqrt areas of the specified are centered on stride / 2, have (approximate) sqrt areas of the specified
sizes, and aspect ratios as given. sizes, and aspect ratios as given.
""" """
return generate_anchors(stride, ratios, return generate_anchors(
np.array(sizes, dtype=np.float) / stride) base_size=stride,
ratios=ratios,
scales=np.array(sizes, dtype=np.float) / stride,
)
def _whctrs(anchor): def _whctrs(anchor):
""" """Return width, height, x center, and y center for an anchor (window)."""
Return width, height, x center, and y center for an anchor (window).
"""
w = anchor[2] - anchor[0] + 1 w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1 h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1) x_ctr = anchor[0] + 0.5 * (w - 1)
...@@ -83,7 +90,6 @@ def _mkanchors(ws, hs, x_ctr, y_ctr): ...@@ -83,7 +90,6 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
Given a vector of widths (ws) and heights (hs) around a center Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows). (x_ctr, y_ctr), output a set of anchors (windows).
""" """
ws = ws[:, np.newaxis] ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis] hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1), anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
...@@ -94,10 +100,7 @@ def _mkanchors(ws, hs, x_ctr, y_ctr): ...@@ -94,10 +100,7 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
def _ratio_enum(anchor, ratios): def _ratio_enum(anchor, ratios):
""" """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h size = w * h
size_ratios = size / ratios size_ratios = size / ratios
...@@ -108,10 +111,7 @@ def _ratio_enum(anchor, ratios): ...@@ -108,10 +111,7 @@ def _ratio_enum(anchor, ratios):
def _scale_enum(anchor, scales): def _scale_enum(anchor, scales):
""" """Enumerate a set of anchors for each scale wrt an anchor."""
Enumerate a set of anchors for each scale wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor) w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales ws = w * scales
hs = h * scales hs = h * scales
......
...@@ -19,9 +19,10 @@ import dragon.vm.torch as torch ...@@ -19,9 +19,10 @@ import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
...@@ -32,10 +33,9 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -32,10 +33,9 @@ class AnchorTargetLayer(torch.nn.Module):
super(AnchorTargetLayer, self).__init__() super(AnchorTargetLayer, self).__init__()
# Load the basic configs # Load the basic configs
# C4 backbone takes the first stride # C4 backbone takes the first stride
self.scales, self.stride, self.ratios = \ self.scales = cfg.RPN.SCALES
cfg.RPN.SCALES, \ self.stride = cfg.RPN.STRIDES[0]
cfg.RPN.STRIDES[0], \ self.ratios = cfg.RPN.ASPECT_RATIOS
cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount # Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
...@@ -61,11 +61,13 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -61,11 +61,13 @@ class AnchorTargetLayer(torch.nn.Module):
""" """
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \ logger.fatal(
.format(num_images, len(gt_boxes_wide))) 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
height, width = features[0].shape[-2:] height, width = features[0].shape[-2:]
...@@ -85,7 +87,7 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -85,7 +87,7 @@ class AnchorTargetLayer(torch.nn.Module):
all_anchors = all_anchors.reshape((K * A, 4)) all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A) total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is dont care # label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32) all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32) all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32) all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
...@@ -101,8 +103,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -101,8 +103,8 @@ class AnchorTargetLayer(torch.nn.Module):
inds_inside = np.where( inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :] anchors = all_anchors[inds_inside, :]
else: else:
inds_inside = np.arange(all_anchors.shape[0]) inds_inside = np.arange(all_anchors.shape[0])
...@@ -143,7 +145,10 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -143,7 +145,10 @@ class AnchorTargetLayer(torch.nn.Module):
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg: if len(fg_inds) > num_fg:
disable_inds = npr.choice( disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False) fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1 labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
...@@ -152,12 +157,17 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -152,12 +157,17 @@ class AnchorTargetLayer(torch.nn.Module):
bg_inds = np.where(labels == 0)[0] bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg: if len(bg_inds) > num_bg:
disable_inds = npr.choice( disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False) bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1 labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform( bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], 0:4]) ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0)) bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
...@@ -169,34 +179,26 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -169,34 +179,26 @@ class AnchorTargetLayer(torch.nn.Module):
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
# labels labels = all_labels \
labels = all_labels.reshape( .reshape((num_images, height, width, A)) \
(num_images, height, width, A)).transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2) \
labels = labels.reshape((num_images, total_anchors)) .reshape((num_images, total_anchors))
# bbox_targets bbox_targets = all_bbox_targets \
bbox_targets = all_bbox_targets.reshape( .reshape((num_images, height, width, A * 4)) \
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
# bbox_inside_weights bbox_inside_weights = all_bbox_inside_weights \
bbox_inside_weights = all_bbox_inside_weights.reshape( .reshape((num_images, height, width, A * 4)) \
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
# bbox_outside_weights bbox_outside_weights = all_bbox_outside_weights \
bbox_outside_weights = all_bbox_outside_weights.reshape( .reshape((num_images, height, width, A * 4)) \
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
return { return {
'labels': to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
...@@ -33,10 +33,8 @@ class DataLayer(torch.nn.Module): ...@@ -33,10 +33,8 @@ class DataLayer(torch.nn.Module):
}) })
def forward(self): def forward(self):
# Get a mini-batch from the Queue # Get an array blob from the Queue
blobs = self.data_batch.get() outputs = self.data_batch.get()
# Zero-Copy from numpy # Zero-Copy the array to tensor
blobs['data'] = torch.from_numpy(blobs['data']) outputs['data'] = torch.from_numpy(outputs['data'])
# Switch the data to Device return outputs
blobs['data'].cuda(cfg.GPU_ID)
return blobs
\ No newline at end of file
...@@ -9,27 +9,35 @@ ...@@ -9,27 +9,35 @@
# #
# -------------------------------------------------------- # --------------------------------------------------------
import numpy as np from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.nms.nms_wrapper import nms
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes from lib.nms.nms_wrapper import nms
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module): class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box """
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors"). transformations to a set of regular boxes (called "anchors").
""" """
def __init__(self): def __init__(self):
super(ProposalLayer, self).__init__() super(ProposalLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales, self.stride, self.ratios = \ self.scales = cfg.RPN.SCALES
cfg.RPN.SCALES, cfg.RPN.STRIDES[0], cfg.RPN.ASPECT_RATIOS self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors # Generate base anchors
self.base_anchors = generate_anchors( self.base_anchors = generate_anchors(
...@@ -61,7 +69,8 @@ class ProposalLayer(torch.nn.Module): ...@@ -61,7 +69,8 @@ class ProposalLayer(torch.nn.Module):
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0] A = self.base_anchors.shape[0]
K = shifts.shape[0] K = shifts.shape[0]
anchors = self.base_anchors.reshape((1, A, 4)) + \ anchors = \
self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2)) shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4)) all_anchors = anchors.reshape((K * A, 4))
...@@ -69,8 +78,6 @@ class ProposalLayer(torch.nn.Module): ...@@ -69,8 +78,6 @@ class ProposalLayer(torch.nn.Module):
batch_rois = [] batch_rois = []
# scores & deltas are (1, A, H, W) format # scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A) # Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1)) batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1)) batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
...@@ -95,11 +102,11 @@ class ProposalLayer(torch.nn.Module): ...@@ -95,11 +102,11 @@ class ProposalLayer(torch.nn.Module):
proposals = bbox_transform_inv(anchors, deltas) proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image # 2. Clip predicted boxes to image
proposals = clip_boxes(proposals, ims_info[ix, :2]) proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold # 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2]) # (NOTE: convert min_size to input image scale stored in im_info[2])
keep = _filter_boxes(proposals, min_size * ims_info[ix, 2]) keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :] proposals = proposals[keep, :]
scores = scores[keep] scores = scores[keep]
...@@ -107,7 +114,8 @@ class ProposalLayer(torch.nn.Module): ...@@ -107,7 +114,8 @@ class ProposalLayer(torch.nn.Module):
# 7. Take after_nms_topN (e.g. 300) # 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top) # 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh) keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: keep = keep[:post_nms_topN] if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :] proposals = proposals[keep, :]
# Output rois blob # Output rois blob
...@@ -118,13 +126,7 @@ class ProposalLayer(torch.nn.Module): ...@@ -118,13 +126,7 @@ class ProposalLayer(torch.nn.Module):
# Merge RoIs into a blob # Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0) rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN': return rpn_rois if cfg_key == 'TRAIN':
else: return [to_tensor(rpn_rois)] return rpn_rois
else:
return [blob_to_tensor(rpn_rois)]
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
...@@ -9,22 +9,24 @@ ...@@ -9,22 +9,24 @@
# #
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
class ProposalTargetLayer(torch.nn.Module): class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets. """Assign object detection proposals to ground-truth targets."""
Produces proposal classification labels and bounding-box regression targets.
"""
def __init__(self): def __init__(self):
super(ProposalTargetLayer, self).__init__() super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES self.num_classes = cfg.MODEL.NUM_CLASSES
...@@ -34,8 +36,8 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -34,8 +36,8 @@ class ProposalTargetLayer(torch.nn.Module):
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label, has_mask) # GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs # Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets', keys = ['labels', 'rois', 'bbox_targets',
...@@ -50,14 +52,12 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -50,14 +52,12 @@ class ProposalTargetLayer(torch.nn.Module):
# Include ground-truth boxes in the set of candidate rois # Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4])))) rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes) rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([ _fmap_batch([
labels, labels,
rois, rois,
...@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module):
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0) batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return { return {
'rois': [to_tensor(batch_outputs['rois'])], 'rois': [blob_to_tensor(batch_outputs['rois'])],
'labels': to_tensor(batch_outputs['labels']), 'labels': blob_to_tensor(batch_outputs['labels']),
'bbox_targets': to_tensor(batch_outputs['bbox_targets']), 'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': to_tensor(batch_outputs['bbox_inside_weights']), 'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': to_tensor(batch_outputs['bbox_outside_weights']), 'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
} }
...@@ -109,7 +109,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): ...@@ -109,7 +109,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
def _compute_targets(ex_rois, gt_rois, labels): def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image.""" """Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4 assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
...@@ -117,12 +116,18 @@ def _compute_targets(ex_rois, gt_rois, labels): ...@@ -117,12 +116,18 @@ def _compute_targets(ex_rois, gt_rois, labels):
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False) return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): def _sample_rois(
"""Generate a random sample of RoIs comprising foreground and background examples.""" all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
)
gt_assignment = overlaps.argmax(axis=1) gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1) max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4] labels = gt_boxes[gt_assignment, 4]
...@@ -164,11 +169,6 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas ...@@ -164,11 +169,6 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
return labels, rois, bbox_targets, bbox_inside_weights return labels, rois, bbox_targets, bbox_inside_weights
def _dismantle_gt_boxes(gt_boxes, num_images):
return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
for ix in range(num_images)]
def _fmap_batch(inputs, outputs, keys): def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(inputs[i]) outputs[key].append(inputs[i])
...@@ -13,27 +13,23 @@ from __future__ import absolute_import ...@@ -13,27 +13,23 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv
from lib.nms.nms_wrapper import nms, soft_nms
from lib.utils.timer import Timer from lib.utils.timer import Timer
from lib.utils.blob import im_list_to_blob, to_array
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales. """Detect a image, with single or multiple scales."""
"""
# Prepare images # Prepare images
ims, ims_scale = scale_image(raw_image) ims, ims_scale = scale_image(raw_image)
...@@ -42,25 +38,30 @@ def im_detect(detector, raw_image): ...@@ -42,25 +38,30 @@ def im_detect(detector, raw_image):
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32) for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID) blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward # Do Forward
with torch.no_grad(): with torch.no_grad():
outputs = detector.forward(inputs=blobs) outputs = detector.forward(inputs=blobs)
# Decode results # Decode results
batch_rois = to_array(outputs['rois']) batch_rois = tensor_to_blob(outputs['rois'])
batch_scores = to_array(outputs['cls_prob']) batch_scores = tensor_to_blob(outputs['cls_prob'])
batch_deltas = to_array(outputs['bbox_pred']) batch_deltas = tensor_to_blob(outputs['bbox_pred'])
batch_boxes = bbox_transform_inv( batch_boxes = bbox_transform_inv(
batch_rois[:, 1:5], batch_deltas, cfg.BBOX_REG_WEIGHTS) boxes=batch_rois[:, 1:5],
scores_wide = []; boxes_wide = [] deltas=batch_deltas,
weights=cfg.BBOX_REG_WEIGHTS,
)
scores_wide, boxes_wide = [], []
for im_idx in range(len(ims)): for im_idx in range(len(ims)):
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0] indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
boxes = batch_boxes[indices] boxes = batch_boxes[indices]
boxes /= ims_scale[im_idx] boxes /= ims_scale[im_idx]
clip_boxes(boxes, raw_image.shape) clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices]) scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes) boxes_wide.append(boxes)
...@@ -69,12 +70,13 @@ def im_detect(detector, raw_image): ...@@ -69,12 +70,13 @@ def im_detect(detector, raw_image):
def test_net(detector, server): def test_net(detector, server):
classes, num_images, num_classes = \ # Load settings
server.classes, server.num_images, server.num_classes classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect' : Timer(), 'misc' : Timer()} _t = {'im_detect': Timer(), 'misc': Timer()}
for i in range(num_images): for i in range(num_images):
image_id, raw_image = server.get_image() image_id, raw_image = server.get_image()
...@@ -89,22 +91,27 @@ def test_net(detector, server): ...@@ -89,22 +91,27 @@ def test_net(detector, server):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j] cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4] cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).\ cls_detections = np.hstack(
astype(np.float32, copy=False) (cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(cls_dets, cfg.TEST.NMS, keep = soft_nms(
cls_detections, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA) sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else: else:
keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True) keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_dets all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_dets) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE: if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_image, classes, boxes_this_image, vis_one_image(
raw_image, classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True, thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_id)) filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes* # Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0: if cfg.TEST.DETECTIONS_PER_IM > 0:
...@@ -112,7 +119,8 @@ def test_net(detector, server): ...@@ -112,7 +119,8 @@ def test_net(detector, server):
for j in range(1, num_classes): for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue if len(all_boxes[j][i]) < 1: continue
image_scores.append(all_boxes[j][i][:, -1]) image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores) if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes): for j in range(1, num_classes):
...@@ -120,7 +128,7 @@ def test_net(detector, server): ...@@ -120,7 +128,7 @@ def test_net(detector, server):
all_boxes[j][i] = all_boxes[j][i][keep, :] all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(i + 1, num_images, _t['im_detect'].average_time, .format(i + 1, num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='') _t['misc'].average_time), end='')
......
...@@ -13,16 +13,18 @@ from __future__ import absolute_import ...@@ -13,16 +13,18 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
import lib.utils.logger as logger
from lib.utils.blob import to_tensor
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module): class AnchorTargetLayer(torch.nn.Module):
...@@ -31,14 +33,14 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -31,14 +33,14 @@ class AnchorTargetLayer(torch.nn.Module):
def __init__(self): def __init__(self):
super(AnchorTargetLayer, self).__init__() super(AnchorTargetLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales, self.strides, self.ratios = \ self.scales = cfg.RPN.SCALES
cfg.RPN.SCALES, \ self.strides = cfg.RPN.STRIDES
cfg.RPN.STRIDES, \ self.ratios = cfg.RPN.ASPECT_RATIOS
cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides): if len(self.scales) != len(self.strides):
logger.fatal( logger.fatal(
'Given {} scales and {} strides.' 'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))) .format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount # Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
...@@ -46,9 +48,9 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -46,9 +48,9 @@ class AnchorTargetLayer(torch.nn.Module):
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
base_size = self.strides[i] base_size, scale = self.strides[i], self.scales[i]
scale = self.scales[i] if not isinstance(scale, collections.Iterable):
if not isinstance(scale, list): scale = [scale] scale = [scale]
self.base_anchors.append( self.base_anchors.append(
generate_anchors( generate_anchors(
base_size=base_size, base_size=base_size,
...@@ -59,16 +61,17 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -59,16 +61,17 @@ class AnchorTargetLayer(torch.nn.Module):
def forward(self, features, gt_boxes, ims_info): def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.""" """Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \ logger.fatal(
.format(num_images, len(gt_boxes_wide))) 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
all_anchors = []; total_anchors = 0 all_anchors, total_anchors = [], 0
for i in range(len(self.strides)): for i in range(len(self.strides)):
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i] shift_x = np.arange(0, width) * self.strides[i]
...@@ -107,8 +110,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -107,8 +110,8 @@ class AnchorTargetLayer(torch.nn.Module):
inds_inside = np.where( inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :] anchors = all_anchors[inds_inside, :]
else: else:
inds_inside = np.arange(all_anchors.shape[0]) inds_inside = np.arange(all_anchors.shape[0])
...@@ -180,16 +183,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -180,16 +183,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
...@@ -9,39 +9,49 @@ ...@@ -9,39 +9,49 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import numpy as np from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms from lib.nms.nms_wrapper import nms
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes from lib.utils.boxes import bbox_transform_inv
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module): class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box. """
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors"). transformations to a set of regular boxes (called "anchors").
""" """
def __init__(self): def __init__(self):
super(ProposalLayer, self).__init__() super(ProposalLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales, self.strides, self.ratios = \ self.scales = cfg.RPN.SCALES
cfg.RPN.SCALES, cfg.RPN.STRIDES, cfg.RPN.ASPECT_RATIOS self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides): if len(self.scales) != len(self.strides):
logger.fatal( logger.fatal(
'Given {} scales and {} strides.' 'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))) .format(len(self.scales), len(self.strides))
)
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
base_size = self.strides[i] base_size, scale = self.strides[i], self.scales[i]
scale = self.scales[i] if not isinstance(scale, collections.Iterable):
if not isinstance(scale, list): scale = [scale] scale = [scale]
self.base_anchors.append( self.base_anchors.append(
generate_anchors( generate_anchors(
base_size=base_size, base_size=base_size,
...@@ -92,7 +102,8 @@ class ProposalLayer(torch.nn.Module): ...@@ -92,7 +102,8 @@ class ProposalLayer(torch.nn.Module):
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
batch_scores = cls_prob.numpy(True) batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4] batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately # Extract RoIs separately
for ix in range(num_images): for ix in range(num_images):
...@@ -115,10 +126,10 @@ class ProposalLayer(torch.nn.Module): ...@@ -115,10 +126,10 @@ class ProposalLayer(torch.nn.Module):
proposals = bbox_transform_inv(anchors, deltas) proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image # 2. Clip predicted boxes to image
proposals = clip_boxes(proposals, ims_info[ix, :2]) proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold # 3. remove predicted boxes with either height or width < threshold
keep = _filter_boxes(proposals, min_size * ims_info[ix, 2]) keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :] proposals = proposals[keep, :]
scores = scores[keep] scores = scores[keep]
...@@ -126,7 +137,8 @@ class ProposalLayer(torch.nn.Module): ...@@ -126,7 +137,8 @@ class ProposalLayer(torch.nn.Module):
# 7. Take after_nms_topN (e.g. 300) # 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top) # 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh) keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: keep = keep[:post_nms_topN] if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :] proposals = proposals[keep, :]
# Output rois blob # Output rois blob
...@@ -151,28 +163,19 @@ class ProposalLayer(torch.nn.Module): ...@@ -151,28 +163,19 @@ class ProposalLayer(torch.nn.Module):
lv_indices = np.where(fpn_levels == (i + min_level))[0] lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0: if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling # Fake a tiny roi to avoid empty roi pooling
all_rois.append(to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32))) all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else: else:
all_rois.append(to_tensor(rpn_rois[lv_indices])) all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
return all_rois return all_rois
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size.
"""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def _map_rois_to_fpn_levels(rois, k_min, k_max): def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
""" """
if len(rois) == 0: return [] Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1 ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1 hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs) s = np.sqrt(ws * hs)
......
...@@ -9,14 +9,19 @@ ...@@ -9,14 +9,19 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
class ProposalTargetLayer(torch.nn.Module): class ProposalTargetLayer(torch.nn.Module):
...@@ -36,26 +41,19 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -36,26 +41,19 @@ class ProposalTargetLayer(torch.nn.Module):
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32), 'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
} }
def _map_rois(self, inputs, fake_outputs, outputs, keys, levels):
f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(f(inputs[i], fake_outputs[key], inds))
def forward(self, rpn_rois, gt_boxes): def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label, has_mask) # GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs # Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets', keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights'] 'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys])) outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys])) batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately # Generate targets separately
for ix in range(num_images): for ix in range(num_images):
...@@ -65,11 +63,9 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -65,11 +63,9 @@ class ProposalTargetLayer(torch.nn.Module):
# Include ground-truth boxes in the set of candidate rois # Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4])))) rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
# Sample rois with labels & bbox targets
labels, rois, bbox_targets, bbox_inside_weights = \ labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes) _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
...@@ -94,14 +90,20 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -94,14 +90,20 @@ class ProposalTargetLayer(torch.nn.Module):
K = max_level - min_level + 1 K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level) fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)] lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
_fmap_rois([batch_outputs[key] for key in keys], self.fake_outputs, outputs, keys, lvs_indices) _fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return { return {
'rois': [to_tensor(outputs['rois'][i]) for i in range(K)], 'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': to_tensor(np.concatenate(outputs['labels'], axis=0)), 'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': to_tensor(np.vstack(outputs['bbox_targets'])), 'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': to_tensor(np.vstack(outputs['bbox_inside_weights'])), 'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': to_tensor(np.vstack(outputs['bbox_outside_weights'])), 'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
} }
...@@ -115,6 +117,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): ...@@ -115,6 +117,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
Returns: Returns:
bbox_target (ndarray): N x 4K blob of regression targets bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights bbox_inside_weights (ndarray): N x 4K blob of loss weights
""" """
clss = bbox_target_data[:, 0] clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
...@@ -131,7 +134,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): ...@@ -131,7 +134,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
def _compute_targets(ex_rois, gt_rois, labels): def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image.""" """Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4 assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
...@@ -140,10 +142,12 @@ def _compute_targets(ex_rois, gt_rois, labels): ...@@ -140,10 +142,12 @@ def _compute_targets(ex_rois, gt_rois, labels):
def _map_rois_to_fpn_levels(rois, k_min, k_max): def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
""" """
if len(rois) == 0: return [] Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1 ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1 hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs) s = np.sqrt(ws * hs)
...@@ -154,9 +158,7 @@ def _map_rois_to_fpn_levels(rois, k_min, k_max): ...@@ -154,9 +158,7 @@ def _map_rois_to_fpn_levels(rois, k_min, k_max):
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background """Sample a batch of RoIs comprising foreground and background examples."""
examples.
"""
# overlaps: (rois x gt_boxes) # overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
...@@ -203,19 +205,15 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas ...@@ -203,19 +205,15 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
return labels, rois, bbox_targets, bbox_inside_weights return labels, rois, bbox_targets, bbox_inside_weights
def _dismantle_gt_boxes(gt_boxes, num_images):
return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
for ix in range(num_images)]
def _fmap_batch(inputs, outputs, keys): def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(inputs[i]) outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels): def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
f = lambda a, b, indices: a[indices] if len(indices) > 0 else b def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)): for k in range(len(levels)):
inds = levels[k] inds = levels[k]
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(f(inputs[i], fake_outputs[key], inds)) outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
\ No newline at end of file
...@@ -9,13 +9,17 @@ ...@@ -9,13 +9,17 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Import custom modules # Import custom modules
from lib.modeling.base import Bootstarp from lib.modeling.base import affine
from lib.modeling.base import RPNDecoder from lib.modeling.base import bn
from lib.modeling.base import RetinaNetDecoder from lib.modeling.base import conv1x1
from lib.modeling.base import conv1x1, conv3x3, bn, affine from lib.modeling.base import conv3x3
from lib.modeling.fpn import FPN
from lib.modeling.rpn import RPN
from lib.modeling.fast_rcnn import FastRCNN from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN
from lib.modeling.retinanet import RetinaNet from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD from lib.modeling.ssd import SSD
...@@ -15,7 +15,9 @@ from __future__ import print_function ...@@ -15,7 +15,9 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.modeling import conv1x1, conv3x3, bn, affine from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module): class WideResBlock(torch.nn.Module):
...@@ -112,8 +114,10 @@ class AirNet(torch.nn.Module): ...@@ -112,8 +114,10 @@ class AirNet(torch.nn.Module):
) )
self.layer1 = self.make_blocks(filters[0], blocks[0]) self.layer1 = self.make_blocks(filters[0], blocks[0])
self.layer2 = self.make_blocks(filters[1], blocks[1], 2) self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
if num_stages >= 4: self.layer3 = self.make_blocks(filters[2], blocks[2], 2) if num_stages >= 4:
if num_stages >= 5: self.layer4 = self.make_blocks(filters[3], blocks[3], 2) self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5:
self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -165,7 +169,14 @@ def airnet(num_stages): ...@@ -165,7 +169,14 @@ def airnet(num_stages):
) )
return AirNet(blocks, num_stages) return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5) def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3) def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4) def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5) def make_airnet_5b(): return airnet(5)
...@@ -17,99 +17,20 @@ from __future__ import print_function ...@@ -17,99 +17,20 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Bootstarp(torch.nn.Module):
"""Extended operator to process the images."""
def __init__(self):
super(Bootstarp, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'ImageData',
'arguments': {
'dtype': self.dtype,
'data_format': 'NCHW',
'mean_values': cfg.PIXEL_MEANS,
}
}
def forward(self, x):
inputs, outputs = [x], [self.register_output()]
return self.run(inputs, outputs)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.register_op()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': cfg.RPN.STRIDES,
'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS],
'scales': [float(e) for e in cfg.RPN.SCALES],
'pre_nms_top_n': cfg.TEST.RPN_PRE_NMS_TOP_N,
'post_nms_top_n': cfg.TEST.RPN_POST_NMS_TOP_N,
'nms_thresh': cfg.TEST.RPN_NMS_THRESH,
'min_size': cfg.TEST.RPN_MIN_SIZE,
'min_leve': cfg.FPN.ROI_MIN_LEVEL,
'max_level': cfg.FPN.ROI_MAX_LEVEL,
'canonical_scale': cfg.FPN.ROI_CANONICAL_SCALE,
'canonical_level': cfg.FPN.ROI_CANONICAL_LEVEL,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
outputs = [self.register_output() for _ in range(self.K)]
outputs = self.run(inputs, outputs)
return outputs if isinstance(outputs, list) else [outputs]
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info): def bn(dim_in, eps=1e-5):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)] """The BatchNorm."""
outputs = [self.register_output()] return torch.nn.BatchNorm2d(dim_in, eps=eps)
return self.run(inputs, outputs)
def conv1x1(dim_in, dim_out, stride=1, bias=False): def conv1x1(dim_in, dim_out, stride=1, bias=False):
...@@ -133,18 +54,3 @@ def conv3x3(dim_in, dim_out, stride=1, bias=False): ...@@ -133,18 +54,3 @@ def conv3x3(dim_in, dim_out, stride=1, bias=False):
padding=1, padding=1,
bias=bias, bias=bias,
) )
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
\ No newline at end of file
...@@ -13,22 +13,19 @@ from __future__ import absolute_import ...@@ -13,22 +13,19 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import importlib import importlib
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.logger import is_root from lib.modeling import FPN
from lib.modeling import RPN
from lib.modeling import FastRCNN
from lib.modeling import RetinaNet
from lib.modeling import SSD
from lib.modeling.factory import get_body_func from lib.modeling.factory import get_body_func
from lib.ops.modules import Bootstrap
from lib.modeling import ( from lib.utils.logger import is_root
Bootstarp,
FPN,
RPN,
FastRCNN,
RetinaNet,
SSD,
)
class Detector(torch.nn.Module): class Detector(torch.nn.Module):
...@@ -47,7 +44,7 @@ class Detector(torch.nn.Module): ...@@ -47,7 +44,7 @@ class Detector(torch.nn.Module):
# + Data Loader # + Data Loader
self.data_layer = importlib.import_module( self.data_layer = importlib.import_module(
'lib.{}'.format(model)).DataLayer 'lib.{}'.format(model)).DataLayer
self.bootstarp = Bootstarp() self.bootstrap = Bootstrap()
# + Feature Extractor # + Feature Extractor
self.body = get_body_func(body)() self.body = get_body_func(body)()
...@@ -84,8 +81,11 @@ class Detector(torch.nn.Module): ...@@ -84,8 +81,11 @@ class Detector(torch.nn.Module):
The path of the weights file. The path of the weights file.
""" """
self.load_state_dict(torch.load(weights), self.load_state_dict(
strict=False, verbose=is_root()) torch.load(weights),
strict=False,
verbose=is_root(),
)
def forward(self, inputs=None): def forward(self, inputs=None):
"""Compute the detection outputs. """Compute the detection outputs.
...@@ -107,9 +107,9 @@ class Detector(torch.nn.Module): ...@@ -107,9 +107,9 @@ class Detector(torch.nn.Module):
# 1. Extract features # 1. Extract features
# Process the data: # Process the data:
# 1) NHWC => NCHW # 1) NHWC => NCHW
# 2) Uint8 => Float32 or Float16 # 2) uint8 => float32 or float16
# 3) Mean subtraction # 3) Mean subtraction
image_data = self.bootstarp(inputs['data']) image_data = self.bootstrap(inputs['data'])
features = self.body(image_data) features = self.body(image_data)
# 2. Apply the FPN to enhance features if necessary # 2. Apply the FPN to enhance features if necessary
...@@ -117,7 +117,7 @@ class Detector(torch.nn.Module): ...@@ -117,7 +117,7 @@ class Detector(torch.nn.Module):
features = self.fpn(features) features = self.fpn(features)
# 3. Collect detection outputs # 3. Collect detection outputs
outputs = OrderedDict() outputs = collections.OrderedDict()
# 3.1 Feature -> RPN -> Fast R-CNN # 3.1 Feature -> RPN -> Fast R-CNN
if hasattr(self, 'rpn'): if hasattr(self, 'rpn'):
......
...@@ -13,27 +13,11 @@ from __future__ import absolute_import ...@@ -13,27 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import importlib import importlib
from collections import defaultdict
_STORE = defaultdict(dict) _STORE = collections.defaultdict(dict)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
########################################### ###########################################
...@@ -59,6 +43,23 @@ for D in ['', '3b', '4b', '5b']: ...@@ -59,6 +43,23 @@ for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \ _STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D) 'lib.modeling.airnet.make_airnet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name): def get_body_func(name):
return get_template_func( return get_template_func(
name, _STORE['BODY'], 'Body') name, _STORE['BODY'], 'Body')
...@@ -13,11 +13,11 @@ from __future__ import absolute_import ...@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import RPNDecoder from lib.ops.modules import RPNDecoder
class FastRCNN(torch.nn.Module): class FastRCNN(torch.nn.Module):
...@@ -50,11 +50,11 @@ class FastRCNN(torch.nn.Module): ...@@ -50,11 +50,11 @@ class FastRCNN(torch.nn.Module):
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False) self.sigmoid = torch.nn.Sigmoid(inplace=False)
self.roi_func = { self.roi_func = {
'RoIPool': torch.roi_pool, 'RoIPool': torch.vision.ops.roi_pool,
'RoIAlign': torch.roi_align, 'RoIAlign': torch.vision.ops.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD] }[cfg.FRCNN.ROI_XFORM_METHOD]
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1.) self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size')
# Compute spatial scales for multiple strides # Compute spatial scales for multiple strides
roi_levels = [level for level in range( roi_levels = [level for level in range(
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)] cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
...@@ -66,13 +66,16 @@ class FastRCNN(torch.nn.Module): ...@@ -66,13 +66,16 @@ class FastRCNN(torch.nn.Module):
torch.nn.init.normal_(self.cls_score.weight, std=0.01) torch.nn.init.normal_(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001) torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters(): for name, p in self.named_parameters():
if 'bias' in name: torch.nn.init.constant_(p, 0) if 'bias' in name:
torch.nn.init.constant_(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale): def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func( return self.roi_func(
feature, rois, feature, rois,
pooled_h=cfg.FRCNN.ROI_XFORM_RESOLUTION, output_size=(
pooled_w=cfg.FRCNN.ROI_XFORM_RESOLUTION, cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale, spatial_scale=spatial_scale,
) )
...@@ -127,14 +130,14 @@ class FastRCNN(torch.nn.Module): ...@@ -127,14 +130,14 @@ class FastRCNN(torch.nn.Module):
# Compute rcnn logits # Compute rcnn logits
cls_score = self.cls_score(rcnn_output).float() cls_score = self.cls_score(rcnn_output).float()
outputs = OrderedDict({ outputs = collections.OrderedDict({
'bbox_pred': 'bbox_pred':
self.bbox_pred(rcnn_output).float(), self.bbox_pred(rcnn_output).float(),
}) })
if self.training: if self.training:
# Compute rcnn losses # Compute rcnn losses
outputs.update(OrderedDict({ outputs.update(collections.OrderedDict({
'cls_loss': self.cls_loss( 'cls_loss': self.cls_loss(
cls_score, cls_score,
self.rcnn_data['labels'], self.rcnn_data['labels'],
......
...@@ -16,7 +16,8 @@ from __future__ import print_function ...@@ -16,7 +16,8 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3 from lib.modeling import conv1x1
from lib.modeling import conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
...@@ -48,49 +49,43 @@ class FPN(torch.nn.Module): ...@@ -48,49 +49,43 @@ class FPN(torch.nn.Module):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_( torch.nn.init.kaiming_uniform_(
m.weight, m.weight,
# Fix the gain for [-127, 127] a=1, # Fix the gain for [-127, 127]
a=1,
) # Xavier Initialization ) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0) torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)] outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features # Apply MaxPool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1])) outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.nn_resize( upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:]) fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output) fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
def apply_on_retinanet(self, features): def apply_on_retinanet(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)] outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
# Add extra convolutions for higher features # Add extra convolutions for higher features
extra_input = features[-1] extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input)) outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: extra_input = self.relu(outputs[-1]) if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.nn_resize( upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:]) fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output) fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
def forward(self, features): def forward(self, features):
......
...@@ -20,12 +20,20 @@ from __future__ import print_function ...@@ -20,12 +20,20 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3, affine from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module): class BasicBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1, def __init__(
downsample=None, dropblock=None): self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out) self.bn1 = affine(dim_out)
...@@ -65,8 +73,14 @@ class Bottleneck(torch.nn.Module): ...@@ -65,8 +73,14 @@ class Bottleneck(torch.nn.Module):
contraction = cfg.RESNET.NUM_GROUPS \ contraction = cfg.RESNET.NUM_GROUPS \
* cfg.RESNET.GROUP_WIDTH / 256.0 * cfg.RESNET.GROUP_WIDTH / 256.0
def __init__(self, dim_in, dim_out, stride=1, def __init__(
downsample=None, dropblock=None): self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(Bottleneck, self).__init__() super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction) dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim) self.conv1 = conv1x1(dim_in, dim)
...@@ -128,11 +142,17 @@ class ResNet(torch.nn.Module): ...@@ -128,11 +142,17 @@ class ResNet(torch.nn.Module):
ceil_mode=True, ceil_mode=True,
) )
self.drop3 = torch.nn.DropBlock2d( self.drop3 = torch.nn.DropBlock2d(
7, 0.9, alpha=0.25, decrement=cfg.DROPBLOCK.DECREMENT) \ kp=0.9,
if cfg.DROPBLOCK.DROP_ON else None block_size=7,
alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d( self.drop4 = torch.nn.DropBlock2d(
7, 0.9, alpha=1., decrement=cfg.DROPBLOCK.DECREMENT) \ kp=0.9,
if cfg.DROPBLOCK.DROP_ON else None block_size=7,
alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0]) self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2) self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3) self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
...@@ -145,7 +165,8 @@ class ResNet(torch.nn.Module): ...@@ -145,7 +165,8 @@ class ResNet(torch.nn.Module):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_( torch.nn.init.kaiming_normal_(
m.weight, m.weight,
nonlinearity='relu') nonlinearity='relu',
)
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
...@@ -178,25 +199,31 @@ class ResNet(torch.nn.Module): ...@@ -178,25 +199,31 @@ class ResNet(torch.nn.Module):
x = self.bn1(x) x = self.bn1(x)
x = self.relu(x) x = self.relu(x)
x = self.maxpool(x) x = self.maxpool(x)
outputs = [x] outputs = [x]
outputs += [self.layer1(outputs[-1])] outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])] outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])] outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])] outputs += [self.layer4(outputs[-1])]
return outputs return outputs
def resnet(depth): def resnet(depth):
if depth == 18: units = [2, 2, 2, 2] if depth == 18:
elif depth == 34: units = [3, 4, 6, 3] units = [2, 2, 2, 2]
elif depth == 50: units = [3, 4, 6, 3] elif depth == 34:
elif depth == 101: units = [3, 4, 23, 3] units = [3, 4, 6, 3]
elif depth == 152: units = [3, 8, 36, 3] elif depth == 50:
elif depth == 200: units = [3, 24, 36, 3] units = [3, 4, 6, 3]
elif depth == 269: units = [3, 30, 48, 8] elif depth == 101:
else: raise ValueError('Unsupported depth: %d' % depth) units = [3, 4, 23, 3]
elif depth == 152:
units = [3, 8, 36, 3]
elif depth == 200:
units = [3, 24, 36, 3]
elif depth == 269:
units = [3, 30, 48, 8]
else:
raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock block = Bottleneck if depth >= 50 else BasicBlock
filters = [64, 256, 512, 1024, 2048] \ filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512] if depth >= 50 else [64, 64, 128, 256, 512]
...@@ -204,7 +231,15 @@ def resnet(depth): ...@@ -204,7 +231,15 @@ def resnet(depth):
def make_resnet_18(): return resnet(18) def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34) def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50) def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101) def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152) def make_resnet_152(): return resnet(152)
...@@ -13,12 +13,13 @@ from __future__ import absolute_import ...@@ -13,12 +13,13 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import math import math
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv3x3, RetinaNetDecoder from lib.modeling import conv3x3
from lib.ops.modules import RetinaNetDecoder
from lib.retinanet import AnchorTargetLayer from lib.retinanet import AnchorTargetLayer
...@@ -32,10 +33,12 @@ class RetinaNet(torch.nn.Module): ...@@ -32,10 +33,12 @@ class RetinaNet(torch.nn.Module):
self.cls_conv = torch.nn.ModuleList( self.cls_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True) conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS)) for _ in range(cfg.RETINANET.NUM_CONVS)
)
self.bbox_conv = torch.nn.ModuleList( self.bbox_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True) conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS)) for _ in range(cfg.RETINANET.NUM_CONVS)
)
# Packed as [C, A] not [A, C] # Packed as [C, A] not [A, C]
self.C = cfg.MODEL.NUM_CLASSES - 1 self.C = cfg.MODEL.NUM_CLASSES - 1
A = len(cfg.RETINANET.ASPECT_RATIOS) * \ A = len(cfg.RETINANET.ASPECT_RATIOS) * \
...@@ -53,8 +56,11 @@ class RetinaNet(torch.nn.Module): ...@@ -53,8 +56,11 @@ class RetinaNet(torch.nn.Module):
self.anchor_target_layer = AnchorTargetLayer() self.anchor_target_layer = AnchorTargetLayer()
self.cls_loss = torch.nn.SigmoidFocalLoss( self.cls_loss = torch.nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA, alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA) gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.) )
self.bbox_loss = torch.nn.SmoothL1Loss(
beta=1. / 9., reduction='batch_size',
)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -127,7 +133,7 @@ class RetinaNet(torch.nn.Module): ...@@ -127,7 +133,7 @@ class RetinaNet(torch.nn.Module):
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info, ims_info=ims_info,
) )
return OrderedDict({ return collections.OrderedDict({
'cls_loss': 'cls_loss':
self.cls_loss( self.cls_loss(
cls_score, cls_score,
...@@ -146,7 +152,7 @@ class RetinaNet(torch.nn.Module): ...@@ -146,7 +152,7 @@ class RetinaNet(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({'bbox_pred': bbox_pred}) outputs = collections.OrderedDict({'bbox_pred': bbox_pred})
if self.training: if self.training:
outputs.update( outputs.update(
......
...@@ -13,11 +13,12 @@ from __future__ import absolute_import ...@@ -13,11 +13,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3 from lib.modeling import conv1x1
from lib.modeling import conv3x3
class RPN(torch.nn.Module): class RPN(torch.nn.Module):
...@@ -119,7 +120,7 @@ class RPN(torch.nn.Module): ...@@ -119,7 +120,7 @@ class RPN(torch.nn.Module):
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info, ims_info=ims_info,
) )
return OrderedDict({ return collections.OrderedDict({
'rpn_cls_loss': 'rpn_cls_loss':
self.cls_loss(cls_score, self.rpn_data['labels']), self.cls_loss(cls_score, self.rpn_data['labels']),
'rpn_bbox_loss': 'rpn_bbox_loss':
...@@ -135,7 +136,7 @@ class RPN(torch.nn.Module): ...@@ -135,7 +136,7 @@ class RPN(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({ outputs = collections.OrderedDict({
'rpn_cls_score': cls_score, 'rpn_cls_score': cls_score,
'rpn_bbox_pred': bbox_pred, 'rpn_bbox_pred': bbox_pred,
}) })
......
...@@ -13,18 +13,15 @@ from __future__ import absolute_import ...@@ -13,18 +13,15 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv3x3 from lib.modeling import conv3x3
from lib.ssd import HardMiningLayer
from lib.ssd import ( from lib.ssd import MultiBoxMatchLayer
PriorBoxLayer, from lib.ssd import MultiBoxTargetLayer
MultiBoxMatchLayer, from lib.ssd import PriorBoxLayer
HardMiningLayer,
MultiBoxTargetLayer,
)
class SSD(torch.nn.Module): class SSD(torch.nn.Module):
...@@ -57,7 +54,7 @@ class SSD(torch.nn.Module): ...@@ -57,7 +54,7 @@ class SSD(torch.nn.Module):
self.hard_mining_layer = HardMiningLayer() self.hard_mining_layer = HardMiningLayer()
self.box_target_layer = MultiBoxTargetLayer() self.box_target_layer = MultiBoxTargetLayer()
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss() self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -88,8 +85,9 @@ class SSD(torch.nn.Module): ...@@ -88,8 +85,9 @@ class SSD(torch.nn.Module):
.permute(0, 2, 3, 1).view(0, -1)) .permute(0, 2, 3, 1).view(0, -1))
# Concat them if necessary # Concat them if necessary
return torch.cat(cls_score_wide, dim=1).view( return \
0, -1, cfg.MODEL.NUM_CLASSES), \ torch.cat(cls_score_wide, dim=1) \
.view(0, -1, cfg.MODEL.NUM_CLASSES), \
torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4) torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4)
def compute_losses( def compute_losses(
...@@ -138,7 +136,7 @@ class SSD(torch.nn.Module): ...@@ -138,7 +136,7 @@ class SSD(torch.nn.Module):
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
) )
) )
return OrderedDict({ return collections.OrderedDict({
# A compensating factor of 4.0 is used # A compensating factor of 4.0 is used
# As we normalize both the pos and neg samples # As we normalize both the pos and neg samples
'cls_loss': 'cls_loss':
...@@ -160,7 +158,7 @@ class SSD(torch.nn.Module): ...@@ -160,7 +158,7 @@ class SSD(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({ outputs = collections.OrderedDict({
'prior_boxes': prior_boxes, 'prior_boxes': prior_boxes,
'bbox_pred': bbox_pred, 'bbox_pred': bbox_pred,
}) })
......
...@@ -16,7 +16,8 @@ from __future__ import print_function ...@@ -16,7 +16,8 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3 from lib.modeling import conv1x1
from lib.modeling import conv3x3
class VGG(torch.nn.Module): class VGG(torch.nn.Module):
...@@ -35,16 +36,22 @@ class VGG(torch.nn.Module): ...@@ -35,16 +36,22 @@ class VGG(torch.nn.Module):
dim_in = 3 if i == 0 else filter_list[i - 1] dim_in = 3 if i == 0 else filter_list[i - 1]
for j in range(self.units[i]): for j in range(self.units[i]):
self.__setattr__( self.__setattr__(
'{}_{}'.format(conv_name, j + 1), '{}_{}'
conv3x3(dim_in, filter_list[i], bias=True)) .format(conv_name, j + 1),
if j == 0: dim_in = filter_list[i] conv3x3(dim_in, filter_list[i], bias=True),
)
if j == 0:
dim_in = filter_list[i]
if reduced: if reduced:
# L2Norm is redundant from the observation of # L2Norm is redundant from the observation of
# empirical experiments. We just keep a trainable scale # empirical experiments. We just keep a trainable scale
self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False) self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False)
self.conv4_3_norm.weight.zero_() # Zero-Init self.conv4_3_norm.weight.zero_() # Zero-Init
self.fc6 = torch.nn.Conv2d(filter_list[-1], 1024, self.fc6 = torch.nn.Conv2d(
kernel_size=3, stride=1, padding=6, dilation=6) filter_list[-1], 1024,
kernel_size=3, padding=6,
stride=1, dilation=6,
)
self.fc7 = conv1x1(1024, 1024, bias=True) self.fc7 = conv1x1(1024, 1024, bias=True)
self.feature_dims = [filter_list[-2], 1024] self.feature_dims = [filter_list[-2], 1024]
if extra_arch is not None: if extra_arch is not None:
...@@ -54,15 +61,23 @@ class VGG(torch.nn.Module): ...@@ -54,15 +61,23 @@ class VGG(torch.nn.Module):
for i in range(len(strides)): for i in range(len(strides)):
conv_name = 'conv{}'.format(i + 6) conv_name = 'conv{}'.format(i + 6)
dim_in = 1024 if i == 0 else filter_list[i - 1] * 2 dim_in = 1024 if i == 0 else filter_list[i - 1] * 2
self.__setattr__('{}_1'.format(conv_name), self.__setattr__(
conv1x1(dim_in, filter_list[i], bias=True)) '{}_1'.format(conv_name),
conv1x1(dim_in, filter_list[i], bias=True),
)
if strides[i] == 2: if strides[i] == 2:
self.__setattr__('{}_2'.format(conv_name), self.__setattr__(
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True)) '{}_2'.format(conv_name),
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True),
)
else: else:
self.__setattr__('{}_2'.format(conv_name), self.__setattr__(
torch.nn.Conv2d(filter_list[i], filter_list[i] * 2, '{}_2'.format(conv_name),
kernel_size=kps[0], padding=kps[1], stride=kps[2])) torch.nn.Conv2d(
filter_list[i], filter_list[i] * 2,
kernel_size=kps[0], padding=kps[1], stride=kps[2]
),
)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -88,8 +103,9 @@ class VGG(torch.nn.Module): ...@@ -88,8 +103,9 @@ class VGG(torch.nn.Module):
for i in range(cfg.MODEL.FREEZE_AT, 0, -1): for i in range(cfg.MODEL.FREEZE_AT, 0, -1):
conv_name = 'conv{}'.format(i) conv_name = 'conv{}'.format(i)
for j in range(self.units[i - 1]): for j in range(self.units[i - 1]):
self.__getattr__('{}_{}'.format( self.__getattr__(
conv_name, j + 1)).apply(freeze_func) '{}_{}'.format(conv_name, j + 1)
).apply(freeze_func)
def forward(self, x): def forward(self, x):
outputs = [] outputs = []
...@@ -101,8 +117,10 @@ class VGG(torch.nn.Module): ...@@ -101,8 +117,10 @@ class VGG(torch.nn.Module):
'{}_{}'.format(conv_name, j + 1))(x)) '{}_{}'.format(conv_name, j + 1))(x))
if self.reduced and i == 3: if self.reduced and i == 3:
outputs.append(self.conv4_3_norm(x)) outputs.append(self.conv4_3_norm(x))
if i < 4: x = self.maxpool(x) if i < 4:
else: x = self.s1pool(x) if self.reduced else x x = self.maxpool(x)
else:
x = self.s1pool(x) if self.reduced else x
# Internal FC layers and Extra Conv Layers # Internal FC layers and Extra Conv Layers
if self.reduced: if self.reduced:
...@@ -145,4 +163,6 @@ def make_vgg_16_reduced(scale=300): ...@@ -145,4 +163,6 @@ def make_vgg_16_reduced(scale=300):
def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300) def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300)
def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512) def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.core.config import cfg from lib.core.config import cfg
import lib.utils.logger as logger from lib.utils import logger
try: try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
...@@ -33,10 +33,12 @@ except ImportError as e: ...@@ -33,10 +33,12 @@ except ImportError as e:
def nms(detections, thresh, force_cpu=False): def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS.""" """Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0: return [] if detections.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu: if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID) return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else: return cpu_nms(detections, thresh) else:
return cpu_nms(detections, thresh)
def soft_nms( def soft_nms(
...@@ -47,7 +49,8 @@ def soft_nms( ...@@ -47,7 +49,8 @@ def soft_nms(
score_thresh=0.001, score_thresh=0.001,
): ):
"""Perform CPU Soft-NMS.""" """Perform CPU Soft-NMS."""
if detections.shape[0] == 0: return [] if detections.shape[0] == 0:
return []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2} methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods: if method not in methods:
logger.fatal('Unknown soft nms method: {}'.format(method)) logger.fatal('Unknown soft nms method: {}'.format(method))
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
from lib.ops import functions
def decode_retinanet(
features,
cls_prob,
bbox_pred,
ims_info,
strides,
ratios,
scales,
pre_nms_top_n,
score_thresh,
):
return function.get(
functions.RetinaNetDecoder,
cls_prob.device,
strides=strides,
ratios=ratios,
scales=scales,
pre_nms_top_n=pre_nms_top_n,
score_thresh=score_thresh,
).apply(features, cls_prob, bbox_pred, ims_info)
def decode_rpn(
features,
cls_prob,
bbox_pred,
ims_info,
num_outputs,
strides,
ratios,
scales,
pre_nms_top_n,
post_nms_top_n,
nms_thresh,
min_size,
min_level,
max_level,
canonical_scale,
canonical_level,
):
return function.get(
functions.RPNDecoder,
cls_prob.device,
K=num_outputs,
strides=strides,
ratios=ratios,
scales=scales,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=nms_thresh,
min_size=min_size,
min_level=min_level,
max_level=max_level,
canonical_scale=canonical_scale,
canonical_level=canonical_level,
).apply(features, cls_prob, bbox_pred, ims_info)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
class RetinaNetDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RetinaNetDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'score_thresh': self.args['score_thresh'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
return self.run(inputs, [self.alloc()], unify_devices=False)
class RPNDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RPNDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'post_nms_top_n': self.args['post_nms_top_n'],
'nms_thresh': self.args['nms_thresh'],
'min_size': self.args['min_size'],
'min_level': self.args['min_level'],
'max_level': self.args['max_level'],
'canonical_scale': self.args['canonical_scale'],
'canonical_level': self.args['canonical_level'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
outputs = [self.alloc() for _ in range(self.args['K'])]
return self.run(inputs, outputs, unify_devices=False)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.ops import functional as F
from lib.utils.blob import blob_to_tensor
class Bootstrap(torch.nn.Module):
"""Extended operator to process the images."""
def __init__(self):
super(Bootstrap, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.mean_values = cfg.PIXEL_MEANS
self.dummy_buffer = torch.ones(1)
def _apply(self, fn):
fn(self.dummy_buffer)
def cpu(self):
self._device = torch.device('cpu')
def cuda(self, device=None):
self._device = torch.device('cuda', device)
def device(self):
"""Return the device of this module."""
return self.dummy_buffer.device
def forward(self, input):
cur_device = self.device()
if input._device != cur_device:
if cur_device.type == 'cpu':
input = input.cpu()
else:
input = input.cuda(cur_device.index)
return torch.vision.ops.image_data(
input, self.dtype, self.mean_values,
)
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
return F.decode_retinanet(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
strides=self.strides,
ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
scales=self.scales,
pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
score_thresh=cfg.TEST.SCORE_THRESH,
)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def forward(self, features, cls_prob, bbox_pred, ims_info):
outputs = F.decode_rpn(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
num_outputs=self.K,
strides=cfg.RPN.STRIDES,
ratios=[float(e) for e in cfg.RPN.ASPECT_RATIOS],
scales=[float(e) for e in cfg.RPN.SCALES],
pre_nms_top_n=cfg.TEST.RPN_PRE_NMS_TOP_N,
post_nms_top_n=cfg.TEST.RPN_POST_NMS_TOP_N,
nms_thresh=cfg.TEST.RPN_NMS_THRESH,
min_size=cfg.TEST.RPN_MIN_SIZE,
min_level=cfg.FPN.ROI_MIN_LEVEL,
max_level=cfg.FPN.ROI_MAX_LEVEL,
canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
)
return [outputs] if self.K == 1 else outputs
...@@ -15,4 +15,3 @@ from __future__ import print_function ...@@ -15,4 +15,3 @@ from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
from lib.retinanet.layers.proposal_layer import ProposalLayer
\ No newline at end of file
...@@ -13,15 +13,16 @@ from __future__ import absolute_import ...@@ -13,15 +13,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.blob import to_tensor
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
class AnchorTargetLayer(torch.nn.Module): class AnchorTargetLayer(torch.nn.Module):
...@@ -35,28 +36,32 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -35,28 +36,32 @@ class AnchorTargetLayer(torch.nn.Module):
anchor_scale = cfg.RETINANET.ANCHOR_SCALE anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)] self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for stride in self.strides: for stride in self.strides:
sizes = [stride * anchor_scale * sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave))) (2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)] for octave in range(scales_per_octave)]
self.base_anchors.append(generate_anchors_v2( self.base_anchors.append(
stride=stride, ratios=self.ratios, sizes=sizes)) generate_anchors_v2(
stride=stride,
ratios=self.ratios,
sizes=sizes,
))
def forward(self, features, gt_boxes, ims_info): def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.""" """Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \ logger.fatal(
.format(num_images, len(gt_boxes_wide))) 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
all_anchors = []; total_anchors = 0 all_anchors, total_anchors = [], 0
for i in range(len(self.strides)): for i in range(len(self.strides)):
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i] shift_x = np.arange(0, width) * self.strides[i]
...@@ -101,7 +106,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -101,7 +106,8 @@ class AnchorTargetLayer(torch.nn.Module):
# Overlaps between the anchors and the gt boxes # Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float)) np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1) argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps] max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
...@@ -125,10 +131,10 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -125,10 +131,10 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_targets[fg_inds, :] = bbox_transform( bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4]) anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1.0, 1.0, 1.0, 1.0)) bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1.0) bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets bbox_targets_wide[ix, inds_inside] = bbox_targets
...@@ -141,16 +147,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -141,16 +147,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.bbox_transform import bbox_transform_inv
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box.
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = []
for stride in self.strides:
sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.base_anchors.append(generate_anchors_v2(
stride=stride, ratios=self.ratios, sizes=sizes))
def forward(self, features, cls_prob, bbox_pred, ims_info):
# Get resources
num_images = ims_info.shape[0]
cls_prob, bbox_pred = cls_prob.numpy(True), bbox_pred.numpy(True)
lvl_info = [features[i].shape[-2:] for i in range(len(self.strides))]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_probs = cls_prob
batch_deltas = bbox_pred.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
batch_detections = []
# Extract Detections separately
for ix in range(num_images):
im_scale = ims_info[ix, 2]
if cfg.RETINANET.SOFTMAX: P = batch_probs[ix, 1:, :]
else: P = batch_probs[ix] # [num_classes - 1, n]
D = batch_deltas[ix] # [n, 4]
anchor_pos = 0
for lvl, (H, W) in enumerate(lvl_info):
A, K = self.base_anchors[lvl].shape[0], H * W
num_anchors = A * K
prob = P[:, anchor_pos : anchor_pos + num_anchors]
deltas = D[anchor_pos : anchor_pos + num_anchors]
anchor_pos += num_anchors
prob_ravel = prob.ravel()
candidate_inds = np.where(prob_ravel > cfg.TEST.SCORE_THRESH)[0]
if len(candidate_inds) == 0: continue
pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
inds = np.argpartition(
prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
inds = candidate_inds[inds]
prob_4d = prob.reshape((prob.shape[0], A, H, W))
inds_2d = np.array(np.unravel_index(inds, prob.shape)).transpose()
inds_4d = np.array(np.unravel_index(inds, prob_4d.shape)).transpose()
classes, anchor_ids = inds_2d[:, 0], inds_2d[:, 1]
a, y, x = inds_4d[:, 1], inds_4d[:, 2], inds_4d[:, 3]
scores = prob[classes, anchor_ids]
deltas = deltas[anchor_ids]
anchors = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
anchors = (anchors * self.strides[lvl]) + self.base_anchors[lvl][a, :]
pred_boxes = bbox_transform_inv(anchors, deltas)
pred_boxes /= im_scale
# {im_idx, x1, y1, x2, y2, score, cls}
detections = np.zeros((pred_boxes.shape[0], 7), dtype=np.float32)
detections[:, 0], detections[:, 1:5] = ix, pred_boxes
detections[:, 5], detections[:, 6] = scores, classes + 1
batch_detections.append(detections)
# Merge Detections into a blob
batch_detections = np.vstack(batch_detections) \
if len(batch_detections) > 0 else \
np.zeros((1, 7), dtype=np.float32)
return batch_detections
\ No newline at end of file
...@@ -13,20 +13,16 @@ from __future__ import absolute_import ...@@ -13,20 +13,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.bbox_transform import clip_boxes
from lib.nms.nms_wrapper import nms, soft_nms
from lib.utils.timer import Timer from lib.utils.timer import Timer
from lib.utils.blob import im_list_to_blob
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
...@@ -39,72 +35,65 @@ def im_detect(detector, raw_image): ...@@ -39,72 +35,65 @@ def im_detect(detector, raw_image):
blobs = {'data': im_list_to_blob(ims)} blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32) for im_scale in ims_scale], dtype=np.float32,
blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID) )
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward # Do Forward
with torch.no_grad(): with torch.no_grad():
outputs = detector.forward(inputs=blobs) outputs = detector.forward(inputs=blobs)
# Decode results # Unpack results
results = outputs['detections'] return tensor_to_blob(outputs['detections'])[:, 1:]
detections_wide = []
for im_idx in range(len(ims)):
indices = np.where(results[:, 0].astype(np.int32) == im_idx)[0]
detections = results[indices, 1:]
detections[:, :4] = clip_boxes(detections[:, :4], raw_image.shape)
detections_wide.append(detections)
return np.vstack(detections_wide) \
if len(detections_wide) > 1 else detections_wide[0]
def ims_detect(net, raw_images):
"""Detect images, with single or multiple scales.
""" def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales."""
# Prepare images # Prepare images
ims, ims_scale = scale_image(raw_images[0]) ims, ims_scale = scale_image(raw_images[0])
num_scales = len(ims_scale)
ims_shape = [im.shape for im in raw_images] ims_shape = [im.shape for im in raw_images]
for item_idx in range(1, len(raw_images)): for item_idx in range(1, len(raw_images)):
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx]) ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
ims += ims_ext; ims_scale += ims_scale_ext ims += ims_ext
ims_scale += ims_scale_ext
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[2:4]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32) for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward # Do Forward
net.forward(**blobs)() with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results # Unpack results
results = net.blobs['detections'].data.get_value() results = tensor_to_blob(outputs['detections'])
detections_wide = [[] for _ in range(len(ims_shape))] detections_wide = [[] for _ in range(len(ims_shape))]
for i in range(len(ims)): for i in range(len(ims)):
j = i % len(ims_shape)
indices = np.where(results[:, 0].astype(np.int32) == i)[0] indices = np.where(results[:, 0].astype(np.int32) == i)[0]
detections = results[indices, 1:] detections = results[indices, 1:]
detections[:, :4] = clip_boxes(detections[:, :4], ims_shape[j]) detections_wide[i // num_scales].append(detections)
detections_wide[j].append(detections)
for j in range(len(ims_shape)): for i in range(len(ims_shape)):
detections_wide[j] = np.vstack(detections_wide[j]) \ detections_wide[i] = np.vstack(detections_wide[i]) \
if len(detections_wide[j]) > 1 else detections_wide[j][0] if len(detections_wide[i]) > 1 else detections_wide[i][0]
return detections_wide return detections_wide
def test_net(net, server): def test_net(net, server):
classes, num_images, num_classes = \ # Load settings
server.classes, server.num_images, server.num_classes classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect' : Timer(), 'misc' : Timer()} _t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH): for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths # Collect raw images and ground-truths
...@@ -134,30 +123,46 @@ def test_net(net, server): ...@@ -134,30 +123,46 @@ def test_net(net, server):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0] cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4] cls_boxes = detections[cls_indices, 0:4]
cls_scores = detections[cls_indices, 4] cls_scores = detections[cls_indices, 4]
cls_dets = np.hstack(( cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])).\ cls_boxes, cls_scores[:, np.newaxis])) \
astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(cls_dets, cfg.TEST.NMS, keep = soft_nms(
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA) sigma=cfg.TEST.SOFT_NMS_SIGMA,
else: keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True) )
cls_dets = cls_dets[keep, :] else:
all_boxes[j][i] = cls_dets keep = nms(
boxes_this_image.append(cls_dets) cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE: if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_images[item_idx], classes, boxes_this_image, vis_one_image(
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True, raw_images[item_idx],
filename=server.get_save_filename(image_ids[item_idx])) classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes* # Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0: if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = [] image_scores = []
for j in range(1, num_classes): for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1]) image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores) if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes): for j in range(1, num_classes):
...@@ -165,7 +170,7 @@ def test_net(net, server): ...@@ -165,7 +170,7 @@ def test_net(net, server):
all_boxes[j][i] = all_boxes[j][i][keep, :] all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH, .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time, num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='') _t['misc'].average_time), end='')
......
...@@ -14,7 +14,7 @@ from __future__ import division ...@@ -14,7 +14,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.ssd.layers.data_layer import DataLayer from lib.ssd.layers.data_layer import DataLayer
from lib.ssd.layers.prior_box_layer import PriorBoxLayer
from lib.ssd.layers.multibox_match_layer import MultiBoxMatchLayer
from lib.ssd.layers.hard_mining_layer import HardMiningLayer from lib.ssd.layers.hard_mining_layer import HardMiningLayer
from lib.ssd.layers.multibox_target_layer import MultiBoxTargetLayer from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer
\ No newline at end of file from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer
from lib.ssd.layers.priorbox_layer import PriorBoxLayer
...@@ -13,32 +13,39 @@ from __future__ import absolute_import ...@@ -13,32 +13,39 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import multiprocessing
import numpy as np import numpy as np
from multiprocessing import Process
from lib.core.config import cfg from lib.core.config import cfg
class BlobFetcher(Process): class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(BlobFetcher, self).__init__() super(BlobFetcher, self).__init__()
self.Q_in = self.Q_out = None self._img_blob_size = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
self.q_in = self.q_out = None
self.daemon = True self.daemon = True
def get(self): def get(self):
num_images = cfg.TRAIN.IMS_PER_BATCH img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
target_h = cfg.SSD.RESIZE.HEIGHT; target_w = cfg.SSD.RESIZE.WIDTH
ims_blob = np.zeros(shape=(num_images, target_h, target_w, 3), dtype=np.uint8) for i in range(cfg.TRAIN.IMS_PER_BATCH):
gt_boxes_wide = [] img_blob[i], gt_boxes = self.q_in.get()
for ix in range(cfg.TRAIN.IMS_PER_BATCH): # Pack the boxes by adding the index of images
im, gt_boxes = self.Q_in.get() boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
ims_blob[ix, :, :, :] = im boxes[:, :gt_boxes.shape[1]] = gt_boxes
# Encode boxes by adding the idx of images boxes[:, -1] = i
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32) boxes_blob.append(boxes)
im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
im_boxes[:, -1] = ix return {
gt_boxes_wide.append(im_boxes) 'data': img_blob,
'gt_boxes': np.concatenate(boxes_blob, 0),
return {'data': ims_blob, 'gt_boxes': np.concatenate(gt_boxes_wide, axis=0)} }
def run(self): def run(self):
while True: self.Q_out.put(self.get()) while True:
\ No newline at end of file self.q_out.put(self.get())
...@@ -13,15 +13,16 @@ from __future__ import absolute_import ...@@ -13,15 +13,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from multiprocessing import Queue
import time import time
import dragon
import pprint import pprint
from multiprocessing import Queue
import dragon.core.mpi as mpi
import lib.utils.logger as logger
from lib.faster_rcnn.data.data_reader import DataReader from lib.faster_rcnn.data.data_reader import DataReader
from lib.ssd.data.data_transformer import DataTransformer from lib.ssd.data.data_transformer import DataTransformer
from lib.ssd.data.blob_fetcher import BlobFetcher from lib.ssd.data.blob_fetcher import BlobFetcher
from lib.utils import logger
class DataBatch(object): class DataBatch(object):
...@@ -52,19 +53,20 @@ class DataBatch(object): ...@@ -52,19 +53,20 @@ class DataBatch(object):
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
# Init mpi # Init mpi
global_rank, local_rank, group_size = 0, 0, 1 global_rank, local_rank, group_size = 0, 0, 1
if mpi.Is_Init(): if dragon.mpi.is_init():
idx, group = mpi.AllowParallel() group = dragon.mpi.is_parallel()
if idx != -1: # DataParallel if group is not None: # DataParallel
global_rank = mpi.Rank() global_rank = dragon.mpi.rank()
group_size = len(group) group_size = len(group)
for i, node in enumerate(group): for i, node in enumerate(group):
if global_rank == node: local_rank = i if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size kwargs['group_size'] = group_size
# Configuration # Configuration
self._prefetch = kwargs.get('prefetch', 5) self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32) self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get( 'num_readers', 1) self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', -1) self._num_transformers = kwargs.get('num_transformers', -1)
self._max_transformers = kwargs.get('max_transformers', 3) self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1) self._num_fetchers = kwargs.get('num_fetchers', 1)
...@@ -84,7 +86,7 @@ class DataBatch(object): ...@@ -84,7 +86,7 @@ class DataBatch(object):
self._readers = [] self._readers = []
for i in range(self._num_readers): for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs)) self._readers.append(DataReader(**kwargs))
self._readers[-1].Q_out = self.Q1 self._readers[-1].q_out = self.Q1
for i in range(self._num_readers): for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers part_idx, num_parts = i, self._num_readers
...@@ -101,8 +103,8 @@ class DataBatch(object): ...@@ -101,8 +103,8 @@ class DataBatch(object):
for i in range(self._num_transformers): for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs) transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers) transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.Q_in = self.Q1 transformer.q_in = self.Q1
transformer.Q_out = self.Q2 transformer.q_out = self.Q2
transformer.start() transformer.start()
self._transformers.append(transformer) self._transformers.append(transformer)
time.sleep(0.1) time.sleep(0.1)
...@@ -111,14 +113,16 @@ class DataBatch(object): ...@@ -111,14 +113,16 @@ class DataBatch(object):
self._fetchers = [] self._fetchers = []
for i in range(self._num_fetchers): for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs) fetcher = BlobFetcher(**kwargs)
fetcher.Q_in = self.Q2 fetcher.q_in = self.Q2
fetcher.Q_out = self.Q3 fetcher.q_out = self.Q3
fetcher.start() fetcher.start()
self._fetchers.append(fetcher) self._fetchers.append(fetcher)
time.sleep(0.1) time.sleep(0.1)
# Prevent to echo multiple nodes # Prevent to echo multiple nodes
if local_rank == 0: self.echo() if local_rank == 0:
self.echo()
def cleanup(): def cleanup():
def terminate(processes): def terminate(processes):
for process in processes: for process in processes:
...@@ -130,6 +134,7 @@ class DataBatch(object): ...@@ -130,6 +134,7 @@ class DataBatch(object):
logger.info('Terminating DataTransformer ......') logger.info('Terminating DataTransformer ......')
terminate(self._readers) terminate(self._readers)
logger.info('Terminating DataReader......') logger.info('Terminating DataReader......')
import atexit import atexit
atexit.register(cleanup) atexit.register(cleanup)
...@@ -145,13 +150,7 @@ class DataBatch(object): ...@@ -145,13 +150,7 @@ class DataBatch(object):
return self.Q3.get() return self.Q3.get()
def echo(self): def echo(self):
"""Print I/O Information. """Print I/O Information."""
Returns
-------
None
"""
print('---------------------------------------------------------') print('---------------------------------------------------------')
print('BatchFetcher({} Threads), Using config:'.format( print('BatchFetcher({} Threads), Using config:'.format(
self._num_readers + self._num_transformers + self._num_fetchers)) self._num_readers + self._num_transformers + self._num_fetchers))
......
...@@ -14,34 +14,34 @@ from __future__ import division ...@@ -14,34 +14,34 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import cv2 import cv2
import multiprocessing
import numpy as np import numpy as np
import numpy.random as npr
from multiprocessing import Process
from lib.core.config import cfg from lib.core.config import cfg
from lib.proto import anno_pb2 as pb from lib.proto import anno_pb2 as pb
from lib.ssd.data.preprocessing import * from lib.ssd.data import transforms
import lib.utils.logger as logger from lib.utils import logger
class DataTransformer(Process): class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._distorter = Distortor()
self._expander = Expander()
self._sampler = Sampler(cfg.SSD.SAMPLERS)
self._resizer = Resizer()
self._rng_seed = cfg.RNG_SEED self._rng_seed = cfg.RNG_SEED
self._mirror = cfg.TRAIN.USE_FLIPPED self._mirror = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF self._use_diff = cfg.TRAIN.USE_DIFF
self._classes = kwargs.get('classes', ('__background__',)) self._classes = kwargs.get('classes', ('__background__',))
self._num_classes = len(self._classes) self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes))) self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._queues = [] self._image_aug = transforms.Compose(
self.Q_in = self.Q_out = None transforms.Distort(), # Color augmentation
transforms.Expand(), # Expand and padding
transforms.Sample(), # Sample a patch randomly
transforms.Resize(), # Resize to a fixed scale
)
self.q_in = self.q_out = None
self.daemon = True self.daemon = True
def make_roidb(self, ann_datum, flip=False): def make_roi_dict(self, ann_datum, flip=False):
annotations = ann_datum.annotation annotations = ann_datum.annotation
n_objects = 0 n_objects = 0
if not self._use_diff: if not self._use_diff:
...@@ -49,7 +49,7 @@ class DataTransformer(Process): ...@@ -49,7 +49,7 @@ class DataTransformer(Process):
if not ann.difficult: n_objects += 1 if not ann.difficult: n_objects += 1
else: n_objects = len(annotations) else: n_objects = len(annotations)
roidb = { roi_dict = {
'width': ann_datum.datum.width, 'width': ann_datum.datum.width,
'height': ann_datum.datum.height, 'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32), 'gt_classes': np.zeros((n_objects,), dtype=np.int32),
...@@ -57,75 +57,82 @@ class DataTransformer(Process): ...@@ -57,75 +57,82 @@ class DataTransformer(Process):
'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32), 'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
} }
ix = 0 rec_idx = 0
for ann in annotations: for ann in annotations:
if not self._use_diff and ann.difficult: continue if not self._use_diff and ann.difficult:
roidb['boxes'][ix, :] = [ continue
max(0, ann.x1), max(0, ann.y1), roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1), min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1)] min(ann.y2, ann_datum.datum.height - 1),
roidb['gt_classes'][ix] = self._class_to_ind[ann.name] ]
ix += 1 roi_dict['gt_classes'][rec_idx] = \
self._class_to_ind[ann.name]
rec_idx += 1
if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width']) if flip:
roidb['normalized_boxes'][:, 0::2] = roidb['boxes'][:, 0::2] / float(roidb['width']) roi_dict['boxes'] = _flip_boxes(
roidb['normalized_boxes'][:, 1::2] = roidb['boxes'][:, 1::2] / float(roidb['height']) roi_dict['boxes'], roi_dict['width'])
return roidb roi_dict['boxes'][:, 0::2] /= roi_dict['width']
roi_dict['boxes'][:, 1::2] /= roi_dict['height']
return roi_dict
def get(self, serialized): def get(self, serialized):
ann_datum = pb.AnnotatedDatum() ann_datum = pb.AnnotatedDatum()
ann_datum.ParseFromString(serialized) ann_datum.ParseFromString(serialized)
im_datum = ann_datum.datum img_datum = ann_datum.datum
im = np.fromstring(im_datum.data, np.uint8) img = np.fromstring(img_datum.data, np.uint8)
if im_datum.encoded is True: im = cv2.imdecode(im, -1) if img_datum.encoded is True:
else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels)) img = cv2.imdecode(img, -1)
else:
h, w = img_datum.height, img_datum.width
img = img.reshape((h, w, img_datum.channels))
# Flip # Flip
flip = False flip = False
if self._mirror: if self._mirror:
if npr.randint(0, 2) > 0: if np.random.randint(0, 2) > 0:
im = im[:, ::-1, :] img = img[:, ::-1, :]
flip = True flip = True
# Datum -> RoIDB # Datum -> RoIDB
roidb = self.make_roidb(ann_datum, flip) roi_dict = self.make_roi_dict(ann_datum, flip)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32) gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), 'float32')
gt_boxes[:, 0:4] = roidb['normalized_boxes'] gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
gt_boxes[:, 4] = roidb['gt_classes']
# Distort => Expand => Sample => Resize # Distort => Expand => Sample => Resize
im = self._distorter.distort_image(im) img, gt_boxes = self._image_aug(img, gt_boxes)
im, gt_boxes = self._expander.expand_image(im, gt_boxes)
im, gt_boxes = self._sampler.sample_image(im, gt_boxes)
im = self._resizer.resize_image(im)
# Modify gt boxes to the blob scale # Restore to the blob scale
gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT
gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT
return im, gt_boxes return img, gt_boxes
def run(self): def run(self):
npr.seed(self._rng_seed) np.random.seed(self._rng_seed)
while True: while True:
serialized = self.Q_in.get() serialized = self.q_in.get()
im, gt_boxes = self.get(serialized) im, gt_boxes = self.get(serialized)
if len(gt_boxes) < 1: continue if len(gt_boxes) < 1:
self.Q_out.put((im, gt_boxes)) continue
self.q_out.put((im, gt_boxes))
def _flip_boxes(boxes, width): def _flip_boxes(boxes, width):
flip_boxes = boxes.copy() flip_boxes = boxes.copy()
oldx1 = boxes[:, 0].copy() old_x1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy() old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - oldx2 - 1 flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - oldx1 - 1 flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all(): if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.') logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes return flip_boxes
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import PIL.Image
import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
class Distortor(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._brightness_delta = 0.3
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._contrast_delta = 0.3
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
self._saturation_delta = 0.3
def distort_image(self, im):
im = PIL.Image.fromarray(im)
if npr.uniform() < self._brightness_prob:
delta_brightness = npr.uniform(-self._brightness_delta, self._brightness_delta) + 1.0
im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness)
if npr.uniform() < self._contrast_prob:
delta_contrast = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast)
if npr.uniform() < self._saturation_prob:
delta_saturation = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation)
im = np.array(im)
return im
if __name__ == '__main__':
distortor = Distortor()
while True:
im = cv2.imread('cat.jpg')
im = distortor.distort_image(im)
cv2.imshow('Distort', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy.random as npr
import numpy as np
import math
from lib.core.config import cfg
import lib.utils.logger as logger
class Expander(object):
def __init__(self, **params):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_expand_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_expand_ratio < 1.0:
logger.fatal('The max expand ratio must >= 1.0, got {}'.format(self._max_expand_ratio))
def expand_image(self, im, gt_boxes=None):
prob = npr.uniform()
if prob > self._expand_prob : return im, gt_boxes
ratio = npr.uniform(1.0, self._max_expand_ratio)
if ratio == 1: return im, gt_boxes
im_h = im.shape[0]
im_w = im.shape[1]
expand_h = int(im_h * ratio)
expand_w = int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0.0, expand_h - im_h)))
w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w)))
new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_im[:] = cfg.PIXEL_MEANS
new_im[h_off : h_off + im_h, w_off : w_off + im_w, :] = im
if gt_boxes is not None:
ex_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
ex_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w + w_off) / expand_w
ex_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h + h_off) / expand_h
ex_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w + w_off) / expand_w
ex_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h + h_off) / expand_h
return new_im, ex_gt_boxes
return new_im, gt_boxes
if __name__ == '__main__':
expander = Expander()
while True:
im = cv2.imread('cat.jpg')
gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
im, gt_boxes = expander.expand_image(im, gt_boxes)
x1 = int(gt_boxes[0][0] * im.shape[1])
y1 = int(gt_boxes[0][1] * im.shape[0])
x2 = int(gt_boxes[0][2] * im.shape[1])
y2 = int(gt_boxes[0][3] * im.shape[0])
cv2.rectangle(im, (x1, y1), (x2, y2), (188,119,64), 2)
cv2.imshow('Expand', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy.random as npr
from lib.core.config import cfg
class Resizer(object):
def __init__(self):
self._re_height = cfg.SSD.RESIZE.HEIGHT
self._re_width = cfg.SSD.RESIZE.WIDTH
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def resize_image(self, im):
rand = npr.randint(0, len(self._interp_mode))
return cv2.resize(
im, (self._re_width, self._re_height),
interpolation=self._interp_mode[rand])
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
from lib.utils.bbox_transform import clip_boxes
from lib.utils.boxes import iou
import lib.utils.logger as logger
class Sampler(object):
def __init__(self, samplers):
if not isinstance(samplers, list): samplers = [samplers]
self._samplers = []
for sampler in samplers:
if len(sampler) != 8:
logger.fatal('The sample params should be a tuple of length 8.')
sample_param = {
'min_scale': sampler[0],
'max_scale': sampler[1],
'min_aspect_ratio': sampler[2],
'max_aspect_ratio': sampler[3],
'min_jaccard_overlap': sampler[4],
'max_jaccard_overlap': sampler[5],
'max_trials': sampler[6],
'max_sample': sampler[7]}
self._samplers.append(sample_param)
def _compute_overlaps(self, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
def _generate_sample(self, sample_param):
min_scale = sample_param.get('min_scale', 1.0)
max_scale = sample_param.get('max_scale', 1.0)
scale = npr.uniform(min_scale, max_scale)
min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.0)
max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.0)
min_aspect_ratio = max(min_aspect_ratio, scale**2)
max_aspect_ratio = min(max_aspect_ratio, 1.0 / (scale**2))
aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_w = scale * (aspect_ratio ** 0.5)
bbox_h = scale / (aspect_ratio ** 0.5)
w_off = npr.uniform(0.0, float(1 - bbox_w))
h_off = npr.uniform(0.0, float(1 - bbox_h))
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
min_jaccard_overlap = constraint.get('min_jaccard_overlap', None)
max_jaccard_overlap = constraint.get('max_jaccard_overlap', None)
if min_jaccard_overlap == None and \
max_jaccard_overlap == None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
if min_jaccard_overlap is not None:
if max_overlap < min_jaccard_overlap: return False
if max_jaccard_overlap is not None:
if max_overlap > max_jaccard_overlap: return False
return True
def _generate_batch_samples(self, gt_boxes):
sample_boxes = []
for sampler in self._samplers:
found = 0
for i in range(sampler['max_trials']):
if found >= sampler['max_sample']: break
sample_box = self._generate_sample(sampler)
if sampler['min_jaccard_overlap'] != 0.0 or \
sampler['max_jaccard_overlap'] != 1.0:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok: continue
found += 1
sample_boxes.append(sample_box)
return sample_boxes
def _rand_crop(self, im, rand_box, gt_boxes=None):
im_h = im.shape[0]
im_w = im.shape[1]
w_off = int(rand_box[0] * im_w)
h_off = int(rand_box[1] * im_h)
crop_w = int((rand_box[2] - rand_box[0]) * im_w)
crop_h = int((rand_box[3] - rand_box[1]) * im_h)
new_im = im[h_off: h_off + crop_h, w_off: w_off + crop_w, :]
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2])
& (ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
return new_im, new_gt_boxes
return new_im, gt_boxes
def sample_image(self, im, gt_boxes):
sample_boxes = self._generate_batch_samples(gt_boxes)
if len(sample_boxes) > 0:
# Apply sampling if found at least one valid sample box
# Then randomly pick one
sample_idx = npr.randint(0, len(sample_boxes))
rand_box = sample_boxes[sample_idx]
im, gt_boxes = self._rand_crop(im, rand_box, gt_boxes)
return im, gt_boxes
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
import numpy.random as npr
npr.seed(3)
import sys
sys.path.append('../../')
from resize import Resizer
from expand import Expander
from distort import Distortor
from sample import Sampler
from lib.core.config import cfg
if __name__ == '__main__':
distorter = Distortor()
expander = Expander()
sampler = Sampler(cfg.SSD.SAMPLERS)
resizer = Resizer()
while True:
im = cv2.imread('cat.jpg')
gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
im = distorter.distort_image(im)
im, gt_boxes = expander.expand_image(im, gt_boxes)
im, gt_boxes = sampler.sample_image(im, gt_boxes)
if len(gt_boxes) < 1: continue
im = resizer.resize_image(im)
for gt_box in gt_boxes:
x1 = int(gt_box[0] * im.shape[1])
y1 = int(gt_box[1] * im.shape[0])
x2 = int(gt_box[2] * im.shape[1])
y2 = int(gt_box[3] * im.shape[0])
cv2.rectangle(im, (x1, y1), (x2, y2), (188, 119, 64), 2)
print(x1, y1, x2, y2)
cv2.imshow('Sample', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import math
import cv2
import PIL.Image
import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import iou
class Compose(object):
"""Compose the several transforms together."""
def __init__(self, *transforms):
self.transforms = transforms
def __call__(self, img, boxes):
for transform in self.transforms:
img, boxes = transform.apply(img, boxes)
return img, boxes
class Distort(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
def apply(self, img, boxes=None):
img = PIL.Image.fromarray(img)
if npr.uniform() < self._brightness_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Brightness(img)
img = img.enhance(delta)
if npr.uniform() < self._contrast_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Contrast(img)
img = img.enhance(delta)
if npr.uniform() < self._saturation_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Color(img)
img = img.enhance(delta)
return np.array(img), boxes
class Expand(object):
def __init__(self):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_ratio < 1.0:
logger.fatal(
'The max expand ratio must >= 1, got {}'
.format(self._max_ratio)
)
def apply(self, img, boxes=None):
prob = npr.uniform()
if prob > self._expand_prob:
return img, boxes
ratio = npr.uniform(1., self._max_ratio)
if ratio == 1:
return img, boxes
im_h, im_w = img.shape[:2]
expand_h, expand_w = int(im_h * ratio), int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0., expand_h - im_h)))
w_off = int(math.floor(npr.uniform(0., expand_w - im_w)))
new_img = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_img[:] = cfg.PIXEL_MEANS
new_img[h_off:h_off + im_h, w_off:w_off + im_w, :] = img
if boxes is not None:
new_boxes = boxes.astype(boxes.dtype, copy=True)
new_boxes[:, 0] = (boxes[:, 0] * im_w + w_off) / expand_w
new_boxes[:, 1] = (boxes[:, 1] * im_h + h_off) / expand_h
new_boxes[:, 2] = (boxes[:, 2] * im_w + w_off) / expand_w
new_boxes[:, 3] = (boxes[:, 3] * im_h + h_off) / expand_h
boxes = new_boxes
return new_img, boxes
class Resize(object):
def __init__(self):
self._target_size = (
cfg.SSD.RESIZE.WIDTH,
cfg.SSD.RESIZE.HEIGHT,
)
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def apply(self, img, boxes):
rand = npr.randint(len(self._interp_mode))
return cv2.resize(
img, self._target_size,
interpolation=self._interp_mode[rand],
), boxes
class Sample(object):
def __init__(self):
samplers = cfg.SSD.SAMPLERS
if not isinstance(samplers, collections.Iterable):
samplers = [samplers]
self._samplers = []
for sampler in samplers:
if len(sampler) != 8:
logger.fatal('The sample params should be a tuple of length 8.')
sample_param = {
'min_scale': sampler[0],
'max_scale': sampler[1],
'min_aspect_ratio': sampler[2],
'max_aspect_ratio': sampler[3],
'min_overlap': sampler[4],
'max_overlap': sampler[5],
'max_trials': sampler[6],
'max_sample': sampler[7],
}
self._samplers.append(sample_param)
@classmethod
def _compute_overlaps(cls, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
@classmethod
def _generate_sample(cls, sample_param):
min_scale = sample_param.get('min_scale', 1.)
max_scale = sample_param.get('max_scale', 1.)
scale = npr.uniform(min_scale, max_scale)
min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.)
max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.)
min_aspect_ratio = max(min_aspect_ratio, scale**2)
max_aspect_ratio = min(max_aspect_ratio, 1. / (scale**2))
aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_w = scale * (aspect_ratio ** 0.5)
bbox_h = scale / (aspect_ratio ** 0.5)
w_off = npr.uniform(0., 1. - bbox_w)
h_off = npr.uniform(0., 1. - bbox_h)
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
min_overlap = constraint.get('min_overlap', None)
max_overlap = constraint.get('max_overlap', None)
if min_overlap is None and \
max_overlap is None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
if min_overlap is not None:
if max_overlap < min_overlap:
return False
if max_overlap is not None:
if max_overlap > max_overlap:
return False
return True
def _generate_batch_samples(self, gt_boxes):
sample_boxes = []
for sampler in self._samplers:
found = 0
for i in range(sampler['max_trials']):
if found >= sampler['max_sample']:
break
sample_box = self._generate_sample(sampler)
if sampler['min_overlap'] != 0. or \
sampler['max_overlap'] != 1.:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok:
continue
found += 1
sample_boxes.append(sample_box)
return sample_boxes
@classmethod
def _rand_crop(cls, im, rand_box, gt_boxes=None):
im_h, im_w = im.shape[:2]
w_off = int(rand_box[0] * im_w)
h_off = int(rand_box[1] * im_h)
crop_w = int((rand_box[2] - rand_box[0]) * im_w)
crop_h = int((rand_box[3] - rand_box[1]) * im_h)
new_im = im[h_off:h_off + crop_h, w_off:w_off + crop_w, :]
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2]) &
(ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_tiled_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
return new_im, new_gt_boxes
return new_im, gt_boxes
def apply(self, img, boxes):
sample_boxes = self._generate_batch_samples(boxes)
if len(sample_boxes) > 0:
# Apply sampling if found at least one valid sample box
# Then randomly pick one
sample_idx = npr.randint(len(sample_boxes))
rand_box = sample_boxes[sample_idx]
img, boxes = self._rand_crop(img, rand_box, boxes)
return img, boxes
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!