Commit ca255ea0 by Ting PAN

Change to the PEP8 code style

1 parent 71593766
Showing with 2202 additions and 2225 deletions
## General
# Compiled Object files
*.slo
*.lo
......@@ -7,13 +5,15 @@
*.cuo
# Compiled Dynamic libraries
# *.so
*.so
*.dll
*.dylib
# Compiled Static libraries
*.lai
*.la
#*.a
*.a
*.lib
# Compiled python
*.pyc
......@@ -40,6 +40,9 @@ __pycache__
# QtCreator files
*.user
# VSCode files
.vscode
# PyCharm files
.idea
......
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.1.2 (20190723)
Dragon Minimum Required (Version 0.3.0.0)
Changes:
Preview Features:
- Change to the PEP8 code style.
- Adapt the new Dragon API.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.1 (20190409)
Dragon Minimum Required (Version 0.3.0.0)
......
......@@ -21,8 +21,8 @@ set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
# ---------------- User Config ----------------
# ---[ Dependencies
include(${PROJECT_SOURCE_DIR}/CMake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/CMake/FindNumPy.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FindNumPy.cmake)
FIND_PACKAGE(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 11)
......
# --------------------------------------------------------
# Detectron @ Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -8,8 +8,3 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from .distort import Distortor
from .expand import Expander
from .sample import Sampler
from .resize import Resizer
\ No newline at end of file
......@@ -16,6 +16,7 @@ from __future__ import print_function
import os
import sys
import time
import cv2
import xml.etree.ElementTree as ET
from dragon.tools.db import LMDB
......@@ -23,6 +24,7 @@ from dragon.tools.db import LMDB
sys.path.insert(0, '../../..')
from lib.proto import anno_pb2 as pb
ZFILL = 8
ENCODE_QUALITY = 95
......@@ -46,14 +48,23 @@ def make_datum(image_file, xml_file):
datum = pb.Datum()
im = cv2.imread(image_file)
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.height, datum.width, datum.channels = im.shape
datum.encoded = ENCODE_QUALITY != 100
if datum.encoded:
result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.data = im.tostring()
anno_datum.datum.CopyFrom(datum)
anno_datum.filename = filename.split('.')[0]
if len(objs) == 0:
return None
for ix, obj in enumerate(objs):
anno = pb.Annotation()
bbox = obj.find('bndbox')
......@@ -64,6 +75,7 @@ def make_datum(image_file, xml_file):
cls = obj.find('name').text.strip()
anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
anno.name = cls
class_name_set.add(cls)
anno.difficult = False
if obj.find('difficult') is not None:
anno.difficult = int(obj.find('difficult').text) == 1
......@@ -72,13 +84,15 @@ def make_datum(image_file, xml_file):
return anno_datum
def make_db(database_file,
def make_db(
database_file,
images_path,
annotations_path,
imagesets_path,
splits):
splits,
):
if os.path.isdir(database_file) is True:
raise ValueError('The database path is already exist.')
print('Warning: The database path is already exist.')
else:
root_dir = database_file[:database_file.rfind('/')]
if not os.path.exists(root_dir):
......@@ -95,12 +109,12 @@ def make_db(database_file,
print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000)
db = LMDB(max_commit=1000)
db.open(database_file, mode='w')
count = 0
total_line = 0
start_time = time.time()
zfill_flag = '{0:0%d}' % (ZFILL)
zfill_flag = '{0:0%d}' % ZFILL
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
......@@ -109,18 +123,18 @@ def make_db(database_file,
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 10000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
datum = make_datum(image_file, xml_file)
if datum is not None:
count += 1
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
if count % 1000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
......
# --------------------------------------------------------
# Detectron
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -155,11 +155,11 @@ __C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5
## NMS threshold used on RPN proposals
# NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7
## Number of top scoring boxes to keep before apply NMS to RPN proposals
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000
## Number of top scoring boxes to keep after applying NMS to RPN proposals
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TEST.RPN_MIN_SIZE = 0
......@@ -199,7 +199,7 @@ __C.MODEL.TYPE = ''
# The float precision for training and inference
# (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE= 'FLOAT32'
__C.MODEL.DATA_TYPE = 'FLOAT32'
# The backbone
__C.MODEL.BACKBONE = ''
......@@ -560,10 +560,11 @@ def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
if not isinstance(a, dict): return
if not isinstance(a, dict):
return
for k, v in a.items():
# a must specify keys that are in b
if not k in b:
if k not in b:
raise KeyError('{} is not a valid config key'.format(k))
# the types must match, too
v = _check_and_coerce_cfg_value_type(v, b[k], k)
......@@ -598,15 +599,15 @@ def cfg_from_list(cfg_list):
assert d.has_key(subkey)
d = d[subkey]
subkey = key_list[-1]
assert d.has_key(subkey)
assert subkey in d
try:
value = literal_eval(v)
except:
# handle the case when v is a string literal
# Handle the case when v is a string literal
value = v
assert type(value) == type(d[subkey]), \
'type {} does not match original type {}'.format(
type(value), type(d[subkey]))
'type {} does not match original type {}'\
.format(type(value), type(d[subkey]))
d[subkey] = value
......@@ -618,8 +619,10 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
# The types must match (with some exceptions)
type_b = type(value_b)
type_a = type(value_a)
if type_a is type_b: return value_a
if type_b is float and type_a is int: return float(value_a)
if type_a is type_b:
return value_a
if type_b is float and type_a is int:
return float(value_a)
# Exceptions: numpy arrays, strings, tuple<->list
if isinstance(value_b, np.ndarray):
......
......@@ -18,7 +18,8 @@ import shutil
import time
import numpy as np
from lib.core.config import cfg, cfg_from_file
from lib.core.config import cfg
from lib.core.config import cfg_from_file
class Coordinator(object):
......@@ -44,7 +45,8 @@ class Coordinator(object):
def _path_at(self, file, auto_create=True):
path = os.path.abspath(os.path.join(self.experiment_dir, file))
if auto_create and not os.path.exists(path): os.makedirs(path)
if auto_create and not os.path.exists(path):
os.makedirs(path)
return path
def checkpoints_dir(self):
......@@ -67,8 +69,10 @@ class Coordinator(object):
return os.path.join(self.checkpoints_dir(), files[ix]), step
steps.append(step)
if global_step is None:
if len(files) == 0: return None, 0
last_idx = int(np.argmax(steps)); last_step = steps[last_idx]
if len(files) == 0:
return None, 0
last_idx = int(np.argmax(steps))
last_step = steps[last_idx]
return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
return None, 0
result = locate()
......
......@@ -30,7 +30,8 @@ class Solver(object):
self.opt_arguments = {
'scale_gradient': 1. / (
cfg.SOLVER.LOSS_SCALING *
cfg.SOLVER.ITER_SIZE),
cfg.SOLVER.ITER_SIZE
),
'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
}
......@@ -57,8 +58,10 @@ class Solver(object):
}
]
for name, param in self.detector.named_parameters():
if 'bias' in name: param_groups[1]['params'].append(param)
else: param_groups[0]['params'].append(param)
if 'bias' in name:
param_groups[1]['params'].append(param)
else:
param_groups[0]['params'].append(param)
return param_groups
def set_learning_rate(self):
......@@ -67,8 +70,10 @@ class Solver(object):
if self._current_step < len(cfg.SOLVER.STEPS) \
and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
self._current_step = self._current_step + 1
logger.info('MultiStep Status: Iteration {}, step = {}' \
.format(self.iter, self._current_step))
logger.info(
'MultiStep Status: Iteration {}, step = {}'
.format(self.iter, self._current_step)
)
new_lr = cfg.SOLVER.BASE_LR * (
cfg.SOLVER.GAMMA ** self._current_step)
self.optimizer.param_groups[0]['lr'] = \
......@@ -77,13 +82,14 @@ class Solver(object):
raise ValueError('Unknown lr policy: ' + policy)
def one_step(self):
def add_loss(x, y):
return y if x is None else x + y
# Forward & Backward & Compute_loss
iter_size = cfg.SOLVER.ITER_SIZE
loss_scaling = cfg.SOLVER.LOSS_SCALING
run_time = 0.; stats = {'loss': {'total': 0.}, 'iter': self.iter}
add_loss = lambda x, y: y if x is None else x + y
stats = {'loss': {'total': 0.}, 'iter': self.iter}
tic = time.time()
run_time, tic = 0., time.time()
if iter_size > 1:
# Dragon is designed for manual gradients accumulating
......@@ -99,10 +105,13 @@ class Solver(object):
stats['loss'][k] = 0.
total_loss = add_loss(total_loss, v)
stats['loss'][k] += float(v) * loss_scaling
if loss_scaling != 1.: total_loss *= loss_scaling
if loss_scaling != 1.:
total_loss *= loss_scaling
stats['loss']['total'] += float(total_loss)
total_loss.backward()
if iter_size > 1: self.optimizer.accumulate_grad()
if iter_size > 1:
self.optimizer.accumulate_grad()
run_time += (time.time() - tic)
......@@ -190,5 +199,8 @@ def get_solver_func(type):
elif type == 'Adam':
return AdamSolver
else:
raise ValueError('Unsupported solver type: {}.\n'
'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam)'.format(type))
\ No newline at end of file
raise ValueError(
'Unsupported solver type: {}.\n'
'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam).'
.format(type)
)
......@@ -33,25 +33,27 @@ class TestServer(object):
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = DataReader(**{'source': self.imdb.source})
self.data_transformer = DataTransformer()
self.data_reader.Q_out = Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.start()
self.gt_recs = OrderedDict()
self.output_dir = output_dir
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir)
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self):
serialized = self.data_reader.Q_out.get()
serialized = self.data_reader.q_out.get()
image = self.data_transformer.get_image(serialized)
image_id, objects = self.data_transformer.get_annotations(serialized)
self.gt_recs[image_id] = {
'objects': objects,
'width': image.shape[1],
'height': image.shape[0]}
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
......@@ -60,9 +62,10 @@ class TestServer(object):
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError('Loading {} records, '
'while the specific database required {}'.format(
len(self.gt_recs), self.num_images))
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
......@@ -87,7 +90,8 @@ class InferServer(object):
self.image_idx = 0
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir)
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
......@@ -99,7 +103,8 @@ class InferServer(object):
self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = {
'width': image.shape[1],
'height': image.shape[0]}
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
......@@ -108,15 +113,23 @@ class InferServer(object):
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError('Loading {} records, '
'while the specific database required {}'.format(
len(self.gt_recs), self.num_images))
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes, self.get_records(), self.output_dir)
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes, all_masks, self.get_records(), self.output_dir)
\ No newline at end of file
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
......@@ -17,17 +17,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import collections
import datetime
from collections import OrderedDict
import os
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.core.solver import get_solver_func
from lib.utils.timer import Timer
from lib.utils.stats import SmoothedValue
from lib.utils import logger
from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer
class SolverWrapper(object):
......@@ -51,13 +51,14 @@ class SolverWrapper(object):
self.solver.detector.cuda(cfg.GPU_ID)
# Plan the metrics
self.metrics = OrderedDict()
self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD:
from dragon.tools.tensorboard import TensorBoard
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
def snapshot(self):
if not logger.is_root(): return None
if not logger.is_root():
return None
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth')
filename = os.path.join(self.output_dir, filename)
......@@ -77,19 +78,35 @@ class SolverWrapper(object):
self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items():
if k == 'total':
self.board.scalar_summary('total_loss', v.GetMedianValue(), stats['iter'])
else: self.board.scalar_summary(k, v.GetMedianValue(), stats['iter'])
self.board.scalar_summary(
'total_loss',
v.GetMedianValue(),
stats['iter'],
)
else:
self.board.scalar_summary(
k,
v.GetMedianValue(),
stats['iter'],
)
def step(self, display=False):
stats = self.solver.one_step()
self.add_metrics(stats)
self.send_metrics(stats)
if display:
logger.info('Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (stats['iter'],
stats['lr'], self.metrics['total'].GetMedianValue(), stats['time']))
logger.info(
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
stats['iter'], stats['lr'],
self.metrics['total'].GetMedianValue(),
stats['time'],
)
)
for k, v in self.metrics.items():
if k == 'total': continue
logger.info(' Train net output({}): {}'.format(k, v.GetMedianValue()))
if k == 'total':
continue
logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue()))
def train_model(self):
"""Network training loop."""
......@@ -104,9 +121,8 @@ class SolverWrapper(object):
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
# Apply 1-step SGD update
timer.tic()
with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
timer.toc()
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time
......@@ -114,8 +130,10 @@ class SolverWrapper(object):
cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
logger.info('< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta))
logger.info(
'< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta)
)
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -13,6 +13,10 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from lib.datasets.taas import TaaS
......
......@@ -61,7 +61,7 @@ class imdb(object):
return num_entries
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
raise NotImplementedError
pass
def evaluate_masks(self, all_boxes, all_masks, output_dir):
raise NotImplementedError
\ No newline at end of file
pass
......@@ -62,10 +62,14 @@ class TaaS(imdb):
def _get_comp_id(self):
return '_' + self._salt if self.config['use_salt'] else ''
def _get_prefix(self, type='bbox'):
if type == 'bbox': return 'detections_'
elif type == 'segm': return 'segmentations_'
elif type == 'kpt': return 'keypoints_'
@classmethod
def _get_prefix(cls, type='bbox'):
if type == 'bbox':
return 'detections_'
elif type == 'segm':
return 'segmentations_'
elif type == 'kpt':
return 'keypoints_'
return ''
def _get_voc_results_T(self, results_folder, type='bbox'):
......@@ -76,19 +80,22 @@ class TaaS(imdb):
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
else:
raise ValueError('Type of results can be either bbox or segm.')
if not os.path.exists(results_folder): os.makedirs(results_folder)
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def _get_coco_annotations_T(self, results_folder, type='bbox'):
# experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
if not os.path.exists(results_folder): os.makedirs(results_folder)
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def _get_coco_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>.json
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
if not os.path.exists(results_folder): os.makedirs(results_folder)
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
##############################################
......@@ -136,7 +143,8 @@ class TaaS(imdb):
with open(filename, 'wt') as f:
ix = 0
for image_id, rec in gt_recs.items():
dets = all_boxes[cls_ind][ix]; ix += 1
dets = all_boxes[cls_ind][ix]
ix += 1
if len(dets) == 0:
continue
for k in range(dets.shape[0]):
......@@ -148,7 +156,8 @@ class TaaS(imdb):
def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
for cls_inds, cls in enumerate(self.classes):
if cls == '__background__': continue
if cls == '__background__':
continue
print('Writing {} VOC format segm results'.format(cls))
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
......@@ -161,11 +170,15 @@ class TaaS(imdb):
aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self._classes):
if cls == '__background__': continue
if cls == '__background__':
continue
det_file = self._get_voc_results_T(output_dir).format(cls)
rec, prec, ap = voc_bbox_eval(det_file, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric)
if ap > 0: aps += [ap]
rec, prec, ap = voc_bbox_eval(
det_file, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
......@@ -173,12 +186,16 @@ class TaaS(imdb):
aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.classes):
if cls == '__background__': continue
if cls == '__background__':
continue
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
ap = voc_segm_eval(bbox_filename, segm_filename, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric)
if ap > 0: aps += [ap]
ap = voc_segm_eval(
bbox_filename, segm_filename, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
......@@ -188,12 +205,16 @@ class TaaS(imdb):
# #
##############################################
def _get_coco_image_id(self, image_name):
@classmethod
def _get_coco_image_id(cls, image_name):
image_id = image_name.split('_')[-1].split('.')[0]
try: return int(image_id)
except: return image_name
try:
return int(image_id)
except:
return image_name
def _encode_coco_masks(self, masks, boxes, im_h, im_w):
@classmethod
def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
num_pred = len(boxes)
assert len(masks) == num_pred
mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
......@@ -216,26 +237,29 @@ class TaaS(imdb):
y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h)
mask_image[y1 : y2, x1 : x2, i] = \
mask[(y1 - ref_box[1]) : (y2 - ref_box[1]),
(x1 - ref_box[0]) : (x2 - ref_box[0])]
mask_image[y1:y2, x1:x2, i] = \
mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
(x1 - ref_box[0]):(x2 - ref_box[0])]
return encode_masks(mask_image)
def _write_coco_bbox_annotations(self, gt_recs, output_dir):
dataset = {}
# Build images
dataset['images'] = []
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width']})
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self._classes:
if cls == '__background__': continue
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls, 'id': self._class_to_ind[cls]})
'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
......@@ -249,27 +273,32 @@ class TaaS(imdb):
'area': w * h,
'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']]})
'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
with open(ann_file, 'w') as f: json.dump(dataset, f)
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def _write_coco_segm_annotations(self, gt_recs, output_dir):
dataset = {}
# Build images
dataset['images'] = []
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width']})
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self._classes:
if cls == '__background__': continue
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls, 'id': self._class_to_ind[cls]})
'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
......@@ -283,20 +312,25 @@ class TaaS(imdb):
'area': w * h,
'segmentation': {
'size': [rec['height'], rec['width']],
'counts': obj['mask']},
'counts': obj['mask'],
},
'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']]})
'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='segm')
with open(ann_file, 'w') as f: json.dump(dataset, f)
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
ix, results = 0, []
for image_name, rec in gt_recs.items():
dets = boxes[ix]; ix += 1
if isinstance(dets, list) and len(dets) == 0: continue
dets = boxes[ix]
ix += 1
if isinstance(dets, list) and len(dets) == 0:
continue
dets = dets.astype(np.float)
scores = dets[:, -1]
xs = dets[:, 0]
......@@ -307,7 +341,9 @@ class TaaS(imdb):
[{'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
'bbox': [xs[k], ys[k], ws[k], hs[k]],
'score': scores[k]} for k in range(dets.shape[0])])
'score': scores[k],
} for k in range(dets.shape[0])]
)
return results
def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
......@@ -321,7 +357,8 @@ class TaaS(imdb):
ix = 0
for image_name, rec in gt_recs.items():
dets = boxes[ix].astype(np.float)
msks = masks[ix]; ix += 1
msks = masks[ix]
ix += 1
keep = filter_boxes(dets)
im_h, im_w = rec['height'], rec['width']
if len(keep) == 0:
......@@ -331,38 +368,46 @@ class TaaS(imdb):
msks[keep], dets[keep, :4], im_h, im_w)
for k in range(dets[keep].shape[0]):
rle = mask_encode[k]
if sys.version_info >= (3,0): rle['counts'] = rle['counts'].decode()
if sys.version_info >= (3, 0):
rle['counts'] = rle['counts'].decode()
results.append({
'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
'segmentation': rle,
'score': scores[k]})
'score': scores[k],
})
return results
def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir)
results = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__': continue
print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, self.num_classes - 1))
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_bbox_results_one_category(
all_boxes[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid: json.dump(results, fid)
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir, type='segm')
results = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__': continue
print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, self.num_classes - 1))
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_segm_results_one_category(
all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid: json.dump(results, fid)
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def _do_coco_bbox_eval(self, coco, res_file):
......@@ -401,7 +446,7 @@ class TaaS(imdb):
precision = \
coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
ap_default = np.mean(precision[precision > -1])
print ('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
'~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
print('{:.1f}'.format(100 * ap_default))
for cls_ind, cls in enumerate(self.classes):
......@@ -460,7 +505,7 @@ class TaaS(imdb):
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
self._write_voc_segm_results(all_boxes, all_masks, output_dir)
if not 'wo' in protocol:
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
self._do_voc_segm_eval(
gt_recs, output_dir, IoU=0.5,
......@@ -477,10 +522,12 @@ class TaaS(imdb):
cats = coco.loadCats(coco.getCatIds())
self._class_to_cat_id = dict(
zip([c['name'] for c in cats], coco.getCatIds()))
else: coco = None
else:
coco = None
res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
if not 'wo' in protocol:
if coco is None: coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
if 'wo' not in protocol:
if coco is None:
coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
self._do_coco_segm_eval(coco, res_file)
def competition_mode(self, on):
......
......@@ -19,16 +19,16 @@ from __future__ import print_function
import cv2
import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg
from lib.utils.mask_transform import mask_overlap
from lib.utils.boxes import expand_boxes
from lib.pycocotools.mask_utils import mask_rle2im
from lib.utils.boxes import expand_boxes
from lib.utils.mask_transform import mask_overlap
def voc_ap(rec, prec, use_07_metric=False):
......@@ -65,8 +65,13 @@ def voc_ap(rec, prec, use_07_metric=False):
return ap
def voc_bbox_eval(det_file, gt_recs, cls_name,
IoU=0.5, use_07_metric=False):
def voc_bbox_eval(
det_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
class_recs = {}
n_pos = 0
for image_name, rec in gt_recs.items():
......@@ -81,35 +86,35 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
'det': det
}
# read detections
with open(det_file, 'r') as f: lines = f.readlines()
# Read detections
with open(det_file, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# avoid IndexError if detecting nothing
if len(BB) == 0: return 0, 0, -1
# Avoid IndexError if detecting nothing
if len(BB) == 0:
return 0, 0, -1
# sort by confidence
# Sort by confidence
sorted_ind = np.argsort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# go down dets and mark TPs and FPs
# Go down detections and mark TPs and FPs
nd = len(image_ids)
tp = np.zeros(nd)
fp = np.zeros(nd)
tp, fp = np.zeros(nd), np.zeros(nd)
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax = -np.inf
ovmax, jmax = -np.inf, 0
BBGT = R['bbox'].astype(float)
if BBGT.size > 0:
# compute overlaps
# intersection
# Compute overlaps intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
......@@ -118,7 +123,7 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
# Union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
......@@ -149,8 +154,14 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
return rec, prec, ap
def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
IoU=0.5, use_07_metric=False):
def voc_segm_eval(
det_file,
seg_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
# 0. Constants
M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH
......@@ -175,8 +186,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
image_names.append(image_name)
# 2. Get predict pickle file for this class
with open(det_file, 'rb') as f: boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f: masks_pkl = cPickle.load(f)
with open(det_file, 'rb') as f:
boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f)
# 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs)
......@@ -185,7 +198,8 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
box_num += len(boxes_pkl[im_i])
# avoid IndexError if detecting nothing
if box_num == 0: return 0, 0, -1
if box_num == 0:
return 0, 0, -1
# 4. Re-organize all the predicted boxes
new_boxes = np.zeros((box_num, 5))
......@@ -223,11 +237,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
fp[i] = 1
continue
R = class_recs[image_name]
im_h, im_w = \
gt_recs[image_name]['height'], \
gt_recs[image_name]['width']
im_h = gt_recs[image_name]['height']
im_w = gt_recs[image_name]['width']
# decode mask
# Decode mask
ref_box = ref_boxes[i, :4]
mask = new_masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :]
......@@ -244,14 +257,14 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
(x1 - ref_box[0]): (x2 - ref_box[0])]
# calculate max region overlap
ovmax = -1; jmax = -1
# Calculate max region overlap
ovmax, jmax = -1, -1
for j in range(len(R['det'])):
gt_mask_bound = R['bbox'][j].astype(int)
pred_mask_bound = new_boxes[i, :4].astype(int)
crop_mask = R['mask'][j][gt_mask_bound[1] : gt_mask_bound[3] + 1,
gt_mask_bound[0] : gt_mask_bound[2] + 1]
crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
......
......@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
......@@ -13,21 +13,21 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from multiprocessing import Process
from lib.core.config import cfg
from lib.utils.blob import im_list_to_blob
class BlobFetcher(Process):
class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs):
super(BlobFetcher, self).__init__()
self.Q1_in = self.Q2_in = self.Q_out = None
self.q1_in = self.q2_in = self.q_out = None
self.daemon = True
def get(self, Q_in):
processed_ims = []; ims_info = []; all_boxes = []
processed_ims, ims_info, all_boxes = [], [], []
for ix in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = Q_in.get()
processed_ims.append(im)
......@@ -46,7 +46,7 @@ class BlobFetcher(Process):
def run(self):
while True:
if self.Q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q_out.put(self.get(self.Q1_in))
elif self.Q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q_out.put(self.get(self.Q2_in))
\ No newline at end of file
if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.q_out.put(self.get(self.q1_in))
elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.q_out.put(self.get(self.q2_in))
......@@ -13,16 +13,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from multiprocessing import Queue
import time
import dragon
import pprint
from multiprocessing import Queue
import dragon.core.mpi as mpi
from lib.core.config import cfg
import lib.utils.logger as logger
from lib.faster_rcnn.data.data_reader import DataReader
from lib.faster_rcnn.data.data_transformer import DataTransformer
from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
from lib.utils import logger
class DataBatch(object):
......@@ -53,13 +54,14 @@ class DataBatch(object):
super(DataBatch, self).__init__()
# Init mpi
global_rank, local_rank, group_size = 0, 0, 1
if mpi.Is_Init():
idx, group = mpi.AllowParallel()
if idx != -1: # DataParallel
global_rank = mpi.Rank()
if dragon.mpi.is_init():
group = dragon.mpi.is_parallel()
if group is not None: # DataParallel
global_rank = dragon.mpi.rank()
group_size = len(group)
for i, node in enumerate(group):
if global_rank == node: local_rank = i
if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size
# Configuration
......@@ -89,7 +91,7 @@ class DataBatch(object):
self._readers = []
for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs))
self._readers[-1].Q_out = self.Q1
self._readers[-1].q_out = self.Q1
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
......@@ -106,9 +108,9 @@ class DataBatch(object):
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.Q_in = self.Q1
transformer.Q1_out = self.Q21
transformer.Q2_out = self.Q22
transformer.q_in = self.Q1
transformer.q1_out = self.Q21
transformer.q2_out = self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
......@@ -117,15 +119,17 @@ class DataBatch(object):
self._fetchers = []
for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs)
fetcher.Q1_in = self.Q21
fetcher.Q2_in = self.Q22
fetcher.Q_out = self.Q3
fetcher.q1_in = self.Q21
fetcher.q2_in = self.Q22
fetcher.q_out = self.Q3
fetcher.start()
self._fetchers.append(fetcher)
time.sleep(0.1)
# Prevent to echo multiple nodes
if local_rank == 0: self.echo()
if local_rank == 0:
self.echo()
def cleanup():
def terminate(processes):
for process in processes:
......@@ -137,6 +141,7 @@ class DataBatch(object):
logger.info('Terminating DataTransformer ......')
terminate(self._readers)
logger.info('Terminating DataReader......')
import atexit
atexit.register(cleanup)
......
......@@ -14,21 +14,17 @@ from __future__ import division
from __future__ import print_function
import math
import numpy
import multiprocessing
import numpy
from dragon import config as _cfg
from dragon.tools import db as _db
from dragon.tools import db
from lib.core.config import cfg
class DataReader(multiprocessing.Process):
"""DataReader is deployed to queue encoded str from `LMDB`_.
It is supported to adaptively partition and shuffle records over all distributed nodes.
"""Collect encoded str from `LMDB`_.
"""
def __init__(self, **kwargs):
"""Construct a ``DataReader``.
Partition and shuffle records over distributed nodes.
Parameters
----------
......@@ -40,14 +36,20 @@ class DataReader(multiprocessing.Process):
The number of chunks to split.
"""
def __init__(self, **kwargs):
"""Create a DataReader."""
super(DataReader, self).__init__()
self._source = kwargs.get('source', '')
self._use_shuffle = kwargs.get('shuffle', False)
self._num_chunks = kwargs.get('num_chunks', 2048)
self._part_idx, self._num_parts = 0, 1
self._cursor, self._chunk_cursor = 0, 0
self._rng_seed = _cfg.GetRandomSeed()
self.Q_out = None
self._chunk_size, self._perm_size = 0, 0
self._head, self._tail, self._num_entries = 0, 0, 0
self._db, self._zfill, self._perm = None, None, None
self._rng_seed = cfg.RNG_SEED
self.q_out = None
self.daemon = True
def element(self):
......@@ -69,10 +71,6 @@ class DataReader(multiprocessing.Process):
target : int
The key of the record.
Returns
-------
None
Notes
-----
The redirection reopens the database.
......@@ -88,17 +86,12 @@ class DataReader(multiprocessing.Process):
self._db.set(str(target).zfill(self._zfill))
def reset(self):
"""Reset the cursor and environment.
Returns
-------
None
"""
"""Reset the cursor and environment."""
if self._num_parts > 1 or self._use_shuffle:
self._chunk_cursor = 0
self._part_idx = (self._part_idx + 1) % self._num_parts
if self._use_shuffle: self._perm = numpy.random.permutation(self._perm_size)
if self._use_shuffle:
self._perm = numpy.random.permutation(self._perm_size)
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._tail = self._head * self._chunk_size
if self._head >= self._num_entries: self.next_chunk()
......@@ -109,26 +102,15 @@ class DataReader(multiprocessing.Process):
self.redirect(self._head)
def next_record(self):
"""Step the cursor of records.
Returns
-------
None
"""
"""Step the cursor of records."""
self._db.next()
self._cursor += 1
def next_chunk(self):
"""Step the cursor of shuffling chunks.
Returns
-------
None
"""
"""Step the cursor of chunks."""
self._chunk_cursor += 1
if self._chunk_cursor >= self._perm_size: self.reset()
if self._chunk_cursor >= self._perm_size:
self.reset()
else:
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._head = self._head * self._chunk_size
......@@ -140,18 +122,12 @@ class DataReader(multiprocessing.Process):
self.redirect(self._head)
def run(self):
"""Start the process.
Returns
-------
None
"""
"""Start the process."""
# Fix seed
numpy.random.seed(self._rng_seed)
# Init db
self._db = _db.LMDB()
self._db = db.LMDB()
self._db.open(self._source)
self._zfill = self._db.zfill()
self._num_entries = self._db.num_entries()
......@@ -189,9 +165,10 @@ class DataReader(multiprocessing.Process):
# Run!
while True:
self.Q_out.put(self.element())
self.q_out.put(self.element())
self.next_record()
if self._cursor >= self._tail:
if self._num_parts > 1 or self._use_shuffle:
self.next_chunk()
else: self.reset()
\ No newline at end of file
else:
self.reset()
......@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from multiprocessing import Process
import multiprocessing
import numpy as np
import numpy.random as npr
......@@ -28,11 +28,11 @@ except ImportError as e:
from lib.core.config import cfg
from lib.proto import anno_pb2 as pb
from lib.utils import logger
from lib.utils.blob import prep_im_for_blob
import lib.utils.logger as logger
class DataTransformer(Process):
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._rng_seed = cfg.RNG_SEED
......@@ -42,60 +42,64 @@ class DataTransformer(Process):
self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._queues = []
self.Q_in = self.Q1_out = self.Q2_out = None
self.q_in = self.q1_out = self.q2_out = None
self.daemon = True
def make_record(
def make_roi_dict(
self,
ann_datum,
im_scale,
flip=False,
apply_flip=False,
offsets=None,
):
annotations = ann_datum.annotation
n_objects = 0
if not self._use_diff:
for ann in annotations:
if not ann.difficult: n_objects += 1
else: n_objects = len(annotations)
if not ann.difficult:
n_objects += 1
else:
n_objects = len(annotations)
record = {
roi_dict = {
'width': ann_datum.datum.width,
'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32),
'boxes': np.zeros((n_objects, 4), dtype=np.float32),
'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), 'float32'),
}
# Filter the difficult instances
instance_idx = 0
rec_idx = 0
for ann in annotations:
if not self._use_diff and ann.difficult: continue
record['boxes'][instance_idx, :] = [
if not self._use_diff and ann.difficult:
continue
roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1),
]
record['gt_classes'][instance_idx] = self._class_to_ind[ann.name]
instance_idx += 1
roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name]
rec_idx += 1
# Flip the boxes if necessary
if flip:
record['boxes'] = _flip_boxes(
record['boxes'], record['width'])
if apply_flip:
roi_dict['boxes'] = _flip_boxes(
roi_dict['boxes'], roi_dict['width'])
# Scale the boxes to the detecting scale
record['boxes'] *= im_scale
roi_dict['boxes'] *= im_scale
# Apply the offsets from scale jitter
if offsets is not None:
record['boxes'][:, 0::2] += offsets[0]
record['boxes'][:, 1::2] += offsets[1]
record['boxes'][:, :] = np.minimum(
np.maximum(record['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2)
roi_dict['boxes'][:, 0::2] += offsets[0]
roi_dict['boxes'][:, 1::2] += offsets[1]
roi_dict['boxes'][:, :] = np.minimum(
np.maximum(roi_dict['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2,
)
return record
return roi_dict
@classmethod
def get_image(cls, serialized):
......@@ -127,20 +131,23 @@ class DataTransformer(Process):
datum.ParseFromString(serialized)
im_datum = datum.datum
im = np.fromstring(im_datum.data, np.uint8)
if im_datum.encoded is True: im = cv2.imdecode(im, -1)
else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels))
if im_datum.encoded is True:
im = cv2.imdecode(im, -1)
else:
h, w = im_datum.height, im_datum.width
im = im.reshape((h, w, im_datum.channels))
# Scale
scale_indices = npr.randint(0, high=len(cfg.TRAIN.SCALES))
scale_indices = npr.randint(len(cfg.TRAIN.SCALES))
target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
# Flip
flip = False
apply_flip = False
if self._use_flipped:
if npr.randint(0, 2) > 0:
im = im[:, ::-1, :]
flip = True
apply_flip = True
# Random Crop or RandomPad
offsets = None
......@@ -153,57 +160,63 @@ class DataTransformer(Process):
# To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im)
# Datum -> Record
rec = self.make_record(datum, im_scale, flip, offsets)
# Datum -> RoIDict
roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(rec['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4], gt_boxes[:, 4] = rec['boxes'], rec['gt_classes']
gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
return im, im_scale, gt_boxes
def run(self):
npr.seed(self._rng_seed)
while True:
serialized = self.Q_in.get()
serialized = self.q_in.get()
data = self.get(serialized)
# Ensure that there should be at least 1 ground-truth
if len(data[2]) < 1: continue
if len(data[2]) < 1:
continue
aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
if aspect_ratio > 1.0: self.Q1_out.put(data)
else: self.Q2_out.put(data)
if aspect_ratio > 1.0:
self.q1_out.put(data)
else:
self.q2_out.put(data)
def _flip_boxes(boxes, width):
flip_boxes = boxes.copy()
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - oldx2 - 1
flip_boxes[:, 2] = width - oldx1 - 1
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes
def _get_image_with_target_size(target_size, im):
im_shape = list(im.shape)
width_diff = target_size[1] - im_shape[1]
offset_crop_width = np.random.randint(0, max(-width_diff, 0) + 1)
offset_pad_width = np.random.randint(0, max(width_diff, 0) + 1)
def _get_image_with_target_size(target_size, img):
im_shape = list(img.shape)
height_diff = target_size[0] - im_shape[0]
offset_crop_height = np.random.randint(0, max(-height_diff, 0) + 1)
offset_pad_height = np.random.randint(0, max(height_diff, 0) + 1)
im_shape[0 : 2] = target_size
new_im = np.empty(im_shape, dtype=im.dtype)
new_im[:] = cfg.PIXEL_MEANS
new_im[offset_pad_height:offset_pad_height + im.shape[0],
offset_pad_width:offset_pad_width + im.shape[1]] = \
im[offset_crop_height:offset_crop_height + target_size[0],
offset_crop_width:offset_crop_width + target_size[1]]
width_diff = target_size[1] - im_shape[1]
return new_im, (offset_pad_width - offset_crop_width,
offset_pad_height - offset_crop_height, target_size)
\ No newline at end of file
ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
im_shape[:2] = target_size
new_img = np.empty(im_shape, dtype=img.dtype)
new_img[:] = cfg.PIXEL_MEANS
new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
ofs_pad_width:ofs_pad_width + img.shape[1]] = \
img[ofs_crop_height:ofs_crop_height + target_size[0],
ofs_crop_width:ofs_crop_width + target_size[1]]
return new_img, (
ofs_pad_width - ofs_crop_width,
ofs_pad_height - ofs_crop_height,
target_size,
)
......@@ -32,7 +32,7 @@ import numpy as np
# -79 -167 96 184
# -167 -343 184 360
#array([[ -83., -39., 100., 56.],
# array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
......@@ -42,8 +42,12 @@ import numpy as np
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6)):
def generate_anchors(
base_size=16,
ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6),
):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
......@@ -55,22 +59,25 @@ def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
return anchors
def generate_anchors_v2(stride=16, ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512)):
def generate_anchors_v2(
stride=16,
ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512),
):
"""
Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
are centered on stride / 2, have (approximate) sqrt areas of the specified
sizes, and aspect ratios as given.
"""
return generate_anchors(stride, ratios,
np.array(sizes, dtype=np.float) / stride)
return generate_anchors(
base_size=stride,
ratios=ratios,
scales=np.array(sizes, dtype=np.float) / stride,
)
def _whctrs(anchor):
"""
Return width, height, x center, and y center for an anchor (window).
"""
"""Return width, height, x center, and y center for an anchor (window)."""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
......@@ -83,7 +90,6 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
......@@ -94,10 +100,7 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
def _ratio_enum(anchor, ratios):
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
"""Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
......@@ -108,10 +111,7 @@ def _ratio_enum(anchor, ratios):
def _scale_enum(anchor, scales):
"""
Enumerate a set of anchors for each scale wrt an anchor.
"""
"""Enumerate a set of anchors for each scale wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
......
......@@ -19,9 +19,10 @@ import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.blob import to_tensor
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors
......@@ -32,10 +33,9 @@ class AnchorTargetLayer(torch.nn.Module):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
# C4 backbone takes the first stride
self.scales, self.stride, self.ratios = \
cfg.RPN.SCALES, \
cfg.RPN.STRIDES[0], \
cfg.RPN.ASPECT_RATIOS
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
......@@ -61,11 +61,13 @@ class AnchorTargetLayer(torch.nn.Module):
"""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \
.format(num_images, len(gt_boxes_wide)))
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
height, width = features[0].shape[-2:]
......@@ -85,7 +87,7 @@ class AnchorTargetLayer(torch.nn.Module):
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is dont care
# label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
......@@ -101,8 +103,8 @@ class AnchorTargetLayer(torch.nn.Module):
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
......@@ -143,7 +145,10 @@ class AnchorTargetLayer(torch.nn.Module):
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
......@@ -152,12 +157,17 @@ class AnchorTargetLayer(torch.nn.Module):
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], 0:4])
ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
......@@ -169,34 +179,26 @@ class AnchorTargetLayer(torch.nn.Module):
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
# labels
labels = all_labels.reshape(
(num_images, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((num_images, total_anchors))
labels = all_labels \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, total_anchors))
# bbox_targets
bbox_targets = all_bbox_targets.reshape(
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
bbox_targets = all_bbox_targets \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
# bbox_inside_weights
bbox_inside_weights = all_bbox_inside_weights.reshape(
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
bbox_inside_weights = all_bbox_inside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
# bbox_outside_weights
bbox_outside_weights = all_bbox_outside_weights.reshape(
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
bbox_outside_weights = all_bbox_outside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights),
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
......@@ -33,10 +33,8 @@ class DataLayer(torch.nn.Module):
})
def forward(self):
# Get a mini-batch from the Queue
blobs = self.data_batch.get()
# Zero-Copy from numpy
blobs['data'] = torch.from_numpy(blobs['data'])
# Switch the data to Device
blobs['data'].cuda(cfg.GPU_ID)
return blobs
\ No newline at end of file
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
......@@ -9,27 +9,35 @@
#
# --------------------------------------------------------
import numpy as np
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.nms.nms_wrapper import nms
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes
from lib.nms.nms_wrapper import nms
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales, self.stride, self.ratios = \
cfg.RPN.SCALES, cfg.RPN.STRIDES[0], cfg.RPN.ASPECT_RATIOS
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = generate_anchors(
......@@ -61,7 +69,8 @@ class ProposalLayer(torch.nn.Module):
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
anchors = self.base_anchors.reshape((1, A, 4)) + \
anchors = \
self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4))
......@@ -69,8 +78,6 @@ class ProposalLayer(torch.nn.Module):
batch_rois = []
# scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
......@@ -95,11 +102,11 @@ class ProposalLayer(torch.nn.Module):
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_boxes(proposals, ims_info[ix, :2])
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = _filter_boxes(proposals, min_size * ims_info[ix, 2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
......@@ -107,7 +114,8 @@ class ProposalLayer(torch.nn.Module):
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: keep = keep[:post_nms_topN]
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
# Output rois blob
......@@ -118,13 +126,7 @@ class ProposalLayer(torch.nn.Module):
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN': return rpn_rois
else: return [to_tensor(rpn_rois)]
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
if cfg_key == 'TRAIN':
return rpn_rois
else:
return [blob_to_tensor(rpn_rois)]
......@@ -9,22 +9,24 @@
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets.
Produces proposal classification labels and bounding-box regression targets.
"""Assign object detection proposals to ground-truth targets."""
"""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
......@@ -34,8 +36,8 @@ class ProposalTargetLayer(torch.nn.Module):
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
......@@ -50,14 +52,12 @@ class ProposalTargetLayer(torch.nn.Module):
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
......@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module):
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return {
'rois': [to_tensor(batch_outputs['rois'])],
'labels': to_tensor(batch_outputs['labels']),
'bbox_targets': to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': to_tensor(batch_outputs['bbox_outside_weights']),
'rois': [blob_to_tensor(batch_outputs['rois'])],
'labels': blob_to_tensor(batch_outputs['labels']),
'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
}
......@@ -109,7 +109,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
......@@ -117,12 +116,18 @@ def _compute_targets(ex_rois, gt_rois, labels):
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background examples."""
def _sample_rois(
all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
)
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
......@@ -164,11 +169,6 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
return labels, rois, bbox_targets, bbox_inside_weights
def _dismantle_gt_boxes(gt_boxes, num_images):
return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
for ix in range(num_images)]
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
......@@ -13,27 +13,23 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import numpy as np
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image
from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv
from lib.nms.nms_wrapper import nms, soft_nms
from lib.utils.timer import Timer
from lib.utils.blob import im_list_to_blob, to_array
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales.
"""
"""Detect a image, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_image)
......@@ -42,25 +38,30 @@ def im_detect(detector, raw_image):
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
batch_rois = to_array(outputs['rois'])
batch_scores = to_array(outputs['cls_prob'])
batch_deltas = to_array(outputs['bbox_pred'])
batch_rois = tensor_to_blob(outputs['rois'])
batch_scores = tensor_to_blob(outputs['cls_prob'])
batch_deltas = tensor_to_blob(outputs['bbox_pred'])
batch_boxes = bbox_transform_inv(
batch_rois[:, 1:5], batch_deltas, cfg.BBOX_REG_WEIGHTS)
scores_wide = []; boxes_wide = []
boxes=batch_rois[:, 1:5],
deltas=batch_deltas,
weights=cfg.BBOX_REG_WEIGHTS,
)
scores_wide, boxes_wide = [], []
for im_idx in range(len(ims)):
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
boxes = batch_boxes[indices]
boxes /= ims_scale[im_idx]
clip_boxes(boxes, raw_image.shape)
clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes)
......@@ -69,12 +70,13 @@ def im_detect(detector, raw_image):
def test_net(detector, server):
classes, num_images, num_classes = \
server.classes, server.num_images, server.num_classes
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect' : Timer(), 'misc' : Timer()}
_t = {'im_detect': Timer(), 'misc': Timer()}
for i in range(num_images):
image_id, raw_image = server.get_image()
......@@ -89,22 +91,27 @@ def test_net(detector, server):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).\
astype(np.float32, copy=False)
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(cls_dets, cfg.TEST.NMS,
keep = soft_nms(
cls_detections, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA)
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets
boxes_this_image.append(cls_dets)
keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_image, classes, boxes_this_image,
vis_one_image(
raw_image, classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_id))
filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
......@@ -112,7 +119,8 @@ def test_net(detector, server):
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores)
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
......@@ -120,7 +128,7 @@ def test_net(detector, server):
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(i + 1, num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='')
......
......@@ -13,16 +13,18 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
import lib.utils.logger as logger
from lib.utils.blob import to_tensor
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module):
......@@ -31,14 +33,14 @@ class AnchorTargetLayer(torch.nn.Module):
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
self.scales, self.strides, self.ratios = \
cfg.RPN.SCALES, \
cfg.RPN.STRIDES, \
cfg.RPN.ASPECT_RATIOS
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides)))
.format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
......@@ -46,9 +48,9 @@ class AnchorTargetLayer(torch.nn.Module):
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size = self.strides[i]
scale = self.scales[i]
if not isinstance(scale, list): scale = [scale]
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
......@@ -59,16 +61,17 @@ class AnchorTargetLayer(torch.nn.Module):
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \
.format(num_images, len(gt_boxes_wide)))
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors = []; total_anchors = 0
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
......@@ -107,8 +110,8 @@ class AnchorTargetLayer(torch.nn.Module):
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
......@@ -180,16 +183,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights),
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
......@@ -9,39 +9,49 @@
#
# ------------------------------------------------------------
import numpy as np
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms
from lib.utils import logger
from lib.utils.blob import to_tensor
from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box.
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales, self.strides, self.ratios = \
cfg.RPN.SCALES, cfg.RPN.STRIDES, cfg.RPN.ASPECT_RATIOS
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides)))
.format(len(self.scales), len(self.strides))
)
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size = self.strides[i]
scale = self.scales[i]
if not isinstance(scale, list): scale = [scale]
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
......@@ -92,7 +102,8 @@ class ProposalLayer(torch.nn.Module):
# Prepare for the outputs
batch_rois = []
batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately
for ix in range(num_images):
......@@ -115,10 +126,10 @@ class ProposalLayer(torch.nn.Module):
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_boxes(proposals, ims_info[ix, :2])
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
keep = _filter_boxes(proposals, min_size * ims_info[ix, 2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
......@@ -126,7 +137,8 @@ class ProposalLayer(torch.nn.Module):
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: keep = keep[:post_nms_topN]
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
# Output rois blob
......@@ -151,28 +163,19 @@ class ProposalLayer(torch.nn.Module):
lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling
all_rois.append(to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else:
all_rois.append(to_tensor(rpn_rois[lv_indices]))
all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
return all_rois
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size.
"""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
"""
if len(rois) == 0: return []
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
......
......@@ -9,14 +9,19 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
class ProposalTargetLayer(torch.nn.Module):
......@@ -36,26 +41,19 @@ class ProposalTargetLayer(torch.nn.Module):
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
}
def _map_rois(self, inputs, fake_outputs, outputs, keys, levels):
f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(f(inputs[i], fake_outputs[key], inds))
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys]))
outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
......@@ -65,11 +63,9 @@ class ProposalTargetLayer(torch.nn.Module):
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
# Sample rois with labels & bbox targets
labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
......@@ -94,14 +90,20 @@ class ProposalTargetLayer(torch.nn.Module):
K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
_fmap_rois([batch_outputs[key] for key in keys], self.fake_outputs, outputs, keys, lvs_indices)
_fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return {
'rois': [to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': to_tensor(np.vstack(outputs['bbox_outside_weights'])),
'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
......@@ -115,6 +117,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
......@@ -131,7 +134,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
......@@ -140,10 +142,12 @@ def _compute_targets(ex_rois, gt_rois, labels):
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
"""
if len(rois) == 0: return []
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
......@@ -154,9 +158,7 @@ def _map_rois_to_fpn_levels(rois, k_min, k_max):
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background
examples.
"""
"""Sample a batch of RoIs comprising foreground and background examples."""
# overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
......@@ -203,19 +205,15 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
return labels, rois, bbox_targets, bbox_inside_weights
def _dismantle_gt_boxes(gt_boxes, num_images):
return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
for ix in range(num_images)]
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(f(inputs[i], fake_outputs[key], inds))
\ No newline at end of file
outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
......@@ -9,13 +9,17 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Import custom modules
from lib.modeling.base import Bootstarp
from lib.modeling.base import RPNDecoder
from lib.modeling.base import RetinaNetDecoder
from lib.modeling.base import conv1x1, conv3x3, bn, affine
from lib.modeling.fpn import FPN
from lib.modeling.rpn import RPN
from lib.modeling.base import affine
from lib.modeling.base import bn
from lib.modeling.base import conv1x1
from lib.modeling.base import conv3x3
from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN
from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD
......@@ -15,7 +15,9 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.modeling import conv1x1, conv3x3, bn, affine
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module):
......@@ -112,8 +114,10 @@ class AirNet(torch.nn.Module):
)
self.layer1 = self.make_blocks(filters[0], blocks[0])
self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
if num_stages >= 4: self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5: self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
if num_stages >= 4:
self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5:
self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
self.reset_parameters()
def reset_parameters(self):
......@@ -165,7 +169,14 @@ def airnet(num_stages):
)
return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5)
......@@ -17,99 +17,20 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Bootstarp(torch.nn.Module):
"""Extended operator to process the images."""
def __init__(self):
super(Bootstarp, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'ImageData',
'arguments': {
'dtype': self.dtype,
'data_format': 'NCHW',
'mean_values': cfg.PIXEL_MEANS,
}
}
def forward(self, x):
inputs, outputs = [x], [self.register_output()]
return self.run(inputs, outputs)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.register_op()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': cfg.RPN.STRIDES,
'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS],
'scales': [float(e) for e in cfg.RPN.SCALES],
'pre_nms_top_n': cfg.TEST.RPN_PRE_NMS_TOP_N,
'post_nms_top_n': cfg.TEST.RPN_POST_NMS_TOP_N,
'nms_thresh': cfg.TEST.RPN_NMS_THRESH,
'min_size': cfg.TEST.RPN_MIN_SIZE,
'min_leve': cfg.FPN.ROI_MIN_LEVEL,
'max_level': cfg.FPN.ROI_MAX_LEVEL,
'canonical_scale': cfg.FPN.ROI_CANONICAL_SCALE,
'canonical_level': cfg.FPN.ROI_CANONICAL_LEVEL,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
outputs = [self.register_output() for _ in range(self.K)]
outputs = self.run(inputs, outputs)
return outputs if isinstance(outputs, list) else [outputs]
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
outputs = [self.register_output()]
return self.run(inputs, outputs)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def conv1x1(dim_in, dim_out, stride=1, bias=False):
......@@ -133,18 +54,3 @@ def conv3x3(dim_in, dim_out, stride=1, bias=False):
padding=1,
bias=bias,
)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
\ No newline at end of file
......@@ -13,22 +13,19 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.utils.logger import is_root
from lib.modeling import FPN
from lib.modeling import RPN
from lib.modeling import FastRCNN
from lib.modeling import RetinaNet
from lib.modeling import SSD
from lib.modeling.factory import get_body_func
from lib.modeling import (
Bootstarp,
FPN,
RPN,
FastRCNN,
RetinaNet,
SSD,
)
from lib.ops.modules import Bootstrap
from lib.utils.logger import is_root
class Detector(torch.nn.Module):
......@@ -47,7 +44,7 @@ class Detector(torch.nn.Module):
# + Data Loader
self.data_layer = importlib.import_module(
'lib.{}'.format(model)).DataLayer
self.bootstarp = Bootstarp()
self.bootstrap = Bootstrap()
# + Feature Extractor
self.body = get_body_func(body)()
......@@ -84,8 +81,11 @@ class Detector(torch.nn.Module):
The path of the weights file.
"""
self.load_state_dict(torch.load(weights),
strict=False, verbose=is_root())
self.load_state_dict(
torch.load(weights),
strict=False,
verbose=is_root(),
)
def forward(self, inputs=None):
"""Compute the detection outputs.
......@@ -107,9 +107,9 @@ class Detector(torch.nn.Module):
# 1. Extract features
# Process the data:
# 1) NHWC => NCHW
# 2) Uint8 => Float32 or Float16
# 2) uint8 => float32 or float16
# 3) Mean subtraction
image_data = self.bootstarp(inputs['data'])
image_data = self.bootstrap(inputs['data'])
features = self.body(image_data)
# 2. Apply the FPN to enhance features if necessary
......@@ -117,7 +117,7 @@ class Detector(torch.nn.Module):
features = self.fpn(features)
# 3. Collect detection outputs
outputs = OrderedDict()
outputs = collections.OrderedDict()
# 3.1 Feature -> RPN -> Fast R-CNN
if hasattr(self, 'rpn'):
......
......@@ -13,27 +13,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
from collections import defaultdict
_STORE = defaultdict(dict)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
_STORE = collections.defaultdict(dict)
###########################################
......@@ -59,6 +43,23 @@ for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name):
return get_template_func(
name, _STORE['BODY'], 'Body')
......@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling import RPNDecoder
from lib.ops.modules import RPNDecoder
class FastRCNN(torch.nn.Module):
......@@ -50,11 +50,11 @@ class FastRCNN(torch.nn.Module):
self.relu = torch.nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False)
self.roi_func = {
'RoIPool': torch.roi_pool,
'RoIAlign': torch.roi_align,
'RoIPool': torch.vision.ops.roi_pool,
'RoIAlign': torch.vision.ops.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD]
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1.)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size')
# Compute spatial scales for multiple strides
roi_levels = [level for level in range(
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
......@@ -66,13 +66,16 @@ class FastRCNN(torch.nn.Module):
torch.nn.init.normal_(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters():
if 'bias' in name: torch.nn.init.constant_(p, 0)
if 'bias' in name:
torch.nn.init.constant_(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func(
feature, rois,
pooled_h=cfg.FRCNN.ROI_XFORM_RESOLUTION,
pooled_w=cfg.FRCNN.ROI_XFORM_RESOLUTION,
output_size=(
cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale,
)
......@@ -127,14 +130,14 @@ class FastRCNN(torch.nn.Module):
# Compute rcnn logits
cls_score = self.cls_score(rcnn_output).float()
outputs = OrderedDict({
outputs = collections.OrderedDict({
'bbox_pred':
self.bbox_pred(rcnn_output).float(),
})
if self.training:
# Compute rcnn losses
outputs.update(OrderedDict({
outputs.update(collections.OrderedDict({
'cls_loss': self.cls_loss(
cls_score,
self.rcnn_data['labels'],
......
......@@ -16,7 +16,8 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3
from lib.modeling import conv1x1
from lib.modeling import conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
......@@ -48,49 +49,43 @@ class FPN(torch.nn.Module):
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_(
m.weight,
# Fix the gain for [-127, 127]
a=1,
a=1, # Fix the gain for [-127, 127]
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.nn_resize(
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def apply_on_retinanet(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
# Add extra convolutions for higher features
extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: extra_input = self.relu(outputs[-1])
if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.nn_resize(
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def forward(self, features):
......
......@@ -20,12 +20,20 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3, affine
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1,
downsample=None, dropblock=None):
def __init__(
self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out)
......@@ -65,8 +73,14 @@ class Bottleneck(torch.nn.Module):
contraction = cfg.RESNET.NUM_GROUPS \
* cfg.RESNET.GROUP_WIDTH / 256.0
def __init__(self, dim_in, dim_out, stride=1,
downsample=None, dropblock=None):
def __init__(
self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim)
......@@ -128,11 +142,17 @@ class ResNet(torch.nn.Module):
ceil_mode=True,
)
self.drop3 = torch.nn.DropBlock2d(
7, 0.9, alpha=0.25, decrement=cfg.DROPBLOCK.DECREMENT) \
if cfg.DROPBLOCK.DROP_ON else None
kp=0.9,
block_size=7,
alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d(
7, 0.9, alpha=1., decrement=cfg.DROPBLOCK.DECREMENT) \
if cfg.DROPBLOCK.DROP_ON else None
kp=0.9,
block_size=7,
alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
......@@ -145,7 +165,8 @@ class ResNet(torch.nn.Module):
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_(
m.weight,
nonlinearity='relu')
nonlinearity='relu',
)
# Stop the gradients if necessary
def freeze_func(m):
......@@ -178,25 +199,31 @@ class ResNet(torch.nn.Module):
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
outputs = [x]
outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])]
return outputs
def resnet(depth):
if depth == 18: units = [2, 2, 2, 2]
elif depth == 34: units = [3, 4, 6, 3]
elif depth == 50: units = [3, 4, 6, 3]
elif depth == 101: units = [3, 4, 23, 3]
elif depth == 152: units = [3, 8, 36, 3]
elif depth == 200: units = [3, 24, 36, 3]
elif depth == 269: units = [3, 30, 48, 8]
else: raise ValueError('Unsupported depth: %d' % depth)
if depth == 18:
units = [2, 2, 2, 2]
elif depth == 34:
units = [3, 4, 6, 3]
elif depth == 50:
units = [3, 4, 6, 3]
elif depth == 101:
units = [3, 4, 23, 3]
elif depth == 152:
units = [3, 8, 36, 3]
elif depth == 200:
units = [3, 24, 36, 3]
elif depth == 269:
units = [3, 30, 48, 8]
else:
raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock
filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512]
......@@ -204,7 +231,15 @@ def resnet(depth):
def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152)
......@@ -13,12 +13,13 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import math
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling import conv3x3, RetinaNetDecoder
from lib.modeling import conv3x3
from lib.ops.modules import RetinaNetDecoder
from lib.retinanet import AnchorTargetLayer
......@@ -32,10 +33,12 @@ class RetinaNet(torch.nn.Module):
self.cls_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS))
for _ in range(cfg.RETINANET.NUM_CONVS)
)
self.bbox_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS))
for _ in range(cfg.RETINANET.NUM_CONVS)
)
# Packed as [C, A] not [A, C]
self.C = cfg.MODEL.NUM_CLASSES - 1
A = len(cfg.RETINANET.ASPECT_RATIOS) * \
......@@ -53,8 +56,11 @@ class RetinaNet(torch.nn.Module):
self.anchor_target_layer = AnchorTargetLayer()
self.cls_loss = torch.nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
self.bbox_loss = torch.nn.SmoothL1Loss(
beta=1. / 9., reduction='batch_size',
)
self.reset_parameters()
def reset_parameters(self):
......@@ -127,7 +133,7 @@ class RetinaNet(torch.nn.Module):
gt_boxes=gt_boxes,
ims_info=ims_info,
)
return OrderedDict({
return collections.OrderedDict({
'cls_loss':
self.cls_loss(
cls_score,
......@@ -146,7 +152,7 @@ class RetinaNet(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({'bbox_pred': bbox_pred})
outputs = collections.OrderedDict({'bbox_pred': bbox_pred})
if self.training:
outputs.update(
......
......@@ -13,11 +13,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class RPN(torch.nn.Module):
......@@ -119,7 +120,7 @@ class RPN(torch.nn.Module):
gt_boxes=gt_boxes,
ims_info=ims_info,
)
return OrderedDict({
return collections.OrderedDict({
'rpn_cls_loss':
self.cls_loss(cls_score, self.rpn_data['labels']),
'rpn_bbox_loss':
......@@ -135,7 +136,7 @@ class RPN(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({
outputs = collections.OrderedDict({
'rpn_cls_score': cls_score,
'rpn_bbox_pred': bbox_pred,
})
......
......@@ -13,18 +13,15 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling import conv3x3
from lib.ssd import (
PriorBoxLayer,
MultiBoxMatchLayer,
HardMiningLayer,
MultiBoxTargetLayer,
)
from lib.ssd import HardMiningLayer
from lib.ssd import MultiBoxMatchLayer
from lib.ssd import MultiBoxTargetLayer
from lib.ssd import PriorBoxLayer
class SSD(torch.nn.Module):
......@@ -57,7 +54,7 @@ class SSD(torch.nn.Module):
self.hard_mining_layer = HardMiningLayer()
self.box_target_layer = MultiBoxTargetLayer()
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss()
self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
self.reset_parameters()
def reset_parameters(self):
......@@ -88,8 +85,9 @@ class SSD(torch.nn.Module):
.permute(0, 2, 3, 1).view(0, -1))
# Concat them if necessary
return torch.cat(cls_score_wide, dim=1).view(
0, -1, cfg.MODEL.NUM_CLASSES), \
return \
torch.cat(cls_score_wide, dim=1) \
.view(0, -1, cfg.MODEL.NUM_CLASSES), \
torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4)
def compute_losses(
......@@ -138,7 +136,7 @@ class SSD(torch.nn.Module):
gt_boxes=gt_boxes,
)
)
return OrderedDict({
return collections.OrderedDict({
# A compensating factor of 4.0 is used
# As we normalize both the pos and neg samples
'cls_loss':
......@@ -160,7 +158,7 @@ class SSD(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({
outputs = collections.OrderedDict({
'prior_boxes': prior_boxes,
'bbox_pred': bbox_pred,
})
......
......@@ -16,7 +16,8 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class VGG(torch.nn.Module):
......@@ -35,16 +36,22 @@ class VGG(torch.nn.Module):
dim_in = 3 if i == 0 else filter_list[i - 1]
for j in range(self.units[i]):
self.__setattr__(
'{}_{}'.format(conv_name, j + 1),
conv3x3(dim_in, filter_list[i], bias=True))
if j == 0: dim_in = filter_list[i]
'{}_{}'
.format(conv_name, j + 1),
conv3x3(dim_in, filter_list[i], bias=True),
)
if j == 0:
dim_in = filter_list[i]
if reduced:
# L2Norm is redundant from the observation of
# empirical experiments. We just keep a trainable scale
self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False)
self.conv4_3_norm.weight.zero_() # Zero-Init
self.fc6 = torch.nn.Conv2d(filter_list[-1], 1024,
kernel_size=3, stride=1, padding=6, dilation=6)
self.fc6 = torch.nn.Conv2d(
filter_list[-1], 1024,
kernel_size=3, padding=6,
stride=1, dilation=6,
)
self.fc7 = conv1x1(1024, 1024, bias=True)
self.feature_dims = [filter_list[-2], 1024]
if extra_arch is not None:
......@@ -54,15 +61,23 @@ class VGG(torch.nn.Module):
for i in range(len(strides)):
conv_name = 'conv{}'.format(i + 6)
dim_in = 1024 if i == 0 else filter_list[i - 1] * 2
self.__setattr__('{}_1'.format(conv_name),
conv1x1(dim_in, filter_list[i], bias=True))
self.__setattr__(
'{}_1'.format(conv_name),
conv1x1(dim_in, filter_list[i], bias=True),
)
if strides[i] == 2:
self.__setattr__('{}_2'.format(conv_name),
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True))
self.__setattr__(
'{}_2'.format(conv_name),
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True),
)
else:
self.__setattr__('{}_2'.format(conv_name),
torch.nn.Conv2d(filter_list[i], filter_list[i] * 2,
kernel_size=kps[0], padding=kps[1], stride=kps[2]))
self.__setattr__(
'{}_2'.format(conv_name),
torch.nn.Conv2d(
filter_list[i], filter_list[i] * 2,
kernel_size=kps[0], padding=kps[1], stride=kps[2]
),
)
self.reset_parameters()
def reset_parameters(self):
......@@ -88,8 +103,9 @@ class VGG(torch.nn.Module):
for i in range(cfg.MODEL.FREEZE_AT, 0, -1):
conv_name = 'conv{}'.format(i)
for j in range(self.units[i - 1]):
self.__getattr__('{}_{}'.format(
conv_name, j + 1)).apply(freeze_func)
self.__getattr__(
'{}_{}'.format(conv_name, j + 1)
).apply(freeze_func)
def forward(self, x):
outputs = []
......@@ -101,8 +117,10 @@ class VGG(torch.nn.Module):
'{}_{}'.format(conv_name, j + 1))(x))
if self.reduced and i == 3:
outputs.append(self.conv4_3_norm(x))
if i < 4: x = self.maxpool(x)
else: x = self.s1pool(x) if self.reduced else x
if i < 4:
x = self.maxpool(x)
else:
x = self.s1pool(x) if self.reduced else x
# Internal FC layers and Extra Conv Layers
if self.reduced:
......@@ -145,4 +163,6 @@ def make_vgg_16_reduced(scale=300):
def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300)
def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function
from lib.core.config import cfg
import lib.utils.logger as logger
from lib.utils import logger
try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
......@@ -33,10 +33,12 @@ except ImportError as e:
def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0: return []
if detections.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else: return cpu_nms(detections, thresh)
else:
return cpu_nms(detections, thresh)
def soft_nms(
......@@ -47,7 +49,8 @@ def soft_nms(
score_thresh=0.001,
):
"""Perform CPU Soft-NMS."""
if detections.shape[0] == 0: return []
if detections.shape[0] == 0:
return []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods:
logger.fatal('Unknown soft nms method: {}'.format(method))
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
from lib.ops import functions
def decode_retinanet(
features,
cls_prob,
bbox_pred,
ims_info,
strides,
ratios,
scales,
pre_nms_top_n,
score_thresh,
):
return function.get(
functions.RetinaNetDecoder,
cls_prob.device,
strides=strides,
ratios=ratios,
scales=scales,
pre_nms_top_n=pre_nms_top_n,
score_thresh=score_thresh,
).apply(features, cls_prob, bbox_pred, ims_info)
def decode_rpn(
features,
cls_prob,
bbox_pred,
ims_info,
num_outputs,
strides,
ratios,
scales,
pre_nms_top_n,
post_nms_top_n,
nms_thresh,
min_size,
min_level,
max_level,
canonical_scale,
canonical_level,
):
return function.get(
functions.RPNDecoder,
cls_prob.device,
K=num_outputs,
strides=strides,
ratios=ratios,
scales=scales,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=nms_thresh,
min_size=min_size,
min_level=min_level,
max_level=max_level,
canonical_scale=canonical_scale,
canonical_level=canonical_level,
).apply(features, cls_prob, bbox_pred, ims_info)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
class RetinaNetDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RetinaNetDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'score_thresh': self.args['score_thresh'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
return self.run(inputs, [self.alloc()], unify_devices=False)
class RPNDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RPNDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'post_nms_top_n': self.args['post_nms_top_n'],
'nms_thresh': self.args['nms_thresh'],
'min_size': self.args['min_size'],
'min_level': self.args['min_level'],
'max_level': self.args['max_level'],
'canonical_scale': self.args['canonical_scale'],
'canonical_level': self.args['canonical_level'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
outputs = [self.alloc() for _ in range(self.args['K'])]
return self.run(inputs, outputs, unify_devices=False)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.ops import functional as F
from lib.utils.blob import blob_to_tensor
class Bootstrap(torch.nn.Module):
"""Extended operator to process the images."""
def __init__(self):
super(Bootstrap, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.mean_values = cfg.PIXEL_MEANS
self.dummy_buffer = torch.ones(1)
def _apply(self, fn):
fn(self.dummy_buffer)
def cpu(self):
self._device = torch.device('cpu')
def cuda(self, device=None):
self._device = torch.device('cuda', device)
def device(self):
"""Return the device of this module."""
return self.dummy_buffer.device
def forward(self, input):
cur_device = self.device()
if input._device != cur_device:
if cur_device.type == 'cpu':
input = input.cpu()
else:
input = input.cuda(cur_device.index)
return torch.vision.ops.image_data(
input, self.dtype, self.mean_values,
)
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
return F.decode_retinanet(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
strides=self.strides,
ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
scales=self.scales,
pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
score_thresh=cfg.TEST.SCORE_THRESH,
)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def forward(self, features, cls_prob, bbox_pred, ims_info):
outputs = F.decode_rpn(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
num_outputs=self.K,
strides=cfg.RPN.STRIDES,
ratios=[float(e) for e in cfg.RPN.ASPECT_RATIOS],
scales=[float(e) for e in cfg.RPN.SCALES],
pre_nms_top_n=cfg.TEST.RPN_PRE_NMS_TOP_N,
post_nms_top_n=cfg.TEST.RPN_POST_NMS_TOP_N,
nms_thresh=cfg.TEST.RPN_NMS_THRESH,
min_size=cfg.TEST.RPN_MIN_SIZE,
min_level=cfg.FPN.ROI_MIN_LEVEL,
max_level=cfg.FPN.ROI_MAX_LEVEL,
canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
)
return [outputs] if self.K == 1 else outputs
......@@ -15,4 +15,3 @@ from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
from lib.retinanet.layers.proposal_layer import ProposalLayer
\ No newline at end of file
......@@ -13,15 +13,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.blob import to_tensor
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
class AnchorTargetLayer(torch.nn.Module):
......@@ -35,28 +36,32 @@ class AnchorTargetLayer(torch.nn.Module):
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = []
for stride in self.strides:
sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.base_anchors.append(generate_anchors_v2(
stride=stride, ratios=self.ratios, sizes=sizes))
self.base_anchors.append(
generate_anchors_v2(
stride=stride,
ratios=self.ratios,
sizes=sizes,
))
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \
.format(num_images, len(gt_boxes_wide)))
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors = []; total_anchors = 0
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
......@@ -101,7 +106,8 @@ class AnchorTargetLayer(torch.nn.Module):
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
......@@ -125,10 +131,10 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1.0)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets
......@@ -141,16 +147,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights),
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.bbox_transform import bbox_transform_inv
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box.
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = []
for stride in self.strides:
sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.base_anchors.append(generate_anchors_v2(
stride=stride, ratios=self.ratios, sizes=sizes))
def forward(self, features, cls_prob, bbox_pred, ims_info):
# Get resources
num_images = ims_info.shape[0]
cls_prob, bbox_pred = cls_prob.numpy(True), bbox_pred.numpy(True)
lvl_info = [features[i].shape[-2:] for i in range(len(self.strides))]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_probs = cls_prob
batch_deltas = bbox_pred.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
batch_detections = []
# Extract Detections separately
for ix in range(num_images):
im_scale = ims_info[ix, 2]
if cfg.RETINANET.SOFTMAX: P = batch_probs[ix, 1:, :]
else: P = batch_probs[ix] # [num_classes - 1, n]
D = batch_deltas[ix] # [n, 4]
anchor_pos = 0
for lvl, (H, W) in enumerate(lvl_info):
A, K = self.base_anchors[lvl].shape[0], H * W
num_anchors = A * K
prob = P[:, anchor_pos : anchor_pos + num_anchors]
deltas = D[anchor_pos : anchor_pos + num_anchors]
anchor_pos += num_anchors
prob_ravel = prob.ravel()
candidate_inds = np.where(prob_ravel > cfg.TEST.SCORE_THRESH)[0]
if len(candidate_inds) == 0: continue
pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
inds = np.argpartition(
prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
inds = candidate_inds[inds]
prob_4d = prob.reshape((prob.shape[0], A, H, W))
inds_2d = np.array(np.unravel_index(inds, prob.shape)).transpose()
inds_4d = np.array(np.unravel_index(inds, prob_4d.shape)).transpose()
classes, anchor_ids = inds_2d[:, 0], inds_2d[:, 1]
a, y, x = inds_4d[:, 1], inds_4d[:, 2], inds_4d[:, 3]
scores = prob[classes, anchor_ids]
deltas = deltas[anchor_ids]
anchors = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
anchors = (anchors * self.strides[lvl]) + self.base_anchors[lvl][a, :]
pred_boxes = bbox_transform_inv(anchors, deltas)
pred_boxes /= im_scale
# {im_idx, x1, y1, x2, y2, score, cls}
detections = np.zeros((pred_boxes.shape[0], 7), dtype=np.float32)
detections[:, 0], detections[:, 1:5] = ix, pred_boxes
detections[:, 5], detections[:, 6] = scores, classes + 1
batch_detections.append(detections)
# Merge Detections into a blob
batch_detections = np.vstack(batch_detections) \
if len(batch_detections) > 0 else \
np.zeros((1, 7), dtype=np.float32)
return batch_detections
\ No newline at end of file
......@@ -13,20 +13,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import numpy as np
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.image import scale_image
from lib.utils.bbox_transform import clip_boxes
from lib.nms.nms_wrapper import nms, soft_nms
from lib.utils.timer import Timer
from lib.utils.blob import im_list_to_blob
from lib.utils.vis import vis_one_image
......@@ -39,72 +35,65 @@ def im_detect(detector, raw_image):
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID)
for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
results = outputs['detections']
detections_wide = []
for im_idx in range(len(ims)):
indices = np.where(results[:, 0].astype(np.int32) == im_idx)[0]
detections = results[indices, 1:]
detections[:, :4] = clip_boxes(detections[:, :4], raw_image.shape)
detections_wide.append(detections)
return np.vstack(detections_wide) \
if len(detections_wide) > 1 else detections_wide[0]
# Unpack results
return tensor_to_blob(outputs['detections'])[:, 1:]
def ims_detect(net, raw_images):
"""Detect images, with single or multiple scales.
"""
def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_images[0])
num_scales = len(ims_scale)
ims_shape = [im.shape for im in raw_images]
for item_idx in range(1, len(raw_images)):
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
ims += ims_ext; ims_scale += ims_scale_ext
ims += ims_ext
ims_scale += ims_scale_ext
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[2:4]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
net.forward(**blobs)()
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
results = net.blobs['detections'].data.get_value()
# Unpack results
results = tensor_to_blob(outputs['detections'])
detections_wide = [[] for _ in range(len(ims_shape))]
for i in range(len(ims)):
j = i % len(ims_shape)
indices = np.where(results[:, 0].astype(np.int32) == i)[0]
detections = results[indices, 1:]
detections[:, :4] = clip_boxes(detections[:, :4], ims_shape[j])
detections_wide[j].append(detections)
detections_wide[i // num_scales].append(detections)
for j in range(len(ims_shape)):
detections_wide[j] = np.vstack(detections_wide[j]) \
if len(detections_wide[j]) > 1 else detections_wide[j][0]
for i in range(len(ims_shape)):
detections_wide[i] = np.vstack(detections_wide[i]) \
if len(detections_wide[i]) > 1 else detections_wide[i][0]
return detections_wide
def test_net(net, server):
classes, num_images, num_classes = \
server.classes, server.num_images, server.num_classes
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect' : Timer(), 'misc' : Timer()}
_t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths
......@@ -134,30 +123,46 @@ def test_net(net, server):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4]
cls_scores = detections[cls_indices, 4]
cls_dets = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])).\
astype(np.float32, copy=False)
cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(cls_dets, cfg.TEST.NMS,
keep = soft_nms(
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA)
else: keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets
boxes_this_image.append(cls_dets)
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_images[item_idx], classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_ids[item_idx]))
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores)
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
......@@ -165,7 +170,7 @@ def test_net(net, server):
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='')
......
......@@ -14,7 +14,7 @@ from __future__ import division
from __future__ import print_function
from lib.ssd.layers.data_layer import DataLayer
from lib.ssd.layers.prior_box_layer import PriorBoxLayer
from lib.ssd.layers.multibox_match_layer import MultiBoxMatchLayer
from lib.ssd.layers.hard_mining_layer import HardMiningLayer
from lib.ssd.layers.multibox_target_layer import MultiBoxTargetLayer
\ No newline at end of file
from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer
from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer
from lib.ssd.layers.priorbox_layer import PriorBoxLayer
......@@ -13,32 +13,39 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from multiprocessing import Process
from lib.core.config import cfg
class BlobFetcher(Process):
class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs):
super(BlobFetcher, self).__init__()
self.Q_in = self.Q_out = None
self._img_blob_size = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
self.q_in = self.q_out = None
self.daemon = True
def get(self):
num_images = cfg.TRAIN.IMS_PER_BATCH
target_h = cfg.SSD.RESIZE.HEIGHT; target_w = cfg.SSD.RESIZE.WIDTH
ims_blob = np.zeros(shape=(num_images, target_h, target_w, 3), dtype=np.uint8)
gt_boxes_wide = []
for ix in range(cfg.TRAIN.IMS_PER_BATCH):
im, gt_boxes = self.Q_in.get()
ims_blob[ix, :, :, :] = im
# Encode boxes by adding the idx of images
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
im_boxes[:, -1] = ix
gt_boxes_wide.append(im_boxes)
return {'data': ims_blob, 'gt_boxes': np.concatenate(gt_boxes_wide, axis=0)}
img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
img_blob[i], gt_boxes = self.q_in.get()
# Pack the boxes by adding the index of images
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
boxes[:, :gt_boxes.shape[1]] = gt_boxes
boxes[:, -1] = i
boxes_blob.append(boxes)
return {
'data': img_blob,
'gt_boxes': np.concatenate(boxes_blob, 0),
}
def run(self):
while True: self.Q_out.put(self.get())
\ No newline at end of file
while True:
self.q_out.put(self.get())
......@@ -13,15 +13,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from multiprocessing import Queue
import time
import dragon
import pprint
from multiprocessing import Queue
import dragon.core.mpi as mpi
import lib.utils.logger as logger
from lib.faster_rcnn.data.data_reader import DataReader
from lib.ssd.data.data_transformer import DataTransformer
from lib.ssd.data.blob_fetcher import BlobFetcher
from lib.utils import logger
class DataBatch(object):
......@@ -52,19 +53,20 @@ class DataBatch(object):
super(DataBatch, self).__init__()
# Init mpi
global_rank, local_rank, group_size = 0, 0, 1
if mpi.Is_Init():
idx, group = mpi.AllowParallel()
if idx != -1: # DataParallel
global_rank = mpi.Rank()
if dragon.mpi.is_init():
group = dragon.mpi.is_parallel()
if group is not None: # DataParallel
global_rank = dragon.mpi.rank()
group_size = len(group)
for i, node in enumerate(group):
if global_rank == node: local_rank = i
if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get( 'num_readers', 1)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', -1)
self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
......@@ -84,7 +86,7 @@ class DataBatch(object):
self._readers = []
for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs))
self._readers[-1].Q_out = self.Q1
self._readers[-1].q_out = self.Q1
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
......@@ -101,8 +103,8 @@ class DataBatch(object):
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.Q_in = self.Q1
transformer.Q_out = self.Q2
transformer.q_in = self.Q1
transformer.q_out = self.Q2
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
......@@ -111,14 +113,16 @@ class DataBatch(object):
self._fetchers = []
for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs)
fetcher.Q_in = self.Q2
fetcher.Q_out = self.Q3
fetcher.q_in = self.Q2
fetcher.q_out = self.Q3
fetcher.start()
self._fetchers.append(fetcher)
time.sleep(0.1)
# Prevent to echo multiple nodes
if local_rank == 0: self.echo()
if local_rank == 0:
self.echo()
def cleanup():
def terminate(processes):
for process in processes:
......@@ -130,6 +134,7 @@ class DataBatch(object):
logger.info('Terminating DataTransformer ......')
terminate(self._readers)
logger.info('Terminating DataReader......')
import atexit
atexit.register(cleanup)
......@@ -145,13 +150,7 @@ class DataBatch(object):
return self.Q3.get()
def echo(self):
"""Print I/O Information.
Returns
-------
None
"""
"""Print I/O Information."""
print('---------------------------------------------------------')
print('BatchFetcher({} Threads), Using config:'.format(
self._num_readers + self._num_transformers + self._num_fetchers))
......
......@@ -14,34 +14,34 @@ from __future__ import division
from __future__ import print_function
import cv2
import multiprocessing
import numpy as np
import numpy.random as npr
from multiprocessing import Process
from lib.core.config import cfg
from lib.proto import anno_pb2 as pb
from lib.ssd.data.preprocessing import *
import lib.utils.logger as logger
from lib.ssd.data import transforms
from lib.utils import logger
class DataTransformer(Process):
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._distorter = Distortor()
self._expander = Expander()
self._sampler = Sampler(cfg.SSD.SAMPLERS)
self._resizer = Resizer()
self._rng_seed = cfg.RNG_SEED
self._mirror = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF
self._classes = kwargs.get('classes', ('__background__',))
self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._queues = []
self.Q_in = self.Q_out = None
self._image_aug = transforms.Compose(
transforms.Distort(), # Color augmentation
transforms.Expand(), # Expand and padding
transforms.Sample(), # Sample a patch randomly
transforms.Resize(), # Resize to a fixed scale
)
self.q_in = self.q_out = None
self.daemon = True
def make_roidb(self, ann_datum, flip=False):
def make_roi_dict(self, ann_datum, flip=False):
annotations = ann_datum.annotation
n_objects = 0
if not self._use_diff:
......@@ -49,7 +49,7 @@ class DataTransformer(Process):
if not ann.difficult: n_objects += 1
else: n_objects = len(annotations)
roidb = {
roi_dict = {
'width': ann_datum.datum.width,
'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32),
......@@ -57,75 +57,82 @@ class DataTransformer(Process):
'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
}
ix = 0
rec_idx = 0
for ann in annotations:
if not self._use_diff and ann.difficult: continue
roidb['boxes'][ix, :] = [
max(0, ann.x1), max(0, ann.y1),
if not self._use_diff and ann.difficult:
continue
roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1)]
roidb['gt_classes'][ix] = self._class_to_ind[ann.name]
ix += 1
min(ann.y2, ann_datum.datum.height - 1),
]
roi_dict['gt_classes'][rec_idx] = \
self._class_to_ind[ann.name]
rec_idx += 1
if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width'])
roidb['normalized_boxes'][:, 0::2] = roidb['boxes'][:, 0::2] / float(roidb['width'])
roidb['normalized_boxes'][:, 1::2] = roidb['boxes'][:, 1::2] / float(roidb['height'])
if flip:
roi_dict['boxes'] = _flip_boxes(
roi_dict['boxes'], roi_dict['width'])
return roidb
roi_dict['boxes'][:, 0::2] /= roi_dict['width']
roi_dict['boxes'][:, 1::2] /= roi_dict['height']
return roi_dict
def get(self, serialized):
ann_datum = pb.AnnotatedDatum()
ann_datum.ParseFromString(serialized)
im_datum = ann_datum.datum
im = np.fromstring(im_datum.data, np.uint8)
if im_datum.encoded is True: im = cv2.imdecode(im, -1)
else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels))
img_datum = ann_datum.datum
img = np.fromstring(img_datum.data, np.uint8)
if img_datum.encoded is True:
img = cv2.imdecode(img, -1)
else:
h, w = img_datum.height, img_datum.width
img = img.reshape((h, w, img_datum.channels))
# Flip
flip = False
if self._mirror:
if npr.randint(0, 2) > 0:
im = im[:, ::-1, :]
if np.random.randint(0, 2) > 0:
img = img[:, ::-1, :]
flip = True
# Datum -> RoIDB
roidb = self.make_roidb(ann_datum, flip)
roi_dict = self.make_roi_dict(ann_datum, flip)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4] = roidb['normalized_boxes']
gt_boxes[:, 4] = roidb['gt_classes']
gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), 'float32')
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
# Distort => Expand => Sample => Resize
im = self._distorter.distort_image(im)
im, gt_boxes = self._expander.expand_image(im, gt_boxes)
im, gt_boxes = self._sampler.sample_image(im, gt_boxes)
im = self._resizer.resize_image(im)
img, gt_boxes = self._image_aug(img, gt_boxes)
# Modify gt boxes to the blob scale
# Restore to the blob scale
gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT
gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT
return im, gt_boxes
return img, gt_boxes
def run(self):
npr.seed(self._rng_seed)
np.random.seed(self._rng_seed)
while True:
serialized = self.Q_in.get()
serialized = self.q_in.get()
im, gt_boxes = self.get(serialized)
if len(gt_boxes) < 1: continue
self.Q_out.put((im, gt_boxes))
if len(gt_boxes) < 1:
continue
self.q_out.put((im, gt_boxes))
def _flip_boxes(boxes, width):
flip_boxes = boxes.copy()
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - oldx2 - 1
flip_boxes[:, 2] = width - oldx1 - 1
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import PIL.Image
import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
class Distortor(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._brightness_delta = 0.3
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._contrast_delta = 0.3
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
self._saturation_delta = 0.3
def distort_image(self, im):
im = PIL.Image.fromarray(im)
if npr.uniform() < self._brightness_prob:
delta_brightness = npr.uniform(-self._brightness_delta, self._brightness_delta) + 1.0
im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness)
if npr.uniform() < self._contrast_prob:
delta_contrast = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast)
if npr.uniform() < self._saturation_prob:
delta_saturation = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation)
im = np.array(im)
return im
if __name__ == '__main__':
distortor = Distortor()
while True:
im = cv2.imread('cat.jpg')
im = distortor.distort_image(im)
cv2.imshow('Distort', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy.random as npr
import numpy as np
import math
from lib.core.config import cfg
import lib.utils.logger as logger
class Expander(object):
def __init__(self, **params):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_expand_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_expand_ratio < 1.0:
logger.fatal('The max expand ratio must >= 1.0, got {}'.format(self._max_expand_ratio))
def expand_image(self, im, gt_boxes=None):
prob = npr.uniform()
if prob > self._expand_prob : return im, gt_boxes
ratio = npr.uniform(1.0, self._max_expand_ratio)
if ratio == 1: return im, gt_boxes
im_h = im.shape[0]
im_w = im.shape[1]
expand_h = int(im_h * ratio)
expand_w = int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0.0, expand_h - im_h)))
w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w)))
new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_im[:] = cfg.PIXEL_MEANS
new_im[h_off : h_off + im_h, w_off : w_off + im_w, :] = im
if gt_boxes is not None:
ex_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
ex_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w + w_off) / expand_w
ex_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h + h_off) / expand_h
ex_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w + w_off) / expand_w
ex_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h + h_off) / expand_h
return new_im, ex_gt_boxes
return new_im, gt_boxes
if __name__ == '__main__':
expander = Expander()
while True:
im = cv2.imread('cat.jpg')
gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
im, gt_boxes = expander.expand_image(im, gt_boxes)
x1 = int(gt_boxes[0][0] * im.shape[1])
y1 = int(gt_boxes[0][1] * im.shape[0])
x2 = int(gt_boxes[0][2] * im.shape[1])
y2 = int(gt_boxes[0][3] * im.shape[0])
cv2.rectangle(im, (x1, y1), (x2, y2), (188,119,64), 2)
cv2.imshow('Expand', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy.random as npr
from lib.core.config import cfg
class Resizer(object):
def __init__(self):
self._re_height = cfg.SSD.RESIZE.HEIGHT
self._re_width = cfg.SSD.RESIZE.WIDTH
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def resize_image(self, im):
rand = npr.randint(0, len(self._interp_mode))
return cv2.resize(
im, (self._re_width, self._re_height),
interpolation=self._interp_mode[rand])
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
from lib.utils.bbox_transform import clip_boxes
from lib.utils.boxes import iou
import lib.utils.logger as logger
class Sampler(object):
def __init__(self, samplers):
if not isinstance(samplers, list): samplers = [samplers]
self._samplers = []
for sampler in samplers:
if len(sampler) != 8:
logger.fatal('The sample params should be a tuple of length 8.')
sample_param = {
'min_scale': sampler[0],
'max_scale': sampler[1],
'min_aspect_ratio': sampler[2],
'max_aspect_ratio': sampler[3],
'min_jaccard_overlap': sampler[4],
'max_jaccard_overlap': sampler[5],
'max_trials': sampler[6],
'max_sample': sampler[7]}
self._samplers.append(sample_param)
def _compute_overlaps(self, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
def _generate_sample(self, sample_param):
min_scale = sample_param.get('min_scale', 1.0)
max_scale = sample_param.get('max_scale', 1.0)
scale = npr.uniform(min_scale, max_scale)
min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.0)
max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.0)
min_aspect_ratio = max(min_aspect_ratio, scale**2)
max_aspect_ratio = min(max_aspect_ratio, 1.0 / (scale**2))
aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_w = scale * (aspect_ratio ** 0.5)
bbox_h = scale / (aspect_ratio ** 0.5)
w_off = npr.uniform(0.0, float(1 - bbox_w))
h_off = npr.uniform(0.0, float(1 - bbox_h))
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
min_jaccard_overlap = constraint.get('min_jaccard_overlap', None)
max_jaccard_overlap = constraint.get('max_jaccard_overlap', None)
if min_jaccard_overlap == None and \
max_jaccard_overlap == None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
if min_jaccard_overlap is not None:
if max_overlap < min_jaccard_overlap: return False
if max_jaccard_overlap is not None:
if max_overlap > max_jaccard_overlap: return False
return True
def _generate_batch_samples(self, gt_boxes):
sample_boxes = []
for sampler in self._samplers:
found = 0
for i in range(sampler['max_trials']):
if found >= sampler['max_sample']: break
sample_box = self._generate_sample(sampler)
if sampler['min_jaccard_overlap'] != 0.0 or \
sampler['max_jaccard_overlap'] != 1.0:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok: continue
found += 1
sample_boxes.append(sample_box)
return sample_boxes
def _rand_crop(self, im, rand_box, gt_boxes=None):
im_h = im.shape[0]
im_w = im.shape[1]
w_off = int(rand_box[0] * im_w)
h_off = int(rand_box[1] * im_h)
crop_w = int((rand_box[2] - rand_box[0]) * im_w)
crop_h = int((rand_box[3] - rand_box[1]) * im_h)
new_im = im[h_off: h_off + crop_h, w_off: w_off + crop_w, :]
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2])
& (ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
return new_im, new_gt_boxes
return new_im, gt_boxes
def sample_image(self, im, gt_boxes):
sample_boxes = self._generate_batch_samples(gt_boxes)
if len(sample_boxes) > 0:
# Apply sampling if found at least one valid sample box
# Then randomly pick one
sample_idx = npr.randint(0, len(sample_boxes))
rand_box = sample_boxes[sample_idx]
im, gt_boxes = self._rand_crop(im, rand_box, gt_boxes)
return im, gt_boxes
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
import numpy.random as npr
npr.seed(3)
import sys
sys.path.append('../../')
from resize import Resizer
from expand import Expander
from distort import Distortor
from sample import Sampler
from lib.core.config import cfg
if __name__ == '__main__':
distorter = Distortor()
expander = Expander()
sampler = Sampler(cfg.SSD.SAMPLERS)
resizer = Resizer()
while True:
im = cv2.imread('cat.jpg')
gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
im = distorter.distort_image(im)
im, gt_boxes = expander.expand_image(im, gt_boxes)
im, gt_boxes = sampler.sample_image(im, gt_boxes)
if len(gt_boxes) < 1: continue
im = resizer.resize_image(im)
for gt_box in gt_boxes:
x1 = int(gt_box[0] * im.shape[1])
y1 = int(gt_box[1] * im.shape[0])
x2 = int(gt_box[2] * im.shape[1])
y2 = int(gt_box[3] * im.shape[0])
cv2.rectangle(im, (x1, y1), (x2, y2), (188, 119, 64), 2)
print(x1, y1, x2, y2)
cv2.imshow('Sample', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import math
import cv2
import PIL.Image
import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import iou
class Compose(object):
"""Compose the several transforms together."""
def __init__(self, *transforms):
self.transforms = transforms
def __call__(self, img, boxes):
for transform in self.transforms:
img, boxes = transform.apply(img, boxes)
return img, boxes
class Distort(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
def apply(self, img, boxes=None):
img = PIL.Image.fromarray(img)
if npr.uniform() < self._brightness_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Brightness(img)
img = img.enhance(delta)
if npr.uniform() < self._contrast_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Contrast(img)
img = img.enhance(delta)
if npr.uniform() < self._saturation_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Color(img)
img = img.enhance(delta)
return np.array(img), boxes
class Expand(object):
def __init__(self):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_ratio < 1.0:
logger.fatal(
'The max expand ratio must >= 1, got {}'
.format(self._max_ratio)
)
def apply(self, img, boxes=None):
prob = npr.uniform()
if prob > self._expand_prob:
return img, boxes
ratio = npr.uniform(1., self._max_ratio)
if ratio == 1:
return img, boxes
im_h, im_w = img.shape[:2]
expand_h, expand_w = int(im_h * ratio), int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0., expand_h - im_h)))
w_off = int(math.floor(npr.uniform(0., expand_w - im_w)))
new_img = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_img[:] = cfg.PIXEL_MEANS
new_img[h_off:h_off + im_h, w_off:w_off + im_w, :] = img
if boxes is not None:
new_boxes = boxes.astype(boxes.dtype, copy=True)
new_boxes[:, 0] = (boxes[:, 0] * im_w + w_off) / expand_w
new_boxes[:, 1] = (boxes[:, 1] * im_h + h_off) / expand_h
new_boxes[:, 2] = (boxes[:, 2] * im_w + w_off) / expand_w
new_boxes[:, 3] = (boxes[:, 3] * im_h + h_off) / expand_h
boxes = new_boxes
return new_img, boxes
class Resize(object):
def __init__(self):
self._target_size = (
cfg.SSD.RESIZE.WIDTH,
cfg.SSD.RESIZE.HEIGHT,
)
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def apply(self, img, boxes):
rand = npr.randint(len(self._interp_mode))
return cv2.resize(
img, self._target_size,
interpolation=self._interp_mode[rand],
), boxes
class Sample(object):
def __init__(self):
samplers = cfg.SSD.SAMPLERS
if not isinstance(samplers, collections.Iterable):
samplers = [samplers]
self._samplers = []
for sampler in samplers:
if len(sampler) != 8:
logger.fatal('The sample params should be a tuple of length 8.')
sample_param = {
'min_scale': sampler[0],
'max_scale': sampler[1],
'min_aspect_ratio': sampler[2],
'max_aspect_ratio': sampler[3],
'min_overlap': sampler[4],
'max_overlap': sampler[5],
'max_trials': sampler[6],
'max_sample': sampler[7],
}
self._samplers.append(sample_param)
@classmethod
def _compute_overlaps(cls, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
@classmethod
def _generate_sample(cls, sample_param):
min_scale = sample_param.get('min_scale', 1.)
max_scale = sample_param.get('max_scale', 1.)
scale = npr.uniform(min_scale, max_scale)
min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.)
max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.)
min_aspect_ratio = max(min_aspect_ratio, scale**2)
max_aspect_ratio = min(max_aspect_ratio, 1. / (scale**2))
aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_w = scale * (aspect_ratio ** 0.5)
bbox_h = scale / (aspect_ratio ** 0.5)
w_off = npr.uniform(0., 1. - bbox_w)
h_off = npr.uniform(0., 1. - bbox_h)
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
min_overlap = constraint.get('min_overlap', None)
max_overlap = constraint.get('max_overlap', None)
if min_overlap is None and \
max_overlap is None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
if min_overlap is not None:
if max_overlap < min_overlap:
return False
if max_overlap is not None:
if max_overlap > max_overlap:
return False
return True
def _generate_batch_samples(self, gt_boxes):
sample_boxes = []
for sampler in self._samplers:
found = 0
for i in range(sampler['max_trials']):
if found >= sampler['max_sample']:
break
sample_box = self._generate_sample(sampler)
if sampler['min_overlap'] != 0. or \
sampler['max_overlap'] != 1.:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok:
continue
found += 1
sample_boxes.append(sample_box)
return sample_boxes
@classmethod
def _rand_crop(cls, im, rand_box, gt_boxes=None):
im_h, im_w = im.shape[:2]
w_off = int(rand_box[0] * im_w)
h_off = int(rand_box[1] * im_h)
crop_w = int((rand_box[2] - rand_box[0]) * im_w)
crop_h = int((rand_box[3] - rand_box[1]) * im_h)
new_im = im[h_off:h_off + crop_h, w_off:w_off + crop_w, :]
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2]) &
(ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_tiled_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
return new_im, new_gt_boxes
return new_im, gt_boxes
def apply(self, img, boxes):
sample_boxes = self._generate_batch_samples(boxes)
if len(sample_boxes) > 0:
# Apply sampling if found at least one valid sample box
# Then randomly pick one
sample_idx = npr.randint(len(sample_boxes))
rand_box = sample_boxes[sample_idx]
img, boxes = self._rand_crop(img, rand_box, boxes)
return img, boxes
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
sys.path.append('../../')
import cv2
import numpy as np
from lib.ssd.data import transforms
if __name__ == '__main__':
np.random.seed(3)
augmentor = transforms.Compose(
transforms.Distort(),
transforms.Expand(),
transforms.Sample(),
transforms.Resize(),
)
while True:
img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes)
if len(boxes) < 1:
continue
for box in boxes:
x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0])
x2 = int(box[2] * img.shape[1])
y2 = int(box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2)
cv2.imshow('Sample', img)
cv2.waitKey(0)
......@@ -17,7 +17,8 @@ import numpy as np
def generate_anchors(min_sizes, max_sizes, ratios):
"""Generate anchor (reference) windows by enumerating
"""
Generate anchor (reference) windows by enumerating
aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
"""
......@@ -34,39 +35,32 @@ def generate_anchors(min_sizes, max_sizes, ratios):
base_anchor, min_size, max_size)])
anchors = np.vstack([_anchors, anchors[1:]])
total_anchors.append(anchors)
return np.vstack([total_anchors[i] for i in range(len(total_anchors))])
return np.vstack(total_anchors)
def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window).
Note that it is a little different from Faster-RCNN.
"""
w = anchor[2]; h = anchor[3]
x_ctr = anchor[0]; y_ctr = anchor[1]
"""Return width, height, x center, and y center for an anchor (window)."""
w, h = anchor[2], anchor[3]
x_ctr, y_ctr = anchor[0], anchor[1]
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""Given a vector of widths (ws) and heights (hs) around a center
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws),
y_ctr - 0.5 * (hs),
x_ctr + 0.5 * (ws),
y_ctr + 0.5 * (hs)))
anchors = np.hstack((x_ctr - 0.5 * ws,
y_ctr - 0.5 * hs,
x_ctr + 0.5 * ws,
y_ctr + 0.5 * hs))
return anchors
def _ratio_enum(anchor, ratios):
"""Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
"""Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
......@@ -77,9 +71,7 @@ def _ratio_enum(anchor, ratios):
def _max_size_enum(base_anchor, min_size, max_size):
"""Enumerate a anchor for max_size wrt base_anchor.
"""
"""Enumerate a anchor for max_size wrt base_anchor."""
w, h, x_ctr, y_ctr = _whctrs(base_anchor)
ws = hs = np.sqrt([min_size * max_size])
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
......
......@@ -15,8 +15,8 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.datasets.factory import get_imdb
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.ssd.data.data_batch import DataBatch
......@@ -33,10 +33,8 @@ class DataLayer(torch.nn.Module):
})
def forward(self):
# Get a mini-batch from the Queue
blobs = self.data_batch.get()
# Zero-Copy from numpy
blobs['data'] = torch.from_numpy(blobs['data'])
# Switch the data to Device
blobs['data'].cuda(cfg.GPU_ID)
return blobs
\ No newline at end of file
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
......@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.utils.blob import blob_to_tensor
class HardMiningLayer(torch.nn.Module):
......@@ -42,8 +42,8 @@ class HardMiningLayer(torch.nn.Module):
conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
inds = np.where(match_labels >= 0)[0]
flt_min = np.finfo(float).eps
# Naive softmax cross-entropy
conf_loss[inds] = -1.0 * np.log(np.maximum(
# Softmax cross-entropy
conf_loss[inds] = -np.log(np.maximum(
conf_prob[inds, match_labels[inds]], flt_min))
# Filter negatives
......@@ -63,4 +63,4 @@ class HardMiningLayer(torch.nn.Module):
labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss
return {'labels': to_tensor(labels_wide)}
\ No newline at end of file
return {'labels': blob_to_tensor(labels_wide)}
......@@ -17,6 +17,9 @@ import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
......@@ -26,7 +29,7 @@ class MultiBoxMatchLayer(torch.nn.Module):
def forward(self, prior_boxes, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
num_priors = len(prior_boxes)
# Do matching between prior boxes and gt boxes
......@@ -37,7 +40,8 @@ class MultiBoxMatchLayer(torch.nn.Module):
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix]
if gt_boxes.shape[0] == 0: continue
if gt_boxes.shape[0] == 0:
continue
# Compute the overlaps between prior boxes and gt boxes
overlaps = bbox_overlaps(
......@@ -70,9 +74,50 @@ class MultiBoxMatchLayer(torch.nn.Module):
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
class MultiBoxTargetLayer(torch.nn.Module):
def __init__(self):
super(MultiBoxTargetLayer, self).__init__()
def forward(self, match_inds, match_labels, prior_boxes, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# GT assignments between default boxes and gt boxes
match_inds_wide = match_inds
# Matched labels (After hard mining possibly)
match_labels_wide = match_labels
num_priors = len(prior_boxes)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
bbox_targets_wide = np.zeros((num_images, num_priors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
# Number of matched boxes(#positive)
# We divide it by num of images, as SmoothLLLoss will divide it also
n_pos = max(len(np.where(match_labels_wide > 0)[0]), 1)
bbox_normalization = n_pos / num_images
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
if gt_boxes.shape[0] == 0:
continue
# Sample fg-rois(default boxes) & gt-rois(gt boxes)
match_inds = match_inds_wide[ix]
match_labels = match_labels_wide[ix]
ex_inds = np.where(match_labels > 0)[0]
ex_rois = prior_boxes[ex_inds]
gt_assignment = match_inds[ex_inds]
gt_rois = gt_boxes[gt_assignment]
# Assign targets & inside weights & outside weights
bbox_targets_wide[ix][ex_inds] = bbox_transform(
ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_inside_weights_wide[ix, :] = (1.0, 1.0, 1.0, 1.0)
bbox_outside_weights_wide[ix][ex_inds] = 1.0 / bbox_normalization
return {
'bbox_targets': blob_to_tensor(bbox_targets_wide),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights_wide),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights_wide),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.utils.bbox_transform import bbox_transform
class MultiBoxTargetLayer(torch.nn.Module):
def __init__(self):
super(MultiBoxTargetLayer, self).__init__()
def forward(self, match_inds, match_labels, prior_boxes, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# GT assignments between default boxes and gt boxes
match_inds_wide = match_inds
# Matched labels (After hard mining possibly)
match_labels_wide = match_labels
num_priors = len(prior_boxes)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
bbox_targets_wide = np.zeros((num_images, num_priors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, dtype=np.float32)
# Number of matched boxes(#positive)
# We divide it by num of images, as SmoothLLLoss will divide it also
n_pos = max(len(np.where(match_labels_wide > 0)[0]), 1)
bbox_normalization = n_pos / num_images
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
if gt_boxes.shape[0] == 0: continue
# Sample fg-rois(default boxes) & gt-rois(gt boxes)
match_inds = match_inds_wide[ix]
match_labels = match_labels_wide[ix]
ex_inds = np.where(match_labels > 0)[0]
ex_rois = prior_boxes[ex_inds]
gt_assignment = match_inds[ex_inds]
gt_rois = gt_boxes[gt_assignment]
# Assign targets & inside weights & outside weights
bbox_targets_wide[ix][ex_inds] = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_inside_weights_wide[ix, :] = (1.0, 1.0, 1.0, 1.0)
bbox_outside_weights_wide[ix][ex_inds] = 1.0 / bbox_normalization
return {
'bbox_targets': to_tensor(bbox_targets_wide),
'bbox_inside_weights': to_tensor(bbox_inside_weights_wide),
'bbox_outside_weights': to_tensor(bbox_outside_weights_wide),
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
......@@ -17,8 +17,8 @@ import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.ssd.generate_anchors import generate_anchors
from lib.utils import logger
class PriorBoxLayer(torch.nn.Module):
......@@ -68,5 +68,4 @@ class PriorBoxLayer(torch.nn.Module):
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, 4)).astype(np.float32)
all_anchors.append(anchors)
return np.concatenate(all_anchors, axis=0)
......@@ -13,26 +13,24 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import cv2
import numpy as np
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv
from lib.nms.nms_wrapper import nms, soft_nms
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.timer import Timer
from lib.utils.blob import to_array
from lib.utils.vis import vis_one_image
def get_images(ims):
target_h = cfg.SSD.RESIZE.HEIGHT
target_w = cfg.SSD.RESIZE.WIDTH
processed_ims = []; im_scales = []
processed_ims, im_scales = [], []
for im in ims:
im_scales.append((float(target_h) / im.shape[0],
float(target_w) / im.shape[1]))
......@@ -43,36 +41,37 @@ def get_images(ims):
def ims_detect(detector, ims):
"""Detect images, with the single scale."""
# Prepare blobs
data, im_scales = get_images(ims)
data = torch.from_numpy(data).cuda(cfg.GPU_ID)
# Do Forward
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs={'data': data})
# Decode results
scores = to_array(outputs['cls_prob'])
prior_boxes = to_array(outputs['prior_boxes'])
box_deltas = to_array(outputs['bbox_pred'])
batch_boxes = []
for ix in range(box_deltas.shape[0]):
boxes = bbox_transform_inv(prior_boxes, box_deltas[ix], cfg.BBOX_REG_WEIGHTS)
boxes[:, 0::2] /= im_scales[ix][1]
boxes[:, 1::2] /= im_scales[ix][0]
batch_boxes.append(clip_boxes(boxes, ims[ix].shape))
scores = tensor_to_blob(outputs['cls_prob'])
prior_boxes = tensor_to_blob(outputs['prior_boxes'])
box_deltas = tensor_to_blob(outputs['bbox_pred'])
for i in range(box_deltas.shape[0]):
boxes = bbox_transform_inv(
boxes=prior_boxes,
deltas=box_deltas[i],
weights=cfg.BBOX_REG_WEIGHTS,
)
boxes[:, 0::2] /= im_scales[i][1]
boxes[:, 1::2] /= im_scales[i][0]
batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape))
return scores, batch_boxes
def test_net(net, server):
classes, num_images, num_classes = \
server.classes, server.num_images, server.num_classes
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
......@@ -101,34 +100,49 @@ def test_net(net, server):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds]
pre_nms_inds = np.argsort(-cls_scores)[0 : cfg.TEST.NMS_TOP_K]
pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K]
cls_scores = cls_scores[pre_nms_inds]
cls_boxes = cls_boxes[pre_nms_inds]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_dets, cfg.TEST.NMS,
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA)
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets
boxes_this_image.append(cls_dets)
keep = nms(
cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_images[item_idx], classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_ids[item_idx]))
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.0,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores)
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
......@@ -136,7 +150,7 @@ def test_net(net, server):
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='')
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py>
#
# ------------------------------------------------------------
import numpy as np
from lib.core.config import cfg
def bbox_transform(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = ww * np.log(gt_widths / ex_widths)
targets_dh = wh * np.log(gt_heights / ex_heights)
targets = np.vstack(
(targets_dx, targets_dy,
targets_dw, targets_dh)).transpose()
return targets
def bbox_transform_inv(boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0)):
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
if cfg.USE_XFORM_CLIP:
dw = np.minimum(dw, cfg.BBOX_XFORM_CLIP)
dh = np.minimum(dh, cfg.BBOX_XFORM_CLIP)
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2
return pred_boxes
def clip_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
\ No newline at end of file
......@@ -89,7 +89,7 @@ def prep_im_for_blob(im, target_size, max_size):
return resize_image(im, im_scale, im_scale), im_scale, jitter
def to_tensor(blob, enforce_cpu=False):
def blob_to_tensor(blob, enforce_cpu=False):
if isinstance(blob, np.ndarray):
# Zero-Copy from numpy
cpu_tensor = torch.from_numpy(blob)
......@@ -99,7 +99,7 @@ def to_tensor(blob, enforce_cpu=False):
cpu_tensor.cuda(cfg.GPU_ID)
def to_array(blob, copy=False):
def tensor_to_blob(blob, copy=False):
if isinstance(blob, torch.Tensor):
# Zero-Copy from numpy
array = blob.numpy(True)
......
......@@ -13,19 +13,11 @@
#
# ------------------------------------------------------------
import numpy as np
def area(boxes):
"""Computes area of boxes.
Args:
boxes: Numpy array with shape [N, 4] holding N boxes
Returns:
a numpy array with shape [N*1] representing box areas
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
import numpy as np
def intersection(boxes1, boxes2):
......@@ -66,10 +58,11 @@ def iou(boxes1, boxes2):
"""
intersect = intersection(boxes1, boxes2)
area1 = area(boxes1)
area2 = area(boxes2)
union = np.expand_dims(area1, axis=1) + np.expand_dims(
area2, axis=0) - intersect
area1 = boxes_area(boxes1)
area2 = boxes_area(boxes2)
union = \
np.expand_dims(area1, axis=1) + \
np.expand_dims(area2, axis=0) - intersect
return intersect / union
......@@ -88,7 +81,7 @@ def ioa1(boxes1, boxes2):
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(area(boxes1), axis=1)
areas = np.expand_dims(boxes_area(boxes1), axis=1)
return intersect / areas
......@@ -107,14 +100,90 @@ def ioa2(boxes1, boxes2):
"""
intersect = intersection(boxes1, boxes2)
areas = np.expand_dims(area(boxes2), axis=0)
areas = np.expand_dims(boxes_area(boxes2), axis=0)
return intersect / areas
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale.
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
"""Transform the boxes to the regression targets."""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
"""
gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = ww * np.log(gt_widths / ex_widths)
targets_dh = wh * np.log(gt_heights / ex_heights)
targets = np.vstack(
(targets_dx, targets_dy,
targets_dw, targets_dh)).transpose()
return targets
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas."""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2
return pred_boxes
def boxes_area(boxes):
"""Compute the area of an array of boxes."""
w = (boxes[:, 2] - boxes[:, 0] + 1)
h = (boxes[:, 3] - boxes[:, 1] + 1)
areas = w * h
assert np.all(areas >= 0), 'Negative areas founds'
return areas
def clip_tiled_boxes(boxes, im_shape):
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale."""
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
x_c = (boxes[:, 2] + boxes[:, 0]) * .5
......@@ -130,3 +199,20 @@ def expand_boxes(boxes, scale):
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def dismantle_gt_boxes(gt_boxes, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
......@@ -15,7 +15,6 @@ from __future__ import print_function
import cv2
import numpy as np
import PIL.Image
import PIL.ImageEnhance
......@@ -24,8 +23,11 @@ from lib.core.config import cfg
def resize_image(im, fx, fy):
return cv2.resize(
im, None, fx=fx, fy=fy,
interpolation=cv2.INTER_LINEAR)
im,
dsize=None,
fx=fx, fy=fy,
interpolation=cv2.INTER_LINEAR,
)
# Faster and robust resizing than OpenCV methods
......@@ -37,15 +39,15 @@ def resize_mask(mask, size):
def distort_image(im):
im = PIL.Image.fromarray(im)
if np.random.uniform() < 0.5:
delta_brightness = np.random.uniform(-0.3, 0.3) + 1.0
delta_brightness = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness)
if np.random.uniform() < 0.5:
delta_contrast = np.random.uniform(-0.3, 0.3) + 1.0
delta_contrast = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast)
if np.random.uniform() < 0.3:
delta_saturation = np.random.uniform(-0.3, 0.3) + 1.0
if np.random.uniform() < 0.5:
delta_saturation = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation)
return np.array(im)
......@@ -55,16 +57,20 @@ def scale_image(im):
processed_ims, ims_scales = [], []
if cfg.TEST.MAX_SIZE > 0:
im_size_min = np.min(im.shape[0:2])
im_size_max = np.max(im.shape[0:2])
im_size_min = np.min(im.shape[:2])
im_size_max = np.max(im.shape[:2])
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
processed_ims.append(
cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR))
cv2.resize(
im,
dsize=None,
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR,
))
ims_scales.append(im_scale)
else:
# Scale image along the longest side
......@@ -72,8 +78,12 @@ def scale_image(im):
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_max)
processed_ims.append(
cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR))
cv2.resize(
im,
dsize=None,
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR,
))
ims_scales.append(im_scale)
return processed_ims, ims_scales
......@@ -17,12 +17,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import inspect
import sys as _sys
import logging as _logging
import os
import sys as _sys
import threading
from logging import DEBUG, ERROR, FATAL, INFO, WARN
_logger = None
_is_root = True
......@@ -43,7 +43,7 @@ def get_logger():
return _logger
logger = _logging.getLogger('detectron')
logger.setLevel(INFO)
logger.setLevel('INFO')
logger.propagate = False
if True:
......@@ -51,7 +51,8 @@ def get_logger():
_interactive = False
try:
# This is only defined in interactive shells.
if _sys.ps1: _interactive = True
if _sys.ps1:
_interactive = True
except AttributeError:
# Even now, we may be in an interactive shell with `python -i`.
_interactive = _sys.flags.interactive
......@@ -59,7 +60,7 @@ def get_logger():
# If we are in an interactive environment (like Jupyter), set loglevel
# to INFO and pipe the output to stdout.
if _interactive:
logger.setLevel(INFO)
logger.setLevel('INFO')
_logging_target = _sys.stdout
else:
_logging_target = _sys.stderr
......@@ -86,7 +87,8 @@ def log(level, msg, *args, **kwargs):
def debug(msg, *args, **kwargs):
if is_root(): get_logger().debug(_detailed_msg(msg), *args, **kwargs)
if is_root():
get_logger().debug(_detailed_msg(msg), *args, **kwargs)
def error(msg, *args, **kwargs):
......@@ -100,15 +102,18 @@ def fatal(msg, *args, **kwargs):
def info(msg, *args, **kwargs):
if is_root(): get_logger().info(_detailed_msg(msg), *args, **kwargs)
if is_root():
get_logger().info(_detailed_msg(msg), *args, **kwargs)
def warn(msg, *args, **kwargs):
if is_root(): get_logger().warn(_detailed_msg(msg), *args, **kwargs)
if is_root():
get_logger().warn(_detailed_msg(msg), *args, **kwargs)
def warning(msg, *args, **kwargs):
if is_root(): get_logger().warning(_detailed_msg(msg), *args, **kwargs)
if is_root():
get_logger().warning(_detailed_msg(msg), *args, **kwargs)
def get_verbosity():
......@@ -128,12 +133,3 @@ def set_root_logger(is_root=True):
def is_root():
return _is_root
_level_names = {
FATAL: 'FATAL',
ERROR: 'ERROR',
WARN: 'WARN',
INFO: 'INFO',
DEBUG: 'DEBUG',
}
\ No newline at end of file
......@@ -13,6 +13,10 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
......@@ -21,15 +25,19 @@ def intersect_box_mask(ex_box, gt_box, gt_mask):
y1 = max(ex_box[1], gt_box[1])
x2 = min(ex_box[2], gt_box[2])
y2 = min(ex_box[3], gt_box[3])
if x1 > x2 or y1 > y2: return None
if x1 > x2 or y1 > y2:
return None
w = x2 - x1 + 1
h = y2 - y1 + 1
ex_starty = y1 - ex_box[1]
ex_startx = x1 - ex_box[0]
inter_maskb = gt_mask[y1 : y2 + 1 , x1 : x2 + 1]
regression_target = np.zeros((ex_box[3] - ex_box[1] + 1, ex_box[2] - ex_box[0] + 1), dtype=np.uint8)
regression_target[ex_starty: ex_starty + h, ex_startx: ex_startx + w] = inter_maskb
return regression_target
ex_start_y = y1 - ex_box[1]
ex_start_x = x1 - ex_box[0]
inter_mask = gt_mask[y1:y2 + 1, x1:x2 + 1]
target_h = ex_box[3] - ex_box[1] + 1
target_w = ex_box[2] - ex_box[0] + 1
reg_target = np.zeros((target_h, target_w), dtype=np.uint8)
reg_target[ex_start_y:ex_start_y + h,
ex_start_x:ex_start_x + w] = inter_mask
return reg_target
def mask_overlap(box1, box2, mask1, mask2):
......@@ -37,21 +45,24 @@ def mask_overlap(box1, box2, mask1, mask2):
y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3])
if x1 > x2 or y1 > y2: return 0
if x1 > x2 or y1 > y2:
return 0
w = x2 - x1 + 1
h = y2 - y1 + 1
# Get masks in the intersection part
start_ya = y1 - box1[1]
start_xa = x1 - box1[0]
inter_maska = mask1[start_ya: start_ya + h, start_xa:start_xa + w]
inter_mask_a = mask1[start_ya: start_ya + h, start_xa:start_xa + w]
start_yb = y1 - box2[1]
start_xb = x1 - box2[0]
inter_maskb = mask2[start_yb: start_yb + h, start_xb:start_xb + w]
inter_mask_b = mask2[start_yb: start_yb + h, start_xb:start_xb + w]
assert inter_maska.shape == inter_maskb.shape, (inter_maska.shape, inter_maskb.shape)
assert inter_mask_a.shape == inter_mask_b.shape
inter = np.logical_and(inter_maskb, inter_maska).sum()
inter = np.logical_and(inter_mask_b, inter_mask_a).sum()
union = mask1.sum() + mask2.sum() - inter
if union < 1.0: return 0
if union < 1.:
return 0.
return float(inter) / float(union)
......@@ -17,17 +17,19 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import deque
import collections
import numpy as np
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
Track a series of values and provide access to smoothed values
over a window or the global series average.
"""
def __init__(self, window_size):
self.deque = deque(maxlen=window_size)
self.deque = collections.deque(maxlen=window_size)
self.series = []
self.total = 0.0
self.count = 0
......
......@@ -13,6 +13,11 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import time
......@@ -27,7 +32,7 @@ class Timer(object):
def tic(self):
# Using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
# does not normalize for multi-threading
self.start_time = time.time()
def toc(self, average=True):
......@@ -39,3 +44,10 @@ class Timer(object):
return self.average_time
else:
return self.diff
@contextlib.contextmanager
def tic_and_toc(self):
try:
yield self.tic()
finally:
self.toc()
......@@ -26,12 +26,13 @@ from __future__ import print_function
from __future__ import unicode_literals
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import numpy as np
from lib.utils.colormap import colormap
from lib.utils.boxes import expand_boxes
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
plt.rcParams['pdf.fonttype'] = 42 # For editing in Adobe Illustrator
......@@ -63,17 +64,20 @@ def kp_connections(keypoints):
def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps):
"""
Convert from the class boxes/segms/keyps format generated by the testing code.
"""
"""Convert from the class boxes/segms/keyps format generated by the testing code."""
box_list = [b for b in cls_boxes if len(b) > 0]
if len(box_list) > 0: boxes = np.concatenate(box_list)
else: boxes = None
if cls_segms is not None: segms = [s for slist in cls_segms for s in slist]
else: segms = None
if cls_keyps is not None: keyps = [k for klist in cls_keyps for k in klist]
else: keyps = None
if len(box_list) > 0:
boxes = np.concatenate(box_list)
else:
boxes = None
if cls_segms is not None:
segms = [s for slist in cls_segms for s in slist]
else:
segms = None
if cls_keyps is not None:
keyps = [k for klist in cls_keyps for k in klist]
else:
keyps = None
classes = []
for j in range(len(cls_boxes)):
classes += [j] * len(cls_boxes[j])
......@@ -81,23 +85,28 @@ def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps):
def convert_from_cls_format_v2(cls_boxes, cls_segms, cls_keyps, class_names):
"""
Convert from the class boxes/segms/keyps format generated by the testing code.
"""
"""Convert from the class boxes/segms/keyps format generated by the testing code."""
box_list, segm_list = [], []
for j, name in enumerate(class_names):
if name == '__background__': continue
if name == '__background__':
continue
if len(cls_boxes[j]) > 0:
box_list.append(cls_boxes[j])
if cls_segms is not None: segm_list.append(cls_segms[j])
if cls_segms is not None:
segm_list.append(cls_segms[j])
if len(box_list) > 0: boxes = np.concatenate(box_list)
else: boxes = None
if len(segm_list) > 0: segms = np.concatenate(segm_list)
else: segms = None
if cls_keyps is not None: keyps = [k for klist in cls_keyps for k in klist]
else: keyps = None
if len(box_list) > 0:
boxes = np.concatenate(box_list)
else:
boxes = None
if len(segm_list) > 0:
segms = np.concatenate(segm_list)
else:
segms = None
if cls_keyps is not None:
keyps = [k for klist in cls_keyps for k in klist]
else:
keyps = None
classes = []
for j in range(len(cls_boxes)):
......@@ -137,7 +146,6 @@ def get_mask(boxes, segms, im_shape, mask_thresh=0.4):
def vis_mask(img, mask, col, alpha=0.4, show_border=True, border_thick=1):
"""Visualizes a single binary mask."""
img = img.astype(np.float32)
idx = np.nonzero(mask)
......@@ -178,80 +186,25 @@ def vis_bbox(img, bbox, thick=1):
return img
def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):
"""Visualizes keypoints (adapted from vis_one_image).
kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).
"""
dataset_keypoints, _ = keypoint_utils.get_keypoints()
kp_lines = kp_connections(dataset_keypoints)
# Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
cmap = plt.get_cmap('rainbow')
colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
# Perform the drawing on a copy of the image, to allow for blending.
kp_mask = np.copy(img)
# Draw mid shoulder / mid hip first for better visualization.
mid_shoulder = (
kps[:2, dataset_keypoints.index('right_shoulder')] +
kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
sc_mid_shoulder = np.minimum(
kps[2, dataset_keypoints.index('right_shoulder')],
kps[2, dataset_keypoints.index('left_shoulder')])
mid_hip = (
kps[:2, dataset_keypoints.index('right_hip')] +
kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
sc_mid_hip = np.minimum(
kps[2, dataset_keypoints.index('right_hip')],
kps[2, dataset_keypoints.index('left_hip')])
nose_idx = dataset_keypoints.index('nose')
if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:
cv2.line(
kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),
color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)
if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
cv2.line(
kp_mask, tuple(mid_shoulder), tuple(mid_hip),
color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)
# Draw the keypoints.
for l in range(len(kp_lines)):
i1 = kp_lines[l][0]
i2 = kp_lines[l][1]
p1 = kps[0, i1], kps[1, i1]
p2 = kps[0, i2], kps[1, i2]
if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
cv2.line(
kp_mask, p1, p2,
color=colors[l], thickness=2, lineType=cv2.LINE_AA)
if kps[2, i1] > kp_thresh:
cv2.circle(
kp_mask, p1,
radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
if kps[2, i2] > kp_thresh:
cv2.circle(
kp_mask, p2,
radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
# Blend the keypoints.
return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)
def vis_one_image_opencv(
im, class_names,
boxes, segms=None, keypoints=None,
thresh=0.9, kp_thresh=2,
show_box=False, show_class=False):
im,
class_names,
boxes,
segms=None,
keypoints=None,
thresh=0.9,
kp_thresh=2,
show_box=False,
show_class=False,
):
"""Constructs a numpy array with the detections visualized."""
boxes, segms, keypoints, classes = \
convert_from_cls_format_v2(boxes, segms, keypoints, class_names)
if boxes is None \
or boxes.shape[0] == 0 or \
max(boxes[:, 4]) < thresh: return im
max(boxes[:, 4]) < thresh:
return im
mask_color_id, masks, color_list = 0, None, colormap()
......@@ -284,41 +237,43 @@ def vis_one_image_opencv(
mask_color_id += 1
im = vis_mask(im, masks[..., i], color_mask)
# show keypoints
if keypoints is not None and len(keypoints) > i:
im = vis_keypoints(im, keypoints[i], kp_thresh)
# # show keypoints
# if keypoints is not None and len(keypoints) > i:
# im = vis_keypoints(im, keypoints[i], kp_thresh)
cv2.imshow('Detectron', im)
cv2.waitKey(0)
def vis_one_image(
im, class_names,
boxes, segms=None, keypoints=None,
thresh=0.9, kp_thresh=2, dpi=100,
box_alpha=0.0, show_class=True,
filename=None):
im,
class_names,
boxes,
segms=None,
keypoints=None,
thresh=0.9,
kp_thresh=2,
dpi=100,
box_alpha=0.,
show_class=True,
filename=None,
):
"""Visual debugging of detections."""
boxes, segms, keypoints, classes = \
convert_from_cls_format_v2(boxes, segms, keypoints, class_names)
if boxes is None \
or boxes.shape[0] == 0 or \
max(boxes[:, 4]) < thresh: return
im, mask = im[:, :, ::-1], None
max(boxes[:, 4]) < thresh:
return
#dataset_keypoints, _ = keypoint_utils.get_keypoints()
im, mask, masks = im[:, :, ::-1], None, None
if segms is not None and len(segms) > 0:
masks = get_mask(boxes, segms, im.shape[0:2])
masks = get_mask(boxes, segms, im.shape[:2])
color_list = colormap(rgb=True) / 255
# kp_lines = kp_connections(dataset_keypoints)
# cmap = plt.get_cmap('rainbow')
# colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
fig = plt.figure(frameon=False)
fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
ax = plt.Axes(fig, [0., 0., 1., 1.])
......@@ -379,56 +334,6 @@ def vis_one_image(
alpha=0.5)
ax.add_patch(polygon)
# show keypoints
if keypoints is not None and len(keypoints) > i:
kps = keypoints[i]
# plt.autoscale(False)
# for l in range(len(kp_lines)):
# i1 = kp_lines[l][0]
# i2 = kp_lines[l][1]
# if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
# x = [kps[0, i1], kps[0, i2]]
# y = [kps[1, i1], kps[1, i2]]
# line = plt.plot(x, y)
# plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
# if kps[2, i1] > kp_thresh:
# plt.plot(
# kps[0, i1], kps[1, i1], '.', color=colors[l],
# markersize=3.0, alpha=0.7)
#
# if kps[2, i2] > kp_thresh:
# plt.plot(
# kps[0, i2], kps[1, i2], '.', color=colors[l],
# markersize=3.0, alpha=0.7)
#
# # add mid shoulder / mid hip for better visualization
# mid_shoulder = (
# kps[:2, dataset_keypoints.index('right_shoulder')] +
# kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
# sc_mid_shoulder = np.minimum(
# kps[2, dataset_keypoints.index('right_shoulder')],
# kps[2, dataset_keypoints.index('left_shoulder')])
# mid_hip = (
# kps[:2, dataset_keypoints.index('right_hip')] +
# kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
# sc_mid_hip = np.minimum(
# kps[2, dataset_keypoints.index('right_hip')],
# kps[2, dataset_keypoints.index('left_hip')])
# if (sc_mid_shoulder > kp_thresh and
# kps[2, dataset_keypoints.index('nose')] > kp_thresh):
# x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]
# y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]
# line = plt.plot(x, y)
# plt.setp(
# line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7)
# if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
# x = [mid_shoulder[0], mid_hip[0]]
# y = [mid_shoulder[1], mid_hip[1]]
# line = plt.plot(x, y)
# plt.setp(
# line, color=colors[len(kp_lines) + 1], linewidth=1.0,
# alpha=0.7)
if filename is not None:
fig.savefig(filename, dpi=dpi)
plt.close('all')
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import base64
import importlib
import sys
import argparse
import cv2
import numpy as np
import pprint
from seetaas_helper import visualization_test
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.modeling.detector import Detector
import lib.ssd.test
import lib.faster_rcnn.test
from lib.faster_rcnn.test import nms, soft_nms
##############################################
# #
# ARGS #
# #
##############################################
def parse_args():
"""Parse input arguments"""
parser = argparse.ArgumentParser(description='Test a Detection Network')
parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str)
parser.add_argument('--exp_dir', dest='exp_dir',
help='experiment dir',
default=None, type=str)
parser.add_argument('--iter', dest='iter', help='global step',
default=0, type=int)
parser.add_argument('--workers', dest='num_workers',
help='number of workers',
default=1, type=int)
if len(sys.argv) == 233:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
##############################################
# #
# UTILS #
# #
##############################################
def get_image(base64_str):
if detector is None:
return {
"state": "False",
"message": "detect model is not init",
"objects": [],
"res": 2,
}
try:
str = base64.b64decode(base64_str)
im = np.fromstring(str, np.uint8)
im = cv2.imdecode(im, cv2.IMREAD_COLOR)
return im
except:
return {
"state": "False",
"message": "detect image is not valid",
"objects": [],
"res": 1,
}
def send_detections(boxes_this_image):
detections = []
for j, name in enumerate(cfg.MODEL.CLASSES):
if name == '__background__':
continue
dets = boxes_this_image[j] # [num, {x1, y1, x2, y2, score}]
keep_inds = np.where(dets[:, 4] > cfg.VIS_TH)[0]
dets = dets[keep_inds]
cls_inds = np.ones((dets.shape[0], 1), dtype=np.float32) * j
dets = np.hstack((dets.astype(np.float32, copy=False), cls_inds))
detections.extend(dets.tolist())
return {"state": "True", "message": "", "objects": detections, "res": 0}
##############################################
# #
# PROCEDURE #
# #
##############################################
def ssd_infer(base64_str):
im = get_image(base64_str)
if not isinstance(im, np.ndarray):
return im
batch_scores, batch_boxes = \
lib.ssd.test.ims_detect(detector, [im])
scores, boxes = batch_scores[0], batch_boxes[0]
boxes_this_image = [[]]
for j in range(1, cfg.MODEL.NUM_CLASSES):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds]
pre_nms_inds = np.argsort(-cls_scores)[0: cfg.TEST.NMS_TOP_K]
cls_scores = cls_scores[pre_nms_inds]
cls_boxes = cls_boxes[pre_nms_inds]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_dets, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
cls_dets, cfg.TEST.NMS,
force_cpu=True,
)
cls_dets = cls_dets[keep, :]
boxes_this_image.append(cls_dets)
return send_detections(boxes_this_image)
def faster_rcnn_infer(base64_str):
im = get_image(base64_str)
if not isinstance(im, np.ndarray):
return im
scores, boxes = lib.faster_rcnn.test.im_detect(detector, im)
boxes_this_image = [[]]
for j in range(1, cfg.MODEL.NUM_CLASSES):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])). \
astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_dets, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :]
boxes_this_image.append(cls_dets)
return send_detections(boxes_this_image)
##############################################
# #
# MAIN #
# #
##############################################
@visualization_test.Deploy.register
def infer(base64_str):
infer_procedure = globals()['{}_infer'.format(cfg.MODEL.TYPE)]
return infer_procedure(base64_str)
args = parse_args()
print('Called with args:')
print(args)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
print('Using config:')
pprint.pprint(cfg)
checkpoint = coordinator.checkpoint(global_step=args.iter, wait=False)
test_engine = importlib.import_module('lib.{}.test'.format(cfg.MODEL.TYPE))
detector = Detector().eval().cuda(cfg.GPU_ID)
detector.load_weights(checkpoint)
detector.optimize_for_inference()
# setup database
visualization_test.Deploy.run(args.num_workers)
\ No newline at end of file
......@@ -16,14 +16,15 @@ from __future__ import print_function
import os
import sys
sys.path.insert(0, '..')
import argparse
import pprint
import dragon.vm.torch as torch
import pprint
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.modeling.detector import Detector
import lib.utils.logger as logger
from lib.utils import logger
def parse_args():
......@@ -51,8 +52,10 @@ if __name__ == '__main__':
if args.exp_dir is None or \
not os.path.exists(args.exp_dir):
raise ValueError('Excepted a existing experiment dir. \nGot {}'
.format(os.path.abspath(args.exp_dir)) if args.exp_dir else 'None')
raise ValueError(
'Excepted a existing experiment dir. \nGot {}.'
.format(os.path.abspath(args.exp_dir))
)
logger.info('Called with args:')
logger.info(args)
......@@ -80,8 +83,7 @@ if __name__ == '__main__':
torch.onnx.export(
model=detector,
args={'data': data, 'ims_info': ims_info},
f=checkpoint.replace(
'checkpoints', 'exports')
f=checkpoint.replace('checkpoints', 'exports')
.replace('pth', 'onnx'),
verbose=True,
)
......@@ -9,20 +9,23 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
sys.path.insert(0, '..')
import argparse
import numpy as np
import argparse
import dragon
import dragon.core.mpi as mpi
import numpy
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.core.train import train_net
from lib.datasets.factory import get_imdb
import lib.utils.logger as logger
from lib.utils import logger
def parse_args():
......@@ -48,29 +51,35 @@ if __name__ == '__main__':
if args.exp_dir is None or \
not os.path.exists(args.exp_dir):
raise ValueError('Excepted a existing experiment dir. \nGot {}'
.format(os.path.abspath(args.exp_dir)) if args.exp_dir else 'None')
raise ValueError(
'Excepted a existing experiment dir. \nGot {}.'
.format(os.path.abspath(args.exp_dir))
)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
checkpoint, start_iter = coordinator.checkpoint(wait=False)
if checkpoint is not None: cfg.TRAIN.WEIGHTS = checkpoint
if checkpoint is not None:
cfg.TRAIN.WEIGHTS = checkpoint
# Setup MPI
if cfg.NUM_GPUS != mpi.Size():
raise ValueError('Excepted {} mpi nodes, but got {}.'
.format(len(args.gpus), mpi.Size()))
if cfg.NUM_GPUS != dragon.mpi.size():
raise ValueError(
'Excepted {} mpi nodes, but got {}.'
.format(len(args.gpus), dragon.mpi.size())
)
GPUs = [i for i in range(cfg.NUM_GPUS)]
cfg.GPU_ID = GPUs[mpi.Rank()]
mpi.Parallel([i for i in range(cfg.NUM_GPUS)])
mpi.SetParallelMode('NCCL' if cfg.USE_NCCL else 'MPI')
cfg.GPU_ID = GPUs[dragon.mpi.rank()]
dragon.mpi.add_parallel_group([i for i in range(cfg.NUM_GPUS)])
dragon.mpi.set_parallel_mode('NCCL' if cfg.USE_NCCL else 'MPI')
# Setup logger
if mpi.Rank() != 0:
if dragon.mpi.rank() != 0:
logger.set_root_logger(False)
# Fix the random seeds (numpy and dragon) for reproducibility
np.random.seed(cfg.RNG_SEED)
dragon.SetRandomSeed(cfg.RNG_SEED)
numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED)
# Inspect the database
database = get_imdb(cfg.TRAIN.DATABASE)
......@@ -83,4 +92,4 @@ if __name__ == '__main__':
train_net(coordinator, start_iter)
# Finalize mpi
mpi.Finalize()
\ No newline at end of file
dragon.mpi.finalize()
......@@ -13,17 +13,19 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import importlib
import os
import sys
sys.path.insert(0, '..')
import argparse
import pprint
import importlib
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.core.test import TestServer
from lib.modeling.detector import Detector
from lib.datasets.factory import get_imdb
from lib.modeling.detector import Detector
from lib.utils import logger
......@@ -51,7 +53,16 @@ def parse_args():
if __name__ == '__main__':
args = parse_args()
logger.info('Called with args:\n' + str(args))
if args.exp_dir is None or \
not os.path.exists(args.exp_dir):
raise ValueError(
'Excepted a existing experiment dir. \nGot {}.'
.format(os.path.abspath(args.exp_dir))
)
logger.info('Called with args:')
logger.info(args)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
logger.info('Using config:\n' + pprint.pformat(cfg))
......
......@@ -16,8 +16,9 @@ from __future__ import print_function
import os
import sys
sys.path.insert(0, '..')
import argparse
import numpy as np
import numpy
from lib.core.coordinator import Coordinator
from lib.utils import logger
......@@ -41,36 +42,37 @@ def parse_args():
def test(cfg_file, exp_dir, global_step):
"""Call test.py to test models on specific global step.
Parameters
----------
cfg_file : str
The path of the cfg file.
global_step : int
The iteration to test.
"""
"""Call test.py to test models on specific global step."""
import subprocess
args = '--cfg {} --exp_dir {} --iter {}'.format(
args = '{} {} '.format(sys.executable, 'test.py')
args += '--cfg {} --exp_dir {} --iter {} '.format(
os.path.abspath(cfg_file), exp_dir, global_step)
return subprocess.call('{} {} {}'.format(
sys.executable, 'test.py', args), shell=True)
return subprocess.call(args, shell=True)
if __name__ == '__main__':
args = parse_args()
if args.exp_dir is None or \
not os.path.exists(args.exp_dir):
raise ValueError(
'Excepted a existing experiment dir. \nGot {}.'
.format(os.path.abspath(args.exp_dir))
)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir)
global_steps = []
files = os.listdir(coordinator.checkpoints_dir())
for ix, file in enumerate(files):
step = int(file.split('_iter_')[-1].split('.')[0])
for file in files:
step = int(file.split('_iter_')[-1].split(b'.')[0])
global_steps.append(step)
order = np.argsort(-np.array(global_steps))
order = numpy.argsort(-numpy.array(global_steps))
for test_idx in order:
logger.info('Testing net at global step: {}......'.format(global_steps[test_idx]))
logger.info('Testing net at global step: {}......'
.format(global_steps[test_idx]))
logger.info(' - Using model file: {}'.format(files[test_idx]))
test(args.cfg_file, args.exp_dir, global_steps[test_idx])
......@@ -16,16 +16,17 @@ from __future__ import print_function
import sys
sys.path.insert(0, '..')
import os.path as osp
import argparse
import pprint
import dragon
import numpy as np
import numpy
import pprint
from lib.core.config import cfg
from lib.core.coordinator import Coordinator
from lib.core.train import train_net
from lib.datasets.factory import get_imdb
import lib.utils.logger as logger
from lib.utils import logger
def parse_args():
......@@ -58,19 +59,10 @@ def mpi_train(cfg_file, exp_dir):
"""
import subprocess
args = '--cfg {} --exp_dir {}'.format(osp.abspath(cfg_file), exp_dir)
mpi_args = 'mpirun --allow-run-as-root -n {}'.format(cfg.NUM_GPUS)
if len(cfg.HOSTS) > 0:
mpi_args += ' -x NCCL_DEBUG=INFO' \
' -x NCCL_IB_CUDA_SUPPORT=1' \
' -mca btl_openib_allow_ib 1' \
' -mca mpi_warn_on_fork 0 -H '
for i, host in enumerate(cfg.HOSTS):
mpi_args += (host + ':{},'.format(cfg.NUM_GPUS // len(cfg.HOSTS)))
if i > 0: subprocess.call('scp -r {} {}:{}'.format(
osp.abspath(exp_dir), host, osp.dirname(exp_dir)), shell=True)
return subprocess.call('{} {} {} {}'.format(
mpi_args, sys.executable, 'mpi_train.py', args), shell=True)
args = 'mpirun --allow-run-as-root -n {} '.format(cfg.NUM_GPUS)
args += '{} {} '.format(sys.executable, 'mpi_train.py')
args += '--cfg {} --exp_dir {} '.format(osp.abspath(cfg_file), exp_dir)
return subprocess.call(args, shell=True)
if __name__ == '__main__':
......@@ -85,9 +77,14 @@ if __name__ == '__main__':
coordinator.checkpoints_dir()
mpi_train(args.cfg_file, coordinator.experiment_dir)
else:
# Resume training?
checkpoint, start_iter = coordinator.checkpoint(wait=False)
if checkpoint is not None:
cfg.TRAIN.WEIGHTS = checkpoint
# Fix the random seeds (numpy and dragon) for reproducibility
np.random.seed(cfg.RNG_SEED)
dragon.SetRandomSeed(cfg.RNG_SEED)
numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED)
# Inspect the database
database = get_imdb(cfg.TRAIN.DATABASE)
......@@ -97,4 +94,4 @@ if __name__ == '__main__':
# Ready to train the network
logger.info('Output will be saved to `{:s}`'
.format(coordinator.checkpoints_dir()))
train_net(coordinator)
\ No newline at end of file
train_net(coordinator, start_iter)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!