Commit ca255ea0 by Ting PAN

Change to the PEP8 code style

1 parent 71593766
Showing with 1572 additions and 1414 deletions
## General
# Compiled Object files
*.slo
*.lo
......@@ -7,13 +5,15 @@
*.cuo
# Compiled Dynamic libraries
# *.so
*.so
*.dll
*.dylib
# Compiled Static libraries
*.lai
*.la
#*.a
*.a
*.lib
# Compiled python
*.pyc
......@@ -40,6 +40,9 @@ __pycache__
# QtCreator files
*.user
# VSCode files
.vscode
# PyCharm files
.idea
......
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.1.2 (20190723)
Dragon Minimum Required (Version 0.3.0.0)
Changes:
Preview Features:
- Change to the PEP8 code style.
- Adapt the new Dragon API.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.1 (20190409)
Dragon Minimum Required (Version 0.3.0.0)
......
......@@ -21,8 +21,8 @@ set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
# ---------------- User Config ----------------
# ---[ Dependencies
include(${PROJECT_SOURCE_DIR}/CMake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/CMake/FindNumPy.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FindNumPy.cmake)
FIND_PACKAGE(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 11)
......
# --------------------------------------------------------
# Detectron @ Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -8,8 +8,3 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from .distort import Distortor
from .expand import Expander
from .sample import Sampler
from .resize import Resizer
\ No newline at end of file
......@@ -16,6 +16,7 @@ from __future__ import print_function
import os
import sys
import time
import cv2
import xml.etree.ElementTree as ET
from dragon.tools.db import LMDB
......@@ -23,6 +24,7 @@ from dragon.tools.db import LMDB
sys.path.insert(0, '../../..')
from lib.proto import anno_pb2 as pb
ZFILL = 8
ENCODE_QUALITY = 95
......@@ -46,14 +48,23 @@ def make_datum(image_file, xml_file):
datum = pb.Datum()
im = cv2.imread(image_file)
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.height, datum.width, datum.channels = im.shape
datum.encoded = ENCODE_QUALITY != 100
if datum.encoded:
result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.data = im.tostring()
anno_datum.datum.CopyFrom(datum)
anno_datum.filename = filename.split('.')[0]
if len(objs) == 0:
return None
for ix, obj in enumerate(objs):
anno = pb.Annotation()
bbox = obj.find('bndbox')
......@@ -64,6 +75,7 @@ def make_datum(image_file, xml_file):
cls = obj.find('name').text.strip()
anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
anno.name = cls
class_name_set.add(cls)
anno.difficult = False
if obj.find('difficult') is not None:
anno.difficult = int(obj.find('difficult').text) == 1
......@@ -72,13 +84,15 @@ def make_datum(image_file, xml_file):
return anno_datum
def make_db(database_file,
def make_db(
database_file,
images_path,
annotations_path,
imagesets_path,
splits):
splits,
):
if os.path.isdir(database_file) is True:
raise ValueError('The database path is already exist.')
print('Warning: The database path is already exist.')
else:
root_dir = database_file[:database_file.rfind('/')]
if not os.path.exists(root_dir):
......@@ -95,12 +109,12 @@ def make_db(database_file,
print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000)
db = LMDB(max_commit=1000)
db.open(database_file, mode='w')
count = 0
total_line = 0
start_time = time.time()
zfill_flag = '{0:0%d}' % (ZFILL)
zfill_flag = '{0:0%d}' % ZFILL
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
......@@ -109,18 +123,18 @@ def make_db(database_file,
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 10000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
datum = make_datum(image_file, xml_file)
if datum is not None:
count += 1
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
if count % 1000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
......
# --------------------------------------------------------
# Detectron
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -155,11 +155,11 @@ __C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5
## NMS threshold used on RPN proposals
# NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7
## Number of top scoring boxes to keep before apply NMS to RPN proposals
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000
## Number of top scoring boxes to keep after applying NMS to RPN proposals
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TEST.RPN_MIN_SIZE = 0
......@@ -199,7 +199,7 @@ __C.MODEL.TYPE = ''
# The float precision for training and inference
# (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE= 'FLOAT32'
__C.MODEL.DATA_TYPE = 'FLOAT32'
# The backbone
__C.MODEL.BACKBONE = ''
......@@ -560,10 +560,11 @@ def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
if not isinstance(a, dict): return
if not isinstance(a, dict):
return
for k, v in a.items():
# a must specify keys that are in b
if not k in b:
if k not in b:
raise KeyError('{} is not a valid config key'.format(k))
# the types must match, too
v = _check_and_coerce_cfg_value_type(v, b[k], k)
......@@ -598,15 +599,15 @@ def cfg_from_list(cfg_list):
assert d.has_key(subkey)
d = d[subkey]
subkey = key_list[-1]
assert d.has_key(subkey)
assert subkey in d
try:
value = literal_eval(v)
except:
# handle the case when v is a string literal
# Handle the case when v is a string literal
value = v
assert type(value) == type(d[subkey]), \
'type {} does not match original type {}'.format(
type(value), type(d[subkey]))
'type {} does not match original type {}'\
.format(type(value), type(d[subkey]))
d[subkey] = value
......@@ -618,8 +619,10 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
# The types must match (with some exceptions)
type_b = type(value_b)
type_a = type(value_a)
if type_a is type_b: return value_a
if type_b is float and type_a is int: return float(value_a)
if type_a is type_b:
return value_a
if type_b is float and type_a is int:
return float(value_a)
# Exceptions: numpy arrays, strings, tuple<->list
if isinstance(value_b, np.ndarray):
......
......@@ -18,7 +18,8 @@ import shutil
import time
import numpy as np
from lib.core.config import cfg, cfg_from_file
from lib.core.config import cfg
from lib.core.config import cfg_from_file
class Coordinator(object):
......@@ -44,7 +45,8 @@ class Coordinator(object):
def _path_at(self, file, auto_create=True):
path = os.path.abspath(os.path.join(self.experiment_dir, file))
if auto_create and not os.path.exists(path): os.makedirs(path)
if auto_create and not os.path.exists(path):
os.makedirs(path)
return path
def checkpoints_dir(self):
......@@ -67,8 +69,10 @@ class Coordinator(object):
return os.path.join(self.checkpoints_dir(), files[ix]), step
steps.append(step)
if global_step is None:
if len(files) == 0: return None, 0
last_idx = int(np.argmax(steps)); last_step = steps[last_idx]
if len(files) == 0:
return None, 0
last_idx = int(np.argmax(steps))
last_step = steps[last_idx]
return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
return None, 0
result = locate()
......
......@@ -30,7 +30,8 @@ class Solver(object):
self.opt_arguments = {
'scale_gradient': 1. / (
cfg.SOLVER.LOSS_SCALING *
cfg.SOLVER.ITER_SIZE),
cfg.SOLVER.ITER_SIZE
),
'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
}
......@@ -57,8 +58,10 @@ class Solver(object):
}
]
for name, param in self.detector.named_parameters():
if 'bias' in name: param_groups[1]['params'].append(param)
else: param_groups[0]['params'].append(param)
if 'bias' in name:
param_groups[1]['params'].append(param)
else:
param_groups[0]['params'].append(param)
return param_groups
def set_learning_rate(self):
......@@ -67,8 +70,10 @@ class Solver(object):
if self._current_step < len(cfg.SOLVER.STEPS) \
and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
self._current_step = self._current_step + 1
logger.info('MultiStep Status: Iteration {}, step = {}' \
.format(self.iter, self._current_step))
logger.info(
'MultiStep Status: Iteration {}, step = {}'
.format(self.iter, self._current_step)
)
new_lr = cfg.SOLVER.BASE_LR * (
cfg.SOLVER.GAMMA ** self._current_step)
self.optimizer.param_groups[0]['lr'] = \
......@@ -77,13 +82,14 @@ class Solver(object):
raise ValueError('Unknown lr policy: ' + policy)
def one_step(self):
def add_loss(x, y):
return y if x is None else x + y
# Forward & Backward & Compute_loss
iter_size = cfg.SOLVER.ITER_SIZE
loss_scaling = cfg.SOLVER.LOSS_SCALING
run_time = 0.; stats = {'loss': {'total': 0.}, 'iter': self.iter}
add_loss = lambda x, y: y if x is None else x + y
stats = {'loss': {'total': 0.}, 'iter': self.iter}
tic = time.time()
run_time, tic = 0., time.time()
if iter_size > 1:
# Dragon is designed for manual gradients accumulating
......@@ -99,10 +105,13 @@ class Solver(object):
stats['loss'][k] = 0.
total_loss = add_loss(total_loss, v)
stats['loss'][k] += float(v) * loss_scaling
if loss_scaling != 1.: total_loss *= loss_scaling
if loss_scaling != 1.:
total_loss *= loss_scaling
stats['loss']['total'] += float(total_loss)
total_loss.backward()
if iter_size > 1: self.optimizer.accumulate_grad()
if iter_size > 1:
self.optimizer.accumulate_grad()
run_time += (time.time() - tic)
......@@ -190,5 +199,8 @@ def get_solver_func(type):
elif type == 'Adam':
return AdamSolver
else:
raise ValueError('Unsupported solver type: {}.\n'
'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam)'.format(type))
\ No newline at end of file
raise ValueError(
'Unsupported solver type: {}.\n'
'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam).'
.format(type)
)
......@@ -33,25 +33,27 @@ class TestServer(object):
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = DataReader(**{'source': self.imdb.source})
self.data_transformer = DataTransformer()
self.data_reader.Q_out = Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.start()
self.gt_recs = OrderedDict()
self.output_dir = output_dir
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir)
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self):
serialized = self.data_reader.Q_out.get()
serialized = self.data_reader.q_out.get()
image = self.data_transformer.get_image(serialized)
image_id, objects = self.data_transformer.get_annotations(serialized)
self.gt_recs[image_id] = {
'objects': objects,
'width': image.shape[1],
'height': image.shape[0]}
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
......@@ -60,9 +62,10 @@ class TestServer(object):
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError('Loading {} records, '
'while the specific database required {}'.format(
len(self.gt_recs), self.num_images))
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
......@@ -87,7 +90,8 @@ class InferServer(object):
self.image_idx = 0
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): os.makedirs(self.vis_dir)
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
......@@ -99,7 +103,8 @@ class InferServer(object):
self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = {
'width': image.shape[1],
'height': image.shape[0]}
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
......@@ -108,15 +113,23 @@ class InferServer(object):
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError('Loading {} records, '
'while the specific database required {}'.format(
len(self.gt_recs), self.num_images))
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes, self.get_records(), self.output_dir)
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes, all_masks, self.get_records(), self.output_dir)
\ No newline at end of file
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
......@@ -17,17 +17,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import collections
import datetime
from collections import OrderedDict
import os
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.core.solver import get_solver_func
from lib.utils.timer import Timer
from lib.utils.stats import SmoothedValue
from lib.utils import logger
from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer
class SolverWrapper(object):
......@@ -51,13 +51,14 @@ class SolverWrapper(object):
self.solver.detector.cuda(cfg.GPU_ID)
# Plan the metrics
self.metrics = OrderedDict()
self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD:
from dragon.tools.tensorboard import TensorBoard
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
def snapshot(self):
if not logger.is_root(): return None
if not logger.is_root():
return None
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth')
filename = os.path.join(self.output_dir, filename)
......@@ -77,19 +78,35 @@ class SolverWrapper(object):
self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items():
if k == 'total':
self.board.scalar_summary('total_loss', v.GetMedianValue(), stats['iter'])
else: self.board.scalar_summary(k, v.GetMedianValue(), stats['iter'])
self.board.scalar_summary(
'total_loss',
v.GetMedianValue(),
stats['iter'],
)
else:
self.board.scalar_summary(
k,
v.GetMedianValue(),
stats['iter'],
)
def step(self, display=False):
stats = self.solver.one_step()
self.add_metrics(stats)
self.send_metrics(stats)
if display:
logger.info('Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (stats['iter'],
stats['lr'], self.metrics['total'].GetMedianValue(), stats['time']))
logger.info(
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
stats['iter'], stats['lr'],
self.metrics['total'].GetMedianValue(),
stats['time'],
)
)
for k, v in self.metrics.items():
if k == 'total': continue
logger.info(' Train net output({}): {}'.format(k, v.GetMedianValue()))
if k == 'total':
continue
logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue()))
def train_model(self):
"""Network training loop."""
......@@ -104,9 +121,8 @@ class SolverWrapper(object):
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
# Apply 1-step SGD update
timer.tic()
with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
timer.toc()
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time
......@@ -114,8 +130,10 @@ class SolverWrapper(object):
cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
logger.info('< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta))
logger.info(
'< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta)
)
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -13,6 +13,10 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from lib.datasets.taas import TaaS
......
......@@ -61,7 +61,7 @@ class imdb(object):
return num_entries
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
raise NotImplementedError
pass
def evaluate_masks(self, all_boxes, all_masks, output_dir):
raise NotImplementedError
\ No newline at end of file
pass
......@@ -19,16 +19,16 @@ from __future__ import print_function
import cv2
import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg
from lib.utils.mask_transform import mask_overlap
from lib.utils.boxes import expand_boxes
from lib.pycocotools.mask_utils import mask_rle2im
from lib.utils.boxes import expand_boxes
from lib.utils.mask_transform import mask_overlap
def voc_ap(rec, prec, use_07_metric=False):
......@@ -65,8 +65,13 @@ def voc_ap(rec, prec, use_07_metric=False):
return ap
def voc_bbox_eval(det_file, gt_recs, cls_name,
IoU=0.5, use_07_metric=False):
def voc_bbox_eval(
det_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
class_recs = {}
n_pos = 0
for image_name, rec in gt_recs.items():
......@@ -81,35 +86,35 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
'det': det
}
# read detections
with open(det_file, 'r') as f: lines = f.readlines()
# Read detections
with open(det_file, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# avoid IndexError if detecting nothing
if len(BB) == 0: return 0, 0, -1
# Avoid IndexError if detecting nothing
if len(BB) == 0:
return 0, 0, -1
# sort by confidence
# Sort by confidence
sorted_ind = np.argsort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# go down dets and mark TPs and FPs
# Go down detections and mark TPs and FPs
nd = len(image_ids)
tp = np.zeros(nd)
fp = np.zeros(nd)
tp, fp = np.zeros(nd), np.zeros(nd)
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax = -np.inf
ovmax, jmax = -np.inf, 0
BBGT = R['bbox'].astype(float)
if BBGT.size > 0:
# compute overlaps
# intersection
# Compute overlaps intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
......@@ -118,7 +123,7 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
# Union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
......@@ -149,8 +154,14 @@ def voc_bbox_eval(det_file, gt_recs, cls_name,
return rec, prec, ap
def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
IoU=0.5, use_07_metric=False):
def voc_segm_eval(
det_file,
seg_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
# 0. Constants
M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH
......@@ -175,8 +186,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
image_names.append(image_name)
# 2. Get predict pickle file for this class
with open(det_file, 'rb') as f: boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f: masks_pkl = cPickle.load(f)
with open(det_file, 'rb') as f:
boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f)
# 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs)
......@@ -185,7 +198,8 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
box_num += len(boxes_pkl[im_i])
# avoid IndexError if detecting nothing
if box_num == 0: return 0, 0, -1
if box_num == 0:
return 0, 0, -1
# 4. Re-organize all the predicted boxes
new_boxes = np.zeros((box_num, 5))
......@@ -223,11 +237,10 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
fp[i] = 1
continue
R = class_recs[image_name]
im_h, im_w = \
gt_recs[image_name]['height'], \
gt_recs[image_name]['width']
im_h = gt_recs[image_name]['height']
im_w = gt_recs[image_name]['width']
# decode mask
# Decode mask
ref_box = ref_boxes[i, :4]
mask = new_masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :]
......@@ -244,14 +257,14 @@ def voc_segm_eval(det_file, seg_file, gt_recs, cls_name,
pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
(x1 - ref_box[0]): (x2 - ref_box[0])]
# calculate max region overlap
ovmax = -1; jmax = -1
# Calculate max region overlap
ovmax, jmax = -1, -1
for j in range(len(R['det'])):
gt_mask_bound = R['bbox'][j].astype(int)
pred_mask_bound = new_boxes[i, :4].astype(int)
crop_mask = R['mask'][j][gt_mask_bound[1] : gt_mask_bound[3] + 1,
gt_mask_bound[0] : gt_mask_bound[2] + 1]
crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
......
......@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
......@@ -13,21 +13,21 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from multiprocessing import Process
from lib.core.config import cfg
from lib.utils.blob import im_list_to_blob
class BlobFetcher(Process):
class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs):
super(BlobFetcher, self).__init__()
self.Q1_in = self.Q2_in = self.Q_out = None
self.q1_in = self.q2_in = self.q_out = None
self.daemon = True
def get(self, Q_in):
processed_ims = []; ims_info = []; all_boxes = []
processed_ims, ims_info, all_boxes = [], [], []
for ix in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = Q_in.get()
processed_ims.append(im)
......@@ -46,7 +46,7 @@ class BlobFetcher(Process):
def run(self):
while True:
if self.Q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q_out.put(self.get(self.Q1_in))
elif self.Q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q_out.put(self.get(self.Q2_in))
\ No newline at end of file
if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.q_out.put(self.get(self.q1_in))
elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.q_out.put(self.get(self.q2_in))
......@@ -13,16 +13,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from multiprocessing import Queue
import time
import dragon
import pprint
from multiprocessing import Queue
import dragon.core.mpi as mpi
from lib.core.config import cfg
import lib.utils.logger as logger
from lib.faster_rcnn.data.data_reader import DataReader
from lib.faster_rcnn.data.data_transformer import DataTransformer
from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
from lib.utils import logger
class DataBatch(object):
......@@ -53,13 +54,14 @@ class DataBatch(object):
super(DataBatch, self).__init__()
# Init mpi
global_rank, local_rank, group_size = 0, 0, 1
if mpi.Is_Init():
idx, group = mpi.AllowParallel()
if idx != -1: # DataParallel
global_rank = mpi.Rank()
if dragon.mpi.is_init():
group = dragon.mpi.is_parallel()
if group is not None: # DataParallel
global_rank = dragon.mpi.rank()
group_size = len(group)
for i, node in enumerate(group):
if global_rank == node: local_rank = i
if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size
# Configuration
......@@ -89,7 +91,7 @@ class DataBatch(object):
self._readers = []
for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs))
self._readers[-1].Q_out = self.Q1
self._readers[-1].q_out = self.Q1
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
......@@ -106,9 +108,9 @@ class DataBatch(object):
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.Q_in = self.Q1
transformer.Q1_out = self.Q21
transformer.Q2_out = self.Q22
transformer.q_in = self.Q1
transformer.q1_out = self.Q21
transformer.q2_out = self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
......@@ -117,15 +119,17 @@ class DataBatch(object):
self._fetchers = []
for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs)
fetcher.Q1_in = self.Q21
fetcher.Q2_in = self.Q22
fetcher.Q_out = self.Q3
fetcher.q1_in = self.Q21
fetcher.q2_in = self.Q22
fetcher.q_out = self.Q3
fetcher.start()
self._fetchers.append(fetcher)
time.sleep(0.1)
# Prevent to echo multiple nodes
if local_rank == 0: self.echo()
if local_rank == 0:
self.echo()
def cleanup():
def terminate(processes):
for process in processes:
......@@ -137,6 +141,7 @@ class DataBatch(object):
logger.info('Terminating DataTransformer ......')
terminate(self._readers)
logger.info('Terminating DataReader......')
import atexit
atexit.register(cleanup)
......
......@@ -14,21 +14,17 @@ from __future__ import division
from __future__ import print_function
import math
import numpy
import multiprocessing
import numpy
from dragon import config as _cfg
from dragon.tools import db as _db
from dragon.tools import db
from lib.core.config import cfg
class DataReader(multiprocessing.Process):
"""DataReader is deployed to queue encoded str from `LMDB`_.
It is supported to adaptively partition and shuffle records over all distributed nodes.
"""Collect encoded str from `LMDB`_.
"""
def __init__(self, **kwargs):
"""Construct a ``DataReader``.
Partition and shuffle records over distributed nodes.
Parameters
----------
......@@ -40,14 +36,20 @@ class DataReader(multiprocessing.Process):
The number of chunks to split.
"""
def __init__(self, **kwargs):
"""Create a DataReader."""
super(DataReader, self).__init__()
self._source = kwargs.get('source', '')
self._use_shuffle = kwargs.get('shuffle', False)
self._num_chunks = kwargs.get('num_chunks', 2048)
self._part_idx, self._num_parts = 0, 1
self._cursor, self._chunk_cursor = 0, 0
self._rng_seed = _cfg.GetRandomSeed()
self.Q_out = None
self._chunk_size, self._perm_size = 0, 0
self._head, self._tail, self._num_entries = 0, 0, 0
self._db, self._zfill, self._perm = None, None, None
self._rng_seed = cfg.RNG_SEED
self.q_out = None
self.daemon = True
def element(self):
......@@ -69,10 +71,6 @@ class DataReader(multiprocessing.Process):
target : int
The key of the record.
Returns
-------
None
Notes
-----
The redirection reopens the database.
......@@ -88,17 +86,12 @@ class DataReader(multiprocessing.Process):
self._db.set(str(target).zfill(self._zfill))
def reset(self):
"""Reset the cursor and environment.
Returns
-------
None
"""
"""Reset the cursor and environment."""
if self._num_parts > 1 or self._use_shuffle:
self._chunk_cursor = 0
self._part_idx = (self._part_idx + 1) % self._num_parts
if self._use_shuffle: self._perm = numpy.random.permutation(self._perm_size)
if self._use_shuffle:
self._perm = numpy.random.permutation(self._perm_size)
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._tail = self._head * self._chunk_size
if self._head >= self._num_entries: self.next_chunk()
......@@ -109,26 +102,15 @@ class DataReader(multiprocessing.Process):
self.redirect(self._head)
def next_record(self):
"""Step the cursor of records.
Returns
-------
None
"""
"""Step the cursor of records."""
self._db.next()
self._cursor += 1
def next_chunk(self):
"""Step the cursor of shuffling chunks.
Returns
-------
None
"""
"""Step the cursor of chunks."""
self._chunk_cursor += 1
if self._chunk_cursor >= self._perm_size: self.reset()
if self._chunk_cursor >= self._perm_size:
self.reset()
else:
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._head = self._head * self._chunk_size
......@@ -140,18 +122,12 @@ class DataReader(multiprocessing.Process):
self.redirect(self._head)
def run(self):
"""Start the process.
Returns
-------
None
"""
"""Start the process."""
# Fix seed
numpy.random.seed(self._rng_seed)
# Init db
self._db = _db.LMDB()
self._db = db.LMDB()
self._db.open(self._source)
self._zfill = self._db.zfill()
self._num_entries = self._db.num_entries()
......@@ -189,9 +165,10 @@ class DataReader(multiprocessing.Process):
# Run!
while True:
self.Q_out.put(self.element())
self.q_out.put(self.element())
self.next_record()
if self._cursor >= self._tail:
if self._num_parts > 1 or self._use_shuffle:
self.next_chunk()
else: self.reset()
\ No newline at end of file
else:
self.reset()
......@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from multiprocessing import Process
import multiprocessing
import numpy as np
import numpy.random as npr
......@@ -28,11 +28,11 @@ except ImportError as e:
from lib.core.config import cfg
from lib.proto import anno_pb2 as pb
from lib.utils import logger
from lib.utils.blob import prep_im_for_blob
import lib.utils.logger as logger
class DataTransformer(Process):
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._rng_seed = cfg.RNG_SEED
......@@ -42,60 +42,64 @@ class DataTransformer(Process):
self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._queues = []
self.Q_in = self.Q1_out = self.Q2_out = None
self.q_in = self.q1_out = self.q2_out = None
self.daemon = True
def make_record(
def make_roi_dict(
self,
ann_datum,
im_scale,
flip=False,
apply_flip=False,
offsets=None,
):
annotations = ann_datum.annotation
n_objects = 0
if not self._use_diff:
for ann in annotations:
if not ann.difficult: n_objects += 1
else: n_objects = len(annotations)
if not ann.difficult:
n_objects += 1
else:
n_objects = len(annotations)
record = {
roi_dict = {
'width': ann_datum.datum.width,
'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32),
'boxes': np.zeros((n_objects, 4), dtype=np.float32),
'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), 'float32'),
}
# Filter the difficult instances
instance_idx = 0
rec_idx = 0
for ann in annotations:
if not self._use_diff and ann.difficult: continue
record['boxes'][instance_idx, :] = [
if not self._use_diff and ann.difficult:
continue
roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1),
]
record['gt_classes'][instance_idx] = self._class_to_ind[ann.name]
instance_idx += 1
roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name]
rec_idx += 1
# Flip the boxes if necessary
if flip:
record['boxes'] = _flip_boxes(
record['boxes'], record['width'])
if apply_flip:
roi_dict['boxes'] = _flip_boxes(
roi_dict['boxes'], roi_dict['width'])
# Scale the boxes to the detecting scale
record['boxes'] *= im_scale
roi_dict['boxes'] *= im_scale
# Apply the offsets from scale jitter
if offsets is not None:
record['boxes'][:, 0::2] += offsets[0]
record['boxes'][:, 1::2] += offsets[1]
record['boxes'][:, :] = np.minimum(
np.maximum(record['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2)
roi_dict['boxes'][:, 0::2] += offsets[0]
roi_dict['boxes'][:, 1::2] += offsets[1]
roi_dict['boxes'][:, :] = np.minimum(
np.maximum(roi_dict['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2,
)
return record
return roi_dict
@classmethod
def get_image(cls, serialized):
......@@ -127,20 +131,23 @@ class DataTransformer(Process):
datum.ParseFromString(serialized)
im_datum = datum.datum
im = np.fromstring(im_datum.data, np.uint8)
if im_datum.encoded is True: im = cv2.imdecode(im, -1)
else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels))
if im_datum.encoded is True:
im = cv2.imdecode(im, -1)
else:
h, w = im_datum.height, im_datum.width
im = im.reshape((h, w, im_datum.channels))
# Scale
scale_indices = npr.randint(0, high=len(cfg.TRAIN.SCALES))
scale_indices = npr.randint(len(cfg.TRAIN.SCALES))
target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
# Flip
flip = False
apply_flip = False
if self._use_flipped:
if npr.randint(0, 2) > 0:
im = im[:, ::-1, :]
flip = True
apply_flip = True
# Random Crop or RandomPad
offsets = None
......@@ -153,57 +160,63 @@ class DataTransformer(Process):
# To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im)
# Datum -> Record
rec = self.make_record(datum, im_scale, flip, offsets)
# Datum -> RoIDict
roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(rec['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4], gt_boxes[:, 4] = rec['boxes'], rec['gt_classes']
gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
return im, im_scale, gt_boxes
def run(self):
npr.seed(self._rng_seed)
while True:
serialized = self.Q_in.get()
serialized = self.q_in.get()
data = self.get(serialized)
# Ensure that there should be at least 1 ground-truth
if len(data[2]) < 1: continue
if len(data[2]) < 1:
continue
aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
if aspect_ratio > 1.0: self.Q1_out.put(data)
else: self.Q2_out.put(data)
if aspect_ratio > 1.0:
self.q1_out.put(data)
else:
self.q2_out.put(data)
def _flip_boxes(boxes, width):
flip_boxes = boxes.copy()
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - oldx2 - 1
flip_boxes[:, 2] = width - oldx1 - 1
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes
def _get_image_with_target_size(target_size, im):
im_shape = list(im.shape)
width_diff = target_size[1] - im_shape[1]
offset_crop_width = np.random.randint(0, max(-width_diff, 0) + 1)
offset_pad_width = np.random.randint(0, max(width_diff, 0) + 1)
def _get_image_with_target_size(target_size, img):
im_shape = list(img.shape)
height_diff = target_size[0] - im_shape[0]
offset_crop_height = np.random.randint(0, max(-height_diff, 0) + 1)
offset_pad_height = np.random.randint(0, max(height_diff, 0) + 1)
im_shape[0 : 2] = target_size
new_im = np.empty(im_shape, dtype=im.dtype)
new_im[:] = cfg.PIXEL_MEANS
new_im[offset_pad_height:offset_pad_height + im.shape[0],
offset_pad_width:offset_pad_width + im.shape[1]] = \
im[offset_crop_height:offset_crop_height + target_size[0],
offset_crop_width:offset_crop_width + target_size[1]]
width_diff = target_size[1] - im_shape[1]
return new_im, (offset_pad_width - offset_crop_width,
offset_pad_height - offset_crop_height, target_size)
\ No newline at end of file
ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
im_shape[:2] = target_size
new_img = np.empty(im_shape, dtype=img.dtype)
new_img[:] = cfg.PIXEL_MEANS
new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
ofs_pad_width:ofs_pad_width + img.shape[1]] = \
img[ofs_crop_height:ofs_crop_height + target_size[0],
ofs_crop_width:ofs_crop_width + target_size[1]]
return new_img, (
ofs_pad_width - ofs_crop_width,
ofs_pad_height - ofs_crop_height,
target_size,
)
......@@ -32,7 +32,7 @@ import numpy as np
# -79 -167 96 184
# -167 -343 184 360
#array([[ -83., -39., 100., 56.],
# array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
......@@ -42,8 +42,12 @@ import numpy as np
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6)):
def generate_anchors(
base_size=16,
ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6),
):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
......@@ -55,22 +59,25 @@ def generate_anchors(base_size=16, ratios=(0.5, 1, 2),
return anchors
def generate_anchors_v2(stride=16, ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512)):
def generate_anchors_v2(
stride=16,
ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512),
):
"""
Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
are centered on stride / 2, have (approximate) sqrt areas of the specified
sizes, and aspect ratios as given.
"""
return generate_anchors(stride, ratios,
np.array(sizes, dtype=np.float) / stride)
return generate_anchors(
base_size=stride,
ratios=ratios,
scales=np.array(sizes, dtype=np.float) / stride,
)
def _whctrs(anchor):
"""
Return width, height, x center, and y center for an anchor (window).
"""
"""Return width, height, x center, and y center for an anchor (window)."""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
......@@ -83,7 +90,6 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
......@@ -94,10 +100,7 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
def _ratio_enum(anchor, ratios):
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
"""Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
......@@ -108,10 +111,7 @@ def _ratio_enum(anchor, ratios):
def _scale_enum(anchor, scales):
"""
Enumerate a set of anchors for each scale wrt an anchor.
"""
"""Enumerate a set of anchors for each scale wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
......
......@@ -19,9 +19,10 @@ import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.blob import to_tensor
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors
......@@ -32,10 +33,9 @@ class AnchorTargetLayer(torch.nn.Module):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
# C4 backbone takes the first stride
self.scales, self.stride, self.ratios = \
cfg.RPN.SCALES, \
cfg.RPN.STRIDES[0], \
cfg.RPN.ASPECT_RATIOS
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
......@@ -61,11 +61,13 @@ class AnchorTargetLayer(torch.nn.Module):
"""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \
.format(num_images, len(gt_boxes_wide)))
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
height, width = features[0].shape[-2:]
......@@ -85,7 +87,7 @@ class AnchorTargetLayer(torch.nn.Module):
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is dont care
# label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
......@@ -101,8 +103,8 @@ class AnchorTargetLayer(torch.nn.Module):
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
......@@ -143,7 +145,10 @@ class AnchorTargetLayer(torch.nn.Module):
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
......@@ -152,12 +157,17 @@ class AnchorTargetLayer(torch.nn.Module):
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], 0:4])
ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
......@@ -169,34 +179,26 @@ class AnchorTargetLayer(torch.nn.Module):
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
# labels
labels = all_labels.reshape(
(num_images, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((num_images, total_anchors))
labels = all_labels \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, total_anchors))
# bbox_targets
bbox_targets = all_bbox_targets.reshape(
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
bbox_targets = all_bbox_targets \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
# bbox_inside_weights
bbox_inside_weights = all_bbox_inside_weights.reshape(
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
bbox_inside_weights = all_bbox_inside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
# bbox_outside_weights
bbox_outside_weights = all_bbox_outside_weights.reshape(
(num_images, height, width, A * 4)).transpose(0, 3, 1, 2)
bbox_outside_weights = all_bbox_outside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights),
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
......@@ -33,10 +33,8 @@ class DataLayer(torch.nn.Module):
})
def forward(self):
# Get a mini-batch from the Queue
blobs = self.data_batch.get()
# Zero-Copy from numpy
blobs['data'] = torch.from_numpy(blobs['data'])
# Switch the data to Device
blobs['data'].cuda(cfg.GPU_ID)
return blobs
\ No newline at end of file
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
......@@ -9,27 +9,35 @@
#
# --------------------------------------------------------
import numpy as np
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.nms.nms_wrapper import nms
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes
from lib.nms.nms_wrapper import nms
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales, self.stride, self.ratios = \
cfg.RPN.SCALES, cfg.RPN.STRIDES[0], cfg.RPN.ASPECT_RATIOS
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = generate_anchors(
......@@ -61,7 +69,8 @@ class ProposalLayer(torch.nn.Module):
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
anchors = self.base_anchors.reshape((1, A, 4)) + \
anchors = \
self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4))
......@@ -69,8 +78,6 @@ class ProposalLayer(torch.nn.Module):
batch_rois = []
# scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
......@@ -95,11 +102,11 @@ class ProposalLayer(torch.nn.Module):
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_boxes(proposals, ims_info[ix, :2])
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = _filter_boxes(proposals, min_size * ims_info[ix, 2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
......@@ -107,7 +114,8 @@ class ProposalLayer(torch.nn.Module):
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: keep = keep[:post_nms_topN]
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
# Output rois blob
......@@ -118,13 +126,7 @@ class ProposalLayer(torch.nn.Module):
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN': return rpn_rois
else: return [to_tensor(rpn_rois)]
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
if cfg_key == 'TRAIN':
return rpn_rois
else:
return [blob_to_tensor(rpn_rois)]
......@@ -9,22 +9,24 @@
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets.
Produces proposal classification labels and bounding-box regression targets.
"""Assign object detection proposals to ground-truth targets."""
"""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
......@@ -34,8 +36,8 @@ class ProposalTargetLayer(torch.nn.Module):
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
......@@ -50,14 +52,12 @@ class ProposalTargetLayer(torch.nn.Module):
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
......@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module):
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return {
'rois': [to_tensor(batch_outputs['rois'])],
'labels': to_tensor(batch_outputs['labels']),
'bbox_targets': to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': to_tensor(batch_outputs['bbox_outside_weights']),
'rois': [blob_to_tensor(batch_outputs['rois'])],
'labels': blob_to_tensor(batch_outputs['labels']),
'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
}
......@@ -109,7 +109,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
......@@ -117,12 +116,18 @@ def _compute_targets(ex_rois, gt_rois, labels):
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background examples."""
def _sample_rois(
all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
)
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
......@@ -164,11 +169,6 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
return labels, rois, bbox_targets, bbox_inside_weights
def _dismantle_gt_boxes(gt_boxes, num_images):
return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
for ix in range(num_images)]
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
......@@ -13,27 +13,23 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import numpy as np
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image
from lib.utils.bbox_transform import clip_boxes, bbox_transform_inv
from lib.nms.nms_wrapper import nms, soft_nms
from lib.utils.timer import Timer
from lib.utils.blob import im_list_to_blob, to_array
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales.
"""
"""Detect a image, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_image)
......@@ -42,25 +38,30 @@ def im_detect(detector, raw_image):
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
batch_rois = to_array(outputs['rois'])
batch_scores = to_array(outputs['cls_prob'])
batch_deltas = to_array(outputs['bbox_pred'])
batch_rois = tensor_to_blob(outputs['rois'])
batch_scores = tensor_to_blob(outputs['cls_prob'])
batch_deltas = tensor_to_blob(outputs['bbox_pred'])
batch_boxes = bbox_transform_inv(
batch_rois[:, 1:5], batch_deltas, cfg.BBOX_REG_WEIGHTS)
scores_wide = []; boxes_wide = []
boxes=batch_rois[:, 1:5],
deltas=batch_deltas,
weights=cfg.BBOX_REG_WEIGHTS,
)
scores_wide, boxes_wide = [], []
for im_idx in range(len(ims)):
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
boxes = batch_boxes[indices]
boxes /= ims_scale[im_idx]
clip_boxes(boxes, raw_image.shape)
clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes)
......@@ -69,12 +70,13 @@ def im_detect(detector, raw_image):
def test_net(detector, server):
classes, num_images, num_classes = \
server.classes, server.num_images, server.num_classes
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect' : Timer(), 'misc' : Timer()}
_t = {'im_detect': Timer(), 'misc': Timer()}
for i in range(num_images):
image_id, raw_image = server.get_image()
......@@ -89,22 +91,27 @@ def test_net(detector, server):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).\
astype(np.float32, copy=False)
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(cls_dets, cfg.TEST.NMS,
keep = soft_nms(
cls_detections, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA)
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets
boxes_this_image.append(cls_dets)
keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_image, classes, boxes_this_image,
vis_one_image(
raw_image, classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_id))
filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
......@@ -112,7 +119,8 @@ def test_net(detector, server):
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores)
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
......@@ -120,7 +128,7 @@ def test_net(detector, server):
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(i + 1, num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='')
......
......@@ -13,16 +13,18 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
import lib.utils.logger as logger
from lib.utils.blob import to_tensor
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module):
......@@ -31,14 +33,14 @@ class AnchorTargetLayer(torch.nn.Module):
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
self.scales, self.strides, self.ratios = \
cfg.RPN.SCALES, \
cfg.RPN.STRIDES, \
cfg.RPN.ASPECT_RATIOS
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides)))
.format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
......@@ -46,9 +48,9 @@ class AnchorTargetLayer(torch.nn.Module):
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size = self.strides[i]
scale = self.scales[i]
if not isinstance(scale, list): scale = [scale]
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
......@@ -59,16 +61,17 @@ class AnchorTargetLayer(torch.nn.Module):
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \
.format(num_images, len(gt_boxes_wide)))
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors = []; total_anchors = 0
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
......@@ -107,8 +110,8 @@ class AnchorTargetLayer(torch.nn.Module):
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] # height
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
......@@ -180,16 +183,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights),
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
......@@ -9,39 +9,49 @@
#
# ------------------------------------------------------------
import numpy as np
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms
from lib.utils import logger
from lib.utils.blob import to_tensor
from lib.utils.bbox_transform import bbox_transform_inv, clip_boxes
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box.
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales, self.strides, self.ratios = \
cfg.RPN.SCALES, cfg.RPN.STRIDES, cfg.RPN.ASPECT_RATIOS
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides)))
.format(len(self.scales), len(self.strides))
)
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size = self.strides[i]
scale = self.scales[i]
if not isinstance(scale, list): scale = [scale]
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
......@@ -92,7 +102,8 @@ class ProposalLayer(torch.nn.Module):
# Prepare for the outputs
batch_rois = []
batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately
for ix in range(num_images):
......@@ -115,10 +126,10 @@ class ProposalLayer(torch.nn.Module):
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_boxes(proposals, ims_info[ix, :2])
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
keep = _filter_boxes(proposals, min_size * ims_info[ix, 2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
......@@ -126,7 +137,8 @@ class ProposalLayer(torch.nn.Module):
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: keep = keep[:post_nms_topN]
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
# Output rois blob
......@@ -151,28 +163,19 @@ class ProposalLayer(torch.nn.Module):
lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling
all_rois.append(to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else:
all_rois.append(to_tensor(rpn_rois[lv_indices]))
all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
return all_rois
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size.
"""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
"""
if len(rois) == 0: return []
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
......
......@@ -9,14 +9,19 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.bbox_transform import bbox_transform
class ProposalTargetLayer(torch.nn.Module):
......@@ -36,26 +41,19 @@ class ProposalTargetLayer(torch.nn.Module):
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
}
def _map_rois(self, inputs, fake_outputs, outputs, keys, levels):
f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(f(inputs[i], fake_outputs[key], inds))
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for key in keys]))
outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
......@@ -65,11 +63,9 @@ class ProposalTargetLayer(torch.nn.Module):
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
# Sample rois with labels & bbox targets
labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
......@@ -94,14 +90,20 @@ class ProposalTargetLayer(torch.nn.Module):
K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
_fmap_rois([batch_outputs[key] for key in keys], self.fake_outputs, outputs, keys, lvs_indices)
_fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return {
'rois': [to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': to_tensor(np.vstack(outputs['bbox_outside_weights'])),
'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
......@@ -115,6 +117,7 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
......@@ -131,7 +134,6 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes):
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
......@@ -140,10 +142,12 @@ def _compute_targets(ex_rois, gt_rois, labels):
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
"""
if len(rois) == 0: return []
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
......@@ -154,9 +158,7 @@ def _map_rois_to_fpn_levels(rois, k_min, k_max):
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background
examples.
"""
"""Sample a batch of RoIs comprising foreground and background examples."""
# overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
......@@ -203,19 +205,15 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas
return labels, rois, bbox_targets, bbox_inside_weights
def _dismantle_gt_boxes(gt_boxes, num_images):
return [gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]] \
for ix in range(num_images)]
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
f = lambda a, b, indices: a[indices] if len(indices) > 0 else b
def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(f(inputs[i], fake_outputs[key], inds))
\ No newline at end of file
outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
......@@ -9,13 +9,17 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Import custom modules
from lib.modeling.base import Bootstarp
from lib.modeling.base import RPNDecoder
from lib.modeling.base import RetinaNetDecoder
from lib.modeling.base import conv1x1, conv3x3, bn, affine
from lib.modeling.fpn import FPN
from lib.modeling.rpn import RPN
from lib.modeling.base import affine
from lib.modeling.base import bn
from lib.modeling.base import conv1x1
from lib.modeling.base import conv3x3
from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN
from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD
......@@ -15,7 +15,9 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.modeling import conv1x1, conv3x3, bn, affine
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module):
......@@ -112,8 +114,10 @@ class AirNet(torch.nn.Module):
)
self.layer1 = self.make_blocks(filters[0], blocks[0])
self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
if num_stages >= 4: self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5: self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
if num_stages >= 4:
self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5:
self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
self.reset_parameters()
def reset_parameters(self):
......@@ -165,7 +169,14 @@ def airnet(num_stages):
)
return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5)
......@@ -17,99 +17,20 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import to_tensor
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Bootstarp(torch.nn.Module):
"""Extended operator to process the images."""
def __init__(self):
super(Bootstarp, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'ImageData',
'arguments': {
'dtype': self.dtype,
'data_format': 'NCHW',
'mean_values': cfg.PIXEL_MEANS,
}
}
def forward(self, x):
inputs, outputs = [x], [self.register_output()]
return self.run(inputs, outputs)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.register_op()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': cfg.RPN.STRIDES,
'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS],
'scales': [float(e) for e in cfg.RPN.SCALES],
'pre_nms_top_n': cfg.TEST.RPN_PRE_NMS_TOP_N,
'post_nms_top_n': cfg.TEST.RPN_POST_NMS_TOP_N,
'nms_thresh': cfg.TEST.RPN_NMS_THRESH,
'min_size': cfg.TEST.RPN_MIN_SIZE,
'min_leve': cfg.FPN.ROI_MIN_LEVEL,
'max_level': cfg.FPN.ROI_MAX_LEVEL,
'canonical_scale': cfg.FPN.ROI_CANONICAL_SCALE,
'canonical_level': cfg.FPN.ROI_CANONICAL_LEVEL,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
outputs = [self.register_output() for _ in range(self.K)]
outputs = self.run(inputs, outputs)
return outputs if isinstance(outputs, list) else [outputs]
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
outputs = [self.register_output()]
return self.run(inputs, outputs)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def conv1x1(dim_in, dim_out, stride=1, bias=False):
......@@ -133,18 +54,3 @@ def conv3x3(dim_in, dim_out, stride=1, bias=False):
padding=1,
bias=bias,
)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
\ No newline at end of file
......@@ -13,22 +13,19 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.utils.logger import is_root
from lib.modeling import FPN
from lib.modeling import RPN
from lib.modeling import FastRCNN
from lib.modeling import RetinaNet
from lib.modeling import SSD
from lib.modeling.factory import get_body_func
from lib.modeling import (
Bootstarp,
FPN,
RPN,
FastRCNN,
RetinaNet,
SSD,
)
from lib.ops.modules import Bootstrap
from lib.utils.logger import is_root
class Detector(torch.nn.Module):
......@@ -47,7 +44,7 @@ class Detector(torch.nn.Module):
# + Data Loader
self.data_layer = importlib.import_module(
'lib.{}'.format(model)).DataLayer
self.bootstarp = Bootstarp()
self.bootstrap = Bootstrap()
# + Feature Extractor
self.body = get_body_func(body)()
......@@ -84,8 +81,11 @@ class Detector(torch.nn.Module):
The path of the weights file.
"""
self.load_state_dict(torch.load(weights),
strict=False, verbose=is_root())
self.load_state_dict(
torch.load(weights),
strict=False,
verbose=is_root(),
)
def forward(self, inputs=None):
"""Compute the detection outputs.
......@@ -107,9 +107,9 @@ class Detector(torch.nn.Module):
# 1. Extract features
# Process the data:
# 1) NHWC => NCHW
# 2) Uint8 => Float32 or Float16
# 2) uint8 => float32 or float16
# 3) Mean subtraction
image_data = self.bootstarp(inputs['data'])
image_data = self.bootstrap(inputs['data'])
features = self.body(image_data)
# 2. Apply the FPN to enhance features if necessary
......@@ -117,7 +117,7 @@ class Detector(torch.nn.Module):
features = self.fpn(features)
# 3. Collect detection outputs
outputs = OrderedDict()
outputs = collections.OrderedDict()
# 3.1 Feature -> RPN -> Fast R-CNN
if hasattr(self, 'rpn'):
......
......@@ -13,27 +13,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
from collections import defaultdict
_STORE = defaultdict(dict)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
_STORE = collections.defaultdict(dict)
###########################################
......@@ -59,6 +43,23 @@ for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name):
return get_template_func(
name, _STORE['BODY'], 'Body')
......@@ -13,11 +13,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling import RPNDecoder
from lib.ops.modules import RPNDecoder
class FastRCNN(torch.nn.Module):
......@@ -50,11 +50,11 @@ class FastRCNN(torch.nn.Module):
self.relu = torch.nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False)
self.roi_func = {
'RoIPool': torch.roi_pool,
'RoIAlign': torch.roi_align,
'RoIPool': torch.vision.ops.roi_pool,
'RoIAlign': torch.vision.ops.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD]
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1.)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size')
# Compute spatial scales for multiple strides
roi_levels = [level for level in range(
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
......@@ -66,13 +66,16 @@ class FastRCNN(torch.nn.Module):
torch.nn.init.normal_(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters():
if 'bias' in name: torch.nn.init.constant_(p, 0)
if 'bias' in name:
torch.nn.init.constant_(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func(
feature, rois,
pooled_h=cfg.FRCNN.ROI_XFORM_RESOLUTION,
pooled_w=cfg.FRCNN.ROI_XFORM_RESOLUTION,
output_size=(
cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale,
)
......@@ -127,14 +130,14 @@ class FastRCNN(torch.nn.Module):
# Compute rcnn logits
cls_score = self.cls_score(rcnn_output).float()
outputs = OrderedDict({
outputs = collections.OrderedDict({
'bbox_pred':
self.bbox_pred(rcnn_output).float(),
})
if self.training:
# Compute rcnn losses
outputs.update(OrderedDict({
outputs.update(collections.OrderedDict({
'cls_loss': self.cls_loss(
cls_score,
self.rcnn_data['labels'],
......
......@@ -16,7 +16,8 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3
from lib.modeling import conv1x1
from lib.modeling import conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
......@@ -48,49 +49,43 @@ class FPN(torch.nn.Module):
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_(
m.weight,
# Fix the gain for [-127, 127]
a=1,
a=1, # Fix the gain for [-127, 127]
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.nn_resize(
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def apply_on_retinanet(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
# Add extra convolutions for higher features
extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: extra_input = self.relu(outputs[-1])
if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.nn_resize(
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def forward(self, features):
......
......@@ -20,12 +20,20 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3, affine
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1,
downsample=None, dropblock=None):
def __init__(
self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out)
......@@ -65,8 +73,14 @@ class Bottleneck(torch.nn.Module):
contraction = cfg.RESNET.NUM_GROUPS \
* cfg.RESNET.GROUP_WIDTH / 256.0
def __init__(self, dim_in, dim_out, stride=1,
downsample=None, dropblock=None):
def __init__(
self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim)
......@@ -128,11 +142,17 @@ class ResNet(torch.nn.Module):
ceil_mode=True,
)
self.drop3 = torch.nn.DropBlock2d(
7, 0.9, alpha=0.25, decrement=cfg.DROPBLOCK.DECREMENT) \
if cfg.DROPBLOCK.DROP_ON else None
kp=0.9,
block_size=7,
alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d(
7, 0.9, alpha=1., decrement=cfg.DROPBLOCK.DECREMENT) \
if cfg.DROPBLOCK.DROP_ON else None
kp=0.9,
block_size=7,
alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
......@@ -145,7 +165,8 @@ class ResNet(torch.nn.Module):
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_(
m.weight,
nonlinearity='relu')
nonlinearity='relu',
)
# Stop the gradients if necessary
def freeze_func(m):
......@@ -178,25 +199,31 @@ class ResNet(torch.nn.Module):
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
outputs = [x]
outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])]
return outputs
def resnet(depth):
if depth == 18: units = [2, 2, 2, 2]
elif depth == 34: units = [3, 4, 6, 3]
elif depth == 50: units = [3, 4, 6, 3]
elif depth == 101: units = [3, 4, 23, 3]
elif depth == 152: units = [3, 8, 36, 3]
elif depth == 200: units = [3, 24, 36, 3]
elif depth == 269: units = [3, 30, 48, 8]
else: raise ValueError('Unsupported depth: %d' % depth)
if depth == 18:
units = [2, 2, 2, 2]
elif depth == 34:
units = [3, 4, 6, 3]
elif depth == 50:
units = [3, 4, 6, 3]
elif depth == 101:
units = [3, 4, 23, 3]
elif depth == 152:
units = [3, 8, 36, 3]
elif depth == 200:
units = [3, 24, 36, 3]
elif depth == 269:
units = [3, 30, 48, 8]
else:
raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock
filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512]
......@@ -204,7 +231,15 @@ def resnet(depth):
def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152)
......@@ -13,12 +13,13 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import math
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling import conv3x3, RetinaNetDecoder
from lib.modeling import conv3x3
from lib.ops.modules import RetinaNetDecoder
from lib.retinanet import AnchorTargetLayer
......@@ -32,10 +33,12 @@ class RetinaNet(torch.nn.Module):
self.cls_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS))
for _ in range(cfg.RETINANET.NUM_CONVS)
)
self.bbox_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS))
for _ in range(cfg.RETINANET.NUM_CONVS)
)
# Packed as [C, A] not [A, C]
self.C = cfg.MODEL.NUM_CLASSES - 1
A = len(cfg.RETINANET.ASPECT_RATIOS) * \
......@@ -53,8 +56,11 @@ class RetinaNet(torch.nn.Module):
self.anchor_target_layer = AnchorTargetLayer()
self.cls_loss = torch.nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
self.bbox_loss = torch.nn.SmoothL1Loss(
beta=1. / 9., reduction='batch_size',
)
self.reset_parameters()
def reset_parameters(self):
......@@ -127,7 +133,7 @@ class RetinaNet(torch.nn.Module):
gt_boxes=gt_boxes,
ims_info=ims_info,
)
return OrderedDict({
return collections.OrderedDict({
'cls_loss':
self.cls_loss(
cls_score,
......@@ -146,7 +152,7 @@ class RetinaNet(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({'bbox_pred': bbox_pred})
outputs = collections.OrderedDict({'bbox_pred': bbox_pred})
if self.training:
outputs.update(
......
......@@ -13,11 +13,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class RPN(torch.nn.Module):
......@@ -119,7 +120,7 @@ class RPN(torch.nn.Module):
gt_boxes=gt_boxes,
ims_info=ims_info,
)
return OrderedDict({
return collections.OrderedDict({
'rpn_cls_loss':
self.cls_loss(cls_score, self.rpn_data['labels']),
'rpn_bbox_loss':
......@@ -135,7 +136,7 @@ class RPN(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({
outputs = collections.OrderedDict({
'rpn_cls_score': cls_score,
'rpn_bbox_pred': bbox_pred,
})
......
......@@ -13,18 +13,15 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling import conv3x3
from lib.ssd import (
PriorBoxLayer,
MultiBoxMatchLayer,
HardMiningLayer,
MultiBoxTargetLayer,
)
from lib.ssd import HardMiningLayer
from lib.ssd import MultiBoxMatchLayer
from lib.ssd import MultiBoxTargetLayer
from lib.ssd import PriorBoxLayer
class SSD(torch.nn.Module):
......@@ -57,7 +54,7 @@ class SSD(torch.nn.Module):
self.hard_mining_layer = HardMiningLayer()
self.box_target_layer = MultiBoxTargetLayer()
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss()
self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
self.reset_parameters()
def reset_parameters(self):
......@@ -88,8 +85,9 @@ class SSD(torch.nn.Module):
.permute(0, 2, 3, 1).view(0, -1))
# Concat them if necessary
return torch.cat(cls_score_wide, dim=1).view(
0, -1, cfg.MODEL.NUM_CLASSES), \
return \
torch.cat(cls_score_wide, dim=1) \
.view(0, -1, cfg.MODEL.NUM_CLASSES), \
torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4)
def compute_losses(
......@@ -138,7 +136,7 @@ class SSD(torch.nn.Module):
gt_boxes=gt_boxes,
)
)
return OrderedDict({
return collections.OrderedDict({
# A compensating factor of 4.0 is used
# As we normalize both the pos and neg samples
'cls_loss':
......@@ -160,7 +158,7 @@ class SSD(torch.nn.Module):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = OrderedDict({
outputs = collections.OrderedDict({
'prior_boxes': prior_boxes,
'bbox_pred': bbox_pred,
})
......
......@@ -16,7 +16,8 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1, conv3x3
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class VGG(torch.nn.Module):
......@@ -35,16 +36,22 @@ class VGG(torch.nn.Module):
dim_in = 3 if i == 0 else filter_list[i - 1]
for j in range(self.units[i]):
self.__setattr__(
'{}_{}'.format(conv_name, j + 1),
conv3x3(dim_in, filter_list[i], bias=True))
if j == 0: dim_in = filter_list[i]
'{}_{}'
.format(conv_name, j + 1),
conv3x3(dim_in, filter_list[i], bias=True),
)
if j == 0:
dim_in = filter_list[i]
if reduced:
# L2Norm is redundant from the observation of
# empirical experiments. We just keep a trainable scale
self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False)
self.conv4_3_norm.weight.zero_() # Zero-Init
self.fc6 = torch.nn.Conv2d(filter_list[-1], 1024,
kernel_size=3, stride=1, padding=6, dilation=6)
self.fc6 = torch.nn.Conv2d(
filter_list[-1], 1024,
kernel_size=3, padding=6,
stride=1, dilation=6,
)
self.fc7 = conv1x1(1024, 1024, bias=True)
self.feature_dims = [filter_list[-2], 1024]
if extra_arch is not None:
......@@ -54,15 +61,23 @@ class VGG(torch.nn.Module):
for i in range(len(strides)):
conv_name = 'conv{}'.format(i + 6)
dim_in = 1024 if i == 0 else filter_list[i - 1] * 2
self.__setattr__('{}_1'.format(conv_name),
conv1x1(dim_in, filter_list[i], bias=True))
self.__setattr__(
'{}_1'.format(conv_name),
conv1x1(dim_in, filter_list[i], bias=True),
)
if strides[i] == 2:
self.__setattr__('{}_2'.format(conv_name),
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True))
self.__setattr__(
'{}_2'.format(conv_name),
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True),
)
else:
self.__setattr__('{}_2'.format(conv_name),
torch.nn.Conv2d(filter_list[i], filter_list[i] * 2,
kernel_size=kps[0], padding=kps[1], stride=kps[2]))
self.__setattr__(
'{}_2'.format(conv_name),
torch.nn.Conv2d(
filter_list[i], filter_list[i] * 2,
kernel_size=kps[0], padding=kps[1], stride=kps[2]
),
)
self.reset_parameters()
def reset_parameters(self):
......@@ -88,8 +103,9 @@ class VGG(torch.nn.Module):
for i in range(cfg.MODEL.FREEZE_AT, 0, -1):
conv_name = 'conv{}'.format(i)
for j in range(self.units[i - 1]):
self.__getattr__('{}_{}'.format(
conv_name, j + 1)).apply(freeze_func)
self.__getattr__(
'{}_{}'.format(conv_name, j + 1)
).apply(freeze_func)
def forward(self, x):
outputs = []
......@@ -101,8 +117,10 @@ class VGG(torch.nn.Module):
'{}_{}'.format(conv_name, j + 1))(x))
if self.reduced and i == 3:
outputs.append(self.conv4_3_norm(x))
if i < 4: x = self.maxpool(x)
else: x = self.s1pool(x) if self.reduced else x
if i < 4:
x = self.maxpool(x)
else:
x = self.s1pool(x) if self.reduced else x
# Internal FC layers and Extra Conv Layers
if self.reduced:
......@@ -145,4 +163,6 @@ def make_vgg_16_reduced(scale=300):
def make_vgg_16_reduced_300(): return make_vgg_16_reduced(300)
def make_vgg_16_reduced_512(): return make_vgg_16_reduced(512)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function
from lib.core.config import cfg
import lib.utils.logger as logger
from lib.utils import logger
try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
......@@ -33,10 +33,12 @@ except ImportError as e:
def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0: return []
if detections.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else: return cpu_nms(detections, thresh)
else:
return cpu_nms(detections, thresh)
def soft_nms(
......@@ -47,7 +49,8 @@ def soft_nms(
score_thresh=0.001,
):
"""Perform CPU Soft-NMS."""
if detections.shape[0] == 0: return []
if detections.shape[0] == 0:
return []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods:
logger.fatal('Unknown soft nms method: {}'.format(method))
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
from lib.ops import functions
def decode_retinanet(
features,
cls_prob,
bbox_pred,
ims_info,
strides,
ratios,
scales,
pre_nms_top_n,
score_thresh,
):
return function.get(
functions.RetinaNetDecoder,
cls_prob.device,
strides=strides,
ratios=ratios,
scales=scales,
pre_nms_top_n=pre_nms_top_n,
score_thresh=score_thresh,
).apply(features, cls_prob, bbox_pred, ims_info)
def decode_rpn(
features,
cls_prob,
bbox_pred,
ims_info,
num_outputs,
strides,
ratios,
scales,
pre_nms_top_n,
post_nms_top_n,
nms_thresh,
min_size,
min_level,
max_level,
canonical_scale,
canonical_level,
):
return function.get(
functions.RPNDecoder,
cls_prob.device,
K=num_outputs,
strides=strides,
ratios=ratios,
scales=scales,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=nms_thresh,
min_size=min_size,
min_level=min_level,
max_level=max_level,
canonical_scale=canonical_scale,
canonical_level=canonical_level,
).apply(features, cls_prob, bbox_pred, ims_info)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
class RetinaNetDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RetinaNetDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'score_thresh': self.args['score_thresh'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
return self.run(inputs, [self.alloc()], unify_devices=False)
class RPNDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RPNDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'post_nms_top_n': self.args['post_nms_top_n'],
'nms_thresh': self.args['nms_thresh'],
'min_size': self.args['min_size'],
'min_level': self.args['min_level'],
'max_level': self.args['max_level'],
'canonical_scale': self.args['canonical_scale'],
'canonical_level': self.args['canonical_level'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
outputs = [self.alloc() for _ in range(self.args['K'])]
return self.run(inputs, outputs, unify_devices=False)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.ops import functional as F
from lib.utils.blob import blob_to_tensor
class Bootstrap(torch.nn.Module):
"""Extended operator to process the images."""
def __init__(self):
super(Bootstrap, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.mean_values = cfg.PIXEL_MEANS
self.dummy_buffer = torch.ones(1)
def _apply(self, fn):
fn(self.dummy_buffer)
def cpu(self):
self._device = torch.device('cpu')
def cuda(self, device=None):
self._device = torch.device('cuda', device)
def device(self):
"""Return the device of this module."""
return self.dummy_buffer.device
def forward(self, input):
cur_device = self.device()
if input._device != cur_device:
if cur_device.type == 'cpu':
input = input.cpu()
else:
input = input.cuda(cur_device.index)
return torch.vision.ops.image_data(
input, self.dtype, self.mean_values,
)
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
return F.decode_retinanet(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
strides=self.strides,
ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
scales=self.scales,
pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
score_thresh=cfg.TEST.SCORE_THRESH,
)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def forward(self, features, cls_prob, bbox_pred, ims_info):
outputs = F.decode_rpn(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=blob_to_tensor(ims_info, enforce_cpu=True),
num_outputs=self.K,
strides=cfg.RPN.STRIDES,
ratios=[float(e) for e in cfg.RPN.ASPECT_RATIOS],
scales=[float(e) for e in cfg.RPN.SCALES],
pre_nms_top_n=cfg.TEST.RPN_PRE_NMS_TOP_N,
post_nms_top_n=cfg.TEST.RPN_POST_NMS_TOP_N,
nms_thresh=cfg.TEST.RPN_NMS_THRESH,
min_size=cfg.TEST.RPN_MIN_SIZE,
min_level=cfg.FPN.ROI_MIN_LEVEL,
max_level=cfg.FPN.ROI_MAX_LEVEL,
canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
)
return [outputs] if self.K == 1 else outputs
......@@ -15,4 +15,3 @@ from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
from lib.retinanet.layers.proposal_layer import ProposalLayer
\ No newline at end of file
......@@ -13,15 +13,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.utils.blob import to_tensor
from lib.utils.bbox_transform import bbox_transform
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
class AnchorTargetLayer(torch.nn.Module):
......@@ -35,28 +36,32 @@ class AnchorTargetLayer(torch.nn.Module):
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = []
for stride in self.strides:
sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.base_anchors.append(generate_anchors_v2(
stride=stride, ratios=self.ratios, sizes=sizes))
self.base_anchors.append(
generate_anchors_v2(
stride=stride,
ratios=self.ratios,
sizes=sizes,
))
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = _dismantle_gt_boxes(gt_boxes, num_images)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal('Input {} images, got {} slices of gt boxes.' \
.format(num_images, len(gt_boxes_wide)))
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors = []; total_anchors = 0
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
......@@ -101,7 +106,8 @@ class AnchorTargetLayer(torch.nn.Module):
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
......@@ -125,10 +131,10 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1.0)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets
......@@ -141,16 +147,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': to_tensor(labels),
'bbox_targets': to_tensor(bbox_targets),
'bbox_inside_weights': to_tensor(bbox_inside_weights),
'bbox_outside_weights': to_tensor(bbox_outside_weights),
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
def _dismantle_gt_boxes(gt_boxes, num_images):
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0]
] for ix in range(num_images)
]
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.bbox_transform import bbox_transform_inv
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
class ProposalLayer(torch.nn.Module):
"""Outputs object detection proposals by applying estimated bounding-box.
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = []
for stride in self.strides:
sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.base_anchors.append(generate_anchors_v2(
stride=stride, ratios=self.ratios, sizes=sizes))
def forward(self, features, cls_prob, bbox_pred, ims_info):
# Get resources
num_images = ims_info.shape[0]
cls_prob, bbox_pred = cls_prob.numpy(True), bbox_pred.numpy(True)
lvl_info = [features[i].shape[-2:] for i in range(len(self.strides))]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_probs = cls_prob
batch_deltas = bbox_pred.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
batch_detections = []
# Extract Detections separately
for ix in range(num_images):
im_scale = ims_info[ix, 2]
if cfg.RETINANET.SOFTMAX: P = batch_probs[ix, 1:, :]
else: P = batch_probs[ix] # [num_classes - 1, n]
D = batch_deltas[ix] # [n, 4]
anchor_pos = 0
for lvl, (H, W) in enumerate(lvl_info):
A, K = self.base_anchors[lvl].shape[0], H * W
num_anchors = A * K
prob = P[:, anchor_pos : anchor_pos + num_anchors]
deltas = D[anchor_pos : anchor_pos + num_anchors]
anchor_pos += num_anchors
prob_ravel = prob.ravel()
candidate_inds = np.where(prob_ravel > cfg.TEST.SCORE_THRESH)[0]
if len(candidate_inds) == 0: continue
pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
inds = np.argpartition(
prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
inds = candidate_inds[inds]
prob_4d = prob.reshape((prob.shape[0], A, H, W))
inds_2d = np.array(np.unravel_index(inds, prob.shape)).transpose()
inds_4d = np.array(np.unravel_index(inds, prob_4d.shape)).transpose()
classes, anchor_ids = inds_2d[:, 0], inds_2d[:, 1]
a, y, x = inds_4d[:, 1], inds_4d[:, 2], inds_4d[:, 3]
scores = prob[classes, anchor_ids]
deltas = deltas[anchor_ids]
anchors = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
anchors = (anchors * self.strides[lvl]) + self.base_anchors[lvl][a, :]
pred_boxes = bbox_transform_inv(anchors, deltas)
pred_boxes /= im_scale
# {im_idx, x1, y1, x2, y2, score, cls}
detections = np.zeros((pred_boxes.shape[0], 7), dtype=np.float32)
detections[:, 0], detections[:, 1:5] = ix, pred_boxes
detections[:, 5], detections[:, 6] = scores, classes + 1
batch_detections.append(detections)
# Merge Detections into a blob
batch_detections = np.vstack(batch_detections) \
if len(batch_detections) > 0 else \
np.zeros((1, 7), dtype=np.float32)
return batch_detections
\ No newline at end of file
......@@ -13,20 +13,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
import cPickle
except:
import pickle as cPickle
import numpy as np
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.image import scale_image
from lib.utils.bbox_transform import clip_boxes
from lib.nms.nms_wrapper import nms, soft_nms
from lib.utils.timer import Timer
from lib.utils.blob import im_list_to_blob
from lib.utils.vis import vis_one_image
......@@ -39,72 +35,65 @@ def im_detect(detector, raw_image):
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data']).cuda(cfg.GPU_ID)
for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
results = outputs['detections']
detections_wide = []
for im_idx in range(len(ims)):
indices = np.where(results[:, 0].astype(np.int32) == im_idx)[0]
detections = results[indices, 1:]
detections[:, :4] = clip_boxes(detections[:, :4], raw_image.shape)
detections_wide.append(detections)
return np.vstack(detections_wide) \
if len(detections_wide) > 1 else detections_wide[0]
# Unpack results
return tensor_to_blob(outputs['detections'])[:, 1:]
def ims_detect(net, raw_images):
"""Detect images, with single or multiple scales.
"""
def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_images[0])
num_scales = len(ims_scale)
ims_shape = [im.shape for im in raw_images]
for item_idx in range(1, len(raw_images)):
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
ims += ims_ext; ims_scale += ims_scale_ext
ims += ims_ext
ims_scale += ims_scale_ext
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[2:4]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
net.forward(**blobs)()
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
results = net.blobs['detections'].data.get_value()
# Unpack results
results = tensor_to_blob(outputs['detections'])
detections_wide = [[] for _ in range(len(ims_shape))]
for i in range(len(ims)):
j = i % len(ims_shape)
indices = np.where(results[:, 0].astype(np.int32) == i)[0]
detections = results[indices, 1:]
detections[:, :4] = clip_boxes(detections[:, :4], ims_shape[j])
detections_wide[j].append(detections)
detections_wide[i // num_scales].append(detections)
for j in range(len(ims_shape)):
detections_wide[j] = np.vstack(detections_wide[j]) \
if len(detections_wide[j]) > 1 else detections_wide[j][0]
for i in range(len(ims_shape)):
detections_wide[i] = np.vstack(detections_wide[i]) \
if len(detections_wide[i]) > 1 else detections_wide[i][0]
return detections_wide
def test_net(net, server):
classes, num_images, num_classes = \
server.classes, server.num_images, server.num_classes
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect' : Timer(), 'misc' : Timer()}
_t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths
......@@ -134,30 +123,46 @@ def test_net(net, server):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4]
cls_scores = detections[cls_indices, 4]
cls_dets = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])).\
astype(np.float32, copy=False)
cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(cls_dets, cfg.TEST.NMS,
keep = soft_nms(
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA)
else: keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets
boxes_this_image.append(cls_dets)
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(raw_images[item_idx], classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_ids[item_idx]))
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: image_scores = np.hstack(image_scores)
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
......@@ -165,7 +170,7 @@ def test_net(net, server):
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='')
......
......@@ -14,7 +14,7 @@ from __future__ import division
from __future__ import print_function
from lib.ssd.layers.data_layer import DataLayer
from lib.ssd.layers.prior_box_layer import PriorBoxLayer
from lib.ssd.layers.multibox_match_layer import MultiBoxMatchLayer
from lib.ssd.layers.hard_mining_layer import HardMiningLayer
from lib.ssd.layers.multibox_target_layer import MultiBoxTargetLayer
\ No newline at end of file
from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer
from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer
from lib.ssd.layers.priorbox_layer import PriorBoxLayer
......@@ -13,32 +13,39 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from multiprocessing import Process
from lib.core.config import cfg
class BlobFetcher(Process):
class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs):
super(BlobFetcher, self).__init__()
self.Q_in = self.Q_out = None
self._img_blob_size = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
self.q_in = self.q_out = None
self.daemon = True
def get(self):
num_images = cfg.TRAIN.IMS_PER_BATCH
target_h = cfg.SSD.RESIZE.HEIGHT; target_w = cfg.SSD.RESIZE.WIDTH
ims_blob = np.zeros(shape=(num_images, target_h, target_w, 3), dtype=np.uint8)
gt_boxes_wide = []
for ix in range(cfg.TRAIN.IMS_PER_BATCH):
im, gt_boxes = self.Q_in.get()
ims_blob[ix, :, :, :] = im
# Encode boxes by adding the idx of images
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
im_boxes[:, -1] = ix
gt_boxes_wide.append(im_boxes)
return {'data': ims_blob, 'gt_boxes': np.concatenate(gt_boxes_wide, axis=0)}
img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
img_blob[i], gt_boxes = self.q_in.get()
# Pack the boxes by adding the index of images
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
boxes[:, :gt_boxes.shape[1]] = gt_boxes
boxes[:, -1] = i
boxes_blob.append(boxes)
return {
'data': img_blob,
'gt_boxes': np.concatenate(boxes_blob, 0),
}
def run(self):
while True: self.Q_out.put(self.get())
\ No newline at end of file
while True:
self.q_out.put(self.get())
......@@ -13,15 +13,16 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from multiprocessing import Queue
import time
import dragon
import pprint
from multiprocessing import Queue
import dragon.core.mpi as mpi
import lib.utils.logger as logger
from lib.faster_rcnn.data.data_reader import DataReader
from lib.ssd.data.data_transformer import DataTransformer
from lib.ssd.data.blob_fetcher import BlobFetcher
from lib.utils import logger
class DataBatch(object):
......@@ -52,19 +53,20 @@ class DataBatch(object):
super(DataBatch, self).__init__()
# Init mpi
global_rank, local_rank, group_size = 0, 0, 1
if mpi.Is_Init():
idx, group = mpi.AllowParallel()
if idx != -1: # DataParallel
global_rank = mpi.Rank()
if dragon.mpi.is_init():
group = dragon.mpi.is_parallel()
if group is not None: # DataParallel
global_rank = dragon.mpi.rank()
group_size = len(group)
for i, node in enumerate(group):
if global_rank == node: local_rank = i
if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get( 'num_readers', 1)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', -1)
self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
......@@ -84,7 +86,7 @@ class DataBatch(object):
self._readers = []
for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs))
self._readers[-1].Q_out = self.Q1
self._readers[-1].q_out = self.Q1
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
......@@ -101,8 +103,8 @@ class DataBatch(object):
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.Q_in = self.Q1
transformer.Q_out = self.Q2
transformer.q_in = self.Q1
transformer.q_out = self.Q2
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
......@@ -111,14 +113,16 @@ class DataBatch(object):
self._fetchers = []
for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs)
fetcher.Q_in = self.Q2
fetcher.Q_out = self.Q3
fetcher.q_in = self.Q2
fetcher.q_out = self.Q3
fetcher.start()
self._fetchers.append(fetcher)
time.sleep(0.1)
# Prevent to echo multiple nodes
if local_rank == 0: self.echo()
if local_rank == 0:
self.echo()
def cleanup():
def terminate(processes):
for process in processes:
......@@ -130,6 +134,7 @@ class DataBatch(object):
logger.info('Terminating DataTransformer ......')
terminate(self._readers)
logger.info('Terminating DataReader......')
import atexit
atexit.register(cleanup)
......@@ -145,13 +150,7 @@ class DataBatch(object):
return self.Q3.get()
def echo(self):
"""Print I/O Information.
Returns
-------
None
"""
"""Print I/O Information."""
print('---------------------------------------------------------')
print('BatchFetcher({} Threads), Using config:'.format(
self._num_readers + self._num_transformers + self._num_fetchers))
......
......@@ -14,34 +14,34 @@ from __future__ import division
from __future__ import print_function
import cv2
import multiprocessing
import numpy as np
import numpy.random as npr
from multiprocessing import Process
from lib.core.config import cfg
from lib.proto import anno_pb2 as pb
from lib.ssd.data.preprocessing import *
import lib.utils.logger as logger
from lib.ssd.data import transforms
from lib.utils import logger
class DataTransformer(Process):
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._distorter = Distortor()
self._expander = Expander()
self._sampler = Sampler(cfg.SSD.SAMPLERS)
self._resizer = Resizer()
self._rng_seed = cfg.RNG_SEED
self._mirror = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF
self._classes = kwargs.get('classes', ('__background__',))
self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._queues = []
self.Q_in = self.Q_out = None
self._image_aug = transforms.Compose(
transforms.Distort(), # Color augmentation
transforms.Expand(), # Expand and padding
transforms.Sample(), # Sample a patch randomly
transforms.Resize(), # Resize to a fixed scale
)
self.q_in = self.q_out = None
self.daemon = True
def make_roidb(self, ann_datum, flip=False):
def make_roi_dict(self, ann_datum, flip=False):
annotations = ann_datum.annotation
n_objects = 0
if not self._use_diff:
......@@ -49,7 +49,7 @@ class DataTransformer(Process):
if not ann.difficult: n_objects += 1
else: n_objects = len(annotations)
roidb = {
roi_dict = {
'width': ann_datum.datum.width,
'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32),
......@@ -57,75 +57,82 @@ class DataTransformer(Process):
'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
}
ix = 0
rec_idx = 0
for ann in annotations:
if not self._use_diff and ann.difficult: continue
roidb['boxes'][ix, :] = [
max(0, ann.x1), max(0, ann.y1),
if not self._use_diff and ann.difficult:
continue
roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1)]
roidb['gt_classes'][ix] = self._class_to_ind[ann.name]
ix += 1
min(ann.y2, ann_datum.datum.height - 1),
]
roi_dict['gt_classes'][rec_idx] = \
self._class_to_ind[ann.name]
rec_idx += 1
if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width'])
roidb['normalized_boxes'][:, 0::2] = roidb['boxes'][:, 0::2] / float(roidb['width'])
roidb['normalized_boxes'][:, 1::2] = roidb['boxes'][:, 1::2] / float(roidb['height'])
if flip:
roi_dict['boxes'] = _flip_boxes(
roi_dict['boxes'], roi_dict['width'])
return roidb
roi_dict['boxes'][:, 0::2] /= roi_dict['width']
roi_dict['boxes'][:, 1::2] /= roi_dict['height']
return roi_dict
def get(self, serialized):
ann_datum = pb.AnnotatedDatum()
ann_datum.ParseFromString(serialized)
im_datum = ann_datum.datum
im = np.fromstring(im_datum.data, np.uint8)
if im_datum.encoded is True: im = cv2.imdecode(im, -1)
else: im = im.reshape((im_datum.height, im_datum.width, im_datum.channels))
img_datum = ann_datum.datum
img = np.fromstring(img_datum.data, np.uint8)
if img_datum.encoded is True:
img = cv2.imdecode(img, -1)
else:
h, w = img_datum.height, img_datum.width
img = img.reshape((h, w, img_datum.channels))
# Flip
flip = False
if self._mirror:
if npr.randint(0, 2) > 0:
im = im[:, ::-1, :]
if np.random.randint(0, 2) > 0:
img = img[:, ::-1, :]
flip = True
# Datum -> RoIDB
roidb = self.make_roidb(ann_datum, flip)
roi_dict = self.make_roi_dict(ann_datum, flip)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4] = roidb['normalized_boxes']
gt_boxes[:, 4] = roidb['gt_classes']
gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), 'float32')
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
# Distort => Expand => Sample => Resize
im = self._distorter.distort_image(im)
im, gt_boxes = self._expander.expand_image(im, gt_boxes)
im, gt_boxes = self._sampler.sample_image(im, gt_boxes)
im = self._resizer.resize_image(im)
img, gt_boxes = self._image_aug(img, gt_boxes)
# Modify gt boxes to the blob scale
# Restore to the blob scale
gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 1] *= cfg.SSD.RESIZE.HEIGHT
gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT
return im, gt_boxes
return img, gt_boxes
def run(self):
npr.seed(self._rng_seed)
np.random.seed(self._rng_seed)
while True:
serialized = self.Q_in.get()
serialized = self.q_in.get()
im, gt_boxes = self.get(serialized)
if len(gt_boxes) < 1: continue
self.Q_out.put((im, gt_boxes))
if len(gt_boxes) < 1:
continue
self.q_out.put((im, gt_boxes))
def _flip_boxes(boxes, width):
flip_boxes = boxes.copy()
oldx1 = boxes[:, 0].copy()
oldx2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - oldx2 - 1
flip_boxes[:, 2] = width - oldx1 - 1
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import PIL.Image
import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
class Distortor(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._brightness_delta = 0.3
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._contrast_delta = 0.3
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
self._saturation_delta = 0.3
def distort_image(self, im):
im = PIL.Image.fromarray(im)
if npr.uniform() < self._brightness_prob:
delta_brightness = npr.uniform(-self._brightness_delta, self._brightness_delta) + 1.0
im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness)
if npr.uniform() < self._contrast_prob:
delta_contrast = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast)
if npr.uniform() < self._saturation_prob:
delta_saturation = npr.uniform(-self._contrast_delta, self._contrast_delta) + 1.0
im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation)
im = np.array(im)
return im
if __name__ == '__main__':
distortor = Distortor()
while True:
im = cv2.imread('cat.jpg')
im = distortor.distort_image(im)
cv2.imshow('Distort', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy.random as npr
import numpy as np
import math
from lib.core.config import cfg
import lib.utils.logger as logger
class Expander(object):
def __init__(self, **params):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_expand_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_expand_ratio < 1.0:
logger.fatal('The max expand ratio must >= 1.0, got {}'.format(self._max_expand_ratio))
def expand_image(self, im, gt_boxes=None):
prob = npr.uniform()
if prob > self._expand_prob : return im, gt_boxes
ratio = npr.uniform(1.0, self._max_expand_ratio)
if ratio == 1: return im, gt_boxes
im_h = im.shape[0]
im_w = im.shape[1]
expand_h = int(im_h * ratio)
expand_w = int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0.0, expand_h - im_h)))
w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w)))
new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_im[:] = cfg.PIXEL_MEANS
new_im[h_off : h_off + im_h, w_off : w_off + im_w, :] = im
if gt_boxes is not None:
ex_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
ex_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w + w_off) / expand_w
ex_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h + h_off) / expand_h
ex_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w + w_off) / expand_w
ex_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h + h_off) / expand_h
return new_im, ex_gt_boxes
return new_im, gt_boxes
if __name__ == '__main__':
expander = Expander()
while True:
im = cv2.imread('cat.jpg')
gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
im, gt_boxes = expander.expand_image(im, gt_boxes)
x1 = int(gt_boxes[0][0] * im.shape[1])
y1 = int(gt_boxes[0][1] * im.shape[0])
x2 = int(gt_boxes[0][2] * im.shape[1])
y2 = int(gt_boxes[0][3] * im.shape[0])
cv2.rectangle(im, (x1, y1), (x2, y2), (188,119,64), 2)
cv2.imshow('Expand', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy.random as npr
from lib.core.config import cfg
class Resizer(object):
def __init__(self):
self._re_height = cfg.SSD.RESIZE.HEIGHT
self._re_width = cfg.SSD.RESIZE.WIDTH
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def resize_image(self, im):
rand = npr.randint(0, len(self._interp_mode))
return cv2.resize(
im, (self._re_width, self._re_height),
interpolation=self._interp_mode[rand])
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
from lib.utils.bbox_transform import clip_boxes
from lib.utils.boxes import iou
import lib.utils.logger as logger
class Sampler(object):
def __init__(self, samplers):
if not isinstance(samplers, list): samplers = [samplers]
self._samplers = []
for sampler in samplers:
if len(sampler) != 8:
logger.fatal('The sample params should be a tuple of length 8.')
sample_param = {
'min_scale': sampler[0],
'max_scale': sampler[1],
'min_aspect_ratio': sampler[2],
'max_aspect_ratio': sampler[3],
'min_jaccard_overlap': sampler[4],
'max_jaccard_overlap': sampler[5],
'max_trials': sampler[6],
'max_sample': sampler[7]}
self._samplers.append(sample_param)
def _compute_overlaps(self, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
def _generate_sample(self, sample_param):
min_scale = sample_param.get('min_scale', 1.0)
max_scale = sample_param.get('max_scale', 1.0)
scale = npr.uniform(min_scale, max_scale)
min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.0)
max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.0)
min_aspect_ratio = max(min_aspect_ratio, scale**2)
max_aspect_ratio = min(max_aspect_ratio, 1.0 / (scale**2))
aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_w = scale * (aspect_ratio ** 0.5)
bbox_h = scale / (aspect_ratio ** 0.5)
w_off = npr.uniform(0.0, float(1 - bbox_w))
h_off = npr.uniform(0.0, float(1 - bbox_h))
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
min_jaccard_overlap = constraint.get('min_jaccard_overlap', None)
max_jaccard_overlap = constraint.get('max_jaccard_overlap', None)
if min_jaccard_overlap == None and \
max_jaccard_overlap == None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
if min_jaccard_overlap is not None:
if max_overlap < min_jaccard_overlap: return False
if max_jaccard_overlap is not None:
if max_overlap > max_jaccard_overlap: return False
return True
def _generate_batch_samples(self, gt_boxes):
sample_boxes = []
for sampler in self._samplers:
found = 0
for i in range(sampler['max_trials']):
if found >= sampler['max_sample']: break
sample_box = self._generate_sample(sampler)
if sampler['min_jaccard_overlap'] != 0.0 or \
sampler['max_jaccard_overlap'] != 1.0:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok: continue
found += 1
sample_boxes.append(sample_box)
return sample_boxes
def _rand_crop(self, im, rand_box, gt_boxes=None):
im_h = im.shape[0]
im_w = im.shape[1]
w_off = int(rand_box[0] * im_w)
h_off = int(rand_box[1] * im_h)
crop_w = int((rand_box[2] - rand_box[0]) * im_w)
crop_h = int((rand_box[3] - rand_box[1]) * im_h)
new_im = im[h_off: h_off + crop_h, w_off: w_off + crop_w, :]
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2])
& (ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
return new_im, new_gt_boxes
return new_im, gt_boxes
def sample_image(self, im, gt_boxes):
sample_boxes = self._generate_batch_samples(gt_boxes)
if len(sample_boxes) > 0:
# Apply sampling if found at least one valid sample box
# Then randomly pick one
sample_idx = npr.randint(0, len(sample_boxes))
rand_box = sample_boxes[sample_idx]
im, gt_boxes = self._rand_crop(im, rand_box, gt_boxes)
return im, gt_boxes
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
import numpy.random as npr
npr.seed(3)
import sys
sys.path.append('../../')
from resize import Resizer
from expand import Expander
from distort import Distortor
from sample import Sampler
from lib.core.config import cfg
if __name__ == '__main__':
distorter = Distortor()
expander = Expander()
sampler = Sampler(cfg.SSD.SAMPLERS)
resizer = Resizer()
while True:
im = cv2.imread('cat.jpg')
gt_boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
im = distorter.distort_image(im)
im, gt_boxes = expander.expand_image(im, gt_boxes)
im, gt_boxes = sampler.sample_image(im, gt_boxes)
if len(gt_boxes) < 1: continue
im = resizer.resize_image(im)
for gt_box in gt_boxes:
x1 = int(gt_box[0] * im.shape[1])
y1 = int(gt_box[1] * im.shape[0])
x2 = int(gt_box[2] * im.shape[1])
y2 = int(gt_box[3] * im.shape[0])
cv2.rectangle(im, (x1, y1), (x2, y2), (188, 119, 64), 2)
print(x1, y1, x2, y2)
cv2.imshow('Sample', im)
cv2.waitKey(0)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import math
import cv2
import PIL.Image
import PIL.ImageEnhance
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import iou
class Compose(object):
"""Compose the several transforms together."""
def __init__(self, *transforms):
self.transforms = transforms
def __call__(self, img, boxes):
for transform in self.transforms:
img, boxes = transform.apply(img, boxes)
return img, boxes
class Distort(object):
def __init__(self):
self._brightness_prob = cfg.SSD.DISTORT.BRIGHTNESS_PROB
self._contrast_prob = cfg.SSD.DISTORT.CONTRAST_PROB
self._saturation_prob = cfg.SSD.DISTORT.SATURATION_PROB
def apply(self, img, boxes=None):
img = PIL.Image.fromarray(img)
if npr.uniform() < self._brightness_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Brightness(img)
img = img.enhance(delta)
if npr.uniform() < self._contrast_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Contrast(img)
img = img.enhance(delta)
if npr.uniform() < self._saturation_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Color(img)
img = img.enhance(delta)
return np.array(img), boxes
class Expand(object):
def __init__(self):
self._expand_prob = cfg.SSD.EXPAND.PROB
self._max_ratio = cfg.SSD.EXPAND.MAX_RATIO
if self._max_ratio < 1.0:
logger.fatal(
'The max expand ratio must >= 1, got {}'
.format(self._max_ratio)
)
def apply(self, img, boxes=None):
prob = npr.uniform()
if prob > self._expand_prob:
return img, boxes
ratio = npr.uniform(1., self._max_ratio)
if ratio == 1:
return img, boxes
im_h, im_w = img.shape[:2]
expand_h, expand_w = int(im_h * ratio), int(im_w * ratio)
h_off = int(math.floor(npr.uniform(0., expand_h - im_h)))
w_off = int(math.floor(npr.uniform(0., expand_w - im_w)))
new_img = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_img[:] = cfg.PIXEL_MEANS
new_img[h_off:h_off + im_h, w_off:w_off + im_w, :] = img
if boxes is not None:
new_boxes = boxes.astype(boxes.dtype, copy=True)
new_boxes[:, 0] = (boxes[:, 0] * im_w + w_off) / expand_w
new_boxes[:, 1] = (boxes[:, 1] * im_h + h_off) / expand_h
new_boxes[:, 2] = (boxes[:, 2] * im_w + w_off) / expand_w
new_boxes[:, 3] = (boxes[:, 3] * im_h + h_off) / expand_h
boxes = new_boxes
return new_img, boxes
class Resize(object):
def __init__(self):
self._target_size = (
cfg.SSD.RESIZE.WIDTH,
cfg.SSD.RESIZE.HEIGHT,
)
interp_list = {
'LINEAR': cv2.INTER_LINEAR,
'AREA': cv2.INTER_AREA,
'NEAREST': cv2.INTER_NEAREST,
'CUBIC': cv2.INTER_CUBIC,
'LANCZOS4': cv2.INTER_LANCZOS4,
}
interp_mode = cfg.SSD.RESIZE.INTERP_MODE
self._interp_mode = [interp_list[key] for key in interp_mode]
def apply(self, img, boxes):
rand = npr.randint(len(self._interp_mode))
return cv2.resize(
img, self._target_size,
interpolation=self._interp_mode[rand],
), boxes
class Sample(object):
def __init__(self):
samplers = cfg.SSD.SAMPLERS
if not isinstance(samplers, collections.Iterable):
samplers = [samplers]
self._samplers = []
for sampler in samplers:
if len(sampler) != 8:
logger.fatal('The sample params should be a tuple of length 8.')
sample_param = {
'min_scale': sampler[0],
'max_scale': sampler[1],
'min_aspect_ratio': sampler[2],
'max_aspect_ratio': sampler[3],
'min_overlap': sampler[4],
'max_overlap': sampler[5],
'max_trials': sampler[6],
'max_sample': sampler[7],
}
self._samplers.append(sample_param)
@classmethod
def _compute_overlaps(cls, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
@classmethod
def _generate_sample(cls, sample_param):
min_scale = sample_param.get('min_scale', 1.)
max_scale = sample_param.get('max_scale', 1.)
scale = npr.uniform(min_scale, max_scale)
min_aspect_ratio = sample_param.get('min_aspect_ratio', 1.)
max_aspect_ratio = sample_param.get('max_aspect_ratio', 1.)
min_aspect_ratio = max(min_aspect_ratio, scale**2)
max_aspect_ratio = min(max_aspect_ratio, 1. / (scale**2))
aspect_ratio = npr.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_w = scale * (aspect_ratio ** 0.5)
bbox_h = scale / (aspect_ratio ** 0.5)
w_off = npr.uniform(0., 1. - bbox_w)
h_off = npr.uniform(0., 1. - bbox_h)
return np.array([w_off, h_off, w_off + bbox_w, h_off + bbox_h])
def _check_satisfy(self, sample_box, gt_boxes, constraint):
min_overlap = constraint.get('min_overlap', None)
max_overlap = constraint.get('max_overlap', None)
if min_overlap is None and \
max_overlap is None:
return True
max_overlap = self._compute_overlaps(sample_box, gt_boxes).max()
if min_overlap is not None:
if max_overlap < min_overlap:
return False
if max_overlap is not None:
if max_overlap > max_overlap:
return False
return True
def _generate_batch_samples(self, gt_boxes):
sample_boxes = []
for sampler in self._samplers:
found = 0
for i in range(sampler['max_trials']):
if found >= sampler['max_sample']:
break
sample_box = self._generate_sample(sampler)
if sampler['min_overlap'] != 0. or \
sampler['max_overlap'] != 1.:
ok = self._check_satisfy(sample_box, gt_boxes, sampler)
if not ok:
continue
found += 1
sample_boxes.append(sample_box)
return sample_boxes
@classmethod
def _rand_crop(cls, im, rand_box, gt_boxes=None):
im_h, im_w = im.shape[:2]
w_off = int(rand_box[0] * im_w)
h_off = int(rand_box[1] * im_h)
crop_w = int((rand_box[2] - rand_box[0]) * im_w)
crop_h = int((rand_box[3] - rand_box[1]) * im_h)
new_im = im[h_off:h_off + crop_h, w_off:w_off + crop_w, :]
if gt_boxes is not None:
ctr_x = (gt_boxes[:, 2] + gt_boxes[:, 0]) / 2.0
ctr_y = (gt_boxes[:, 3] + gt_boxes[:, 1]) / 2.0
# Keep the ground-truth box whose center is in the sample box
# Implement ``EmitConstraint.CENTER`` in the original SSD
keep_inds = np.where((ctr_x >= rand_box[0]) & (ctr_x <= rand_box[2]) &
(ctr_y >= rand_box[1]) & (ctr_y <= rand_box[3]))[0]
gt_boxes = gt_boxes[keep_inds]
new_gt_boxes = gt_boxes.astype(gt_boxes.dtype, copy=True)
new_gt_boxes[:, 0] = (gt_boxes[:, 0] * im_w - w_off)
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_tiled_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
new_gt_boxes[:, 3] = new_gt_boxes[:, 3] / crop_h
return new_im, new_gt_boxes
return new_im, gt_boxes
def apply(self, img, boxes):
sample_boxes = self._generate_batch_samples(boxes)
if len(sample_boxes) > 0:
# Apply sampling if found at least one valid sample box
# Then randomly pick one
sample_idx = npr.randint(len(sample_boxes))
rand_box = sample_boxes[sample_idx]
img, boxes = self._rand_crop(img, rand_box, boxes)
return img, boxes
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!