Commit d3ed62db by Ting PAN

Support Mask R-CNN

1 parent 41b3932b
Showing with 2602 additions and 1652 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.3.0 (20191121)
Dragon Minimum Required (Version 0.3.0.dev20191121)
Changes:
Preview Features:
- New algorithm: Mask R-CNN.
- Add MobileNet(V2 and NAS) as backbone.
- Refactor testing module, multi-GPU is supported.
Bugs fixed:
- Remove rotated boxes, use Mask R-CNN instead.
------------------------------------------------------------------------
SeetaDet 0.2.3 (20191101) SeetaDet 0.2.3 (20191101)
Dragon Minimum Required (Version 0.3.0.dev20191021) Dragon Minimum Required (Version 0.3.0.dev20191021)
......
...@@ -12,6 +12,10 @@ while the style of codes is PyTorch. ...@@ -12,6 +12,10 @@ while the style of codes is PyTorch.
The torch-style codes help us to simplify the hierarchical pipeline of modern detection. The torch-style codes help us to simplify the hierarchical pipeline of modern detection.
## Requirements
seeta-dragon >= 0.3.0.dev20191121
## Installation ## Installation
#### 1. Install the required python packages #### 1. Install the required python packages
......
...@@ -5,7 +5,6 @@ rm -r build install *.c *.cpp ...@@ -5,7 +5,6 @@ rm -r build install *.c *.cpp
# Compile cpp modules # Compile cpp modules
python setup.py build_ext --inplace python setup.py build_ext --inplace
g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -std=c++11 -fopenmp
# Compile cuda modules # Compile cuda modules
cd build && cmake .. && make install && cd .. cd build && cmake .. && make install && cd ..
......
...@@ -41,6 +41,9 @@ __C.TRAIN.WEIGHTS = '' ...@@ -41,6 +41,9 @@ __C.TRAIN.WEIGHTS = ''
# Database to train # Database to train
__C.TRAIN.DATABASE = '' __C.TRAIN.DATABASE = ''
# The number of workers to transform data
__C.TRAIN.NUM_WORKERS = 3
# Scales to use during training (can list multiple scales) # Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side # Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,) __C.TRAIN.SCALES = (600,)
...@@ -151,10 +154,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5 ...@@ -151,10 +154,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5
# The top-k prior boxes before nms. # The top-k prior boxes before nms.
__C.TEST.NMS_TOP_K = 400 __C.TEST.NMS_TOP_K = 400
# The threshold for prAttrDicting boxes # The threshold for predicting boxes
__C.TEST.SCORE_THRESH = 0.05 __C.TEST.SCORE_THRESH = 0.05
# The threshold for prAttrDicting masks # The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5 __C.TEST.BINARY_THRESH = 0.5
# NMS threshold used on RPN proposals # NMS threshold used on RPN proposals
...@@ -192,8 +195,9 @@ __C.MODEL = AttrDict() ...@@ -192,8 +195,9 @@ __C.MODEL = AttrDict()
# The type of the model # The type of the model
# ('faster_rcnn', # ('faster_rcnn',
# 'ssd', # 'mask_rcnn',
# 'retinanet, # 'retinanet,
# 'ssd',
# ) # )
__C.MODEL.TYPE = '' __C.MODEL.TYPE = ''
...@@ -361,14 +365,14 @@ __C.SSD.NUM_CONVS = 0 ...@@ -361,14 +365,14 @@ __C.SSD.NUM_CONVS = 0
# Weight for bbox regression loss # Weight for bbox regression loss
__C.SSD.BBOX_REG_WEIGHT = 1. __C.SSD.BBOX_REG_WEIGHT = 1.
__C.SSD.MULTIBOX = AttrDict()
# MultiBox configs # MultiBox configs
__C.SSD.MULTIBOX = AttrDict()
__C.SSD.MULTIBOX.STRIDES = [] __C.SSD.MULTIBOX.STRIDES = []
__C.SSD.MULTIBOX.MIN_SIZES = [] __C.SSD.MULTIBOX.MIN_SIZES = []
__C.SSD.MULTIBOX.MAX_SIZES = [] __C.SSD.MULTIBOX.MAX_SIZES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = [] __C.SSD.MULTIBOX.ASPECT_RATIOS = []
__C.SSD.MULTIBOX.ASPECT_ANGLES = []
# OHEM configs
__C.SSD.OHEM = AttrDict() __C.SSD.OHEM = AttrDict()
# The threshold for selecting negative bbox in hard example mining # The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_OVERLAP = 0.5 __C.SSD.OHEM.NEG_OVERLAP = 0.5
......
...@@ -21,46 +21,56 @@ import cv2 ...@@ -21,46 +21,56 @@ import cv2
import dragon import dragon
from lib.core.config import cfg from lib.core.config import cfg
from lib.datasets.example import Example
from lib.datasets.factory import get_imdb from lib.datasets.factory import get_imdb
from lib.faster_rcnn.data_transformer import DataTransformer
class TestServer(object): class _Server(object):
def __init__(self, output_dir): def __init__(self, output_dir):
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.num_images, self.num_classes, self.classes = \
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_transformer = DataTransformer()
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir self.output_dir = output_dir
if cfg.VIS_ON_FILE: if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis') self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir) os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls): def evaluate_detections(self, all_boxes):
self.data_transformer = transformer_cls() pass
def evaluate_segmentations(self, all_boxes, all_masks):
pass
def get_image(self): def get_image(self):
example = self.data_reader.q_out.get() pass
image = self.data_transformer.get_image(example)
image_id, objects = self.data_transformer.get_annotations(example)
self.gt_recs[image_id] = {
'objects': objects,
'width': image.shape[1],
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'): def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \ return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None if cfg.VIS_ON_FILE else None
class TestServer(_Server):
def __init__(self, output_dir):
super(TestServer, self).__init__(output_dir)
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.classes = self.imdb.classes
self.num_images = self.imdb.num_images
self.num_classes = self.imdb.num_classes
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
def get_image(self):
example = Example(self.data_reader.q_out.get())
image, image_id = example.image, example.id
self.gt_recs[image_id] = {
'height': example.height,
'width': example.width,
'objects': example.objects,
}
return image_id, image
def get_records(self): def get_records(self):
if len(self.gt_recs) != self.num_images: if len(self.gt_recs) != self.num_images:
raise RuntimeError( raise RuntimeError(
...@@ -70,7 +80,7 @@ class TestServer(object): ...@@ -70,7 +80,7 @@ class TestServer(object):
return self.gt_recs return self.gt_recs
def evaluate_detections(self, all_boxes): def evaluate_detections(self, all_boxes):
if cfg.TEST.PROTOCOL == 'null': if cfg.TEST.PROTOCOL == 'dump':
self.imdb.dump_detections(all_boxes, self.output_dir) self.imdb.dump_detections(all_boxes, self.output_dir)
else: else:
self.imdb.evaluate_detections( self.imdb.evaluate_detections(
...@@ -88,56 +98,20 @@ class TestServer(object): ...@@ -88,56 +98,20 @@ class TestServer(object):
) )
class InferServer(object): class InferServer(_Server):
def __init__(self, output_dir): def __init__(self, output_dir):
super(InferServer, self).__init__(output_dir)
self.images_dir = cfg.TEST.DATABASE self.images_dir = cfg.TEST.DATABASE
self.imdb = get_imdb('taas:/empty')
self.images = os.listdir(self.images_dir) self.images = os.listdir(self.images_dir)
self.num_images, self.num_classes, self.classes = \ self.classes = cfg.MODEL.CLASSES
len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES self.num_images = len(self.images)
self.data_transformer = DataTransformer() self.num_classes = cfg.MODEL.NUM_CLASSES
self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir self.output_dir = output_dir
self.image_idx = 0 self.image_idx = 0
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self): def get_image(self):
image_name = self.images[self.image_idx] image_name = self.images[self.image_idx]
image_id = image_name.split('.')[0] image_id = image_name.split('.')[0]
image = cv2.imread(os.path.join(self.images_dir, image_name)) image = cv2.imread(os.path.join(self.images_dir, image_name))
self.image_idx = (self.image_idx + 1) % self.num_images self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = {'width': image.shape[1], 'height': image.shape[0]}
return image_id, image return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import importlib
import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.utils import time_util
from lib.utils.vis import vis_one_image
def run_test_net(checkpoint, server, devices):
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
devices = devices if devices else [cfg.GPU_ID]
num_workers = len(devices)
test_fn = importlib.import_module(
'lib.%s.test' % cfg.MODEL.TYPE).test_net
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
vis_image_dict = {}
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
all_masks = [[[] for _ in range(num_images)] for _ in range(num_classes)]
queues = [
multiprocessing.Queue()
for _ in range(num_workers + 1)
]
workers = [
multiprocessing.Process(
target=test_fn,
kwargs={
'weights': checkpoint,
'num_classes': server.num_classes,
'q_in': queues[i],
'q_out': queues[-1],
'device': devices[i],
}
) for i in range(num_workers)
]
for process in workers:
process.start()
for i in range(num_images):
image_id, raw_image = server.get_image()
queues[i % num_workers].put((i, raw_image))
# Hold the image until the visualization
if cfg.VIS or cfg.VIS_ON_FILE:
vis_image_dict[i] = (image_id, raw_image)
for i in range(num_workers):
queues[i].put((-1, None))
for count in range(num_images):
i, time_diffs, results = queues[-1].get()
# Unpack the diverse results
boxes_this_image = results['boxes']
masks_this_image = results.get('masks', None)
# Disable some collections
if masks_this_image is None:
all_masks = None
# Update time difference
for name, diff in time_diffs.items():
_t[name].add_diff(diff)
# Visualize the results if necessary
if cfg.VIS or cfg.VIS_ON_FILE:
image_id, raw_image = vis_image_dict[i]
vis_one_image(
raw_image,
classes,
boxes_this_image,
masks_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_id),
)
del vis_image_dict[i]
_t['misc'].tic()
# Pack the results in the class-major order
for j in range(1, num_classes):
all_boxes[j][i] = boxes_this_image[j]
if all_masks is not None:
if j < len(masks_this_image):
all_masks[j][i] = masks_this_image[j]
# Limit to max_per_image detections *over all classes*
max_detections = cfg.TEST.DETECTIONS_PER_IM
if max_detections > 0:
scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
scores.append(all_boxes[j][i][:, -1])
if len(scores) > 0:
scores = np.hstack(scores)
if len(scores) > max_detections:
thr = np.sort(scores)[-max_detections]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= thr)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
if all_masks is not None:
all_masks[j][i] = all_masks[j][i][keep]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s|{:.3f}s {:.3f}s'
.format(count + 1, num_images,
_t['im_detect'].average_time,
_t['mask_detect'].average_time,
_t['misc'].average_time),
end='')
print('\n\n>>> Evaluating detections\n')
server.evaluate_detections(all_boxes)
if all_masks is not None:
print('>>> Evaluating segmentations\n')
server.evaluate_segmentations(all_boxes, all_masks)
...@@ -31,9 +31,9 @@ from lib.utils.stats import SmoothedValue ...@@ -31,9 +31,9 @@ from lib.utils.stats import SmoothedValue
class SolverWrapper(object): class SolverWrapper(object):
def __init__(self, coordinator): def __init__(self, coordinator):
self.output_dir = coordinator.checkpoints_dir()
self.solver = SGDSolver() self.solver = SGDSolver()
self.detector = self.solver.detector self.detector = self.solver.detector
self.output_dir = coordinator.checkpoints_dir()
# Setup the detector # Setup the detector
self.detector.load_weights(cfg.TRAIN.WEIGHTS) self.detector.load_weights(cfg.TRAIN.WEIGHTS)
...@@ -89,7 +89,6 @@ class SolverWrapper(object): ...@@ -89,7 +89,6 @@ class SolverWrapper(object):
display = self.solver.iter % cfg.SOLVER.DISPLAY == 0 display = self.solver.iter % cfg.SOLVER.DISPLAY == 0
stats = self.solver.one_step() stats = self.solver.one_step()
self.add_metrics(stats) self.add_metrics(stats)
self.send_metrics(stats)
if display: if display:
logger.info( logger.info(
...@@ -104,6 +103,7 @@ class SolverWrapper(object): ...@@ -104,6 +103,7 @@ class SolverWrapper(object):
continue continue
logger.info(' ' * 10 + 'Train net output({}): {}' logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue())) .format(k, v.GetMedianValue()))
self.send_metrics(stats)
def train_model(self): def train_model(self):
"""Network training loop.""" """Network training loop."""
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
from lib.pycocotools import mask_utils
class Example(object):
"""Wrapper for annotated example."""
def __init__(self, datum):
"""Create a ``Example``.
Parameters
----------
datum : Dict
The data loaded for dataset
"""
self._datum = datum
@property
def id(self):
"""Return the example id.
Returns
-------
str
The unique id.
"""
return self._datum['id']
@property
def image(self):
"""Return the image data.
Returns
-------
numpy.ndarray
The image.
"""
img = np.frombuffer(self._datum['content'], 'uint8')
return cv2.imdecode(img, 3)
@property
def height(self):
"""Return the image height.
Returns
-------
int
The height of image.
"""
return self._datum['height']
@property
def objects(self):
"""Return the annotated objects.
Returns
-------
Sequence[Dict]
The objects.
"""
objects = []
for ix, obj in enumerate(self._datum['object']):
mask = obj.get('mask', None)
if 'x3' in obj:
poly = np.array([
obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
obj['x4'], obj['y4']
], 'float32')
x, y, w, h = cv2.boundingRect(
poly.reshape((-1, 2)))
bbox = [x, y, x + w, y + h]
mask = mask_utils.poly2bytes(
[poly],
self._datum['height'],
self._datum['width'],
)
elif 'x2' in obj:
bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
elif 'xmin' in obj:
bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
else:
bbox = obj['bbox']
objects.append({
'name': obj['name'],
'bbox': bbox,
'mask': mask,
'difficult': obj.get('difficult', 0),
})
return objects
@property
def width(self):
"""Return the image width.
Returns
-------
int
The width of image.
"""
return self._datum['width']
...@@ -13,84 +13,118 @@ ...@@ -13,84 +13,118 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import shutil import uuid
import dragon
from lib.core.config import cfg from lib.core.config import cfg
from lib.datasets.coco_evaluator import COCOEvaluator
from lib.datasets.voc_evaluator import VOCEvaluator
class imdb(object): class imdb(object):
def __init__(self, name): def __init__(self, source):
self._name = name self._source = source
self._num_classes = 0 self._num_images = 0
self._classes = [] self._classes = cfg.MODEL.CLASSES
self._class_to_ind = self._class_to_cat_id = \
@property dict(zip(self.classes, range(self.num_classes)))
def name(self): self._salt = str(uuid.uuid4())
return self._name self.config = {'cleanup': True, 'use_salt': True}
@property @property
def num_classes(self): def cache_path(self):
return len(self._classes) cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property @property
def classes(self): def classes(self):
return self._classes return self._classes
@property @property
def cache_path(self): def class_to_ind(self):
cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache')) return self._class_to_ind
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property @property
def source(self): def comp_id(self):
excepted_source = os.path.join(self.cache_path, self.name) return '_' + self._salt if self.config['use_salt'] else ''
if not os.path.exists(excepted_source):
raise RuntimeError( @property
'Excepted source from: {}, ' def num_classes(self):
'but it is not existed.' return len(self._classes)
.format(excepted_source)
)
return excepted_source
@property @property
def num_images(self): def num_images(self):
return dragon.io.SeetaRecordDataset(self.source).size return self._num_images
@property
def source(self):
return self._source
def competition_mode(self, on):
if on:
self.config['use_salt'] = False
self.config['cleanup'] = False
else:
self.config['use_salt'] = True
self.config['cleanup'] = True
def dump_detections(self, all_boxes, output_dir): def dump_detections(self, all_boxes, output_dir):
dataset = dragon.io.SeetaRecordDataset(self.source) pass
for file in ('data.data', 'data.index', 'data.meta'):
file = os.path.join(output_dir, file)
if os.path.exists(file):
os.remove(file)
writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
for i in range(len(dataset)):
example = dataset.get()
example['object'] = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][i]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
example['object'].append({
'name': cls,
'xmin': float(detections[k][0]),
'ymin': float(detections[k][1]),
'xmax': float(detections[k][2]),
'ymax': float(detections[k][3]),
'difficult': 0,
})
writer.write(example)
def evaluate_detections(self, all_boxes, gt_recs, output_dir): def evaluate_detections(self, all_boxes, gt_recs, output_dir):
pass protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
evaluator = VOCEvaluator(self)
evaluator.write_bbox_results(all_boxes, gt_recs, output_dir)
if '!' not in protocol:
for ovr in (0.5, 0.7):
evaluator.do_bbox_eval(
gt_recs,
output_dir,
iou=ovr,
use_07_metric='2007' in protocol,
)
elif 'coco' in protocol:
ann_file = cfg.TEST.JSON_FILE
evaluator = COCOEvaluator(self, ann_file)
if evaluator.coco is None:
ann_file = evaluator \
.write_bbox_annotations(
gt_recs, output_dir)
evaluator = COCOEvaluator(self, ann_file)
res_file = evaluator.write_bbox_results(
all_boxes, gt_recs, output_dir)
if '!' not in protocol:
evaluator.do_bbox_eval(res_file)
def evaluate_masks(self, all_boxes, all_masks, output_dir): def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
pass protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
evaluator = VOCEvaluator(self)
evaluator.write_segm_results(all_boxes, all_masks, output_dir)
if '!' not in protocol:
for ovr in (0.5, 0.7):
evaluator.do_segm_eval(
gt_recs,
output_dir,
iou=ovr,
use_07_metric='2007' in protocol,
)
elif 'coco' in protocol:
ann_file = cfg.TEST.JSON_FILE
evaluator = COCOEvaluator(self, ann_file)
if evaluator.coco is None:
ann_file = evaluator \
.write_segm_annotations(
gt_recs, output_dir)
evaluator = COCOEvaluator(self, ann_file)
res_file = evaluator.write_segm_results(
all_boxes, all_masks, gt_recs, output_dir)
if '!' not in protocol:
evaluator.do_segm_eval(res_file)
...@@ -20,15 +20,10 @@ from __future__ import print_function ...@@ -20,15 +20,10 @@ from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg from lib.core.config import cfg
from lib.pycocotools.mask_utils import mask_rle2im from lib.pycocotools import mask_utils
from lib.utils import rotated_boxes from lib.utils import boxes as box_util
from lib.utils.boxes import expand_boxes from lib.utils.framework import pickle
from lib.utils.mask import mask_overlap from lib.utils.mask import mask_overlap
...@@ -66,15 +61,15 @@ def voc_bbox_eval( ...@@ -66,15 +61,15 @@ def voc_bbox_eval(
det_file, det_file,
gt_recs, gt_recs,
cls_name, cls_name,
IoU=0.5, iou=0.5,
use_07_metric=False, use_07_metric=False,
): ):
class_recs, n_pos = {}, 0 class_recs, n_pos = {}, 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name] objects = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R]) bbox = np.array([x['bbox'] for x in objects])
diff = np.array([x['difficult'] for x in R]).astype(np.bool) diff = np.array([x['difficult'] for x in objects]).astype(np.bool)
det = [False] * len(R) det = [False] * len(objects)
n_pos = n_pos + sum(~diff) n_pos = n_pos + sum(~diff)
class_recs[image_name] = {'bbox': bbox, 'difficult': diff, 'det': det} class_recs[image_name] = {'bbox': bbox, 'difficult': diff, 'det': det}
...@@ -100,7 +95,7 @@ def voc_bbox_eval( ...@@ -100,7 +95,7 @@ def voc_bbox_eval(
nd = len(image_ids) nd = len(image_ids)
tp, fp = np.zeros(nd), np.zeros(nd) tp, fp = np.zeros(nd), np.zeros(nd)
def overlaps4(bb, BBGT): def compute_overlaps(bb, BBGT):
ixmin = np.maximum(BBGT[:, 0], bb[0]) ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1]) iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2]) ixmax = np.minimum(BBGT[:, 2], bb[2])
...@@ -114,9 +109,6 @@ def voc_bbox_eval( ...@@ -114,9 +109,6 @@ def voc_bbox_eval(
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
return inters / uni return inters / uni
def overlaps5(bb, BBGT):
return rotated_boxes.bbox_overlaps(bb.reshape((1, 5)), BBGT)[0]
for d in range(nd): for d in range(nd):
R = class_recs[image_ids[d]] R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float) bb = BB[d, :].astype(float)
...@@ -124,12 +116,11 @@ def voc_bbox_eval( ...@@ -124,12 +116,11 @@ def voc_bbox_eval(
BBGT = R['bbox'].astype(float) BBGT = R['bbox'].astype(float)
if BBGT.size > 0: if BBGT.size > 0:
overlaps = overlaps4(bb, BBGT) \ overlaps = compute_overlaps(bb, BBGT)
if len(bb) == 4 else overlaps5(bb, BBGT)
ov_max = np.max(overlaps) ov_max = np.max(overlaps)
j_max = np.argmax(overlaps) j_max = np.argmax(overlaps)
if ov_max > IoU: if ov_max > iou:
if not R['difficult'][j_max]: if not R['difficult'][j_max]:
if not R['det'][j_max]: if not R['det'][j_max]:
tp[d] = 1. tp[d] = 1.
...@@ -154,23 +145,29 @@ def voc_segm_eval( ...@@ -154,23 +145,29 @@ def voc_segm_eval(
seg_file, seg_file,
gt_recs, gt_recs,
cls_name, cls_name,
IoU=0.5, iou=0.5,
use_07_metric=False, use_07_metric=False,
): ):
# 0. Constants # 0. Constants
M = cfg.MRCNN.RESOLUTION M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH binary_thresh = cfg.TEST.BINARY_THRESH
scale = (M + 2.0) / M scale = (M + 2.) / M
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
# 1. Get bbox & mask ground truths # 1. Get bbox & mask ground truths
image_names, class_recs, n_pos = [], {}, 0 image_names, class_recs, n_pos = [], {}, 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name] objects = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R]) bbox = np.array([x['bbox'] for x in objects])
mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R]) mask = np.array([
difficult = np.array([x['difficult'] for x in R]).astype(np.bool) mask_utils.bytes2img(
det = [False] * len(R) x['mask'],
rec['height'],
rec['width']
) for x in objects]
)
difficult = np.array([x['difficult'] for x in objects]).astype(np.bool)
det = [False] * len(objects)
n_pos = n_pos + sum(~difficult) n_pos = n_pos + sum(~difficult)
class_recs[image_name] = { class_recs[image_name] = {
'bbox': bbox, 'bbox': bbox,
...@@ -182,9 +179,9 @@ def voc_segm_eval( ...@@ -182,9 +179,9 @@ def voc_segm_eval(
# 2. Get predict pickle file for this class # 2. Get predict pickle file for this class
with open(det_file, 'rb') as f: with open(det_file, 'rb') as f:
boxes_pkl = cPickle.load(f) boxes_pkl = pickle.load(f)
with open(seg_file, 'rb') as f: with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f) masks_pkl = pickle.load(f)
# 3. Pre-compute number of total instances to allocate memory # 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs) num_images = len(gt_recs)
...@@ -222,7 +219,7 @@ def voc_segm_eval( ...@@ -222,7 +219,7 @@ def voc_segm_eval(
fp = np.zeros((num_pred, 1)) fp = np.zeros((num_pred, 1))
tp = np.zeros((num_pred, 1)) tp = np.zeros((num_pred, 1))
ref_boxes = expand_boxes(new_boxes, scale) ref_boxes = box_util.expand_boxes(new_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32) ref_boxes = ref_boxes.astype(np.int32)
for i in range(num_pred): for i in range(num_pred):
...@@ -261,13 +258,19 @@ def voc_segm_eval( ...@@ -261,13 +258,19 @@ def voc_segm_eval(
crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1, crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0]:gt_mask_bound[2] + 1] gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask) ov = \
mask_overlap(
gt_mask_bound,
pred_mask_bound,
crop_mask,
pred_mask,
)
if ov > ovmax: if ov > ovmax:
ovmax = ov ovmax = ov
jmax = j jmax = j
if ovmax > IoU: if ovmax > iou:
if not R['difficult'][jmax]: if not R['difficult'][jmax]:
if not R['det'][jmax]: if not R['det'][jmax]:
tp[i] = 1. tp[i] = 1.
...@@ -281,7 +284,7 @@ def voc_segm_eval( ...@@ -281,7 +284,7 @@ def voc_segm_eval(
fp = np.cumsum(fp) fp = np.cumsum(fp)
tp = np.cumsum(tp) tp = np.cumsum(tp)
rec = tp / float(n_pos) rec = tp / float(n_pos)
# avoid divide by zero in case the first matches a difficult gt # Avoid divide by zero in case the first matches a difficult gt
prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps) prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric=use_07_metric) ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
return ap return ap
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from lib.datasets import voc_eval
from lib.utils.framework import pickle
class VOCEvaluator(object):
def __init__(self, imdb):
self.imdb = imdb
def do_bbox_eval(
self,
gt_recs,
output_dir,
iou=0.5,
use_07_metric=True,
):
aps = []
print('~~~~~~ Evaluation IoU@%s ~~~~~~' % str(iou))
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
det_file = self.get_results_file(output_dir).format(cls)
rec, prec, ap = \
voc_eval.voc_bbox_eval(
det_file,
gt_recs, cls,
iou=iou,
use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
def do_segm_eval(
self,
gt_recs,
output_dir,
iou=0.5,
use_07_metric=True,
):
aps = []
print('~~~~~~ Evaluation IoU@%s ~~~~~~' % str(iou))
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
segm_filename = self.get_results_file(output_dir, 'segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
ap = voc_eval.voc_segm_eval(
bbox_filename,
segm_filename,
gt_recs, cls,
iou=iou,
use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
@staticmethod
def get_prefix(type='bbox'):
if type == 'bbox':
return 'detections'
elif type == 'segm':
return 'segmentations'
elif type == 'kpt':
return 'keypoints'
return ''
def get_results_file(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_<comp_id>_<class_name>.txt
if type == 'bbox':
filename = self.get_prefix(type) + self.imdb.comp_id + '_{:s}.txt'
elif type == 'segm':
filename = self.get_prefix(type) + self.imdb.comp_id + '_{:s}.pkl'
else:
raise ValueError('Type of results can be either bbox or segm.')
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def write_bbox_results(self, all_boxes, gt_recs, output_dir):
for cls_ind, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
print('Writing {} VOC format bbox results'.format(cls))
filename = self.get_results_file(output_dir).format(cls)
with open(filename, 'wt') as f:
ix = 0
for image_id, rec in gt_recs.items():
dets = all_boxes[cls_ind][ix]
ix += 1
if len(dets) == 0:
continue
for k in range(dets.shape[0]):
content = '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}' \
.format(image_id, dets[k, -1],
dets[k, 0] + 1, dets[k, 1] + 1,
dets[k, 2] + 1, dets[k, 3] + 1)
if dets.shape[1] == 6:
content += ' {:.2f}'.format(dets[k, 4])
f.write(content + '\n')
def write_segm_results(self, all_boxes, all_masks, output_dir):
for cls_inds, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
print('Writing {} VOC format segm results'.format(cls))
segm_filename = self.get_results_file(output_dir, 'segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
with open(bbox_filename, 'wb') as f:
pickle.dump(all_boxes[cls_inds], f, pickle.HIGHEST_PROTOCOL)
with open(segm_filename, 'wb') as f:
pickle.dump(all_masks[cls_inds], f, pickle.HIGHEST_PROTOCOL)
...@@ -13,7 +13,11 @@ from __future__ import absolute_import ...@@ -13,7 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer from lib.faster_rcnn.anchor_target import AnchorTarget
from lib.faster_rcnn.data_layer import DataLayer from lib.faster_rcnn.data_loader import DataLoader
from lib.faster_rcnn.proposal_layer import ProposalLayer from lib.faster_rcnn.proposal import Proposal
from lib.faster_rcnn.proposal_target_layer import ProposalTargetLayer from lib.faster_rcnn.proposal_target import ProposalTarget
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.faster_rcnn.utils import map_returns_to_blobs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
class AnchorTarget(object):
"""Assign ground-truth targets to anchors."""
def __init__(self):
super(AnchorTarget, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
self.num_strides = len(self.strides)
self.allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = []
for i in range(self.num_strides):
self.base_anchors.append(
generate_anchors(
self.strides[i],
self.ratios,
np.array([self.scales[i]])
if self.num_strides > 1
else np.array(self.scales)
)
)
def __call__(self, features, gt_boxes, ims_info):
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, num_anchors,), 'float32')
bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32')
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, ...)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self.allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self.allowed_border) &
(all_anchors[:, 1] >= -self.allowed_border) &
(all_anchors[:, 2] < im_info[1] + self.allowed_border) &
(all_anchors[:, 3] < im_info[0] + self.allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside, anchors = np.arange(num_anchors), all_anchors
num_inside = len(inds_inside)
labels = np.empty((num_inside,), 'float32')
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = box_util.bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(fg_inds, len(fg_inds) - num_fg, False)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), 'float32')
bbox_targets[fg_inds, :] = \
box_util.bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), 'float32')
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), 'float32')
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
if self.num_strides > 1:
labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
else:
A = self.base_anchors[0].shape[0]
height, width = features[0].shape[-2:]
labels = labels_wide \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = bbox_inside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = bbox_outside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': new_tensor(labels),
'bbox_targets': new_tensor(bbox_targets),
'bbox_inside_weights': new_tensor(bbox_inside_weights),
'bbox_outside_weights': new_tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
# C4 backbone takes the first stride
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
height, width = features[0].shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
all_anchors = (self.base_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
all_labels[ix, inds_inside] = labels # label
all_bbox_targets[ix, inds_inside] = bbox_targets
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
labels = all_labels \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, total_anchors))
bbox_targets = all_bbox_targets \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = all_bbox_inside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = all_bbox_outside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
...@@ -27,11 +27,11 @@ from lib.utils import logger ...@@ -27,11 +27,11 @@ from lib.utils import logger
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
class DataLayer(torch.nn.Module): class DataLoader(object):
"""Generate a mini-batch of data.""" """Provide mini-batches of data."""
def __init__(self): def __init__(self):
super(DataLayer, self).__init__() super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE) database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{ self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source), 'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
...@@ -39,12 +39,11 @@ class DataLayer(torch.nn.Module): ...@@ -39,12 +39,11 @@ class DataLayer(torch.nn.Module):
'shuffle': cfg.TRAIN.USE_SHUFFLE, 'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS, 'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2, 'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
}) })
def forward(self): def __call__(self):
# Get an array blob from the Queue
outputs = self.data_batch.get() outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data']) outputs['data'] = torch.from_numpy(outputs['data'])
return outputs return outputs
...@@ -59,14 +58,16 @@ class DataBatch(mp.Process): ...@@ -59,14 +58,16 @@ class DataBatch(mp.Process):
---------- ----------
dataset : lambda dataset : lambda
The creator of a dataset. The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False shuffle : bool, optional, default=False
Whether to shuffle the data. Whether to shuffle the data.
num_chunks : int, optional, default=0 num_chunks : int, optional, default=0
The number of chunks to split. The number of chunks to split.
batch_size : int, optional, default=2 batch_size : int, optional, default=2
The size of a mini-batch. The size of a mini-batch.
prefetch : int, optional, default=5 num_transformers : int, optional, default=3
The prefetch count. The number of workers to transform data.
""" """
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
...@@ -83,20 +84,10 @@ class DataBatch(mp.Process): ...@@ -83,20 +84,10 @@ class DataBatch(mp.Process):
self._prefetch = kwargs.get('prefetch', 5) self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2) self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1) self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', -1) self._num_transformers = kwargs.get('num_transformers', 3)
self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1) self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True self.daemon = True
# Io-Aware Policy
if self._num_transformers == -1:
self._num_transformers = 2
# Add 1 transformer for color augmentation
if cfg.TRAIN.USE_COLOR_JITTER:
self._num_transformers += 1
self._num_transformers = min(
self._num_transformers, self._max_transformers)
# Initialize queues # Initialize queues
num_batches = self._prefetch * self._num_readers num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size) self.Q1 = mp.Queue(num_batches * self._batch_size)
......
...@@ -19,9 +19,9 @@ import cv2 ...@@ -19,9 +19,9 @@ import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import rotated_boxes from lib.datasets.example import Example
from lib.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob from lib.utils.blob import prep_im_for_blob
from lib.utils.boxes import flip_boxes
from lib.utils.image import get_image_with_target_size from lib.utils.image import get_image_with_target_size
...@@ -44,32 +44,32 @@ class DataTransformer(multiprocessing.Process): ...@@ -44,32 +44,32 @@ class DataTransformer(multiprocessing.Process):
apply_flip=False, apply_flip=False,
offsets=None, offsets=None,
): ):
n_objects = 0 objects, n_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff: if not self._use_diff:
for obj in example['object']: for obj in objects:
if obj.get('difficult', 0) == 0: if obj.get('difficult', 0) == 0:
n_objects += 1 n_objects += 1
else: else:
n_objects = len(example['object']) n_objects = len(objects)
roi_dict = { roi_dict = {
'width': example['width'],
'height': example['height'],
'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), 'float32'), 'boxes': np.zeros((n_objects, 4), 'float32'),
'gt_classes': np.zeros((n_objects,), 'int32'),
} }
# Filter the difficult instances # Filter the difficult instances
object_idx = 0 object_idx = 0
for obj in example['object']: for obj in objects:
if not self._use_diff and \ if not self._use_diff and \
obj.get('difficult', 0) > 0: obj.get('difficult', 0) > 0:
continue continue
bbox = obj['bbox']
roi_dict['boxes'][object_idx, :] = [ roi_dict['boxes'][object_idx, :] = [
max(0, obj['xmin']), max(0, bbox[0]),
max(0, obj['ymin']), max(0, bbox[1]),
min(obj['xmax'], example['width'] - 1), min(bbox[2], width - 1),
min(obj['ymax'], example['height'] - 1), min(bbox[3], height - 1),
] ]
roi_dict['gt_classes'][object_idx] = \ roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']] self._class_to_ind[obj['name']]
...@@ -77,8 +77,11 @@ class DataTransformer(multiprocessing.Process): ...@@ -77,8 +77,11 @@ class DataTransformer(multiprocessing.Process):
# Flip the boxes if necessary # Flip the boxes if necessary
if apply_flip: if apply_flip:
roi_dict['boxes'] = flip_boxes( roi_dict['boxes'] = \
roi_dict['boxes'], roi_dict['width']) box_util.flip_boxes(
roi_dict['boxes'],
width,
)
# Scale the boxes to the detecting scale # Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale roi_dict['boxes'] *= im_scale
...@@ -94,61 +97,32 @@ class DataTransformer(multiprocessing.Process): ...@@ -94,61 +97,32 @@ class DataTransformer(multiprocessing.Process):
return roi_dict return roi_dict
@classmethod
def get_image(cls, example):
img = np.frombuffer(example['content'], np.uint8)
return cv2.imdecode(img, -1)
@classmethod
def get_annotations(cls, example):
objects = []
for ix, obj in enumerate(example['object']):
if 'x3' in obj:
bbox = rotated_boxes.vertices2box(
[obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
obj['x4'], obj['y4']]
)
elif 'x2' in obj:
bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
elif 'xmin' in obj:
bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
else:
bbox = obj['bbox']
objects.append({
'name': obj['name'],
'difficult': obj.get('difficult', 0),
'bbox': bbox,
})
return example['id'], objects
def get(self, example): def get(self, example):
img = np.frombuffer(example['content'], np.uint8) example = Example(example)
img = cv2.imdecode(img, 1) img = example.image
# Scale # Scale
scale_indices = np.random.randint(len(cfg.TRAIN.SCALES)) max_size = cfg.TRAIN.MAX_SIZE
target_size = cfg.TRAIN.SCALES[scale_indices] target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
im, im_scale, jitter = prep_im_for_blob(img, target_size, cfg.TRAIN.MAX_SIZE) img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip # Flip
apply_flip = False apply_flip = False
if self._use_flipped: if self._use_flipped:
if np.random.randint(2) > 0: if np.random.randint(2) > 0:
im = im[:, ::-1, :] img = img[:, ::-1]
apply_flip = True apply_flip = True
# Random Crop or RandomPad # Random Crop or RandomPad
offsets = None offsets = None
if cfg.TRAIN.MAX_SIZE > 0: if cfg.TRAIN.MAX_SIZE > 0:
if jitter != 1.0: if jitter != 1:
# To a rectangle (scale, max_size) # To a rectangle (scale, max_size)
target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int) target_size = (np.array(img.shape[:2]) / jitter).astype(np.int32)
im, offsets = get_image_with_target_size(target_size, im) img, offsets = get_image_with_target_size(target_size, img)
else: else:
# To a square (target_size, target_size) # To a square (target_size, target_size)
im, offsets = get_image_with_target_size([target_size] * 2, im) img, offsets = get_image_with_target_size([target_size] * 2, img)
# Example -> RoIDict # Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets) roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
...@@ -158,7 +132,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -158,7 +132,7 @@ class DataTransformer(multiprocessing.Process):
gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32) gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes'] gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
return im, im_scale, gt_boxes return img, im_scale, gt_boxes
def run(self): def run(self):
# Fix the process-local random seed # Fix the process-local random seed
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
class Proposal(object):
"""Compute proposals by applying transformations anchors."""
def __init__(self):
super(Proposal, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
self.num_strides = len(self.strides)
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
])
# Generate base anchors
self.base_anchors = []
for i in range(self.num_strides):
self.base_anchors.append(
generate_anchors(
self.strides[i],
self.ratios,
np.array([self.scales[i]])
if self.num_strides > 1
else np.array(self.scales)
)
)
def __call__(self, features, cls_prob, bbox_pred, ims_info):
pre_nms_top_n = cfg.TRAIN.RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg.TRAIN.RPN_POST_NMS_TOP_N
nms_thresh = cfg.TRAIN.RPN_NMS_THRESH
min_size = cfg.TRAIN.RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
# Prepare for the outputs
batch_rois = []
cls_prob = cls_prob.numpy(True)
bbox_pred = bbox_pred.numpy(True)
if self.num_strides > 1:
# (?, 4, A * K) -> (?, A * K, 4)
bbox_pred = bbox_pred.transpose((0, 2, 1))
else:
# (?, A * 4, H, W) -> (?, H, W, A * 4)
cls_prob = cls_prob.transpose((0, 2, 3, 1))
bbox_pred = bbox_pred.transpose((0, 2, 3, 1))
# Extract RoIs separately
for ix in range(num_images):
# [?, N] -> [? * N, 1]
scores = cls_prob[ix].reshape((-1, 1))
if self.num_strides > 1:
deltas = bbox_pred[ix]
else:
deltas = bbox_pred[ix].reshape((-1, 4))
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# Convert anchors into proposals via bbox transformations
proposals = box_util.bbox_transform_inv(anchors, deltas)
# Clip predicted boxes to image
proposals = box_util.clip_tiled_boxes(proposals, ims_info[ix, :2])
# Remove predicted boxes with either height or width < threshold
keep = box_util.filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# Apply nms (e.g. threshold = 0.7)
# Take after_nms_topN (e.g. 300)
# Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Attach RoIs with batch indices
batch_inds = np.empty((proposals.shape[0], 1), 'float32')
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype('float32', copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
return np.concatenate(batch_rois, 0)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms import nms_wrapper
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Compute proposals by applying transformations to anchors."""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
# Generate proposals from shifted anchors
height, width = cls_prob.shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
anchors = \
self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4))
# Prepare for the outputs
batch_rois = []
# scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_top_n (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
return [array2tensor(rpn_rois)]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_returns_to_blobs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
class ProposalTarget(object):
"""Assign ground-truth targets to proposals."""
def __init__(self):
super(ProposalTarget, self).__init__()
self.num_strides = len(cfg.RPN.STRIDES)
self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
])
def __call__(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = self.defaults.keys()
blobs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype('int32') == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, :4]))))
# Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs(
sample_rois(
rois,
gt_boxes,
rois_per_image,
fg_rois_per_image,
self.num_classes,
), blobs, keys,
)
# Stack into continuous blobs
for k, v in blobs.items():
blobs[k] = np.concatenate(blobs[k], 0)
if self.num_strides > 1:
# Distribute RoIs into pyramids
min_lvl = cfg.FPN.ROI_MIN_LEVEL
max_lvl = cfg.FPN.ROI_MAX_LEVEL
k = max_lvl - min_lvl + 1
levels = map_rois_to_levels(blobs['rois'], min_lvl, max_lvl)
outputs = map_blobs_to_outputs(
blobs,
self.defaults,
[np.where(levels == (i + min_lvl))[0] for i in range(k)],
)
return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
else:
# Return RoIs directly for CX-stride
return {
'rois': [new_tensor(blobs['rois'])],
'labels': new_tensor(blobs['labels']),
'bbox_targets': new_tensor(blobs['bbox_targets']),
'bbox_inside_weights': new_tensor(blobs['bbox_inside_weights']),
'bbox_outside_weights': new_tensor(blobs['bbox_outside_weights']),
}
def get_targets(ex_rois, gt_rois, gt_labels, num_classes):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
return bbox_targets, inside_weights, outside_weights
def sample_rois(
all_rois,
gt_boxes,
num_rois,
num_fg_rois,
num_classes,
):
"""Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image
bg_rois_per_this_image = num_rois - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, bg_rois_per_this_image, False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays
rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs
outputs = [rois, labels]
outputs += get_targets(
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels,
num_classes,
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets."""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return {
'rois': [array2tensor(batch_outputs['rois'])],
'labels': array2tensor(batch_outputs['labels']),
'bbox_targets': array2tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': array2tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': array2tensor(batch_outputs['bbox_outside_weights']),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(
all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
...@@ -17,14 +17,13 @@ import dragon.vm.torch as torch ...@@ -17,14 +17,13 @@ import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
from lib.utils import framework from lib.utils import framework
from lib.utils import time_util from lib.utils import time_util
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
...@@ -39,69 +38,65 @@ def im_detect(detector, raw_image): ...@@ -39,69 +38,65 @@ def im_detect(detector, raw_image):
], dtype=np.float32) ], dtype=np.float32)
# Do Forward # Do Forward
if not hasattr(detector, 'frozen_graph'): if not hasattr(detector, 'graph'):
inputs = { with framework.new_workspace().as_default():
'data': torch.from_numpy(blobs['data']), data = torch.from_numpy(blobs['data'])
'ims_info': torch.from_numpy(blobs['ims_info']), ims_info = torch.from_numpy(blobs['ims_info'])
}
with torch.no_grad(): with torch.no_grad():
with torch.jit.Recorder(retain_ops=True): with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs) outputs = detector.forward(inputs)
detector.frozen_graph = \ detector.graph = \
framework.FrozenGraph( framework.Graph(inputs, {
{'data': inputs['data'], 'rois': outputs['rois'],
'ims_info': inputs['ims_info']},
{'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'], 'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']}, 'bbox_pred': outputs['bbox_pred']
) })
outputs = detector.frozen_graph(**blobs) outputs = detector.graph(**blobs)
# Decode results # Decode results
batch_rois = outputs['rois'] rois = outputs['rois']
batch_scores = outputs['cls_prob'] scores, boxes, batch_inds = [], [], []
batch_deltas = outputs['bbox_pred'] pred_boxes = \
batch_boxes = bbox_transform_inv( box_util.bbox_transform_inv(
batch_rois[:, 1:5], rois[:, 1:5],
batch_deltas, outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS, cfg.BBOX_REG_WEIGHTS,
) )
scores_wide, boxes_wide = [], [] for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
for im_idx in range(len(ims)): return (
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0] np.vstack(scores) if len(ims) > 0 else scores[0],
boxes = batch_boxes[indices] np.vstack(boxes) if len(ims) > 0 else boxes[0],
boxes /= ims_scale[im_idx] )
clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes)
return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
def test_net(weights, num_classes, q_in, q_out, device):
num_classes, cfg.GPU_ID = num_classes, device
detector = new_detector(device, weights)
def test_net(detector, server): _t = time_util.new_timers('im_detect', 'misc')
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()} while True:
idx, raw_image = q_in.get()
if raw_image is None:
break
for i in range(num_images): boxes_this_image = [[]]
image_id, raw_image = server.get_image()
with _t['im_detect'].tic_and_toc(): with _t['im_detect'].tic_and_toc():
scores, boxes = im_detect(detector, raw_image) scores, boxes = im_detect(detector, raw_image)
_t['misc'].tic() _t['misc'].tic()
boxes_this_image = [[]]
for j in range(1, num_classes): for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j] cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4] cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
cls_detections = np.hstack( cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis]) (cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False) ).astype(np.float32, copy=False)
...@@ -119,43 +114,16 @@ def test_net(detector, server): ...@@ -119,43 +114,16 @@ def test_net(detector, server):
force_cpu=True, force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_image,
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' q_out.put((
.format(i + 1, num_images, idx,
_t['im_detect'].average_time, {
_t['misc'].average_time), 'im_detect': _t['im_detect'].average_time,
end='') 'misc': _t['misc'].average_time,
},
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') {
'boxes': boxes_this_image,
print('Evaluating detections') },
server.evaluate_detections(all_boxes) ))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
from lib.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides):
num_strides = len(strides)
if len(features) != num_strides:
raise ValueError(
'Given %d features for %d strides.'
% (len(features), num_strides)
)
# Generate proposals from shifted anchors
anchors_to_pack = []
for i in range(len(features)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to
# match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors_to_pack.append(anchors)
else:
# Original order of Faster R-CNN
return anchors.reshape((K * A, 4))
return np.vstack(anchors_to_pack)
def map_returns_to_blobs(returns, blobs, keys):
"""Map returns of image to blobs."""
for i, key in enumerate(keys):
blobs[key].append(returns[i])
def map_rois_to_levels(rois, k_min, k_max):
"""Map rois to fpn levels."""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
def map_blobs_to_outputs(blobs, defaults, lvl_inds):
"""Map blobs to outputs according to fpn indices."""
outputs = collections.defaultdict(list)
for inds in lvl_inds:
for key, blob in blobs.items():
outputs[key].append(
blob[inds]
if len(inds) > 0
else defaults[key]
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors)
total_anchors += anchors.shape[0]
all_anchors = np.vstack(all_anchors)
# label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms import nms_wrapper
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Compute proposals by applying transformations anchors."""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def generate_grid_anchors(self, features):
# Generate proposals from shifted anchors
anchors_wide = []
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors_wide.append(anchors)
return np.vstack(anchors_wide)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
all_anchors = self.generate_grid_anchors(features) # [n, 4]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_rois = []
batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4]
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
all_rois = []
for i in range(k):
lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling
all_rois.append(array2tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else:
all_rois.append(array2tensor(rpn_rois[lv_indices]))
return all_rois
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets.
Produces proposal classification labels and bounding-box regression targets.
"""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
self.fake_outputs = {
'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
'labels': np.array([-1], dtype=np.float32),
'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
}
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(k)]
_fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return {
'rois': [array2tensor(outputs['rois'][i]) for i in range(k)],
'labels': array2tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': array2tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': array2tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': array2tensor(np.vstack(outputs['bbox_outside_weights'])),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Sample a batch of RoIs comprising foreground and background examples."""
# overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
...@@ -13,6 +13,7 @@ from __future__ import absolute_import ...@@ -13,6 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.fpn.anchor_target_layer import AnchorTargetLayer from lib.faster_rcnn.anchor_target import AnchorTarget
from lib.fpn.proposal_layer import ProposalLayer from lib.faster_rcnn.proposal import Proposal
from lib.fpn.proposal_target_layer import ProposalTargetLayer from lib.mask_rcnn.data_loader import DataLoader
from lib.mask_rcnn.proposal_target import ProposalTarget
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.mask_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import mask_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info = [], []
packed_boxes, packed_masks = [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes, gt_masks = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
packed_boxes.append(im_boxes)
packed_masks.append(gt_masks)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, 'float32'),
'gt_boxes': np.concatenate(packed_boxes, 0),
'gt_masks': mask_list_to_blob(packed_masks),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.datasets.example import Example
from lib.pycocotools import mask_utils
from lib.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob
from lib.utils.image import get_image_with_target_size
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._seed = cfg.RNG_SEED
self._use_flipped = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF
self._classes = kwargs.get('classes', ('__background__',))
self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self.q_in = self.q1_out = self.q2_out = None
self.daemon = True
def make_roi_dict(self, example, im_scale, apply_flip=False):
objects, n_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff:
for obj in objects:
if obj.get('difficult', 0) == 0:
n_objects += 1
else:
n_objects = len(objects)
roi_dict = {
'boxes': np.zeros((n_objects, 4), 'float32'),
'masks': np.empty((n_objects, height, width), 'uint8'),
'gt_classes': np.zeros((n_objects, 1), 'int32'),
'mask_flags': np.ones((n_objects, 1), 'float32'),
}
# Filter the difficult instances
object_idx = 0
for obj in objects:
if not self._use_diff and \
obj.get('difficult', 0) > 0:
continue
bbox, mask = obj['bbox'], obj['mask']
roi_dict['boxes'][object_idx, :] = [
max(0, bbox[0]),
max(0, bbox[1]),
min(bbox[2], width - 1),
min(bbox[3], height - 1),
]
if mask is not None:
roi_dict['masks'][object_idx] = (
mask_utils.bytes2img(
obj['mask'],
height,
width,
))
else:
roi_dict['mask_flags'][object_idx] = 0.
roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']]
object_idx += 1
# Flip the boxes if necessary
if apply_flip:
roi_dict['boxes'] = \
box_util.flip_boxes(
roi_dict['boxes'],
width,
)
# Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale
return roi_dict
def get(self, example):
example = Example(example)
img = example.image
# Scale
max_size = cfg.TRAIN.MAX_SIZE
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip
apply_flip = False
if self._use_flipped:
if np.random.randint(2) > 0:
img = img[:, ::-1]
apply_flip = True
# Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls, flag}]
gt_boxes = \
np.concatenate([
roi_dict['boxes'],
roi_dict['gt_classes'],
roi_dict['mask_flags']
], axis=1)
# Post-Process for gt masks
# Shape like: [num_objects, im_h, im_w]
if gt_boxes.shape[0] > 0:
gt_masks = roi_dict['masks']
if apply_flip:
gt_masks = gt_masks[:, :, ::-1]
else:
gt_masks = None
return img, im_scale, gt_boxes, gt_masks
def run(self):
# Fix the process-local random seed
np.random.seed(self._seed)
# Main prefetch loop
while True:
outputs = self.get(self.q_in.get())
if len(outputs[2]) < 1:
continue # Ignore the non-object image
aspect_ratio = float(outputs[0].shape[0]) / outputs[0].shape[1]
if aspect_ratio > 1.:
self.q1_out.put(outputs)
else:
self.q2_out.put(outputs)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_returns_to_blobs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.utils import boxes as box_util
from lib.utils import mask as mask_util
from lib.utils.framework import new_tensor
class ProposalTarget(object):
"""Assign proposals to ground-truth targets."""
def __init__(self):
super(ProposalTarget, self).__init__()
self.resolution = cfg.MRCNN.RESOLUTION
self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('mask_targets', -np.ones((1, self.resolution, self.resolution), 'float32')),
])
def __call__(self, rpn_rois, gt_boxes, gt_masks, ims_info):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
# GT masks (num_objects, im_h, im_w)
gt_boxes_wide, gt_masks_wide = \
mask_util.dismantle_masks(
gt_boxes,
gt_masks,
num_images,
)
# Prepare for the outputs
keys = self.defaults.keys()
blobs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
gt_masks = gt_masks_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype('int32') == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, :4]))))
# Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs(
sample_rois(
rois,
gt_boxes,
gt_masks,
rois_per_image,
fg_rois_per_image,
self.num_classes,
ims_info[ix][2],
), blobs, keys,
)
# Stack into continuous blobs
for k, v in blobs.items():
blobs[k] = np.concatenate(blobs[k], 0)
# Distribute rois into pyramids
k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1
levels = map_rois_to_levels(blobs['rois'], k_min, k_max)
outputs = \
map_blobs_to_outputs(
blobs,
self.defaults,
[np.where(levels == (i + k_min))[0] for i in range(k)],
)
# Select the foreground RoIs only for mask branch
for i in range(k):
inds = np.where(outputs['labels'][i] > 0)[0]
inds = inds if len(inds) > 0 else np.array([0], 'int64')
outputs['mask_rois'].append(outputs['rois'][i][inds])
outputs['mask_targets'][i] = outputs['mask_targets'][i][inds]
outputs['mask_labels'].append(outputs['labels'][i][inds].astype('int64') - 1)
# Use the sparse indices to select logits
# Reduce the overhead on feeding dense class-specific targets
mask_labels = np.concatenate(outputs['mask_labels'], 0)
mask_indices = np.arange(len(mask_labels)) * (self.num_classes - 1)
return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
'mask_rois': [new_tensor(outputs['mask_rois'][i]) for i in range(k)],
'mask_targets': new_tensor(np.vstack(outputs['mask_targets'])),
'mask_indices': new_tensor(mask_indices + mask_labels),
}
def get_targets(
ex_rois,
gt_rois,
gt_labels,
gt_masks,
mask_flags,
mask_size,
num_classes,
im_scale,
):
"""Compute the bounding-box regression targets."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
# Compute mask classification targets
mask_shape = [mask_size] * 2
ex_rois_ori = np.round(ex_rois / im_scale).astype(int)
gt_rois_ori = np.round(gt_rois / im_scale).astype(int)
mask_targets = -np.ones([len(gt_labels)] + mask_shape, 'float32')
for i in fg_inds:
if mask_flags[i] > 0:
box_mask = \
mask_util.intersect_box_mask(
ex_rois_ori[i],
gt_rois_ori[i],
gt_masks[i],
)
if box_mask is not None:
mask_targets[i] = \
mask_util.resize_mask(
mask=box_mask,
size=mask_shape,
)
return bbox_targets, inside_weights, outside_weights, mask_targets
def sample_rois(
all_rois,
gt_boxes,
gt_masks,
num_rois,
num_fg_rois,
num_classes,
im_scale,
):
"""Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image
bg_rois_per_this_image = num_rois - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, bg_rois_per_this_image, False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays
rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs
outputs = [rois, labels]
outputs += get_targets(
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels,
gt_masks[gt_assignment[fg_inds]],
gt_boxes[gt_assignment[fg_inds], 5],
cfg.MRCNN.RESOLUTION,
num_classes,
im_scale,
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn import map_rois_to_levels
from lib.faster_rcnn import map_blobs_to_outputs
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper
from lib.utils import framework
from lib.utils import time_util
from lib.utils import boxes as box_util
from lib.utils.blob import im_list_to_blob
from lib.utils.image import scale_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward
if not hasattr(detector, 'graph'):
with framework.new_workspace().as_default():
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info'])
with torch.no_grad():
with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs)
detector.graph = \
framework.Graph(inputs, {
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']
})
outputs = detector.graph(**blobs)
# Decode results
rois = outputs['rois']
scores, boxes, batch_inds = [], [], []
pred_boxes = \
box_util.bbox_transform_inv(
rois[:, 1:5],
outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS,
)
for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
batch_inds.append(np.ones((len(inds), 1), 'int32') * i)
return (
np.vstack(scores) if len(ims) > 0 else scores[0],
np.vstack(boxes) if len(ims) > 0 else boxes[0],
np.vstack(batch_inds) if len(ims) > 0 else batch_inds[0],
np.array(ims_scale, 'float64'),
)
def mask_detect(detector, rois):
k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1
levels = map_rois_to_levels(rois, k_min, k_max)
level_inds = [np.where(levels == (i + k_min))[0] for i in range(k)]
fpn_rois = map_blobs_to_outputs(
{'rois': rois[:, :5]},
{'rois': np.array([[-1, 0, 0, 1, 1]], 'float32')},
level_inds)['rois']
workspace = detector.graph.workspace
placeholders = detector.graph.placeholders
score_fn = detector.rcnn.compute_mask_score
with workspace.as_default():
if 'rois' not in placeholders:
placeholders['rois'] = \
[framework.new_placeholder(cfg.GPU_ID) for _ in range(k)]
placeholders['mask_inds'] = \
framework.new_placeholder(cfg.GPU_ID)
for i, v in enumerate(fpn_rois):
framework.feed_tensor(placeholders['rois'][i], v.astype('float32'))
with torch.no_grad():
mask_score = score_fn(rois=placeholders['rois'])
nc, i = mask_score.shape[1], 0
mask_inds = {}
for inds in level_inds:
for idx in inds:
cls = int(rois[idx, 5])
mask_inds[idx] = (i * nc + cls)
i += 1
if len(inds) == 0:
i += 1
mask_inds = list(map(mask_inds.get, sorted(mask_inds)))
framework.feed_tensor(
placeholders['mask_inds'],
np.array(mask_inds, 'int64'),
)
with torch.no_grad():
mask_pred = mask_score.index_select(
(0, 1), placeholders['mask_inds'])
return detector.rcnn.sigmoid(mask_pred).numpy(True).copy()
def test_net(weights, num_classes, q_in, q_out, device):
num_classes, cfg.GPU_ID = num_classes, device
detector = new_detector(device, weights)
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
while True:
idx, raw_image = q_in.get()
if raw_image is None:
break
rois_this_image = []
boxes_this_image = [[]]
masks_this_image = [[]]
with _t['im_detect'].tic_and_toc():
scores, boxes, batch_inds, ims_scale = \
im_detect(detector, raw_image)
_t['misc'].tic()
for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
cls_batch_inds = batch_inds[inds]
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms(
cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms_wrapper.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
cls_batch_inds = cls_batch_inds[keep]
boxes_this_image.append(cls_detections)
rois_this_image.append(
np.hstack((
cls_batch_inds,
cls_detections[:, :4] * ims_scale[cls_batch_inds],
np.ones((len(keep), 1)) * (j - 1),
)))
mask_rois = np.concatenate(rois_this_image)
_t['misc'].toc()
if len(mask_rois) > 0:
k = 0
_t['mask_detect'].tic()
mask_pred = mask_detect(detector, mask_rois)
for j in range(1, num_classes):
num_pred = len(boxes_this_image[j])
cls_masks = mask_pred[k:k + num_pred]
masks_this_image.append(cls_masks)
k += num_pred
_t['mask_detect'].toc()
q_out.put((
idx,
{
'im_detect': _t['im_detect'].average_time,
'mask_detect': _t['mask_detect'].average_time,
'misc': _t['misc'].average_time,
},
{
'boxes': boxes_this_image,
'masks': masks_this_image,
},
))
...@@ -14,12 +14,9 @@ from __future__ import division ...@@ -14,12 +14,9 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
# Import custom modules # Import custom modules
from lib.modeling.base import affine
from lib.modeling.base import bn
from lib.modeling.base import conv1x1
from lib.modeling.base import conv3x3
from lib.modeling.fast_rcnn import FastRCNN from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN from lib.modeling.fpn import FPN
from lib.modeling.mask_rcnn import MaskRCNN
from lib.modeling.retinanet import RetinaNet from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD from lib.modeling.ssd import SSD
...@@ -15,20 +15,19 @@ from __future__ import print_function ...@@ -15,20 +15,19 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.modeling import affine from lib.modules import init
from lib.modeling import conv1x1 from lib.modules import nn
from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module): class WideResBlock(nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None): def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__() super(WideResBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out) self.bn1 = nn.Affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out) self.bn2 = nn.Affine(dim_out)
self.downsample = downsample self.downsample = downsample
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -48,20 +47,20 @@ class WideResBlock(torch.nn.Module): ...@@ -48,20 +47,20 @@ class WideResBlock(torch.nn.Module):
return out return out
class InceptionBlock(torch.nn.Module): class InceptionBlock(nn.Module):
def __init__(self, dim_in, dim_out): def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__() super(InceptionBlock, self).__init__()
self.conv1 = conv1x1(dim_in, dim_out) self.conv1 = nn.Conv1x1(dim_in, dim_out)
self.bn1 = affine(dim_out) self.bn1 = nn.Affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out // 2) self.conv2 = nn.Conv3x3(dim_out, dim_out // 2)
self.bn2 = affine(dim_out // 2) self.bn2 = nn.Affine(dim_out // 2)
self.conv3a = conv3x3(dim_out // 2, dim_out) self.conv3a = nn.Conv3x3(dim_out // 2, dim_out)
self.bn3a = affine(dim_out) self.bn3a = nn.Affine(dim_out)
self.conv3b = conv3x3(dim_out, dim_out) self.conv3b = nn.Conv3x3(dim_out, dim_out)
self.bn3b = affine(dim_out) self.bn3b = nn.Affine(dim_out)
self.conv4 = conv3x3(dim_out * 3, dim_out) self.conv4 = nn.Conv3x3(dim_out * 3, dim_out)
self.bn4 = affine(dim_out) self.bn4 = nn.Affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -82,7 +81,7 @@ class InceptionBlock(torch.nn.Module): ...@@ -82,7 +81,7 @@ class InceptionBlock(torch.nn.Module):
out_3x3_b = self.bn3b(out) out_3x3_b = self.bn3b(out)
out_3x3_b = self.relu(out_3x3_b) out_3x3_b = self.relu(out_3x3_b)
out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1) out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], 1)
out = self.conv4(out) out = self.conv4(out)
out = self.bn4(out) out = self.bn4(out)
...@@ -91,22 +90,22 @@ class InceptionBlock(torch.nn.Module): ...@@ -91,22 +90,22 @@ class InceptionBlock(torch.nn.Module):
return out return out
class AirNet(torch.nn.Module): class AirNet(nn.Module):
def __init__(self, blocks, num_stages): def __init__(self, blocks, num_stages):
super(AirNet, self).__init__() super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384] self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = [None, None] + \ self.feature_dims = [None, None] + \
filters[1:num_stages - 1] filters[1:num_stages - 1]
self.conv1 = torch.nn.Conv2d( self.conv1 = nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
stride=2, stride=2,
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = affine(self.dim_in) self.bn1 = nn.Affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = nn.MaxPool2d(
kernel_size=2, kernel_size=2,
stride=2, stride=2,
padding=0, padding=0,
...@@ -121,19 +120,14 @@ class AirNet(torch.nn.Module): ...@@ -121,19 +120,14 @@ class AirNet(torch.nn.Module):
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_uniform_( init.xaiver(m.weight)
m.weight,
# Fix the gain for [-127, 127]
a=1,
) # Xavier Initialization
def make_blocks(self, dim_out, blocks, stride=1): def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential( downsample = nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride), nn.Conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out), nn.Affine(dim_out),
) )
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)] layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out self.dim_in = dim_out
...@@ -144,7 +138,7 @@ class AirNet(torch.nn.Module): ...@@ -144,7 +138,7 @@ class AirNet(torch.nn.Module):
layers.append(InceptionBlock(dim_out, dim_out)) layers.append(InceptionBlock(dim_out, dim_out))
else: else:
raise ValueError('Unknown block flag: ' + blocks[i]) raise ValueError('Unknown block flag: ' + blocks[i])
return torch.nn.Sequential(*layers) return nn.Sequential(*layers)
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.conv1(x)
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def conv1x1(dim_in, dim_out, stride=1, bias=False):
"""1x1 convolution."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
def conv3x3(dim_in, dim_out, stride=1, bias=False):
"""3x3 convolution with padding."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1,
bias=bias,
)
...@@ -21,14 +21,16 @@ from lib.core.config import cfg ...@@ -21,14 +21,16 @@ from lib.core.config import cfg
from lib.modeling import FPN from lib.modeling import FPN
from lib.modeling import RPN from lib.modeling import RPN
from lib.modeling import FastRCNN from lib.modeling import FastRCNN
from lib.modeling import MaskRCNN
from lib.modeling import RetinaNet from lib.modeling import RetinaNet
from lib.modeling import SSD from lib.modeling import SSD
from lib.modeling.factory import get_body_func from lib.modeling.factory import get_body_func
from lib.ops.modules import Bootstrap from lib.modules import nn
from lib.utils.logger import is_root from lib.modules import vision
from lib.utils import logger
class Detector(torch.nn.Module): class Detector(nn.Module):
"""Organize the detection pipelines. """Organize the detection pipelines.
A bunch of classic algorithms are integrated, see the A bunch of classic algorithms are integrated, see the
...@@ -42,19 +44,20 @@ class Detector(torch.nn.Module): ...@@ -42,19 +44,20 @@ class Detector(torch.nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.') backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:] body, modules = backbone[0], backbone[1:]
# + Data Loader # + DataLoader
self.data_layer = importlib.import_module( self.data_loader_cls = importlib.import_module(
'lib.{}'.format(model)).DataLayer 'lib.{}'.format(model)).DataLoader
self.bootstrap = Bootstrap() self.bootstrap = vision.Bootstrap()
# + Feature Extractor # + FeatureExtractor
self.body = get_body_func(body)() self.body = get_body_func(body)()
feature_dims = self.body.feature_dims feature_dims = self.body.feature_dims
# + Feature Enhancer # + FeatureEnhancer
if 'fpn' in modules: if 'fpn' in modules:
self.fpn = FPN(feature_dims) self.fpn = FPN(feature_dims)
feature_dims = self.fpn.feature_dims feature_dims = self.fpn.feature_dims
elif 'mbox' in modules: elif 'mbox' in modules:
pass # Placeholder pass # Placeholder
else: else:
...@@ -63,7 +66,10 @@ class Detector(torch.nn.Module): ...@@ -63,7 +66,10 @@ class Detector(torch.nn.Module):
# + Detection Modules # + Detection Modules
if 'rcnn' in model: if 'rcnn' in model:
self.rpn = RPN(feature_dims[0]) self.rpn = RPN(feature_dims[0])
self.fast_rcnn = FastRCNN(feature_dims[0]) if 'faster' in model:
self.rcnn = FastRCNN(feature_dims[0])
elif 'mask' in model:
self.rcnn = MaskRCNN(feature_dims[0])
if 'retinanet' in model: if 'retinanet' in model:
self.retinanet = RetinaNet(feature_dims[0]) self.retinanet = RetinaNet(feature_dims[0])
...@@ -85,7 +91,7 @@ class Detector(torch.nn.Module): ...@@ -85,7 +91,7 @@ class Detector(torch.nn.Module):
self.load_state_dict( self.load_state_dict(
torch.load(weights), torch.load(weights),
strict=False, strict=False,
verbose=is_root(), verbose=logger.is_root(),
) )
def forward(self, inputs=None): def forward(self, inputs=None):
...@@ -107,7 +113,7 @@ class Detector(torch.nn.Module): ...@@ -107,7 +113,7 @@ class Detector(torch.nn.Module):
# 1) Training: <= DataLayer # 1) Training: <= DataLayer
# 2) Inference: <= Given # 2) Inference: <= Given
if not hasattr(self, 'data_loader'): if not hasattr(self, 'data_loader'):
self.data_loader = self.data_layer() self.data_loader = self.data_loader_cls()
inputs = self.data_loader() inputs = self.data_loader()
# 1. Extract features # 1. Extract features
...@@ -126,7 +132,7 @@ class Detector(torch.nn.Module): ...@@ -126,7 +132,7 @@ class Detector(torch.nn.Module):
# 3. Collect detection outputs # 3. Collect detection outputs
outputs = collections.OrderedDict() outputs = collections.OrderedDict()
# 3.1 Feature -> RPN -> Fast R-CNN # 3.1 Feature -> RPN -> R-CNN
if hasattr(self, 'rpn'): if hasattr(self, 'rpn'):
outputs.update( outputs.update(
self.rpn( self.rpn(
...@@ -135,7 +141,7 @@ class Detector(torch.nn.Module): ...@@ -135,7 +141,7 @@ class Detector(torch.nn.Module):
) )
) )
outputs.update( outputs.update(
self.fast_rcnn( self.rcnn(
features=features, features=features,
rpn_cls_score=outputs['rpn_cls_score'], rpn_cls_score=outputs['rpn_cls_score'],
rpn_bbox_pred=outputs['rpn_bbox_pred'], rpn_bbox_pred=outputs['rpn_bbox_pred'],
...@@ -174,8 +180,8 @@ class Detector(torch.nn.Module): ...@@ -174,8 +180,8 @@ class Detector(torch.nn.Module):
################################## ##################################
last_module = None last_module = None
for e in self.modules(): for e in self.modules():
if isinstance(e, torch.nn.Affine) and \ if isinstance(e, nn.Affine) and \
isinstance(last_module, torch.nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x e.forward = lambda x: x
...@@ -188,8 +194,8 @@ class Detector(torch.nn.Module): ...@@ -188,8 +194,8 @@ class Detector(torch.nn.Module):
###################################### ######################################
last_module = None last_module = None
for e in self.modules(): for e in self.modules():
if isinstance(e, torch.nn.BatchNorm2d) and \ if isinstance(e, nn.BatchNorm2d) and \
isinstance(last_module, torch.nn.Conv2d): nn.is_conv2d(last_module):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x e.forward = lambda x: x
...@@ -204,3 +210,17 @@ class Detector(torch.nn.Module): ...@@ -204,3 +210,17 @@ class Detector(torch.nn.Module):
else: else:
last_module.weight.data.mul_(term) last_module.weight.data.mul_(term)
last_module = e last_module = e
def new_detector(device, weights=None, training=False):
detector = Detector().cuda(device)
if weights is not None:
detector.load_weights(weights)
if not training:
detector.eval()
detector.optimize_for_inference()
# Enable the fp16 inference support if necessary
# Boost a little if TensorCore is available
if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half()
return detector
...@@ -43,14 +43,20 @@ for D in ['', '3b', '4b', '5b']: ...@@ -43,14 +43,20 @@ for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \ _STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D) 'lib.modeling.airnet.make_airnet_{}'.format(D)
# MobileNet
for D in ['a1', 'v2']:
_STORE['BODY']['mobilenet_{}'.format(D)] = \
'lib.modeling.mobilenet.make_mobilenet_{}'.format(D)
def get_template_func(name, sets, desc): def get_template_func(name, sets, desc):
name = name.lower() name = name.lower()
if name not in sets: if name not in sets:
raise ValueError( raise ValueError(
'The {} for {} was not registered.\n' 'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format( 'Registered modules: [{}]'
name, desc, ', '.join(sets.keys()))) .format(name, desc, ', '.join(sets.keys()))
)
module_name = '.'.join(sets[name].split('.')[0:-1]) module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1] func_name = sets[name].split('.')[-1]
try: try:
......
...@@ -14,13 +14,19 @@ from __future__ import division ...@@ -14,13 +14,19 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import functools
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import faster_rcnn
from lib.core.config import cfg from lib.core.config import cfg
from lib.ops.modules import RPNDecoder from lib.modules import det
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
class FastRCNN(torch.nn.Module): class FastRCNN(nn.Module):
"""Generate proposal regions for R-CNN series. """Generate proposal regions for R-CNN series.
The pipeline is as follows: The pipeline is as follows:
...@@ -32,59 +38,45 @@ class FastRCNN(torch.nn.Module): ...@@ -32,59 +38,45 @@ class FastRCNN(torch.nn.Module):
""" """
def __init__(self, dim_in=256): def __init__(self, dim_in=256):
super(FastRCNN, self).__init__() super(FastRCNN, self).__init__()
if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN)
from lib.fpn import ProposalLayer, ProposalTargetLayer
else:
# RPN with single stride(i.e. C4)
from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2) self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM) self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM) self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES) self.cls_score = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4) self.bbox_pred = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.rpn_decoder = RPNDecoder() self.rpn_decoder = det.RPNDecoder()
self.proposal_layer = ProposalLayer() self.proposal = faster_rcnn.Proposal()
self.proposal_target_layer = ProposalTargetLayer() self.proposal_target = faster_rcnn.ProposalTarget()
self.softmax = torch.nn.Softmax(dim=1) self.softmax = nn.Softmax(dim=1)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False) self.sigmoid = nn.Sigmoid()
self.roi_func = { self.box_roi_feature = functools.partial({
'RoIPool': torch.vision.ops.roi_pool, 'RoIPool': vision.roi_pool,
'RoIAlign': torch.vision.ops.roi_align, 'RoIAlign': vision.roi_align
}[cfg.FRCNN.ROI_XFORM_METHOD] }[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size') self.bbox_loss = nn.SmoothL1Loss()
# Compute spatial scales for multiple strides # Compute spatial scales according to strides
roi_levels = [level for level in range( self.spatial_scales = [
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)] 1. / (2 ** lvl)
self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels] for lvl in range(
cfg.FPN.ROI_MIN_LEVEL,
cfg.FPN.ROI_MAX_LEVEL + 1
)]
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# Careful initialization for Fast R-CNN # Careful initialization for Fast R-CNN
torch.nn.init.normal_(self.cls_score.weight, std=0.01) init.normal(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001) init.normal(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters(): for name, p in self.named_parameters():
if 'bias' in name: if 'bias' in name:
torch.nn.init.constant_(p, 0) init.constant(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func(
feature, rois,
output_size=(
cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale,
)
def forward(self, **kwargs): def forward(self, **kwargs):
# Generate Proposals # Generate proposals
# Apply the CXX implementation during inference proposal_func = self.proposal \
proposal_func = self.proposal_layer \
if self.training else self.rpn_decoder if self.training else self.rpn_decoder
self.rcnn_data = { self.data = {
'rois': proposal_func( 'rois': proposal_func(
kwargs['features'], kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data), self.sigmoid(kwargs['rpn_cls_score'].data),
...@@ -93,66 +85,61 @@ class FastRCNN(torch.nn.Module): ...@@ -93,66 +85,61 @@ class FastRCNN(torch.nn.Module):
) )
} }
# Generate Targets from Proposals # Generate targets from proposals
if self.training: if self.training:
self.rcnn_data.update( self.data.update(
self.proposal_target_layer( self.proposal_target(
rpn_rois=self.rcnn_data['rois'], rpn_rois=self.data['rois'],
gt_boxes=kwargs['gt_boxes'], gt_boxes=kwargs['gt_boxes'],
) )
) )
# Transform RoI Feature # Transform RoI features
roi_features = [] if len(self.data['rois']) > 1:
if len(self.rcnn_data['rois']) > 1: roi_features = \
for i, spatial_scale in enumerate(self.spatial_scales): torch.cat([
roi_features.append( self.box_roi_feature(
self.RoIFeatureTransform(
kwargs['features'][i], kwargs['features'][i],
self.rcnn_data['rois'][i], self.data['rois'][i],
spatial_scale, spatial_scale,
) ) for i, spatial_scale in enumerate(self.spatial_scales)
) ], dim=0)
roi_features = torch.cat(roi_features, dim=0)
else: else:
spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
roi_features = \ roi_features = \
self.RoIFeatureTransform( self.box_roi_feature(
kwargs['features'][0], kwargs['features'][0],
self.rcnn_data['rois'][0], self.data['rois'][0],
spatial_scale, 1. / cfg.RPN.STRIDES[0],
) )
# Apply a simple MLP # Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim) roi_features = roi_features.view(-1, self.roi_head_dim)
rcnn_output = self.relu(self.fc6(roi_features)) roi_features = self.relu(self.fc6(roi_features))
rcnn_output = self.relu(self.fc7(rcnn_output)) roi_features = self.relu(self.fc7(roi_features))
# Compute rcnn logits # Compute logits and losses
cls_score = self.cls_score(rcnn_output).float() outputs = collections.OrderedDict()
outputs = collections.OrderedDict([ cls_score = self.cls_score(roi_features).float()
('bbox_pred', self.bbox_pred(rcnn_output).float()), outputs['bbox_pred'] = self.bbox_pred(roi_features).float()
])
if self.training: if self.training:
# Compute rcnn losses # Compute rcnn losses
outputs.update(collections.OrderedDict([ outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss( ('cls_loss', self.cls_loss(
cls_score, self.rcnn_data['labels'])), cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss( ('bbox_loss', self.bbox_loss(
outputs['bbox_pred'], outputs['bbox_pred'],
self.rcnn_data['bbox_targets'], self.data['bbox_targets'],
self.rcnn_data['bbox_inside_weights'], self.data['bbox_inside_weights'],
self.rcnn_data['bbox_outside_weights'], self.data['bbox_outside_weights'],
)), )),
])) ]))
else: else:
# Return the rois to decode the refine boxes # Return the rois to decode the refine boxes
if len(self.rcnn_data['rois']) > 1: if len(self.data['rois']) > 1:
outputs['rois'] = torch.cat( outputs['rois'] = torch.cat(self.data['rois'], 0)
self.rcnn_data['rois'], dim=0)
else: else:
outputs['rois'] = self.rcnn_data['rois'][0] outputs['rois'] = self.data['rois'][0]
# Return the classification prob # Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score) outputs['cls_prob'] = self.softmax(cls_score)
......
...@@ -16,43 +16,41 @@ from __future__ import print_function ...@@ -16,43 +16,41 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1 from lib.modules import init
from lib.modeling import conv3x3 from lib.modules import nn
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
class FPN(torch.nn.Module): class FPN(nn.Module):
"""Feature Pyramid Networks for R-CNN and RetinaNet.""" """Feature Pyramid Networks for R-CNN and RetinaNet."""
def __init__(self, feature_dims): def __init__(self, feature_dims):
super(FPN, self).__init__() super(FPN, self).__init__()
self.C = torch.nn.ModuleList() dim = cfg.FPN.DIM
self.P = torch.nn.ModuleList() self.C = nn.ModuleList()
self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1): for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True)) self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True)) self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE: if 'rcnn' in cfg.MODEL.TYPE:
self.apply_func = self.apply_on_rcnn self.apply_func = self.apply_on_rcnn
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True) self.maxpool = nn.MaxPool2d(1, 2, ceil_mode=True)
else: else:
self.apply_func = self.apply_on_generic self.apply_func = self.apply_on_generic
self.relu = torch.nn.ReLU(inplace=False) self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1): for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim
self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True)) self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim]
self.reset_parameters() self.reset_parameters()
self.feature_dims = [cfg.FPN.DIM]
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_uniform_( init.xaiver(m.weight)
m.weight, init.constant(m.bias, 0)
a=1, # Fix the gain for [-127, 127]
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import functools
import dragon.vm.torch as torch
from lib import mask_rcnn
from lib.core.config import cfg
from lib.modules import det
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
class MaskRCNN(nn.Module):
def __init__(self, dim_in=256):
"""Generate mask regions for R-CNN series.
The pipeline is as follows:
... -> BoxRoIs \ /-> cls_score -> cls_loss
-> RoIFeatureXform -> MLP
... -> Features / \-> bbox_pred -> bbox_loss
... -> MaskRoIs \
-> RoIFeatureXform -> FCN -> mask_score -> mask_loss
... -> Features /
"""
super(MaskRCNN, self).__init__()
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.fcn = nn.ModuleList([nn.Conv3x3(dim_in, dim_in, bias=True) for _ in range(4)])
self.fcn += [nn.ConvTranspose2d(dim_in, dim_in, 2, 2, 0)]
self.cls_score = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.mask_score = nn.Conv1x1(dim_in, cfg.MODEL.NUM_CLASSES - 1, bias=True)
self.rpn_decoder = det.RPNDecoder()
self.proposal = mask_rcnn.Proposal()
self.proposal_target = mask_rcnn.ProposalTarget()
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)
self.relu = nn.ReLU(True)
self.box_roi_feature = functools.partial({
'RoIPool': vision.roi_pool,
'RoIAlign': vision.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
self.mask_roi_feature = functools.partial({
'RoIPool': vision.roi_pool,
'RoIAlign': vision.roi_align,
}[cfg.MRCNN.ROI_XFORM_METHOD], size=cfg.MRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss()
self.mask_loss = nn.BCEWithLogitsLoss()
# Compute spatial scales according to strides
self.spatial_scales = [
1. / (2 ** lvl)
for lvl in range(
cfg.FPN.ROI_MIN_LEVEL,
cfg.FPN.ROI_MAX_LEVEL + 1
)]
self.reset_parameters()
def reset_parameters(self):
# Careful initialization for Fast R-CNN
init.normal(self.cls_score.weight, std=0.01)
init.normal(self.bbox_pred.weight, std=0.001)
# Careful initialization for Mask R-CNN
init.normal(self.mask_score.weight, std=0.001)
for m in self.fcn.modules():
if hasattr(m, 'weight'):
init.kaiming_normal(m.weight)
for name, p in self.named_parameters():
if 'bias' in name:
init.constant(p, 0)
def get_mask_score(self, features, rois):
roi_features = \
torch.cat([
self.mask_roi_feature(
features[i], rois[i], spatial_scale,
) for i, spatial_scale in enumerate(self.spatial_scales)
], dim=0)
for i in range(len(self.fcn)):
roi_features = self.relu(self.fcn[i](roi_features))
return self.mask_score(roi_features).float()
def forward(self, **kwargs):
# Generate proposals
proposal_func = self.proposal \
if self.training else self.rpn_decoder
self.data = {
'rois': proposal_func(
kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data),
kwargs['rpn_bbox_pred'],
kwargs['ims_info'],
)
}
# Generate targets from proposals
if self.training:
self.data.update(
self.proposal_target(
rpn_rois=self.data['rois'],
gt_boxes=kwargs['gt_boxes'],
gt_masks=kwargs['gt_masks'],
ims_info=kwargs['ims_info'],
)
)
# Transform RoI features
roi_features = \
torch.cat([
self.box_roi_feature(
kwargs['features'][i],
self.data['rois'][i],
spatial_scale,
) for i, spatial_scale in enumerate(self.spatial_scales)
], dim=0)
# Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim)
roi_features = self.relu(self.fc6(roi_features))
roi_features = self.relu(self.fc7(roi_features))
# Compute logits and losses
outputs = collections.OrderedDict()
cls_score = self.cls_score(roi_features).float()
outputs['bbox_pred'] = self.bbox_pred(roi_features).float()
if self.training:
# Compute the loss of bbox branch
outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss(
outputs['bbox_pred'],
self.data['bbox_targets'],
self.data['bbox_inside_weights'],
self.data['bbox_outside_weights'],
)),
]))
# Compute the loss of mask branch
mask_score = self.get_mask_score(
kwargs['features'], self.data['mask_rois'])
mask_score = mask_score.index_select(
(0, 1), self.data['mask_indices'])
outputs['mask_loss'] = self.mask_loss(
mask_score, self.data['mask_targets'])
else:
# Return the RoIs to decode the refine boxes
if len(self.data['rois']) > 1:
outputs['rois'] = torch.cat(self.data['rois'], 0)
else:
outputs['rois'] = self.data['rois'][0]
# Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score)
# Set a callback to decode mask from refine RoIs
self.compute_mask_score = \
functools.partial(
self.get_mask_score,
features=kwargs['features'],
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
def conv_triplet(dim_in, dim_out):
"""1x1 convolution + BN + ReLU."""
return [
nn.Conv2d(dim_in, dim_out, 1, bias=False),
nn.Affine(dim_out),
nn.ReLU(True),
]
def conv_quintet(dim_in, dim_out, ks, stride):
"""KxK convolution + BN + ReLU."""
return [
nn.DepthwiseConv2d(
dim_in, dim_in,
kernel_size=ks,
stride=stride,
padding=ks // 2,
bias=False,
),
nn.Affine(dim_in),
nn.ReLU(True),
nn.Conv1x1(dim_in, dim_out),
nn.Affine(dim_out),
]
class Setting(object):
V2 = (
[2, 3, 4, 3, 3, 1],
[2, 2, 2, 1, 2, 1],
[32, 16, 24, 32, 64, 96, 160, 320, 1280],
)
PROXYLESS_MOBILE = (
[4, 4, 4, 4, 4, 1],
[2, 2, 2, 1, 2, 1],
[32, 16, 32, 40, 80, 96, 192, 320, 1280],
)
PROXYLESS_GPU = (
[4, 4, 4, 4, 4, 1],
[2, 2, 2, 1, 2, 1],
[40, 24, 32, 56, 112, 128, 256, 432, 1280],
)
def Stem(dim_out, stride=1):
return torch.nn.Sequential(
torch.nn.Conv2d(
3, dim_out,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
),
nn.Affine(dim_out),
nn.ReLU(True),
)
class Choice(nn.Module):
def __init__(self, dim_in, dim_out, mb=3, ks=3, stride=1):
super(Choice, self).__init__()
self.mb = mb
dim_hidden = int(round(dim_in * mb))
seq = conv_triplet(dim_in, dim_hidden) if mb != 1 else []
seq += conv_quintet(dim_hidden, dim_out, ks, stride)
self.conv = nn.ModuleList(seq)
self.stride = stride
self.apply_residual = stride == 1 and dim_in == dim_out
def forward(self, x):
residual = x if self.apply_residual else None
for i in range(3):
x = self.conv[i](x)
y = x if self.stride == 2 else None
for i in range(3, len(self.conv)):
x = self.conv[i](x)
if self.apply_residual:
return residual + x, y
else:
return x, y
class NASMobileNet(nn.Module):
def __init__(self, choices, preset=Setting.PROXYLESS_MOBILE):
super(NASMobileNet, self).__init__()
# Pre-defined blocks
def select_block(choice):
return {
0: functools.partial(Choice, mb=3, ks=3),
1: functools.partial(Choice, mb=6, ks=3),
2: functools.partial(Choice, mb=3, ks=5),
3: functools.partial(Choice, mb=6, ks=5),
4: functools.partial(Choice, mb=3, ks=7),
5: functools.partial(Choice, mb=6, ks=7),
6: nn.Identity,
}[choice]
# Hand-craft configurations
repeats, strides, out_channels = preset
names = ['2!', '3!', '4', '4!', '5', '5!']
self.num_layers = len(choices)
assert sum(repeats) == self.num_layers
# + Stem
self.bootstrap = vision.Bootstrap()
self.conv1 = Stem(out_channels[0], stride=2)
self.stage1 = Choice(out_channels[0], out_channels[1], mb=1, ks=3)
dim_in = out_channels[1]
self.feature_dims = [out_channels[-1]]
# + Body
self.layers = []
for name, rep, dim_out, stride in zip(
names, repeats, out_channels[2:], strides):
self.layers.append(select_block(
choices[len(self.layers)]
)(dim_in, dim_out, stride=stride))
if stride == 2:
self.feature_dims.insert(
-1, dim_in * self.layers[-1].mb)
for i in range(rep - 1):
self.layers.append(select_block(
choices[len(self.layers)]
)(dim_out, dim_out, stride=1))
fullname = 'stage%s' % name.split('!')[0]
seq = getattr(self, fullname, [])
seq += self.layers[-rep:]
seq = nn.Sequential(*seq) if '!' in name else seq
setattr(self, fullname, seq)
dim_in = dim_out
self.conv6 = nn.Sequential(*conv_triplet(dim_in, out_channels[-1]))
self.reset_parameters()
def reset_parameters(self):
for m in self.modules():
if nn.is_conv2d(m):
init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None:
init.constant(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant(m.weight, 1)
elif isinstance(m, nn.Linear):
if m.bias is not None:
init.constant(m.bias, 0)
# Stop the gradients if necessary
def freeze_func(m):
if nn.is_conv2d(m):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
if cfg.MODEL.FREEZE_AT > 0:
self.conv1.apply(freeze_func)
self.stage1.apply(freeze_func)
for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
getattr(self, 'stage{}'.format(i)).apply(freeze_func)
def forward(self, x):
x = self.conv1(x)
x, _ = self.stage1(x)
outputs = []
for layer in self.layers:
x = layer(x)
x, y = x if isinstance(x, tuple) else (x, None)
if y is not None:
outputs.append(y)
outputs.append(self.conv6(x))
return outputs
def make_mobilenet_a1():
return NASMobileNet([
4, 6, 6, 6,
3, 3, 4, 6,
2, 4, 0, 4, 1, 5, 3, 5,
2, 4, 2, 4,
1,
], Setting.PROXYLESS_MOBILE)
def make_mobilenet_v2():
return NASMobileNet([
1, 1,
1, 1, 1,
1, 1, 1, 1, 1, 1, 1,
1, 1, 1,
1,
], Setting.V2)
...@@ -20,12 +20,11 @@ from __future__ import print_function ...@@ -20,12 +20,11 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import affine from lib.modules import nn
from lib.modeling import conv1x1 from lib.modules import init
from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module): class BasicBlock(nn.Module):
def __init__( def __init__(
self, self,
dim_in, dim_in,
...@@ -35,11 +34,11 @@ class BasicBlock(torch.nn.Module): ...@@ -35,11 +34,11 @@ class BasicBlock(torch.nn.Module):
dropblock=None, dropblock=None,
): ):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out) self.bn1 = nn.Affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.conv2 = conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out) self.bn2 = nn.Affine(dim_out)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock = dropblock
...@@ -83,12 +82,12 @@ class Bottleneck(torch.nn.Module): ...@@ -83,12 +82,12 @@ class Bottleneck(torch.nn.Module):
): ):
super(Bottleneck, self).__init__() super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction) dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim) self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = affine(dim) self.bn1 = nn.Affine(dim)
self.conv2 = conv3x3(dim, dim, stride=stride) self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.bn2 = affine(dim) self.bn2 = nn.Affine(dim)
self.conv3 = conv1x1(dim, dim_out) self.conv3 = nn.Conv1x1(dim, dim_out)
self.bn3 = affine(dim_out) self.bn3 = nn.Affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock = dropblock
...@@ -133,7 +132,7 @@ class ResNet(torch.nn.Module): ...@@ -133,7 +132,7 @@ class ResNet(torch.nn.Module):
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = affine(self.dim_in) self.bn1 = nn.Affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = torch.nn.MaxPool2d(
kernel_size=3, kernel_size=3,
...@@ -160,13 +159,9 @@ class ResNet(torch.nn.Module): ...@@ -160,13 +159,9 @@ class ResNet(torch.nn.Module):
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_normal_( init.kaiming_normal(m.weight)
m.weight,
nonlinearity='relu',
)
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
...@@ -184,15 +179,15 @@ class ResNet(torch.nn.Module): ...@@ -184,15 +179,15 @@ class ResNet(torch.nn.Module):
def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None): def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
downsample = None downsample = None
if stride != 1 or self.dim_in != dim_out: if stride != 1 or self.dim_in != dim_out:
downsample = torch.nn.Sequential( downsample = nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride), nn.Conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out), nn.Affine(dim_out),
) )
layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)] layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
self.dim_in = dim_out self.dim_in = dim_out
for i in range(1, blocks): for i in range(1, blocks):
layers.append(block(dim_out, dim_out, dropblock=dropblock)) layers.append(block(dim_out, dim_out, dropblock=dropblock))
return torch.nn.Sequential(*layers) return nn.Sequential(*layers)
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.conv1(x)
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!