Commit d3ed62db by Ting PAN

Support Mask R-CNN

1 parent 41b3932b
Showing with 2602 additions and 1652 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.3.0 (20191121)
Dragon Minimum Required (Version 0.3.0.dev20191121)
Changes:
Preview Features:
- New algorithm: Mask R-CNN.
- Add MobileNet(V2 and NAS) as backbone.
- Refactor testing module, multi-GPU is supported.
Bugs fixed:
- Remove rotated boxes, use Mask R-CNN instead.
------------------------------------------------------------------------
SeetaDet 0.2.3 (20191101)
Dragon Minimum Required (Version 0.3.0.dev20191021)
......
......@@ -12,6 +12,10 @@ while the style of codes is PyTorch.
The torch-style codes help us to simplify the hierarchical pipeline of modern detection.
## Requirements
seeta-dragon >= 0.3.0.dev20191121
## Installation
#### 1. Install the required python packages
......
......@@ -5,7 +5,6 @@ rm -r build install *.c *.cpp
# Compile cpp modules
python setup.py build_ext --inplace
g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -std=c++11 -fopenmp
# Compile cuda modules
cd build && cmake .. && make install && cd ..
......
......@@ -41,6 +41,9 @@ __C.TRAIN.WEIGHTS = ''
# Database to train
__C.TRAIN.DATABASE = ''
# The number of workers to transform data
__C.TRAIN.NUM_WORKERS = 3
# Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,)
......@@ -151,10 +154,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5
# The top-k prior boxes before nms.
__C.TEST.NMS_TOP_K = 400
# The threshold for prAttrDicting boxes
# The threshold for predicting boxes
__C.TEST.SCORE_THRESH = 0.05
# The threshold for prAttrDicting masks
# The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5
# NMS threshold used on RPN proposals
......@@ -192,8 +195,9 @@ __C.MODEL = AttrDict()
# The type of the model
# ('faster_rcnn',
# 'ssd',
# 'mask_rcnn',
# 'retinanet,
# 'ssd',
# )
__C.MODEL.TYPE = ''
......@@ -361,14 +365,14 @@ __C.SSD.NUM_CONVS = 0
# Weight for bbox regression loss
__C.SSD.BBOX_REG_WEIGHT = 1.
__C.SSD.MULTIBOX = AttrDict()
# MultiBox configs
__C.SSD.MULTIBOX = AttrDict()
__C.SSD.MULTIBOX.STRIDES = []
__C.SSD.MULTIBOX.MIN_SIZES = []
__C.SSD.MULTIBOX.MAX_SIZES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = []
__C.SSD.MULTIBOX.ASPECT_ANGLES = []
# OHEM configs
__C.SSD.OHEM = AttrDict()
# The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_OVERLAP = 0.5
......
......@@ -21,46 +21,56 @@ import cv2
import dragon
from lib.core.config import cfg
from lib.datasets.example import Example
from lib.datasets.factory import get_imdb
from lib.faster_rcnn.data_transformer import DataTransformer
class TestServer(object):
class _Server(object):
def __init__(self, output_dir):
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.num_images, self.num_classes, self.classes = \
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_transformer = DataTransformer()
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def evaluate_detections(self, all_boxes):
pass
def evaluate_segmentations(self, all_boxes, all_masks):
pass
def get_image(self):
example = self.data_reader.q_out.get()
image = self.data_transformer.get_image(example)
image_id, objects = self.data_transformer.get_annotations(example)
self.gt_recs[image_id] = {
'objects': objects,
'width': image.shape[1],
'height': image.shape[0],
}
return image_id, image
pass
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
class TestServer(_Server):
def __init__(self, output_dir):
super(TestServer, self).__init__(output_dir)
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.classes = self.imdb.classes
self.num_images = self.imdb.num_images
self.num_classes = self.imdb.num_classes
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
def get_image(self):
example = Example(self.data_reader.q_out.get())
image, image_id = example.image, example.id
self.gt_recs[image_id] = {
'height': example.height,
'width': example.width,
'objects': example.objects,
}
return image_id, image
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError(
......@@ -70,7 +80,7 @@ class TestServer(object):
return self.gt_recs
def evaluate_detections(self, all_boxes):
if cfg.TEST.PROTOCOL == 'null':
if cfg.TEST.PROTOCOL == 'dump':
self.imdb.dump_detections(all_boxes, self.output_dir)
else:
self.imdb.evaluate_detections(
......@@ -88,56 +98,20 @@ class TestServer(object):
)
class InferServer(object):
class InferServer(_Server):
def __init__(self, output_dir):
super(InferServer, self).__init__(output_dir)
self.images_dir = cfg.TEST.DATABASE
self.imdb = get_imdb('taas:/empty')
self.images = os.listdir(self.images_dir)
self.num_images, self.num_classes, self.classes = \
len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
self.data_transformer = DataTransformer()
self.gt_recs = collections.OrderedDict()
self.classes = cfg.MODEL.CLASSES
self.num_images = len(self.images)
self.num_classes = cfg.MODEL.NUM_CLASSES
self.output_dir = output_dir
self.image_idx = 0
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self):
image_name = self.images[self.image_idx]
image_id = image_name.split('.')[0]
image = cv2.imread(os.path.join(self.images_dir, image_name))
self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = {'width': image.shape[1], 'height': image.shape[0]}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import importlib
import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.utils import time_util
from lib.utils.vis import vis_one_image
def run_test_net(checkpoint, server, devices):
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
devices = devices if devices else [cfg.GPU_ID]
num_workers = len(devices)
test_fn = importlib.import_module(
'lib.%s.test' % cfg.MODEL.TYPE).test_net
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
vis_image_dict = {}
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
all_masks = [[[] for _ in range(num_images)] for _ in range(num_classes)]
queues = [
multiprocessing.Queue()
for _ in range(num_workers + 1)
]
workers = [
multiprocessing.Process(
target=test_fn,
kwargs={
'weights': checkpoint,
'num_classes': server.num_classes,
'q_in': queues[i],
'q_out': queues[-1],
'device': devices[i],
}
) for i in range(num_workers)
]
for process in workers:
process.start()
for i in range(num_images):
image_id, raw_image = server.get_image()
queues[i % num_workers].put((i, raw_image))
# Hold the image until the visualization
if cfg.VIS or cfg.VIS_ON_FILE:
vis_image_dict[i] = (image_id, raw_image)
for i in range(num_workers):
queues[i].put((-1, None))
for count in range(num_images):
i, time_diffs, results = queues[-1].get()
# Unpack the diverse results
boxes_this_image = results['boxes']
masks_this_image = results.get('masks', None)
# Disable some collections
if masks_this_image is None:
all_masks = None
# Update time difference
for name, diff in time_diffs.items():
_t[name].add_diff(diff)
# Visualize the results if necessary
if cfg.VIS or cfg.VIS_ON_FILE:
image_id, raw_image = vis_image_dict[i]
vis_one_image(
raw_image,
classes,
boxes_this_image,
masks_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_id),
)
del vis_image_dict[i]
_t['misc'].tic()
# Pack the results in the class-major order
for j in range(1, num_classes):
all_boxes[j][i] = boxes_this_image[j]
if all_masks is not None:
if j < len(masks_this_image):
all_masks[j][i] = masks_this_image[j]
# Limit to max_per_image detections *over all classes*
max_detections = cfg.TEST.DETECTIONS_PER_IM
if max_detections > 0:
scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
scores.append(all_boxes[j][i][:, -1])
if len(scores) > 0:
scores = np.hstack(scores)
if len(scores) > max_detections:
thr = np.sort(scores)[-max_detections]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= thr)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
if all_masks is not None:
all_masks[j][i] = all_masks[j][i][keep]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s|{:.3f}s {:.3f}s'
.format(count + 1, num_images,
_t['im_detect'].average_time,
_t['mask_detect'].average_time,
_t['misc'].average_time),
end='')
print('\n\n>>> Evaluating detections\n')
server.evaluate_detections(all_boxes)
if all_masks is not None:
print('>>> Evaluating segmentations\n')
server.evaluate_segmentations(all_boxes, all_masks)
......@@ -31,9 +31,9 @@ from lib.utils.stats import SmoothedValue
class SolverWrapper(object):
def __init__(self, coordinator):
self.output_dir = coordinator.checkpoints_dir()
self.solver = SGDSolver()
self.detector = self.solver.detector
self.output_dir = coordinator.checkpoints_dir()
# Setup the detector
self.detector.load_weights(cfg.TRAIN.WEIGHTS)
......@@ -89,7 +89,6 @@ class SolverWrapper(object):
display = self.solver.iter % cfg.SOLVER.DISPLAY == 0
stats = self.solver.one_step()
self.add_metrics(stats)
self.send_metrics(stats)
if display:
logger.info(
......@@ -104,6 +103,7 @@ class SolverWrapper(object):
continue
logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue()))
self.send_metrics(stats)
def train_model(self):
"""Network training loop."""
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
from lib.pycocotools import mask_utils
class Example(object):
"""Wrapper for annotated example."""
def __init__(self, datum):
"""Create a ``Example``.
Parameters
----------
datum : Dict
The data loaded for dataset
"""
self._datum = datum
@property
def id(self):
"""Return the example id.
Returns
-------
str
The unique id.
"""
return self._datum['id']
@property
def image(self):
"""Return the image data.
Returns
-------
numpy.ndarray
The image.
"""
img = np.frombuffer(self._datum['content'], 'uint8')
return cv2.imdecode(img, 3)
@property
def height(self):
"""Return the image height.
Returns
-------
int
The height of image.
"""
return self._datum['height']
@property
def objects(self):
"""Return the annotated objects.
Returns
-------
Sequence[Dict]
The objects.
"""
objects = []
for ix, obj in enumerate(self._datum['object']):
mask = obj.get('mask', None)
if 'x3' in obj:
poly = np.array([
obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
obj['x4'], obj['y4']
], 'float32')
x, y, w, h = cv2.boundingRect(
poly.reshape((-1, 2)))
bbox = [x, y, x + w, y + h]
mask = mask_utils.poly2bytes(
[poly],
self._datum['height'],
self._datum['width'],
)
elif 'x2' in obj:
bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
elif 'xmin' in obj:
bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
else:
bbox = obj['bbox']
objects.append({
'name': obj['name'],
'bbox': bbox,
'mask': mask,
'difficult': obj.get('difficult', 0),
})
return objects
@property
def width(self):
"""Return the image width.
Returns
-------
int
The width of image.
"""
return self._datum['width']
......@@ -13,84 +13,118 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
import dragon
import uuid
from lib.core.config import cfg
from lib.datasets.coco_evaluator import COCOEvaluator
from lib.datasets.voc_evaluator import VOCEvaluator
class imdb(object):
def __init__(self, name):
self._name = name
self._num_classes = 0
self._classes = []
@property
def name(self):
return self._name
def __init__(self, source):
self._source = source
self._num_images = 0
self._classes = cfg.MODEL.CLASSES
self._class_to_ind = self._class_to_cat_id = \
dict(zip(self.classes, range(self.num_classes)))
self._salt = str(uuid.uuid4())
self.config = {'cleanup': True, 'use_salt': True}
@property
def num_classes(self):
return len(self._classes)
def cache_path(self):
cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property
def classes(self):
return self._classes
@property
def cache_path(self):
cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
def class_to_ind(self):
return self._class_to_ind
@property
def source(self):
excepted_source = os.path.join(self.cache_path, self.name)
if not os.path.exists(excepted_source):
raise RuntimeError(
'Excepted source from: {}, '
'but it is not existed.'
.format(excepted_source)
)
return excepted_source
def comp_id(self):
return '_' + self._salt if self.config['use_salt'] else ''
@property
def num_classes(self):
return len(self._classes)
@property
def num_images(self):
return dragon.io.SeetaRecordDataset(self.source).size
return self._num_images
@property
def source(self):
return self._source
def competition_mode(self, on):
if on:
self.config['use_salt'] = False
self.config['cleanup'] = False
else:
self.config['use_salt'] = True
self.config['cleanup'] = True
def dump_detections(self, all_boxes, output_dir):
dataset = dragon.io.SeetaRecordDataset(self.source)
for file in ('data.data', 'data.index', 'data.meta'):
file = os.path.join(output_dir, file)
if os.path.exists(file):
os.remove(file)
writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
for i in range(len(dataset)):
example = dataset.get()
example['object'] = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][i]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
example['object'].append({
'name': cls,
'xmin': float(detections[k][0]),
'ymin': float(detections[k][1]),
'xmax': float(detections[k][2]),
'ymax': float(detections[k][3]),
'difficult': 0,
})
writer.write(example)
pass
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
pass
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
evaluator = VOCEvaluator(self)
evaluator.write_bbox_results(all_boxes, gt_recs, output_dir)
if '!' not in protocol:
for ovr in (0.5, 0.7):
evaluator.do_bbox_eval(
gt_recs,
output_dir,
iou=ovr,
use_07_metric='2007' in protocol,
)
elif 'coco' in protocol:
ann_file = cfg.TEST.JSON_FILE
evaluator = COCOEvaluator(self, ann_file)
if evaluator.coco is None:
ann_file = evaluator \
.write_bbox_annotations(
gt_recs, output_dir)
evaluator = COCOEvaluator(self, ann_file)
res_file = evaluator.write_bbox_results(
all_boxes, gt_recs, output_dir)
if '!' not in protocol:
evaluator.do_bbox_eval(res_file)
def evaluate_masks(self, all_boxes, all_masks, output_dir):
pass
def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
evaluator = VOCEvaluator(self)
evaluator.write_segm_results(all_boxes, all_masks, output_dir)
if '!' not in protocol:
for ovr in (0.5, 0.7):
evaluator.do_segm_eval(
gt_recs,
output_dir,
iou=ovr,
use_07_metric='2007' in protocol,
)
elif 'coco' in protocol:
ann_file = cfg.TEST.JSON_FILE
evaluator = COCOEvaluator(self, ann_file)
if evaluator.coco is None:
ann_file = evaluator \
.write_segm_annotations(
gt_recs, output_dir)
evaluator = COCOEvaluator(self, ann_file)
res_file = evaluator.write_segm_results(
all_boxes, all_masks, gt_recs, output_dir)
if '!' not in protocol:
evaluator.do_segm_eval(res_file)
......@@ -20,15 +20,10 @@ from __future__ import print_function
import cv2
import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg
from lib.pycocotools.mask_utils import mask_rle2im
from lib.utils import rotated_boxes
from lib.utils.boxes import expand_boxes
from lib.pycocotools import mask_utils
from lib.utils import boxes as box_util
from lib.utils.framework import pickle
from lib.utils.mask import mask_overlap
......@@ -66,15 +61,15 @@ def voc_bbox_eval(
det_file,
gt_recs,
cls_name,
IoU=0.5,
iou=0.5,
use_07_metric=False,
):
class_recs, n_pos = {}, 0
for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R])
diff = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
objects = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in objects])
diff = np.array([x['difficult'] for x in objects]).astype(np.bool)
det = [False] * len(objects)
n_pos = n_pos + sum(~diff)
class_recs[image_name] = {'bbox': bbox, 'difficult': diff, 'det': det}
......@@ -100,7 +95,7 @@ def voc_bbox_eval(
nd = len(image_ids)
tp, fp = np.zeros(nd), np.zeros(nd)
def overlaps4(bb, BBGT):
def compute_overlaps(bb, BBGT):
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
......@@ -114,9 +109,6 @@ def voc_bbox_eval(
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
return inters / uni
def overlaps5(bb, BBGT):
return rotated_boxes.bbox_overlaps(bb.reshape((1, 5)), BBGT)[0]
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
......@@ -124,12 +116,11 @@ def voc_bbox_eval(
BBGT = R['bbox'].astype(float)
if BBGT.size > 0:
overlaps = overlaps4(bb, BBGT) \
if len(bb) == 4 else overlaps5(bb, BBGT)
overlaps = compute_overlaps(bb, BBGT)
ov_max = np.max(overlaps)
j_max = np.argmax(overlaps)
if ov_max > IoU:
if ov_max > iou:
if not R['difficult'][j_max]:
if not R['det'][j_max]:
tp[d] = 1.
......@@ -154,23 +145,29 @@ def voc_segm_eval(
seg_file,
gt_recs,
cls_name,
IoU=0.5,
iou=0.5,
use_07_metric=False,
):
# 0. Constants
M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH
scale = (M + 2.0) / M
scale = (M + 2.) / M
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
# 1. Get bbox & mask ground truths
image_names, class_recs, n_pos = [], {}, 0
for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R])
mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
objects = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in objects])
mask = np.array([
mask_utils.bytes2img(
x['mask'],
rec['height'],
rec['width']
) for x in objects]
)
difficult = np.array([x['difficult'] for x in objects]).astype(np.bool)
det = [False] * len(objects)
n_pos = n_pos + sum(~difficult)
class_recs[image_name] = {
'bbox': bbox,
......@@ -182,9 +179,9 @@ def voc_segm_eval(
# 2. Get predict pickle file for this class
with open(det_file, 'rb') as f:
boxes_pkl = cPickle.load(f)
boxes_pkl = pickle.load(f)
with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f)
masks_pkl = pickle.load(f)
# 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs)
......@@ -222,7 +219,7 @@ def voc_segm_eval(
fp = np.zeros((num_pred, 1))
tp = np.zeros((num_pred, 1))
ref_boxes = expand_boxes(new_boxes, scale)
ref_boxes = box_util.expand_boxes(new_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
for i in range(num_pred):
......@@ -261,13 +258,19 @@ def voc_segm_eval(
crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
ov = \
mask_overlap(
gt_mask_bound,
pred_mask_bound,
crop_mask,
pred_mask,
)
if ov > ovmax:
ovmax = ov
jmax = j
if ovmax > IoU:
if ovmax > iou:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[i] = 1.
......@@ -281,7 +284,7 @@ def voc_segm_eval(
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(n_pos)
# avoid divide by zero in case the first matches a difficult gt
# Avoid divide by zero in case the first matches a difficult gt
prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
return ap
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from lib.datasets import voc_eval
from lib.utils.framework import pickle
class VOCEvaluator(object):
def __init__(self, imdb):
self.imdb = imdb
def do_bbox_eval(
self,
gt_recs,
output_dir,
iou=0.5,
use_07_metric=True,
):
aps = []
print('~~~~~~ Evaluation IoU@%s ~~~~~~' % str(iou))
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
det_file = self.get_results_file(output_dir).format(cls)
rec, prec, ap = \
voc_eval.voc_bbox_eval(
det_file,
gt_recs, cls,
iou=iou,
use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
def do_segm_eval(
self,
gt_recs,
output_dir,
iou=0.5,
use_07_metric=True,
):
aps = []
print('~~~~~~ Evaluation IoU@%s ~~~~~~' % str(iou))
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
segm_filename = self.get_results_file(output_dir, 'segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
ap = voc_eval.voc_segm_eval(
bbox_filename,
segm_filename,
gt_recs, cls,
iou=iou,
use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
@staticmethod
def get_prefix(type='bbox'):
if type == 'bbox':
return 'detections'
elif type == 'segm':
return 'segmentations'
elif type == 'kpt':
return 'keypoints'
return ''
def get_results_file(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_<comp_id>_<class_name>.txt
if type == 'bbox':
filename = self.get_prefix(type) + self.imdb.comp_id + '_{:s}.txt'
elif type == 'segm':
filename = self.get_prefix(type) + self.imdb.comp_id + '_{:s}.pkl'
else:
raise ValueError('Type of results can be either bbox or segm.')
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def write_bbox_results(self, all_boxes, gt_recs, output_dir):
for cls_ind, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
print('Writing {} VOC format bbox results'.format(cls))
filename = self.get_results_file(output_dir).format(cls)
with open(filename, 'wt') as f:
ix = 0
for image_id, rec in gt_recs.items():
dets = all_boxes[cls_ind][ix]
ix += 1
if len(dets) == 0:
continue
for k in range(dets.shape[0]):
content = '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}' \
.format(image_id, dets[k, -1],
dets[k, 0] + 1, dets[k, 1] + 1,
dets[k, 2] + 1, dets[k, 3] + 1)
if dets.shape[1] == 6:
content += ' {:.2f}'.format(dets[k, 4])
f.write(content + '\n')
def write_segm_results(self, all_boxes, all_masks, output_dir):
for cls_inds, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
print('Writing {} VOC format segm results'.format(cls))
segm_filename = self.get_results_file(output_dir, 'segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
with open(bbox_filename, 'wb') as f:
pickle.dump(all_boxes[cls_inds], f, pickle.HIGHEST_PROTOCOL)
with open(segm_filename, 'wb') as f:
pickle.dump(all_masks[cls_inds], f, pickle.HIGHEST_PROTOCOL)
......@@ -13,7 +13,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.data_layer import DataLayer
from lib.faster_rcnn.proposal_layer import ProposalLayer
from lib.faster_rcnn.proposal_target_layer import ProposalTargetLayer
from lib.faster_rcnn.anchor_target import AnchorTarget
from lib.faster_rcnn.data_loader import DataLoader
from lib.faster_rcnn.proposal import Proposal
from lib.faster_rcnn.proposal_target import ProposalTarget
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.faster_rcnn.utils import map_returns_to_blobs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
class AnchorTarget(object):
"""Assign ground-truth targets to anchors."""
def __init__(self):
super(AnchorTarget, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
self.num_strides = len(self.strides)
self.allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = []
for i in range(self.num_strides):
self.base_anchors.append(
generate_anchors(
self.strides[i],
self.ratios,
np.array([self.scales[i]])
if self.num_strides > 1
else np.array(self.scales)
)
)
def __call__(self, features, gt_boxes, ims_info):
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, num_anchors,), 'float32')
bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32')
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, ...)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self.allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self.allowed_border) &
(all_anchors[:, 1] >= -self.allowed_border) &
(all_anchors[:, 2] < im_info[1] + self.allowed_border) &
(all_anchors[:, 3] < im_info[0] + self.allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside, anchors = np.arange(num_anchors), all_anchors
num_inside = len(inds_inside)
labels = np.empty((num_inside,), 'float32')
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = box_util.bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(fg_inds, len(fg_inds) - num_fg, False)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), 'float32')
bbox_targets[fg_inds, :] = \
box_util.bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), 'float32')
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), 'float32')
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
if self.num_strides > 1:
labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
else:
A = self.base_anchors[0].shape[0]
height, width = features[0].shape[-2:]
labels = labels_wide \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = bbox_inside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = bbox_outside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': new_tensor(labels),
'bbox_targets': new_tensor(bbox_targets),
'bbox_inside_weights': new_tensor(bbox_inside_weights),
'bbox_outside_weights': new_tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
# C4 backbone takes the first stride
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
height, width = features[0].shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
all_anchors = (self.base_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
all_labels[ix, inds_inside] = labels # label
all_bbox_targets[ix, inds_inside] = bbox_targets
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
labels = all_labels \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, total_anchors))
bbox_targets = all_bbox_targets \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = all_bbox_inside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = all_bbox_outside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
......@@ -27,11 +27,11 @@ from lib.utils import logger
from lib.utils.blob import im_list_to_blob
class DataLayer(torch.nn.Module):
"""Generate a mini-batch of data."""
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLayer, self).__init__()
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
......@@ -39,12 +39,11 @@ class DataLayer(torch.nn.Module):
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def forward(self):
# Get an array blob from the Queue
def __call__(self):
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
......@@ -59,14 +58,16 @@ class DataBatch(mp.Process):
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
prefetch : int, optional, default=5
The prefetch count.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
......@@ -83,20 +84,10 @@ class DataBatch(mp.Process):
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', -1)
self._max_transformers = kwargs.get('max_transformers', 3)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Io-Aware Policy
if self._num_transformers == -1:
self._num_transformers = 2
# Add 1 transformer for color augmentation
if cfg.TRAIN.USE_COLOR_JITTER:
self._num_transformers += 1
self._num_transformers = min(
self._num_transformers, self._max_transformers)
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
......
......@@ -19,9 +19,9 @@ import cv2
import numpy as np
from lib.core.config import cfg
from lib.utils import rotated_boxes
from lib.datasets.example import Example
from lib.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob
from lib.utils.boxes import flip_boxes
from lib.utils.image import get_image_with_target_size
......@@ -44,32 +44,32 @@ class DataTransformer(multiprocessing.Process):
apply_flip=False,
offsets=None,
):
n_objects = 0
objects, n_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff:
for obj in example['object']:
for obj in objects:
if obj.get('difficult', 0) == 0:
n_objects += 1
else:
n_objects = len(example['object'])
n_objects = len(objects)
roi_dict = {
'width': example['width'],
'height': example['height'],
'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), 'float32'),
'gt_classes': np.zeros((n_objects,), 'int32'),
}
# Filter the difficult instances
object_idx = 0
for obj in example['object']:
for obj in objects:
if not self._use_diff and \
obj.get('difficult', 0) > 0:
continue
bbox = obj['bbox']
roi_dict['boxes'][object_idx, :] = [
max(0, obj['xmin']),
max(0, obj['ymin']),
min(obj['xmax'], example['width'] - 1),
min(obj['ymax'], example['height'] - 1),
max(0, bbox[0]),
max(0, bbox[1]),
min(bbox[2], width - 1),
min(bbox[3], height - 1),
]
roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']]
......@@ -77,8 +77,11 @@ class DataTransformer(multiprocessing.Process):
# Flip the boxes if necessary
if apply_flip:
roi_dict['boxes'] = flip_boxes(
roi_dict['boxes'], roi_dict['width'])
roi_dict['boxes'] = \
box_util.flip_boxes(
roi_dict['boxes'],
width,
)
# Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale
......@@ -94,61 +97,32 @@ class DataTransformer(multiprocessing.Process):
return roi_dict
@classmethod
def get_image(cls, example):
img = np.frombuffer(example['content'], np.uint8)
return cv2.imdecode(img, -1)
@classmethod
def get_annotations(cls, example):
objects = []
for ix, obj in enumerate(example['object']):
if 'x3' in obj:
bbox = rotated_boxes.vertices2box(
[obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
obj['x4'], obj['y4']]
)
elif 'x2' in obj:
bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
elif 'xmin' in obj:
bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
else:
bbox = obj['bbox']
objects.append({
'name': obj['name'],
'difficult': obj.get('difficult', 0),
'bbox': bbox,
})
return example['id'], objects
def get(self, example):
img = np.frombuffer(example['content'], np.uint8)
img = cv2.imdecode(img, 1)
example = Example(example)
img = example.image
# Scale
scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(img, target_size, cfg.TRAIN.MAX_SIZE)
max_size = cfg.TRAIN.MAX_SIZE
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip
apply_flip = False
if self._use_flipped:
if np.random.randint(2) > 0:
im = im[:, ::-1, :]
img = img[:, ::-1]
apply_flip = True
# Random Crop or RandomPad
offsets = None
if cfg.TRAIN.MAX_SIZE > 0:
if jitter != 1.0:
if jitter != 1:
# To a rectangle (scale, max_size)
target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int)
im, offsets = get_image_with_target_size(target_size, im)
target_size = (np.array(img.shape[:2]) / jitter).astype(np.int32)
img, offsets = get_image_with_target_size(target_size, img)
else:
# To a square (target_size, target_size)
im, offsets = get_image_with_target_size([target_size] * 2, im)
img, offsets = get_image_with_target_size([target_size] * 2, img)
# Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
......@@ -158,7 +132,7 @@ class DataTransformer(multiprocessing.Process):
gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
return im, im_scale, gt_boxes
return img, im_scale, gt_boxes
def run(self):
# Fix the process-local random seed
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
class Proposal(object):
"""Compute proposals by applying transformations anchors."""
def __init__(self):
super(Proposal, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
self.num_strides = len(self.strides)
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
])
# Generate base anchors
self.base_anchors = []
for i in range(self.num_strides):
self.base_anchors.append(
generate_anchors(
self.strides[i],
self.ratios,
np.array([self.scales[i]])
if self.num_strides > 1
else np.array(self.scales)
)
)
def __call__(self, features, cls_prob, bbox_pred, ims_info):
pre_nms_top_n = cfg.TRAIN.RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg.TRAIN.RPN_POST_NMS_TOP_N
nms_thresh = cfg.TRAIN.RPN_NMS_THRESH
min_size = cfg.TRAIN.RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
# Prepare for the outputs
batch_rois = []
cls_prob = cls_prob.numpy(True)
bbox_pred = bbox_pred.numpy(True)
if self.num_strides > 1:
# (?, 4, A * K) -> (?, A * K, 4)
bbox_pred = bbox_pred.transpose((0, 2, 1))
else:
# (?, A * 4, H, W) -> (?, H, W, A * 4)
cls_prob = cls_prob.transpose((0, 2, 3, 1))
bbox_pred = bbox_pred.transpose((0, 2, 3, 1))
# Extract RoIs separately
for ix in range(num_images):
# [?, N] -> [? * N, 1]
scores = cls_prob[ix].reshape((-1, 1))
if self.num_strides > 1:
deltas = bbox_pred[ix]
else:
deltas = bbox_pred[ix].reshape((-1, 4))
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# Convert anchors into proposals via bbox transformations
proposals = box_util.bbox_transform_inv(anchors, deltas)
# Clip predicted boxes to image
proposals = box_util.clip_tiled_boxes(proposals, ims_info[ix, :2])
# Remove predicted boxes with either height or width < threshold
keep = box_util.filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# Apply nms (e.g. threshold = 0.7)
# Take after_nms_topN (e.g. 300)
# Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Attach RoIs with batch indices
batch_inds = np.empty((proposals.shape[0], 1), 'float32')
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype('float32', copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
return np.concatenate(batch_rois, 0)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms import nms_wrapper
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Compute proposals by applying transformations to anchors."""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
# Generate proposals from shifted anchors
height, width = cls_prob.shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
anchors = \
self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4))
# Prepare for the outputs
batch_rois = []
# scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_top_n (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
return [array2tensor(rpn_rois)]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_returns_to_blobs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
class ProposalTarget(object):
"""Assign ground-truth targets to proposals."""
def __init__(self):
super(ProposalTarget, self).__init__()
self.num_strides = len(cfg.RPN.STRIDES)
self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
])
def __call__(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = self.defaults.keys()
blobs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype('int32') == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, :4]))))
# Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs(
sample_rois(
rois,
gt_boxes,
rois_per_image,
fg_rois_per_image,
self.num_classes,
), blobs, keys,
)
# Stack into continuous blobs
for k, v in blobs.items():
blobs[k] = np.concatenate(blobs[k], 0)
if self.num_strides > 1:
# Distribute RoIs into pyramids
min_lvl = cfg.FPN.ROI_MIN_LEVEL
max_lvl = cfg.FPN.ROI_MAX_LEVEL
k = max_lvl - min_lvl + 1
levels = map_rois_to_levels(blobs['rois'], min_lvl, max_lvl)
outputs = map_blobs_to_outputs(
blobs,
self.defaults,
[np.where(levels == (i + min_lvl))[0] for i in range(k)],
)
return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
else:
# Return RoIs directly for CX-stride
return {
'rois': [new_tensor(blobs['rois'])],
'labels': new_tensor(blobs['labels']),
'bbox_targets': new_tensor(blobs['bbox_targets']),
'bbox_inside_weights': new_tensor(blobs['bbox_inside_weights']),
'bbox_outside_weights': new_tensor(blobs['bbox_outside_weights']),
}
def get_targets(ex_rois, gt_rois, gt_labels, num_classes):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
return bbox_targets, inside_weights, outside_weights
def sample_rois(
all_rois,
gt_boxes,
num_rois,
num_fg_rois,
num_classes,
):
"""Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image
bg_rois_per_this_image = num_rois - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, bg_rois_per_this_image, False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays
rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs
outputs = [rois, labels]
outputs += get_targets(
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels,
num_classes,
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets."""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return {
'rois': [array2tensor(batch_outputs['rois'])],
'labels': array2tensor(batch_outputs['labels']),
'bbox_targets': array2tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': array2tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': array2tensor(batch_outputs['bbox_outside_weights']),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(
all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
......@@ -17,14 +17,13 @@ import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
from lib.utils import framework
from lib.utils import time_util
from lib.utils.blob import im_list_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
......@@ -39,69 +38,65 @@ def im_detect(detector, raw_image):
], dtype=np.float32)
# Do Forward
if not hasattr(detector, 'frozen_graph'):
inputs = {
'data': torch.from_numpy(blobs['data']),
'ims_info': torch.from_numpy(blobs['ims_info']),
}
if not hasattr(detector, 'graph'):
with framework.new_workspace().as_default():
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info'])
with torch.no_grad():
with torch.jit.Recorder(retain_ops=True):
with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs)
detector.frozen_graph = \
framework.FrozenGraph(
{'data': inputs['data'],
'ims_info': inputs['ims_info']},
{'rois': outputs['rois'],
detector.graph = \
framework.Graph(inputs, {
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']},
)
outputs = detector.frozen_graph(**blobs)
'bbox_pred': outputs['bbox_pred']
})
outputs = detector.graph(**blobs)
# Decode results
batch_rois = outputs['rois']
batch_scores = outputs['cls_prob']
batch_deltas = outputs['bbox_pred']
batch_boxes = bbox_transform_inv(
batch_rois[:, 1:5],
batch_deltas,
rois = outputs['rois']
scores, boxes, batch_inds = [], [], []
pred_boxes = \
box_util.bbox_transform_inv(
rois[:, 1:5],
outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS,
)
scores_wide, boxes_wide = [], []
for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
for im_idx in range(len(ims)):
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
boxes = batch_boxes[indices]
boxes /= ims_scale[im_idx]
clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes)
return (
np.vstack(scores) if len(ims) > 0 else scores[0],
np.vstack(boxes) if len(ims) > 0 else boxes[0],
)
return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
def test_net(weights, num_classes, q_in, q_out, device):
num_classes, cfg.GPU_ID = num_classes, device
detector = new_detector(device, weights)
def test_net(detector, server):
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = time_util.new_timers('im_detect', 'misc')
_t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}
while True:
idx, raw_image = q_in.get()
if raw_image is None:
break
for i in range(num_images):
image_id, raw_image = server.get_image()
boxes_this_image = [[]]
with _t['im_detect'].tic_and_toc():
scores, boxes = im_detect(detector, raw_image)
_t['misc'].tic()
boxes_this_image = [[]]
for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
......@@ -119,43 +114,16 @@ def test_net(detector, server):
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_image,
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(i + 1, num_images,
_t['im_detect'].average_time,
_t['misc'].average_time),
end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections')
server.evaluate_detections(all_boxes)
q_out.put((
idx,
{
'im_detect': _t['im_detect'].average_time,
'misc': _t['misc'].average_time,
},
{
'boxes': boxes_this_image,
},
))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
from lib.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides):
num_strides = len(strides)
if len(features) != num_strides:
raise ValueError(
'Given %d features for %d strides.'
% (len(features), num_strides)
)
# Generate proposals from shifted anchors
anchors_to_pack = []
for i in range(len(features)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to
# match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors_to_pack.append(anchors)
else:
# Original order of Faster R-CNN
return anchors.reshape((K * A, 4))
return np.vstack(anchors_to_pack)
def map_returns_to_blobs(returns, blobs, keys):
"""Map returns of image to blobs."""
for i, key in enumerate(keys):
blobs[key].append(returns[i])
def map_rois_to_levels(rois, k_min, k_max):
"""Map rois to fpn levels."""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
def map_blobs_to_outputs(blobs, defaults, lvl_inds):
"""Map blobs to outputs according to fpn indices."""
outputs = collections.defaultdict(list)
for inds in lvl_inds:
for key, blob in blobs.items():
outputs[key].append(
blob[inds]
if len(inds) > 0
else defaults[key]
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors)
total_anchors += anchors.shape[0]
all_anchors = np.vstack(all_anchors)
# label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms import nms_wrapper
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Compute proposals by applying transformations anchors."""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def generate_grid_anchors(self, features):
# Generate proposals from shifted anchors
anchors_wide = []
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors_wide.append(anchors)
return np.vstack(anchors_wide)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
all_anchors = self.generate_grid_anchors(features) # [n, 4]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_rois = []
batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4]
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
all_rois = []
for i in range(k):
lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling
all_rois.append(array2tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else:
all_rois.append(array2tensor(rpn_rois[lv_indices]))
return all_rois
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets.
Produces proposal classification labels and bounding-box regression targets.
"""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
self.fake_outputs = {
'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
'labels': np.array([-1], dtype=np.float32),
'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
}
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(k)]
_fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return {
'rois': [array2tensor(outputs['rois'][i]) for i in range(k)],
'labels': array2tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': array2tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': array2tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': array2tensor(np.vstack(outputs['bbox_outside_weights'])),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Sample a batch of RoIs comprising foreground and background examples."""
# overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
......@@ -13,6 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.fpn.anchor_target_layer import AnchorTargetLayer
from lib.fpn.proposal_layer import ProposalLayer
from lib.fpn.proposal_target_layer import ProposalTargetLayer
from lib.faster_rcnn.anchor_target import AnchorTarget
from lib.faster_rcnn.proposal import Proposal
from lib.mask_rcnn.data_loader import DataLoader
from lib.mask_rcnn.proposal_target import ProposalTarget
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.mask_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import mask_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info = [], []
packed_boxes, packed_masks = [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes, gt_masks = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
packed_boxes.append(im_boxes)
packed_masks.append(gt_masks)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, 'float32'),
'gt_boxes': np.concatenate(packed_boxes, 0),
'gt_masks': mask_list_to_blob(packed_masks),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.datasets.example import Example
from lib.pycocotools import mask_utils
from lib.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob
from lib.utils.image import get_image_with_target_size
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._seed = cfg.RNG_SEED
self._use_flipped = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF
self._classes = kwargs.get('classes', ('__background__',))
self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self.q_in = self.q1_out = self.q2_out = None
self.daemon = True
def make_roi_dict(self, example, im_scale, apply_flip=False):
objects, n_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff:
for obj in objects:
if obj.get('difficult', 0) == 0:
n_objects += 1
else:
n_objects = len(objects)
roi_dict = {
'boxes': np.zeros((n_objects, 4), 'float32'),
'masks': np.empty((n_objects, height, width), 'uint8'),
'gt_classes': np.zeros((n_objects, 1), 'int32'),
'mask_flags': np.ones((n_objects, 1), 'float32'),
}
# Filter the difficult instances
object_idx = 0
for obj in objects:
if not self._use_diff and \
obj.get('difficult', 0) > 0:
continue
bbox, mask = obj['bbox'], obj['mask']
roi_dict['boxes'][object_idx, :] = [
max(0, bbox[0]),
max(0, bbox[1]),
min(bbox[2], width - 1),
min(bbox[3], height - 1),
]
if mask is not None:
roi_dict['masks'][object_idx] = (
mask_utils.bytes2img(
obj['mask'],
height,
width,
))
else:
roi_dict['mask_flags'][object_idx] = 0.
roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']]
object_idx += 1
# Flip the boxes if necessary
if apply_flip:
roi_dict['boxes'] = \
box_util.flip_boxes(
roi_dict['boxes'],
width,
)
# Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale
return roi_dict
def get(self, example):
example = Example(example)
img = example.image
# Scale
max_size = cfg.TRAIN.MAX_SIZE
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip
apply_flip = False
if self._use_flipped:
if np.random.randint(2) > 0:
img = img[:, ::-1]
apply_flip = True
# Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls, flag}]
gt_boxes = \
np.concatenate([
roi_dict['boxes'],
roi_dict['gt_classes'],
roi_dict['mask_flags']
], axis=1)
# Post-Process for gt masks
# Shape like: [num_objects, im_h, im_w]
if gt_boxes.shape[0] > 0:
gt_masks = roi_dict['masks']
if apply_flip:
gt_masks = gt_masks[:, :, ::-1]
else:
gt_masks = None
return img, im_scale, gt_boxes, gt_masks
def run(self):
# Fix the process-local random seed
np.random.seed(self._seed)
# Main prefetch loop
while True:
outputs = self.get(self.q_in.get())
if len(outputs[2]) < 1:
continue # Ignore the non-object image
aspect_ratio = float(outputs[0].shape[0]) / outputs[0].shape[1]
if aspect_ratio > 1.:
self.q1_out.put(outputs)
else:
self.q2_out.put(outputs)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_returns_to_blobs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.utils import boxes as box_util
from lib.utils import mask as mask_util
from lib.utils.framework import new_tensor
class ProposalTarget(object):
"""Assign proposals to ground-truth targets."""
def __init__(self):
super(ProposalTarget, self).__init__()
self.resolution = cfg.MRCNN.RESOLUTION
self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('mask_targets', -np.ones((1, self.resolution, self.resolution), 'float32')),
])
def __call__(self, rpn_rois, gt_boxes, gt_masks, ims_info):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
# GT masks (num_objects, im_h, im_w)
gt_boxes_wide, gt_masks_wide = \
mask_util.dismantle_masks(
gt_boxes,
gt_masks,
num_images,
)
# Prepare for the outputs
keys = self.defaults.keys()
blobs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
gt_masks = gt_masks_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype('int32') == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, :4]))))
# Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs(
sample_rois(
rois,
gt_boxes,
gt_masks,
rois_per_image,
fg_rois_per_image,
self.num_classes,
ims_info[ix][2],
), blobs, keys,
)
# Stack into continuous blobs
for k, v in blobs.items():
blobs[k] = np.concatenate(blobs[k], 0)
# Distribute rois into pyramids
k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1
levels = map_rois_to_levels(blobs['rois'], k_min, k_max)
outputs = \
map_blobs_to_outputs(
blobs,
self.defaults,
[np.where(levels == (i + k_min))[0] for i in range(k)],
)
# Select the foreground RoIs only for mask branch
for i in range(k):
inds = np.where(outputs['labels'][i] > 0)[0]
inds = inds if len(inds) > 0 else np.array([0], 'int64')
outputs['mask_rois'].append(outputs['rois'][i][inds])
outputs['mask_targets'][i] = outputs['mask_targets'][i][inds]
outputs['mask_labels'].append(outputs['labels'][i][inds].astype('int64') - 1)
# Use the sparse indices to select logits
# Reduce the overhead on feeding dense class-specific targets
mask_labels = np.concatenate(outputs['mask_labels'], 0)
mask_indices = np.arange(len(mask_labels)) * (self.num_classes - 1)
return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
'mask_rois': [new_tensor(outputs['mask_rois'][i]) for i in range(k)],
'mask_targets': new_tensor(np.vstack(outputs['mask_targets'])),
'mask_indices': new_tensor(mask_indices + mask_labels),
}
def get_targets(
ex_rois,
gt_rois,
gt_labels,
gt_masks,
mask_flags,
mask_size,
num_classes,
im_scale,
):
"""Compute the bounding-box regression targets."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
# Compute mask classification targets
mask_shape = [mask_size] * 2
ex_rois_ori = np.round(ex_rois / im_scale).astype(int)
gt_rois_ori = np.round(gt_rois / im_scale).astype(int)
mask_targets = -np.ones([len(gt_labels)] + mask_shape, 'float32')
for i in fg_inds:
if mask_flags[i] > 0:
box_mask = \
mask_util.intersect_box_mask(
ex_rois_ori[i],
gt_rois_ori[i],
gt_masks[i],
)
if box_mask is not None:
mask_targets[i] = \
mask_util.resize_mask(
mask=box_mask,
size=mask_shape,
)
return bbox_targets, inside_weights, outside_weights, mask_targets
def sample_rois(
all_rois,
gt_boxes,
gt_masks,
num_rois,
num_fg_rois,
num_classes,
im_scale,
):
"""Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image
bg_rois_per_this_image = num_rois - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, bg_rois_per_this_image, False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays
rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs
outputs = [rois, labels]
outputs += get_targets(
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels,
gt_masks[gt_assignment[fg_inds]],
gt_boxes[gt_assignment[fg_inds], 5],
cfg.MRCNN.RESOLUTION,
num_classes,
im_scale,
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn import map_rois_to_levels
from lib.faster_rcnn import map_blobs_to_outputs
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper
from lib.utils import framework
from lib.utils import time_util
from lib.utils import boxes as box_util
from lib.utils.blob import im_list_to_blob
from lib.utils.image import scale_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward
if not hasattr(detector, 'graph'):
with framework.new_workspace().as_default():
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info'])
with torch.no_grad():
with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs)
detector.graph = \
framework.Graph(inputs, {
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']
})
outputs = detector.graph(**blobs)
# Decode results
rois = outputs['rois']
scores, boxes, batch_inds = [], [], []
pred_boxes = \
box_util.bbox_transform_inv(
rois[:, 1:5],
outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS,
)
for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
batch_inds.append(np.ones((len(inds), 1), 'int32') * i)
return (
np.vstack(scores) if len(ims) > 0 else scores[0],
np.vstack(boxes) if len(ims) > 0 else boxes[0],
np.vstack(batch_inds) if len(ims) > 0 else batch_inds[0],
np.array(ims_scale, 'float64'),
)
def mask_detect(detector, rois):
k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1
levels = map_rois_to_levels(rois, k_min, k_max)
level_inds = [np.where(levels == (i + k_min))[0] for i in range(k)]
fpn_rois = map_blobs_to_outputs(
{'rois': rois[:, :5]},
{'rois': np.array([[-1, 0, 0, 1, 1]], 'float32')},
level_inds)['rois']
workspace = detector.graph.workspace
placeholders = detector.graph.placeholders
score_fn = detector.rcnn.compute_mask_score
with workspace.as_default():
if 'rois' not in placeholders:
placeholders['rois'] = \
[framework.new_placeholder(cfg.GPU_ID) for _ in range(k)]
placeholders['mask_inds'] = \
framework.new_placeholder(cfg.GPU_ID)
for i, v in enumerate(fpn_rois):
framework.feed_tensor(placeholders['rois'][i], v.astype('float32'))
with torch.no_grad():
mask_score = score_fn(rois=placeholders['rois'])
nc, i = mask_score.shape[1], 0
mask_inds = {}
for inds in level_inds:
for idx in inds:
cls = int(rois[idx, 5])
mask_inds[idx] = (i * nc + cls)
i += 1
if len(inds) == 0:
i += 1
mask_inds = list(map(mask_inds.get, sorted(mask_inds)))
framework.feed_tensor(
placeholders['mask_inds'],
np.array(mask_inds, 'int64'),
)
with torch.no_grad():
mask_pred = mask_score.index_select(
(0, 1), placeholders['mask_inds'])
return detector.rcnn.sigmoid(mask_pred).numpy(True).copy()
def test_net(weights, num_classes, q_in, q_out, device):
num_classes, cfg.GPU_ID = num_classes, device
detector = new_detector(device, weights)
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
while True:
idx, raw_image = q_in.get()
if raw_image is None:
break
rois_this_image = []
boxes_this_image = [[]]
masks_this_image = [[]]
with _t['im_detect'].tic_and_toc():
scores, boxes, batch_inds, ims_scale = \
im_detect(detector, raw_image)
_t['misc'].tic()
for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
cls_batch_inds = batch_inds[inds]
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms(
cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms_wrapper.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
cls_batch_inds = cls_batch_inds[keep]
boxes_this_image.append(cls_detections)
rois_this_image.append(
np.hstack((
cls_batch_inds,
cls_detections[:, :4] * ims_scale[cls_batch_inds],
np.ones((len(keep), 1)) * (j - 1),
)))
mask_rois = np.concatenate(rois_this_image)
_t['misc'].toc()
if len(mask_rois) > 0:
k = 0
_t['mask_detect'].tic()
mask_pred = mask_detect(detector, mask_rois)
for j in range(1, num_classes):
num_pred = len(boxes_this_image[j])
cls_masks = mask_pred[k:k + num_pred]
masks_this_image.append(cls_masks)
k += num_pred
_t['mask_detect'].toc()
q_out.put((
idx,
{
'im_detect': _t['im_detect'].average_time,
'mask_detect': _t['mask_detect'].average_time,
'misc': _t['misc'].average_time,
},
{
'boxes': boxes_this_image,
'masks': masks_this_image,
},
))
......@@ -14,12 +14,9 @@ from __future__ import division
from __future__ import print_function
# Import custom modules
from lib.modeling.base import affine
from lib.modeling.base import bn
from lib.modeling.base import conv1x1
from lib.modeling.base import conv3x3
from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN
from lib.modeling.mask_rcnn import MaskRCNN
from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD
......@@ -15,20 +15,19 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
from lib.modules import init
from lib.modules import nn
class WideResBlock(torch.nn.Module):
class WideResBlock(nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out)
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.Affine(dim_out)
self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.Affine(dim_out)
self.downsample = downsample
self.relu = torch.nn.ReLU(inplace=True)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
residual = x
......@@ -48,20 +47,20 @@ class WideResBlock(torch.nn.Module):
return out
class InceptionBlock(torch.nn.Module):
class InceptionBlock(nn.Module):
def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__()
self.conv1 = conv1x1(dim_in, dim_out)
self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out // 2)
self.bn2 = affine(dim_out // 2)
self.conv3a = conv3x3(dim_out // 2, dim_out)
self.bn3a = affine(dim_out)
self.conv3b = conv3x3(dim_out, dim_out)
self.bn3b = affine(dim_out)
self.conv4 = conv3x3(dim_out * 3, dim_out)
self.bn4 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.conv1 = nn.Conv1x1(dim_in, dim_out)
self.bn1 = nn.Affine(dim_out)
self.conv2 = nn.Conv3x3(dim_out, dim_out // 2)
self.bn2 = nn.Affine(dim_out // 2)
self.conv3a = nn.Conv3x3(dim_out // 2, dim_out)
self.bn3a = nn.Affine(dim_out)
self.conv3b = nn.Conv3x3(dim_out, dim_out)
self.bn3b = nn.Affine(dim_out)
self.conv4 = nn.Conv3x3(dim_out * 3, dim_out)
self.bn4 = nn.Affine(dim_out)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
residual = x
......@@ -82,7 +81,7 @@ class InceptionBlock(torch.nn.Module):
out_3x3_b = self.bn3b(out)
out_3x3_b = self.relu(out_3x3_b)
out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], 1)
out = self.conv4(out)
out = self.bn4(out)
......@@ -91,22 +90,22 @@ class InceptionBlock(torch.nn.Module):
return out
class AirNet(torch.nn.Module):
class AirNet(nn.Module):
def __init__(self, blocks, num_stages):
super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = [None, None] + \
filters[1:num_stages - 1]
self.conv1 = torch.nn.Conv2d(
self.conv1 = nn.Conv2d(
3, 64,
kernel_size=7,
stride=2,
padding=3,
bias=False,
)
self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
self.bn1 = nn.Affine(self.dim_in)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(
kernel_size=2,
stride=2,
padding=0,
......@@ -121,19 +120,14 @@ class AirNet(torch.nn.Module):
self.reset_parameters()
def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_(
m.weight,
# Fix the gain for [-127, 127]
a=1,
) # Xavier Initialization
if isinstance(m, nn.Conv2d):
init.xaiver(m.weight)
def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out),
downsample = nn.Sequential(
nn.Conv1x1(self.dim_in, dim_out, stride=stride),
nn.Affine(dim_out),
)
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out
......@@ -144,7 +138,7 @@ class AirNet(torch.nn.Module):
layers.append(InceptionBlock(dim_out, dim_out))
else:
raise ValueError('Unknown block flag: ' + blocks[i])
return torch.nn.Sequential(*layers)
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def conv1x1(dim_in, dim_out, stride=1, bias=False):
"""1x1 convolution."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
def conv3x3(dim_in, dim_out, stride=1, bias=False):
"""3x3 convolution with padding."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1,
bias=bias,
)
......@@ -21,14 +21,16 @@ from lib.core.config import cfg
from lib.modeling import FPN
from lib.modeling import RPN
from lib.modeling import FastRCNN
from lib.modeling import MaskRCNN
from lib.modeling import RetinaNet
from lib.modeling import SSD
from lib.modeling.factory import get_body_func
from lib.ops.modules import Bootstrap
from lib.utils.logger import is_root
from lib.modules import nn
from lib.modules import vision
from lib.utils import logger
class Detector(torch.nn.Module):
class Detector(nn.Module):
"""Organize the detection pipelines.
A bunch of classic algorithms are integrated, see the
......@@ -42,19 +44,20 @@ class Detector(torch.nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:]
# + Data Loader
self.data_layer = importlib.import_module(
'lib.{}'.format(model)).DataLayer
self.bootstrap = Bootstrap()
# + DataLoader
self.data_loader_cls = importlib.import_module(
'lib.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap()
# + Feature Extractor
# + FeatureExtractor
self.body = get_body_func(body)()
feature_dims = self.body.feature_dims
# + Feature Enhancer
# + FeatureEnhancer
if 'fpn' in modules:
self.fpn = FPN(feature_dims)
feature_dims = self.fpn.feature_dims
elif 'mbox' in modules:
pass # Placeholder
else:
......@@ -63,7 +66,10 @@ class Detector(torch.nn.Module):
# + Detection Modules
if 'rcnn' in model:
self.rpn = RPN(feature_dims[0])
self.fast_rcnn = FastRCNN(feature_dims[0])
if 'faster' in model:
self.rcnn = FastRCNN(feature_dims[0])
elif 'mask' in model:
self.rcnn = MaskRCNN(feature_dims[0])
if 'retinanet' in model:
self.retinanet = RetinaNet(feature_dims[0])
......@@ -85,7 +91,7 @@ class Detector(torch.nn.Module):
self.load_state_dict(
torch.load(weights),
strict=False,
verbose=is_root(),
verbose=logger.is_root(),
)
def forward(self, inputs=None):
......@@ -107,7 +113,7 @@ class Detector(torch.nn.Module):
# 1) Training: <= DataLayer
# 2) Inference: <= Given
if not hasattr(self, 'data_loader'):
self.data_loader = self.data_layer()
self.data_loader = self.data_loader_cls()
inputs = self.data_loader()
# 1. Extract features
......@@ -126,7 +132,7 @@ class Detector(torch.nn.Module):
# 3. Collect detection outputs
outputs = collections.OrderedDict()
# 3.1 Feature -> RPN -> Fast R-CNN
# 3.1 Feature -> RPN -> R-CNN
if hasattr(self, 'rpn'):
outputs.update(
self.rpn(
......@@ -135,7 +141,7 @@ class Detector(torch.nn.Module):
)
)
outputs.update(
self.fast_rcnn(
self.rcnn(
features=features,
rpn_cls_score=outputs['rpn_cls_score'],
rpn_bbox_pred=outputs['rpn_bbox_pred'],
......@@ -174,8 +180,8 @@ class Detector(torch.nn.Module):
##################################
last_module = None
for e in self.modules():
if isinstance(e, torch.nn.Affine) and \
isinstance(last_module, torch.nn.Conv2d):
if isinstance(e, nn.Affine) and \
isinstance(last_module, nn.Conv2d):
if last_module.bias is None:
delattr(last_module, 'bias')
e.forward = lambda x: x
......@@ -188,8 +194,8 @@ class Detector(torch.nn.Module):
######################################
last_module = None
for e in self.modules():
if isinstance(e, torch.nn.BatchNorm2d) and \
isinstance(last_module, torch.nn.Conv2d):
if isinstance(e, nn.BatchNorm2d) and \
nn.is_conv2d(last_module):
if last_module.bias is None:
delattr(last_module, 'bias')
e.forward = lambda x: x
......@@ -204,3 +210,17 @@ class Detector(torch.nn.Module):
else:
last_module.weight.data.mul_(term)
last_module = e
def new_detector(device, weights=None, training=False):
detector = Detector().cuda(device)
if weights is not None:
detector.load_weights(weights)
if not training:
detector.eval()
detector.optimize_for_inference()
# Enable the fp16 inference support if necessary
# Boost a little if TensorCore is available
if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half()
return detector
......@@ -43,14 +43,20 @@ for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
# MobileNet
for D in ['a1', 'v2']:
_STORE['BODY']['mobilenet_{}'.format(D)] = \
'lib.modeling.mobilenet.make_mobilenet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
'Registered modules: [{}]'
.format(name, desc, ', '.join(sets.keys()))
)
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
......
......@@ -14,13 +14,19 @@ from __future__ import division
from __future__ import print_function
import collections
import functools
import dragon.vm.torch as torch
from lib import faster_rcnn
from lib.core.config import cfg
from lib.ops.modules import RPNDecoder
from lib.modules import det
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
class FastRCNN(torch.nn.Module):
class FastRCNN(nn.Module):
"""Generate proposal regions for R-CNN series.
The pipeline is as follows:
......@@ -32,59 +38,45 @@ class FastRCNN(torch.nn.Module):
"""
def __init__(self, dim_in=256):
super(FastRCNN, self).__init__()
if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN)
from lib.fpn import ProposalLayer, ProposalTargetLayer
else:
# RPN with single stride(i.e. C4)
from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.rpn_decoder = RPNDecoder()
self.proposal_layer = ProposalLayer()
self.proposal_target_layer = ProposalTargetLayer()
self.softmax = torch.nn.Softmax(dim=1)
self.relu = torch.nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False)
self.roi_func = {
'RoIPool': torch.vision.ops.roi_pool,
'RoIAlign': torch.vision.ops.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD]
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
# Compute spatial scales for multiple strides
roi_levels = [level for level in range(
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.rpn_decoder = det.RPNDecoder()
self.proposal = faster_rcnn.Proposal()
self.proposal_target = faster_rcnn.ProposalTarget()
self.softmax = nn.Softmax(dim=1)
self.relu = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
self.box_roi_feature = functools.partial({
'RoIPool': vision.roi_pool,
'RoIAlign': vision.roi_align
}[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss()
# Compute spatial scales according to strides
self.spatial_scales = [
1. / (2 ** lvl)
for lvl in range(
cfg.FPN.ROI_MIN_LEVEL,
cfg.FPN.ROI_MAX_LEVEL + 1
)]
self.reset_parameters()
def reset_parameters(self):
# Careful initialization for Fast R-CNN
torch.nn.init.normal_(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
init.normal(self.cls_score.weight, std=0.01)
init.normal(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters():
if 'bias' in name:
torch.nn.init.constant_(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func(
feature, rois,
output_size=(
cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale,
)
init.constant(p, 0)
def forward(self, **kwargs):
# Generate Proposals
# Apply the CXX implementation during inference
proposal_func = self.proposal_layer \
# Generate proposals
proposal_func = self.proposal \
if self.training else self.rpn_decoder
self.rcnn_data = {
self.data = {
'rois': proposal_func(
kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data),
......@@ -93,66 +85,61 @@ class FastRCNN(torch.nn.Module):
)
}
# Generate Targets from Proposals
# Generate targets from proposals
if self.training:
self.rcnn_data.update(
self.proposal_target_layer(
rpn_rois=self.rcnn_data['rois'],
self.data.update(
self.proposal_target(
rpn_rois=self.data['rois'],
gt_boxes=kwargs['gt_boxes'],
)
)
# Transform RoI Feature
roi_features = []
if len(self.rcnn_data['rois']) > 1:
for i, spatial_scale in enumerate(self.spatial_scales):
roi_features.append(
self.RoIFeatureTransform(
# Transform RoI features
if len(self.data['rois']) > 1:
roi_features = \
torch.cat([
self.box_roi_feature(
kwargs['features'][i],
self.rcnn_data['rois'][i],
self.data['rois'][i],
spatial_scale,
)
)
roi_features = torch.cat(roi_features, dim=0)
) for i, spatial_scale in enumerate(self.spatial_scales)
], dim=0)
else:
spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
roi_features = \
self.RoIFeatureTransform(
self.box_roi_feature(
kwargs['features'][0],
self.rcnn_data['rois'][0],
spatial_scale,
self.data['rois'][0],
1. / cfg.RPN.STRIDES[0],
)
# Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim)
rcnn_output = self.relu(self.fc6(roi_features))
rcnn_output = self.relu(self.fc7(rcnn_output))
roi_features = self.relu(self.fc6(roi_features))
roi_features = self.relu(self.fc7(roi_features))
# Compute rcnn logits
cls_score = self.cls_score(rcnn_output).float()
outputs = collections.OrderedDict([
('bbox_pred', self.bbox_pred(rcnn_output).float()),
])
# Compute logits and losses
outputs = collections.OrderedDict()
cls_score = self.cls_score(roi_features).float()
outputs['bbox_pred'] = self.bbox_pred(roi_features).float()
if self.training:
# Compute rcnn losses
outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.rcnn_data['labels'])),
cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss(
outputs['bbox_pred'],
self.rcnn_data['bbox_targets'],
self.rcnn_data['bbox_inside_weights'],
self.rcnn_data['bbox_outside_weights'],
self.data['bbox_targets'],
self.data['bbox_inside_weights'],
self.data['bbox_outside_weights'],
)),
]))
else:
# Return the rois to decode the refine boxes
if len(self.rcnn_data['rois']) > 1:
outputs['rois'] = torch.cat(
self.rcnn_data['rois'], dim=0)
if len(self.data['rois']) > 1:
outputs['rois'] = torch.cat(self.data['rois'], 0)
else:
outputs['rois'] = self.rcnn_data['rois'][0]
outputs['rois'] = self.data['rois'][0]
# Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score)
......
......@@ -16,43 +16,41 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1
from lib.modeling import conv3x3
from lib.modules import init
from lib.modules import nn
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
class FPN(torch.nn.Module):
class FPN(nn.Module):
"""Feature Pyramid Networks for R-CNN and RetinaNet."""
def __init__(self, feature_dims):
super(FPN, self).__init__()
self.C = torch.nn.ModuleList()
self.P = torch.nn.ModuleList()
dim = cfg.FPN.DIM
self.C = nn.ModuleList()
self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE:
self.apply_func = self.apply_on_rcnn
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
self.maxpool = nn.MaxPool2d(1, 2, ceil_mode=True)
else:
self.apply_func = self.apply_on_generic
self.relu = torch.nn.ReLU(inplace=False)
self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim]
self.reset_parameters()
self.feature_dims = [cfg.FPN.DIM]
def reset_parameters(self):
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_(
m.weight,
a=1, # Fix the gain for [-127, 127]
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0)
if isinstance(m, nn.Conv2d):
init.xaiver(m.weight)
init.constant(m.bias, 0)
def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1])
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import functools
import dragon.vm.torch as torch
from lib import mask_rcnn
from lib.core.config import cfg
from lib.modules import det
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
class MaskRCNN(nn.Module):
def __init__(self, dim_in=256):
"""Generate mask regions for R-CNN series.
The pipeline is as follows:
... -> BoxRoIs \ /-> cls_score -> cls_loss
-> RoIFeatureXform -> MLP
... -> Features / \-> bbox_pred -> bbox_loss
... -> MaskRoIs \
-> RoIFeatureXform -> FCN -> mask_score -> mask_loss
... -> Features /
"""
super(MaskRCNN, self).__init__()
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.fcn = nn.ModuleList([nn.Conv3x3(dim_in, dim_in, bias=True) for _ in range(4)])
self.fcn += [nn.ConvTranspose2d(dim_in, dim_in, 2, 2, 0)]
self.cls_score = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.mask_score = nn.Conv1x1(dim_in, cfg.MODEL.NUM_CLASSES - 1, bias=True)
self.rpn_decoder = det.RPNDecoder()
self.proposal = mask_rcnn.Proposal()
self.proposal_target = mask_rcnn.ProposalTarget()
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)
self.relu = nn.ReLU(True)
self.box_roi_feature = functools.partial({
'RoIPool': vision.roi_pool,
'RoIAlign': vision.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
self.mask_roi_feature = functools.partial({
'RoIPool': vision.roi_pool,
'RoIAlign': vision.roi_align,
}[cfg.MRCNN.ROI_XFORM_METHOD], size=cfg.MRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss()
self.mask_loss = nn.BCEWithLogitsLoss()
# Compute spatial scales according to strides
self.spatial_scales = [
1. / (2 ** lvl)
for lvl in range(
cfg.FPN.ROI_MIN_LEVEL,
cfg.FPN.ROI_MAX_LEVEL + 1
)]
self.reset_parameters()
def reset_parameters(self):
# Careful initialization for Fast R-CNN
init.normal(self.cls_score.weight, std=0.01)
init.normal(self.bbox_pred.weight, std=0.001)
# Careful initialization for Mask R-CNN
init.normal(self.mask_score.weight, std=0.001)
for m in self.fcn.modules():
if hasattr(m, 'weight'):
init.kaiming_normal(m.weight)
for name, p in self.named_parameters():
if 'bias' in name:
init.constant(p, 0)
def get_mask_score(self, features, rois):
roi_features = \
torch.cat([
self.mask_roi_feature(
features[i], rois[i], spatial_scale,
) for i, spatial_scale in enumerate(self.spatial_scales)
], dim=0)
for i in range(len(self.fcn)):
roi_features = self.relu(self.fcn[i](roi_features))
return self.mask_score(roi_features).float()
def forward(self, **kwargs):
# Generate proposals
proposal_func = self.proposal \
if self.training else self.rpn_decoder
self.data = {
'rois': proposal_func(
kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data),
kwargs['rpn_bbox_pred'],
kwargs['ims_info'],
)
}
# Generate targets from proposals
if self.training:
self.data.update(
self.proposal_target(
rpn_rois=self.data['rois'],
gt_boxes=kwargs['gt_boxes'],
gt_masks=kwargs['gt_masks'],
ims_info=kwargs['ims_info'],
)
)
# Transform RoI features
roi_features = \
torch.cat([
self.box_roi_feature(
kwargs['features'][i],
self.data['rois'][i],
spatial_scale,
) for i, spatial_scale in enumerate(self.spatial_scales)
], dim=0)
# Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim)
roi_features = self.relu(self.fc6(roi_features))
roi_features = self.relu(self.fc7(roi_features))
# Compute logits and losses
outputs = collections.OrderedDict()
cls_score = self.cls_score(roi_features).float()
outputs['bbox_pred'] = self.bbox_pred(roi_features).float()
if self.training:
# Compute the loss of bbox branch
outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss(
outputs['bbox_pred'],
self.data['bbox_targets'],
self.data['bbox_inside_weights'],
self.data['bbox_outside_weights'],
)),
]))
# Compute the loss of mask branch
mask_score = self.get_mask_score(
kwargs['features'], self.data['mask_rois'])
mask_score = mask_score.index_select(
(0, 1), self.data['mask_indices'])
outputs['mask_loss'] = self.mask_loss(
mask_score, self.data['mask_targets'])
else:
# Return the RoIs to decode the refine boxes
if len(self.data['rois']) > 1:
outputs['rois'] = torch.cat(self.data['rois'], 0)
else:
outputs['rois'] = self.data['rois'][0]
# Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score)
# Set a callback to decode mask from refine RoIs
self.compute_mask_score = \
functools.partial(
self.get_mask_score,
features=kwargs['features'],
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
def conv_triplet(dim_in, dim_out):
"""1x1 convolution + BN + ReLU."""
return [
nn.Conv2d(dim_in, dim_out, 1, bias=False),
nn.Affine(dim_out),
nn.ReLU(True),
]
def conv_quintet(dim_in, dim_out, ks, stride):
"""KxK convolution + BN + ReLU."""
return [
nn.DepthwiseConv2d(
dim_in, dim_in,
kernel_size=ks,
stride=stride,
padding=ks // 2,
bias=False,
),
nn.Affine(dim_in),
nn.ReLU(True),
nn.Conv1x1(dim_in, dim_out),
nn.Affine(dim_out),
]
class Setting(object):
V2 = (
[2, 3, 4, 3, 3, 1],
[2, 2, 2, 1, 2, 1],
[32, 16, 24, 32, 64, 96, 160, 320, 1280],
)
PROXYLESS_MOBILE = (
[4, 4, 4, 4, 4, 1],
[2, 2, 2, 1, 2, 1],
[32, 16, 32, 40, 80, 96, 192, 320, 1280],
)
PROXYLESS_GPU = (
[4, 4, 4, 4, 4, 1],
[2, 2, 2, 1, 2, 1],
[40, 24, 32, 56, 112, 128, 256, 432, 1280],
)
def Stem(dim_out, stride=1):
return torch.nn.Sequential(
torch.nn.Conv2d(
3, dim_out,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
),
nn.Affine(dim_out),
nn.ReLU(True),
)
class Choice(nn.Module):
def __init__(self, dim_in, dim_out, mb=3, ks=3, stride=1):
super(Choice, self).__init__()
self.mb = mb
dim_hidden = int(round(dim_in * mb))
seq = conv_triplet(dim_in, dim_hidden) if mb != 1 else []
seq += conv_quintet(dim_hidden, dim_out, ks, stride)
self.conv = nn.ModuleList(seq)
self.stride = stride
self.apply_residual = stride == 1 and dim_in == dim_out
def forward(self, x):
residual = x if self.apply_residual else None
for i in range(3):
x = self.conv[i](x)
y = x if self.stride == 2 else None
for i in range(3, len(self.conv)):
x = self.conv[i](x)
if self.apply_residual:
return residual + x, y
else:
return x, y
class NASMobileNet(nn.Module):
def __init__(self, choices, preset=Setting.PROXYLESS_MOBILE):
super(NASMobileNet, self).__init__()
# Pre-defined blocks
def select_block(choice):
return {
0: functools.partial(Choice, mb=3, ks=3),
1: functools.partial(Choice, mb=6, ks=3),
2: functools.partial(Choice, mb=3, ks=5),
3: functools.partial(Choice, mb=6, ks=5),
4: functools.partial(Choice, mb=3, ks=7),
5: functools.partial(Choice, mb=6, ks=7),
6: nn.Identity,
}[choice]
# Hand-craft configurations
repeats, strides, out_channels = preset
names = ['2!', '3!', '4', '4!', '5', '5!']
self.num_layers = len(choices)
assert sum(repeats) == self.num_layers
# + Stem
self.bootstrap = vision.Bootstrap()
self.conv1 = Stem(out_channels[0], stride=2)
self.stage1 = Choice(out_channels[0], out_channels[1], mb=1, ks=3)
dim_in = out_channels[1]
self.feature_dims = [out_channels[-1]]
# + Body
self.layers = []
for name, rep, dim_out, stride in zip(
names, repeats, out_channels[2:], strides):
self.layers.append(select_block(
choices[len(self.layers)]
)(dim_in, dim_out, stride=stride))
if stride == 2:
self.feature_dims.insert(
-1, dim_in * self.layers[-1].mb)
for i in range(rep - 1):
self.layers.append(select_block(
choices[len(self.layers)]
)(dim_out, dim_out, stride=1))
fullname = 'stage%s' % name.split('!')[0]
seq = getattr(self, fullname, [])
seq += self.layers[-rep:]
seq = nn.Sequential(*seq) if '!' in name else seq
setattr(self, fullname, seq)
dim_in = dim_out
self.conv6 = nn.Sequential(*conv_triplet(dim_in, out_channels[-1]))
self.reset_parameters()
def reset_parameters(self):
for m in self.modules():
if nn.is_conv2d(m):
init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None:
init.constant(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant(m.weight, 1)
elif isinstance(m, nn.Linear):
if m.bias is not None:
init.constant(m.bias, 0)
# Stop the gradients if necessary
def freeze_func(m):
if nn.is_conv2d(m):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
if cfg.MODEL.FREEZE_AT > 0:
self.conv1.apply(freeze_func)
self.stage1.apply(freeze_func)
for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
getattr(self, 'stage{}'.format(i)).apply(freeze_func)
def forward(self, x):
x = self.conv1(x)
x, _ = self.stage1(x)
outputs = []
for layer in self.layers:
x = layer(x)
x, y = x if isinstance(x, tuple) else (x, None)
if y is not None:
outputs.append(y)
outputs.append(self.conv6(x))
return outputs
def make_mobilenet_a1():
return NASMobileNet([
4, 6, 6, 6,
3, 3, 4, 6,
2, 4, 0, 4, 1, 5, 3, 5,
2, 4, 2, 4,
1,
], Setting.PROXYLESS_MOBILE)
def make_mobilenet_v2():
return NASMobileNet([
1, 1,
1, 1, 1,
1, 1, 1, 1, 1, 1, 1,
1, 1, 1,
1,
], Setting.V2)
......@@ -20,12 +20,11 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
from lib.modules import nn
from lib.modules import init
class BasicBlock(torch.nn.Module):
class BasicBlock(nn.Module):
def __init__(
self,
dim_in,
......@@ -35,11 +34,11 @@ class BasicBlock(torch.nn.Module):
dropblock=None,
):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out)
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.Affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out)
self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.Affine(dim_out)
self.downsample = downsample
self.dropblock = dropblock
......@@ -83,12 +82,12 @@ class Bottleneck(torch.nn.Module):
):
super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim)
self.bn1 = affine(dim)
self.conv2 = conv3x3(dim, dim, stride=stride)
self.bn2 = affine(dim)
self.conv3 = conv1x1(dim, dim_out)
self.bn3 = affine(dim_out)
self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.Affine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.bn2 = nn.Affine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out)
self.bn3 = nn.Affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample
self.dropblock = dropblock
......@@ -133,7 +132,7 @@ class ResNet(torch.nn.Module):
padding=3,
bias=False,
)
self.bn1 = affine(self.dim_in)
self.bn1 = nn.Affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
kernel_size=3,
......@@ -160,13 +159,9 @@ class ResNet(torch.nn.Module):
self.reset_parameters()
def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_(
m.weight,
nonlinearity='relu',
)
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight)
# Stop the gradients if necessary
def freeze_func(m):
......@@ -184,15 +179,15 @@ class ResNet(torch.nn.Module):
def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
downsample = None
if stride != 1 or self.dim_in != dim_out:
downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out),
downsample = nn.Sequential(
nn.Conv1x1(self.dim_in, dim_out, stride=stride),
nn.Affine(dim_out),
)
layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
self.dim_in = dim_out
for i in range(1, blocks):
layers.append(block(dim_out, dim_out, dropblock=dropblock))
return torch.nn.Sequential(*layers)
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!