Commit f8359d17 by Ting PAN

Adapt to SeetaRecord

1 parent ca255ea0
Showing with 5417 additions and 6186 deletions
......@@ -47,4 +47,4 @@ __pycache__
.idea
# OSX dir files
.DS_Store
\ No newline at end of file
.DS_Store
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.2.0 (20190929)
Dragon Minimum Required (Version 0.3.0.dev20190929)
Changes:
Preview Features:
- Use SeetaRecord instead of LMDB.
- Flatten the implementation of layers.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.2 (20190723)
Dragon Minimum Required (Version 0.3.0.0)
......
#!/bin/sh
# delete cache
rm -r build install *.c *.cpp
# compile proto files
protoc -I ../lib/proto --python_out=../lib/proto ../lib/proto/anno.proto
# compile cython modules
python setup.py build_ext --inplace
# compile cuda modules
cd build
cmake .. && make install && cd ..
cd build && cmake .. && make install && cd ..
# setup
cp -r install/lib ../
......@@ -32,15 +32,15 @@ FRCNN:
ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k_lmdb'
WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2
USE_DIFF: False # Do not use crowd objects
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
TEST:
DATABASE: '/data/coco_2014_minival_lmdb'
DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
......
......@@ -32,15 +32,15 @@ FRCNN:
ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k_lmdb'
WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2
USE_DIFF: False # Do not use crowd objects
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
TEST:
DATABASE: '/data/coco_2014_minival_lmdb'
DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
......
......@@ -23,14 +23,14 @@ FRCNN:
ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 2
BATCH_SIZE: 128
SCALES: [600]
MAX_SIZE: 1000
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [600]
......
......@@ -28,15 +28,15 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7
MLP_HEAD_DIM: 4096
TRAIN:
WEIGHTS: '/data/models/imagenet/VGG16.RCNN.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
WEIGHTS: '/model/VGG16.RCNN.pth'
DATABASE: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2
BATCH_SIZE: 128
SCALES: [600]
MAX_SIZE: 1000
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300
......
......@@ -32,13 +32,13 @@ FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k_lmdb'
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
TEST:
DATABASE: '/data/coco_2014_minival_lmdb'
DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
......
......@@ -36,8 +36,8 @@ DROPBLOCK:
DROP_ON: True
DECREMENT: 0.000005 # * 20000 = 0.1
TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k_lmdb'
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
......@@ -45,7 +45,7 @@ TRAIN:
COLOR_JITTERING: True
SCALE_RANGE: [0.75, 1.33]
TEST:
DATABASE: '/data/coco_2014_minival_lmdb'
DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
......
......@@ -23,8 +23,8 @@ FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
WEIGHTS: '/model/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
......@@ -32,7 +32,7 @@ TRAIN:
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
......
......@@ -24,8 +24,8 @@ FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-18.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
WEIGHTS: '/model/R-18.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
......@@ -33,7 +33,7 @@ TRAIN:
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
......
......@@ -24,8 +24,8 @@ FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-34.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
WEIGHTS: '/model/R-34.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
......@@ -33,7 +33,7 @@ TRAIN:
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
......
......@@ -29,11 +29,11 @@ SSD:
STRIDES: [8, 16, 32]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN:
WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
WEIGHTS: '/model/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
NMS_TOP_K: 400
......
......@@ -32,11 +32,11 @@ SSD:
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN:
WEIGHTS: '/data/models/imagenet/VGG16.SSD.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
WEIGHTS: '/model/VGG16.SSD.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
NMS_TOP_K: 400
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os.path as osp
sys.path.insert(0, '../../../')
from database.frcnn.utils.make_from_xml import make_db
if __name__ == '__main__':
VOC_ROOT_DIR = '/home/workspace/datasets/VOC'
# train database: voc_2007_trainval + voc_2012_trainval
make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_0712_trainval_lmdb'),
images_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval'])
# test database: voc_2007_test
make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_2007_test_lmdb'),
images_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test'])
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import cv2
from dragon.tools.db import LMDB
sys.path.insert(0, '../../..')
from lib.proto import anno_pb2 as pb
ZFILL = 8
ENCODE_QUALITY = 95
def set_zfill(value):
global ZFILL
ZFILL = value
def set_quality(value):
global ENCODE_QUALITY
ENCODE_QUALITY = value
def make_datum(image_id, image_file, objects):
anno_datum = pb.AnnotatedDatum()
datum = pb.Datum()
im = cv2.imread(image_file)
datum.height, datum.width, datum.channels = im.shape
datum.encoded = ENCODE_QUALITY != 100
if datum.encoded:
result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
datum.data = im.tostring()
anno_datum.datum.CopyFrom(datum)
anno_datum.filename = image_id
for ix, obj in enumerate(objects):
anno = pb.Annotation()
anno.x1, anno.y1, anno.x2, anno.y2 = obj['bbox']
anno.name = obj['name']
anno.difficult = obj['difficult']
anno_datum.annotation.add().CopyFrom(anno)
return anno_datum
def make_db(database_file, images_path, gt_recs, ext='.png'):
if os.path.isdir(database_file) is True:
raise ValueError('The database path is already exist.')
else:
root_dir = database_file[:database_file.rfind('/')]
if not os.path.exists(root_dir):
os.makedirs(root_dir)
print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000)
db.open(database_file, mode='w')
count = 0
total_line = len(gt_recs)
start_time = time.time()
zfill_flag = '{0:0%d}' % (ZFILL)
for image_id, objects in gt_recs.items():
count += 1
if count % 10000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
image_file = os.path.join(images_path, image_id + ext)
datum = make_datum(image_id, image_file, objects)
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
db.commit()
db.close()
end_time = time.time()
print('{0} images have been stored in the database.'.format(total_line))
print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
print('The size of database is {0} MB.'.format(
float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import cv2
import xml.etree.ElementTree as ET
from dragon.tools.db import LMDB
sys.path.insert(0, '../../..')
from lib.proto import anno_pb2 as pb
ZFILL = 8
ENCODE_QUALITY = 95
def set_zfill(value):
global ZFILL
ZFILL = value
def set_quality(value):
global ENCODE_QUALITY
ENCODE_QUALITY = value
def make_datum(image_file, xml_file):
tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1]
objs = tree.findall('object')
anno_datum = pb.AnnotatedDatum()
datum = pb.Datum()
im = cv2.imread(image_file)
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.height, datum.width, datum.channels = im.shape
datum.encoded = ENCODE_QUALITY != 100
if datum.encoded:
result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.data = im.tostring()
anno_datum.datum.CopyFrom(datum)
anno_datum.filename = filename.split('.')[0]
if len(objs) == 0:
return None
for ix, obj in enumerate(objs):
anno = pb.Annotation()
bbox = obj.find('bndbox')
x1 = float(bbox.find('xmin').text)
y1 = float(bbox.find('ymin').text)
x2 = float(bbox.find('xmax').text)
y2 = float(bbox.find('ymax').text)
cls = obj.find('name').text.strip()
anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
anno.name = cls
class_name_set.add(cls)
anno.difficult = False
if obj.find('difficult') is not None:
anno.difficult = int(obj.find('difficult').text) == 1
anno_datum.annotation.add().CopyFrom(anno)
return anno_datum
def make_db(
database_file,
images_path,
annotations_path,
imagesets_path,
splits,
):
if os.path.isdir(database_file) is True:
print('Warning: The database path is already exist.')
else:
root_dir = database_file[:database_file.rfind('/')]
if not os.path.exists(root_dir):
os.makedirs(root_dir)
if not isinstance(images_path, list):
images_path = [images_path]
if not isinstance(annotations_path, list):
annotations_path = [annotations_path]
if not isinstance(imagesets_path, list):
imagesets_path = [imagesets_path]
assert len(splits) == len(imagesets_path)
assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path)
print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=1000)
db.open(database_file, mode='w')
count = 0
total_line = 0
start_time = time.time()
zfill_flag = '{0:0%d}' % ZFILL
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
datum = make_datum(image_file, xml_file)
if datum is not None:
count += 1
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
if count % 1000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
db.commit()
db.close()
end_time = time.time()
print('{0} images have been stored in the database.'.format(total_line))
print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
print('The size of database is {0} MB.'.format(
float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/facebookresearch/Detectron/blob/master/lib/core/config.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path as osp
import numpy as np
from lib.utils.attrdict import AttrDict as edict
__C = edict()
cfg = __C
###########################################
# #
# Training Options #
# #
###########################################
__C.TRAIN = edict()
# Initialize network with weights from this file
__C.TRAIN.WEIGHTS = ''
# Database to train
__C.TRAIN.DATABASE = ''
# Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1
__C.TRAIN.MAX_SIZE = 1000
# Images to use per mini-batch
__C.TRAIN.IMS_PER_BATCH = 1
# Minibatch size (number of regions of interest [ROIs])
__C.TRAIN.BATCH_SIZE = 128
# Fraction of minibatch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5
# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.0
# Use shuffle after each epoch
__C.TRAIN.USE_SHUFFLE = True
# Use horizontally-flipped images during training?
__C.TRAIN.USE_FLIPPED = True
# Use the difficult(under occlusion) objects
__C.TRAIN.USE_DIFF = True
# Overlap required between a ROI and ground-truth box in order for that ROI to
# be used as a bounding-box regression training example
__C.TRAIN.BBOX_THRESH = 0.5
# If True, randomly scale the image by scale range
__C.TRAIN.SCALE_JITTERING = False
__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
# If True, randomly distort the image by brightness, contrast, and saturation
__C.TRAIN.COLOR_JITTERING = False
# IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
# If an anchor statisfied by positive and negative conditions set to negative
__C.TRAIN.RPN_CLOBBER_POSITIVES = False
# Max number of foreground examples
__C.TRAIN.RPN_FG_FRACTION = 0.5
# Total number of examples
__C.TRAIN.RPN_BATCHSIZE = 256
# NMS threshold used on RPN proposals
__C.TRAIN.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TRAIN.RPN_MIN_SIZE = 0
# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
__C.TRAIN.RPN_STRADDLE_THRESH = 0
###########################################
# #
# Testing Options #
# #
###########################################
__C.TEST = edict()
# Database to test
__C.TEST.DATABASE = ''
# Original json ground-truth file to use
# Records in the Database file will be used instead
__C.TEST.JSON_FILE = ''
# Scales to use during testing (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TEST.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1
__C.TEST.MAX_SIZE = 1000
# Images to use per mini-batch
__C.TEST.IMS_PER_BATCH = 1
# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
__C.TEST.NMS = 0.3
# Use Soft-NMS instead of standard NMS?
# For the soft NMS overlap threshold, we simply use TEST.NMS
__C.TEST.USE_SOFT_NMS = False
__C.TEST.SOFT_NMS_METHOD = 'linear'
__C.TEST.SOFT_NMS_SIGMA = 0.5
# The top-k prior boxes before nms.
__C.TEST.NMS_TOP_K = 400
# The threshold for predicting boxes
__C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5
# NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TEST.RPN_MIN_SIZE = 0
# Save detection results files if True
# If false, results files are cleaned up (they can be large) after local
# evaluation
__C.TEST.COMPETITION_MODE = True
# The optional test protocol for custom dataSet
# Ignored by VOC, COCO dataSets
# Available protocols: 'voc2007', 'voc2010', 'coco'
__C.TEST.PROTOCOL = 'voc2007'
# Maximum number of detections to return per image (100 is based on the limit
# established for the COCO dataset)
__C.TEST.DETECTIONS_PER_IM = 100
###########################################
# #
# Model Options #
# #
###########################################
__C.MODEL = edict()
# The type of the model
# ('faster_rcnn',
# 'mask_rcnn',
# 'ssd',
# 'rssd',
# 'retinanet,
# )
__C.MODEL.TYPE = ''
# The float precision for training and inference
# (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE = 'FLOAT32'
# The backbone
__C.MODEL.BACKBONE = ''
# The number of classes in the dataset
__C.MODEL.NUM_CLASSES = -1
# Keep it for TaaS DataSet
__C.MODEL.CLASSES = ['__background__']
# Add StopGrad at a specified stage so the bottom layers are frozen
__C.MODEL.FREEZE_AT = 2
# Whether to use focal loss for one-stage detectors?
# Enabled if model type in ('ssd',)
# Retinanet is force to use focal loss
__C.MODEL.USE_FOCAL_LOSS = False
__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
# Stride of the coarsest Feature level
# This is needed so the input can be padded properly
__C.MODEL.COARSEST_STRIDE = -1
###########################################
# #
# RPN Options #
# #
###########################################
__C.RPN = edict()
# Strides for multiple rpn heads
__C.RPN.STRIDES = [4, 8, 16, 32, 64]
# Scales for multiple anchors
__C.RPN.SCALES = [8, 8, 8, 8, 8]
# RPN anchor aspect ratios
__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
###########################################
# #
# Retina-Net Options #
# #
###########################################
__C.RETINANET = edict()
# Anchor aspect ratios to use
__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
# Anchor scales per octave
__C.RETINANET.SCALES_PER_OCTAVE = 3
# At each FPN level, we generate anchors based on their scale, aspect_ratio,
# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
__C.RETINANET.ANCHOR_SCALE = 4
# Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
__C.RETINANET.NUM_CONVS = 4
# During inference, #locs to select based on cls score before NMS is performed
__C.RETINANET.PRE_NMS_TOP_N = 5000
# IoU overlap ratio for labeling an anchor as positive
# Anchors with >= iou overlap are labeled positive
__C.RETINANET.POSITIVE_OVERLAP = 0.5
# IoU overlap ratio for labeling an anchor as negative
# Anchors with < iou overlap are labeled negative
__C.RETINANET.NEGATIVE_OVERLAP = 0.4
###########################################
# #
# FPN Options #
# #
###########################################
__C.FPN = edict()
# Channel dimension of the FPN feature levels
__C.FPN.DIM = 256
# Coarsest level of the FPN pyramid
__C.FPN.RPN_MAX_LEVEL = 6
# Finest level of the FPN pyramid
__C.FPN.RPN_MIN_LEVEL = 2
# Hyper-Parameters for the RoI-to-FPN level mapping heuristic
__C.FPN.ROI_CANONICAL_SCALE = 224
__C.FPN.ROI_CANONICAL_LEVEL = 4
# Coarsest level of the FPN pyramid
__C.FPN.ROI_MAX_LEVEL = 5
# Finest level of the FPN pyramid
__C.FPN.ROI_MIN_LEVEL = 2
###########################################
# #
# Fast R-CNN Options #
# #
###########################################
__C.FRCNN = edict()
# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
# Hidden layer dimension when using an MLP for the RoI box head
__C.FRCNN.MLP_HEAD_DIM = 1024
# RoI transform output resolution
# Note: some models may have constraints on what they can use, e.g. they use
# pretrained FC layers like in VGG16, and will ignore this option
__C.FRCNN.ROI_XFORM_RESOLUTION = 7
###########################################
# #
# Mask R-CNN Options #
# #
###########################################
__C.MRCNN = edict()
# Resolution of mask predictions
__C.MRCNN.RESOLUTION = 28
# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.MRCNN.ROI_XFORM_RESOLUTION = 14
###########################################
# #
# SSD Options #
# #
###########################################
__C.SSD = edict()
# Whether to enable FPN enhancement?
__C.SSD.FPN_ON = False
__C.SSD.MULTIBOX = edict()
# MultiBox configs
__C.SSD.MULTIBOX.STRIDES = []
__C.SSD.MULTIBOX.MIN_SIZES = []
__C.SSD.MULTIBOX.MAX_SIZES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = []
__C.SSD.MULTIBOX.ASPECT_ANGLES = []
__C.SSD.OHEM = edict()
# The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_OVERLAP = 0.5
# The ratio used in hard example mining
__C.SSD.OHEM.NEG_POS_RATIO = 3.0
# Distort the image?
__C.SSD.DISTORT = edict()
__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
__C.SSD.DISTORT.CONTRAST_PROB = 0.5
__C.SSD.DISTORT.SATURATION_PROB = 0.5
# Expand the image?
__C.SSD.EXPAND = edict()
__C.SSD.EXPAND.PROB = 0.5
__C.SSD.EXPAND.MAX_RATIO = 4.0
# Resize the image?
__C.SSD.RESIZE = edict()
__C.SSD.RESIZE.HEIGHT = 300
__C.SSD.RESIZE.WIDTH = 300
__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
# Samplers
# Format as (min_scale, max_scale,
# min_aspect_ratio, max_aspect_ratio,
# min_jaccard_overlap, max_jaccard_overlap,
# max_trials, max_sample)
__C.SSD.SAMPLERS = [
(1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image
(0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1), # IoU >= 0.1
(0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1), # IoU >= 0.3
(0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1), # IoU >= 0.5
(0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1), # IoU >= 0.7
(0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1), # IoU >= 0.9
(0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1), # Any patches
]
###########################################
# #
# ResNet Options #
# #
###########################################
__C.RESNET = edict()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
__C.RESNET.NUM_GROUPS = 1
# Baseline width of each group
__C.RESNET.GROUP_WIDTH = 64
###########################################
# #
# DropBlock Options #
# #
###########################################
__C.DROPBLOCK = edict()
# Whether to use drop block for more regulization
__C.DROPBLOCK.DROP_ON = False
# Decrement for scheduling keep prob after each iteration
__C.DROPBLOCK.DECREMENT = 1e-6
###########################################
# #
# Solver Options #
# #
###########################################
__C.SOLVER = edict()
# Base learning rate for the specified schedule
__C.SOLVER.BASE_LR = 0.001
# Optional scaling factor for total loss
# This option is helpful to scale the magnitude
# of gradients during FP16 training
__C.SOLVER.LOSS_SCALING = 1.
# Schedule type (see functions in utils.lr_policy for options)
# E.g., 'step', 'steps_with_decay', ...
__C.SOLVER.LR_POLICY = 'steps_with_decay'
# Hyperparameter used by the specified policy
# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
__C.SOLVER.GAMMA = 0.1
# Uniform step size for 'steps' policy
__C.SOLVER.STEP_SIZE = 30000
__C.SOLVER.STEPS = []
# Maximum number of SGD iterations
__C.SOLVER.MAX_ITERS = 40000
# Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9
# L2 regularization hyper parameters
__C.SOLVER.WEIGHT_DECAY = 0.0005
# L2 norm factor for clipping gradients
__C.SOLVER.CLIP_NORM = -1.0
# Warm up to SOLVER.BASE_LR over this number of SGD iterations
__C.SOLVER.WARM_UP_ITERS = 500
# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
# The steps for accumulating gradients
__C.SOLVER.ITER_SIZE = 1
# The interval to display logs
__C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
__C.SOLVER.SNAPSHOT_ITERS = 5000
# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
__C.SOLVER.SNAPSHOT_PREFIX = ''
###########################################
# #
# Misc Options #
# #
###########################################
# Number of GPUs to use (applies to both training and testing)
__C.NUM_GPUS = 1
# Use NCCL for all reduce, otherwise use cuda-aware mpi
__C.USE_NCCL = True
# Hosts for Inter-Machine communication
__C.HOSTS = []
# Pixel mean values (BGR order)
__C.PIXEL_MEANS = [102., 115., 122.]
# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
# These are empirically chosen to approximately lead to unit variance targets
__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
# Prior prob for the positives at the beginning of training.
# This is used to set the bias init for the logits layer
__C.PRIOR_PROB = 0.01
# For reproducibility
__C.RNG_SEED = 3
# Root directory of project
__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
# Data directory
__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
# Place outputs under an experiments directory
__C.EXP_DIR = ''
# Use GPU implementation of non-maximum suppression
__C.USE_GPU_NMS = True
# Default GPU device id
__C.GPU_ID = 0
# Dump detection visualizations
__C.VIS = False
__C.VIS_ON_FILE = False
# Score threshold for visualization
__C.VIS_TH = 0.7
# Write summaries by tensor board
__C.ENABLE_TENSOR_BOARD = False
def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
if not isinstance(a, dict):
return
for k, v in a.items():
# a must specify keys that are in b
if k not in b:
raise KeyError('{} is not a valid config key'.format(k))
# the types must match, too
v = _check_and_coerce_cfg_value_type(v, b[k], k)
# recursively merge dicts
if type(v) is edict:
try:
_merge_a_into_b(a[k], b[k])
except:
print('Error under config key: {}'.format(k))
raise
else:
b[k] = v
def cfg_from_file(filename):
"""Load a config file and merge it into the default options."""
import yaml
with open(filename, 'r') as f:
yaml_cfg = edict(yaml.load(f))
global __C
_merge_a_into_b(yaml_cfg, __C)
def cfg_from_list(cfg_list):
"""Set config keys via list (e.g., from command line)."""
from ast import literal_eval
assert len(cfg_list) % 2 == 0
for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
key_list = k.split('.')
d = __C
for subkey in key_list[:-1]:
assert d.has_key(subkey)
d = d[subkey]
subkey = key_list[-1]
assert subkey in d
try:
value = literal_eval(v)
except:
# Handle the case when v is a string literal
value = v
assert type(value) == type(d[subkey]), \
'type {} does not match original type {}'\
.format(type(value), type(d[subkey]))
d[subkey] = value
def _check_and_coerce_cfg_value_type(value_a, value_b, key):
"""Checks that `value_a`, which is intended to replace `value_b` is of the
right type. The type is correct if it matches exactly or is one of a few
cases in which the type can be easily coerced.
"""
# The types must match (with some exceptions)
type_b = type(value_b)
type_a = type(value_a)
if type_a is type_b:
return value_a
if type_b is float and type_a is int:
return float(value_a)
# Exceptions: numpy arrays, strings, tuple<->list
if isinstance(value_b, np.ndarray):
value_a = np.array(value_a, dtype=value_b.dtype)
elif isinstance(value_a, tuple) and isinstance(value_b, list):
value_a = list(value_a)
elif isinstance(value_a, list) and isinstance(value_b, tuple):
value_a = tuple(value_a)
elif isinstance(value_a, dict) and isinstance(value_b, edict):
value_a = edict(value_a)
else:
raise ValueError(
'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
'key: {}'.format(type_b, type_a, value_b, value_a, key)
)
return value_a
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/facebookresearch/Detectron/blob/master/lib/core/config.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path as osp
import numpy as np
from lib.utils.attrdict import AttrDict as edict
__C = edict()
cfg = __C
###########################################
# #
# Training Options #
# #
###########################################
__C.TRAIN = edict()
# Initialize network with weights from this file
__C.TRAIN.WEIGHTS = ''
# Database to train
__C.TRAIN.DATABASE = ''
# Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1
__C.TRAIN.MAX_SIZE = 1000
# Images to use per mini-batch
__C.TRAIN.IMS_PER_BATCH = 1
# Minibatch size (number of regions of interest [ROIs])
__C.TRAIN.BATCH_SIZE = 128
# Fraction of minibatch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5
# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.0
# Use shuffle after each epoch
__C.TRAIN.USE_SHUFFLE = True
# The number of chunks to shuffle
__C.TRAIN.NUM_SHUFFLE_CHUNKS = 0
# Use horizontally-flipped images during training?
__C.TRAIN.USE_FLIPPED = True
# Use the difficult(under occlusion) objects
__C.TRAIN.USE_DIFF = True
# Overlap required between a ROI and ground-truth box in order for that ROI to
# be used as a bounding-box regression training example
__C.TRAIN.BBOX_THRESH = 0.5
# If True, randomly scale the image by scale range
__C.TRAIN.SCALE_JITTERING = False
__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
# If True, randomly distort the image by brightness, contrast, and saturation
__C.TRAIN.COLOR_JITTERING = False
# IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
# If an anchor statisfied by positive and negative conditions set to negative
__C.TRAIN.RPN_CLOBBER_POSITIVES = False
# Max number of foreground examples
__C.TRAIN.RPN_FG_FRACTION = 0.5
# Total number of examples
__C.TRAIN.RPN_BATCHSIZE = 256
# NMS threshold used on RPN proposals
__C.TRAIN.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TRAIN.RPN_MIN_SIZE = 0
# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
__C.TRAIN.RPN_STRADDLE_THRESH = 0
###########################################
# #
# Testing Options #
# #
###########################################
__C.TEST = edict()
# Database to test
__C.TEST.DATABASE = ''
# Original json ground-truth file to use
# Records in the Database file will be used instead
__C.TEST.JSON_FILE = ''
# Scales to use during testing (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TEST.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1
__C.TEST.MAX_SIZE = 1000
# Images to use per mini-batch
__C.TEST.IMS_PER_BATCH = 1
# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
__C.TEST.NMS = 0.3
# Use Soft-NMS instead of standard NMS?
# For the soft NMS overlap threshold, we simply use TEST.NMS
__C.TEST.USE_SOFT_NMS = False
__C.TEST.SOFT_NMS_METHOD = 'linear'
__C.TEST.SOFT_NMS_SIGMA = 0.5
# The top-k prior boxes before nms.
__C.TEST.NMS_TOP_K = 400
# The threshold for predicting boxes
__C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5
# NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TEST.RPN_MIN_SIZE = 0
# Save detection results files if True
# If false, results files are cleaned up (they can be large) after local
# evaluation
__C.TEST.COMPETITION_MODE = True
# The optional test protocol for custom dataSet
# Ignored by VOC, COCO dataSets
# Available protocols: 'voc2007', 'voc2010', 'coco'
__C.TEST.PROTOCOL = 'voc2007'
# Maximum number of detections to return per image (100 is based on the limit
# established for the COCO dataset)
__C.TEST.DETECTIONS_PER_IM = 100
###########################################
# #
# Model Options #
# #
###########################################
__C.MODEL = edict()
# The type of the model
# ('faster_rcnn',
# 'mask_rcnn',
# 'ssd',
# 'rssd',
# 'retinanet,
# )
__C.MODEL.TYPE = ''
# The float precision for training and inference
# (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE = 'FLOAT32'
# The backbone
__C.MODEL.BACKBONE = ''
# The number of classes in the dataset
__C.MODEL.NUM_CLASSES = -1
# Keep it for TaaS DataSet
__C.MODEL.CLASSES = ['__background__']
# Add StopGrad at a specified stage so the bottom layers are frozen
__C.MODEL.FREEZE_AT = 2
# Whether to use focal loss for one-stage detectors?
# Enabled if model type in ('ssd',)
# Retinanet is force to use focal loss
__C.MODEL.USE_FOCAL_LOSS = False
__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
# Stride of the coarsest Feature level
# This is needed so the input can be padded properly
__C.MODEL.COARSEST_STRIDE = -1
###########################################
# #
# RPN Options #
# #
###########################################
__C.RPN = edict()
# Strides for multiple rpn heads
__C.RPN.STRIDES = [4, 8, 16, 32, 64]
# Scales for multiple anchors
__C.RPN.SCALES = [8, 8, 8, 8, 8]
# RPN anchor aspect ratios
__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
###########################################
# #
# Retina-Net Options #
# #
###########################################
__C.RETINANET = edict()
# Anchor aspect ratios to use
__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
# Anchor scales per octave
__C.RETINANET.SCALES_PER_OCTAVE = 3
# At each FPN level, we generate anchors based on their scale, aspect_ratio,
# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
__C.RETINANET.ANCHOR_SCALE = 4
# Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
__C.RETINANET.NUM_CONVS = 4
# During inference, #locs to select based on cls score before NMS is performed
__C.RETINANET.PRE_NMS_TOP_N = 5000
# IoU overlap ratio for labeling an anchor as positive
# Anchors with >= iou overlap are labeled positive
__C.RETINANET.POSITIVE_OVERLAP = 0.5
# IoU overlap ratio for labeling an anchor as negative
# Anchors with < iou overlap are labeled negative
__C.RETINANET.NEGATIVE_OVERLAP = 0.4
###########################################
# #
# FPN Options #
# #
###########################################
__C.FPN = edict()
# Channel dimension of the FPN feature levels
__C.FPN.DIM = 256
# Coarsest level of the FPN pyramid
__C.FPN.RPN_MAX_LEVEL = 6
# Finest level of the FPN pyramid
__C.FPN.RPN_MIN_LEVEL = 2
# Hyper-Parameters for the RoI-to-FPN level mapping heuristic
__C.FPN.ROI_CANONICAL_SCALE = 224
__C.FPN.ROI_CANONICAL_LEVEL = 4
# Coarsest level of the FPN pyramid
__C.FPN.ROI_MAX_LEVEL = 5
# Finest level of the FPN pyramid
__C.FPN.ROI_MIN_LEVEL = 2
###########################################
# #
# Fast R-CNN Options #
# #
###########################################
__C.FRCNN = edict()
# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
# Hidden layer dimension when using an MLP for the RoI box head
__C.FRCNN.MLP_HEAD_DIM = 1024
# RoI transform output resolution
# Note: some models may have constraints on what they can use, e.g. they use
# pretrained FC layers like in VGG16, and will ignore this option
__C.FRCNN.ROI_XFORM_RESOLUTION = 7
###########################################
# #
# Mask R-CNN Options #
# #
###########################################
__C.MRCNN = edict()
# Resolution of mask predictions
__C.MRCNN.RESOLUTION = 28
# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.MRCNN.ROI_XFORM_RESOLUTION = 14
###########################################
# #
# SSD Options #
# #
###########################################
__C.SSD = edict()
# Whether to enable FPN enhancement?
__C.SSD.FPN_ON = False
__C.SSD.MULTIBOX = edict()
# MultiBox configs
__C.SSD.MULTIBOX.STRIDES = []
__C.SSD.MULTIBOX.MIN_SIZES = []
__C.SSD.MULTIBOX.MAX_SIZES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = []
__C.SSD.MULTIBOX.ASPECT_ANGLES = []
__C.SSD.OHEM = edict()
# The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_OVERLAP = 0.5
# The ratio used in hard example mining
__C.SSD.OHEM.NEG_POS_RATIO = 3.0
# Distort the image?
__C.SSD.DISTORT = edict()
__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
__C.SSD.DISTORT.CONTRAST_PROB = 0.5
__C.SSD.DISTORT.SATURATION_PROB = 0.5
# Expand the image?
__C.SSD.EXPAND = edict()
__C.SSD.EXPAND.PROB = 0.5
__C.SSD.EXPAND.MAX_RATIO = 4.0
# Resize the image?
__C.SSD.RESIZE = edict()
__C.SSD.RESIZE.HEIGHT = 300
__C.SSD.RESIZE.WIDTH = 300
__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
# Samplers
# Format as (min_scale, max_scale,
# min_aspect_ratio, max_aspect_ratio,
# min_jaccard_overlap, max_jaccard_overlap,
# max_trials, max_sample)
__C.SSD.SAMPLERS = [
(1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image
(0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1), # IoU >= 0.1
(0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1), # IoU >= 0.3
(0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1), # IoU >= 0.5
(0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1), # IoU >= 0.7
(0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1), # IoU >= 0.9
(0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1), # Any patches
]
###########################################
# #
# ResNet Options #
# #
###########################################
__C.RESNET = edict()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
__C.RESNET.NUM_GROUPS = 1
# Baseline width of each group
__C.RESNET.GROUP_WIDTH = 64
###########################################
# #
# DropBlock Options #
# #
###########################################
__C.DROPBLOCK = edict()
# Whether to use drop block for more regulization
__C.DROPBLOCK.DROP_ON = False
# Decrement for scheduling keep prob after each iteration
__C.DROPBLOCK.DECREMENT = 1e-6
###########################################
# #
# Solver Options #
# #
###########################################
__C.SOLVER = edict()
# Base learning rate for the specified schedule
__C.SOLVER.BASE_LR = 0.001
# Optional scaling factor for total loss
# This option is helpful to scale the magnitude
# of gradients during FP16 training
__C.SOLVER.LOSS_SCALING = 1.
# Schedule type (see functions in utils.lr_policy for options)
# E.g., 'step', 'steps_with_decay', ...
__C.SOLVER.LR_POLICY = 'steps_with_decay'
# Hyperparameter used by the specified policy
# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
__C.SOLVER.GAMMA = 0.1
# Uniform step size for 'steps' policy
__C.SOLVER.STEP_SIZE = 30000
__C.SOLVER.STEPS = []
# Maximum number of SGD iterations
__C.SOLVER.MAX_ITERS = 40000
# Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9
# L2 regularization hyper parameters
__C.SOLVER.WEIGHT_DECAY = 0.0005
# L2 norm factor for clipping gradients
__C.SOLVER.CLIP_NORM = -1.0
# Warm up to SOLVER.BASE_LR over this number of SGD iterations
__C.SOLVER.WARM_UP_ITERS = 500
# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
# The steps for accumulating gradients
__C.SOLVER.ITER_SIZE = 1
# The interval to display logs
__C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
__C.SOLVER.SNAPSHOT_ITERS = 5000
# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
__C.SOLVER.SNAPSHOT_PREFIX = ''
###########################################
# #
# Misc Options #
# #
###########################################
# Number of GPUs to use (applies to both training and testing)
__C.NUM_GPUS = 1
# Use NCCL for all reduce, otherwise use cuda-aware mpi
__C.USE_NCCL = True
# Hosts for Inter-Machine communication
__C.HOSTS = []
# Pixel mean values (BGR order)
__C.PIXEL_MEANS = [102., 115., 122.]
# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
# These are empirically chosen to approximately lead to unit variance targets
__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
# Prior prob for the positives at the beginning of training.
# This is used to set the bias init for the logits layer
__C.PRIOR_PROB = 0.01
# For reproducibility
__C.RNG_SEED = 3
# Root directory of project
__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
# Data directory
__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
# Place outputs under an experiments directory
__C.EXP_DIR = ''
# Use GPU implementation of non-maximum suppression
__C.USE_GPU_NMS = True
# Default GPU device id
__C.GPU_ID = 0
# Dump detection visualizations
__C.VIS = False
__C.VIS_ON_FILE = False
# Score threshold for visualization
__C.VIS_TH = 0.7
# Write summaries by tensor board
__C.ENABLE_TENSOR_BOARD = False
def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
if not isinstance(a, dict):
return
for k, v in a.items():
# a must specify keys that are in b
if k not in b:
raise KeyError('{} is not a valid config key'.format(k))
# the types must match, too
v = _check_and_coerce_cfg_value_type(v, b[k], k)
# recursively merge dicts
if type(v) is edict:
try:
_merge_a_into_b(a[k], b[k])
except:
print('Error under config key: {}'.format(k))
raise
else:
b[k] = v
def cfg_from_file(filename):
"""Load a config file and merge it into the default options."""
import yaml
with open(filename, 'r') as f:
yaml_cfg = edict(yaml.load(f))
global __C
_merge_a_into_b(yaml_cfg, __C)
def cfg_from_list(cfg_list):
"""Set config keys via list (e.g., from command line)."""
from ast import literal_eval
assert len(cfg_list) % 2 == 0
for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
key_list = k.split('.')
d = __C
for subkey in key_list[:-1]:
assert d.has_key(subkey)
d = d[subkey]
subkey = key_list[-1]
assert subkey in d
try:
value = literal_eval(v)
except:
# Handle the case when v is a string literal
value = v
assert type(value) == type(d[subkey]), \
'type {} does not match original type {}'\
.format(type(value), type(d[subkey]))
d[subkey] = value
def _check_and_coerce_cfg_value_type(value_a, value_b, key):
"""Checks that `value_a`, which is intended to replace `value_b` is of the
right type. The type is correct if it matches exactly or is one of a few
cases in which the type can be easily coerced.
"""
# The types must match (with some exceptions)
type_b = type(value_b)
type_a = type(value_a)
if type_a is type_b:
return value_a
if type_b is float and type_a is int:
return float(value_a)
# Exceptions: numpy arrays, strings, tuple<->list
if isinstance(value_b, np.ndarray):
value_a = np.array(value_a, dtype=value_b.dtype)
elif isinstance(value_a, tuple) and isinstance(value_b, list):
value_a = list(value_a)
elif isinstance(value_a, list) and isinstance(value_b, tuple):
value_a = tuple(value_a)
elif isinstance(value_a, dict) and isinstance(value_b, edict):
value_a = edict(value_a)
else:
raise ValueError(
'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
'key: {}'.format(type_b, type_a, value_b, value_a, key)
)
return value_a
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
import time
import numpy as np
from lib.core.config import cfg
from lib.core.config import cfg_from_file
class Coordinator(object):
"""Coordinator is a simple tool to manage the
unique experiments from the YAML configurations.
"""
def __init__(self, cfg_file, exp_dir=None):
# Override the default configs
cfg_from_file(cfg_file)
if cfg.EXP_DIR != '':
exp_dir = cfg.EXP_DIR
if exp_dir is None:
model_id = time.strftime(
'%Y%m%d_%H%M%S', time.localtime(time.time()))
self.experiment_dir = '../experiments/{}'.format(model_id)
if not os.path.exists(self.experiment_dir):
os.makedirs(self.experiment_dir)
else:
if not os.path.exists(exp_dir):
raise ValueError('ExperimentDir({}) does not exist.'.format(exp_dir))
self.experiment_dir = exp_dir
def _path_at(self, file, auto_create=True):
path = os.path.abspath(os.path.join(self.experiment_dir, file))
if auto_create and not os.path.exists(path):
os.makedirs(path)
return path
def checkpoints_dir(self):
return self._path_at('checkpoints')
def exports_dir(self):
return self._path_at('exports')
def results_dir(self, checkpoint=None):
sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
return self._path_at(os.path.join('results', sub_dir))
def checkpoint(self, global_step=None, wait=True):
def locate():
files = os.listdir(self.checkpoints_dir())
steps = []
for ix, file in enumerate(files):
step = int(file.split('_iter_')[-1].split('.')[0])
if global_step == step:
return os.path.join(self.checkpoints_dir(), files[ix]), step
steps.append(step)
if global_step is None:
if len(files) == 0:
return None, 0
last_idx = int(np.argmax(steps))
last_step = steps[last_idx]
return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
return None, 0
result = locate()
while result[0] is None and wait:
print('\rWaiting for step_{}.checkpoint to exist...'.format(global_step), end='')
time.sleep(10)
result = locate()
return result
def delete_experiment(self):
if os.path.exists(self.experiment_dir):
shutil.rmtree(self.experiment_dir)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
import time
import numpy as np
from lib.core.config import cfg
from lib.core.config import cfg_from_file
class Coordinator(object):
"""Coordinator is a simple tool to manage the
unique experiments from the YAML configurations.
"""
def __init__(self, cfg_file, exp_dir=None):
# Override the default configs
cfg_from_file(cfg_file)
if cfg.EXP_DIR != '':
exp_dir = cfg.EXP_DIR
if exp_dir is None:
model_id = time.strftime(
'%Y%m%d_%H%M%S', time.localtime(time.time()))
self.experiment_dir = '../experiments/{}'.format(model_id)
if not os.path.exists(self.experiment_dir):
os.makedirs(self.experiment_dir)
else:
if not os.path.exists(exp_dir):
raise ValueError('ExperimentDir({}) does not exist.'.format(exp_dir))
self.experiment_dir = exp_dir
def _path_at(self, file, auto_create=True):
path = os.path.abspath(os.path.join(self.experiment_dir, file))
if auto_create and not os.path.exists(path):
os.makedirs(path)
return path
def checkpoints_dir(self):
return self._path_at('checkpoints')
def exports_dir(self):
return self._path_at('exports')
def results_dir(self, checkpoint=None):
sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
return self._path_at(os.path.join('results', sub_dir))
def checkpoint(self, global_step=None, wait=True):
def locate():
files = os.listdir(self.checkpoints_dir())
steps = []
for ix, file in enumerate(files):
step = int(file.split('_iter_')[-1].split('.')[0])
if global_step == step:
return os.path.join(self.checkpoints_dir(), files[ix]), step
steps.append(step)
if global_step is None:
if len(files) == 0:
return None, 0
last_idx = int(np.argmax(steps))
last_step = steps[last_idx]
return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
return None, 0
result = locate()
while result[0] is None and wait:
print('\rWaiting for step_{}.checkpoint to exist...'.format(global_step), end='')
time.sleep(10)
result = locate()
return result
def delete_experiment(self):
if os.path.exists(self.experiment_dir):
shutil.rmtree(self.experiment_dir)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import cv2
from multiprocessing import Queue
from collections import OrderedDict
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
# All detectors share the same reader/transformer during testing
from lib.faster_rcnn.data.data_reader import DataReader
from lib.faster_rcnn.data.data_transformer import DataTransformer
class TestServer(object):
def __init__(self, output_dir):
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.num_images, self.num_classes, self.classes = \
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = DataReader(**{'source': self.imdb.source})
self.data_transformer = DataTransformer()
self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.start()
self.gt_recs = OrderedDict()
self.output_dir = output_dir
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self):
serialized = self.data_reader.q_out.get()
image = self.data_transformer.get_image(serialized)
image_id, objects = self.data_transformer.get_annotations(serialized)
self.gt_recs[image_id] = {
'objects': objects,
'width': image.shape[1],
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes, self.get_records(), self.output_dir)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes, all_masks, self.get_records(), self.output_dir)
class InferServer(object):
def __init__(self, output_dir):
self.images_dir = cfg.TEST.DATABASE
self.imdb = get_imdb('taas:/empty')
self.images = os.listdir(self.images_dir)
self.num_images, self.num_classes, self.classes = \
len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
self.data_transformer = DataTransformer()
self.gt_recs = OrderedDict()
self.output_dir = output_dir
self.image_idx = 0
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self):
image_name = self.images[self.image_idx]
image_id = image_name.split('.')[0]
image = cv2.imread(os.path.join(self.images_dir, image_name))
self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = {
'width': image.shape[1],
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import multiprocessing as mp
import os
import cv2
import dragon
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.faster_rcnn.data_transformer import DataTransformer
class TestServer(object):
def __init__(self, output_dir):
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.num_images, self.num_classes, self.classes = \
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_transformer = DataTransformer()
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self):
example = self.data_reader.q_out.get()
image = self.data_transformer.get_image(example)
image_id, objects = self.data_transformer.get_annotations(example)
self.gt_recs[image_id] = {
'objects': objects,
'width': image.shape[1],
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
class InferServer(object):
def __init__(self, output_dir):
self.images_dir = cfg.TEST.DATABASE
self.imdb = get_imdb('taas:/empty')
self.images = os.listdir(self.images_dir)
self.num_images, self.num_classes, self.classes = \
len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
self.data_transformer = DataTransformer()
self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir
self.image_idx = 0
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self):
image_name = self.images[self.image_idx]
image_id = image_name.split('.')[0]
image = cv2.imread(os.path.join(self.images_dir, image_name))
self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = {'width': image.shape[1], 'height': image.shape[0]}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/train.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import datetime
import os
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.core.solver import get_solver_func
from lib.utils import logger
from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer
class SolverWrapper(object):
def __init__(self, coordinator):
self.output_dir = coordinator.checkpoints_dir()
self.solver = get_solver_func('MomentumSGD')()
# Load the pre-trained weights
init_weights = cfg.TRAIN.WEIGHTS
if init_weights != '':
if os.path.exists(init_weights):
logger.info('Loading weights from {}.'.format(init_weights))
self.solver.detector.load_weights(init_weights)
else:
raise ValueError('Invalid path of weights: {}'.format(init_weights))
# Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16':
self.solver.detector.half() # Powerful FP16 Support
self.solver.detector.cuda(cfg.GPU_ID)
# Plan the metrics
self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD:
from dragon.tools.tensorboard import TensorBoard
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
def snapshot(self):
if not logger.is_root():
return None
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth')
filename = os.path.join(self.output_dir, filename)
torch.save(self.solver.detector.state_dict(), filename)
logger.info('Wrote snapshot to: {:s}'.format(filename))
return filename
def add_metrics(self, stats):
for k, v in stats['loss'].items():
if k not in self.metrics:
self.metrics[k] = SmoothedValue(20)
self.metrics[k].AddValue(v)
def send_metrics(self, stats):
if hasattr(self, 'board'):
self.board.scalar_summary('lr', stats['lr'], stats['iter'])
self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items():
if k == 'total':
self.board.scalar_summary(
'total_loss',
v.GetMedianValue(),
stats['iter'],
)
else:
self.board.scalar_summary(
k,
v.GetMedianValue(),
stats['iter'],
)
def step(self, display=False):
stats = self.solver.one_step()
self.add_metrics(stats)
self.send_metrics(stats)
if display:
logger.info(
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
stats['iter'], stats['lr'],
self.metrics['total'].GetMedianValue(),
stats['time'],
)
)
for k, v in self.metrics.items():
if k == 'total':
continue
logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue()))
def train_model(self):
"""Network training loop."""
last_snapshot_iter = -1
timer = Timer()
model_paths = []
start_lr = self.solver.base_lr
while self.solver.iter < cfg.SOLVER.MAX_ITERS:
if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
self.solver.base_lr = \
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
# Apply 1-step SGD update
with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time
eta_seconds = average_time * (
cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
logger.info(
'< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta)
)
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter
model_paths.append(self.snapshot())
if last_snapshot_iter != self.solver.iter:
model_paths.append(self.snapshot())
return model_paths
def train_net(coordinator, start_iter=0):
sw = SolverWrapper(coordinator)
sw.solver.iter = start_iter
logger.info('Solving...')
model_paths = sw.train_model()
return model_paths
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/train.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import datetime
import os
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.core.solver import get_solver_func
from lib.utils import logger
from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer
class SolverWrapper(object):
def __init__(self, coordinator):
self.output_dir = coordinator.checkpoints_dir()
self.solver = get_solver_func('MomentumSGD')()
# Load the pre-trained weights
init_weights = cfg.TRAIN.WEIGHTS
if init_weights != '':
if os.path.exists(init_weights):
logger.info('Loading weights from {}.'.format(init_weights))
self.solver.detector.load_weights(init_weights)
else:
raise ValueError('Invalid path of weights: {}'.format(init_weights))
# Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16':
self.solver.detector.half() # Powerful FP16 Support
self.solver.detector.cuda(cfg.GPU_ID)
# Plan the metrics
self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD:
from dragon.tools.tensorboard import TensorBoard
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
def snapshot(self):
if not logger.is_root():
return None
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth')
filename = os.path.join(self.output_dir, filename)
torch.save(self.solver.detector.state_dict(), filename)
logger.info('Wrote snapshot to: {:s}'.format(filename))
return filename
def add_metrics(self, stats):
for k, v in stats['loss'].items():
if k not in self.metrics:
self.metrics[k] = SmoothedValue(20)
self.metrics[k].AddValue(v)
def send_metrics(self, stats):
if hasattr(self, 'board'):
self.board.scalar_summary('lr', stats['lr'], stats['iter'])
self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items():
if k == 'total':
self.board.scalar_summary(
'total_loss',
v.GetMedianValue(),
stats['iter'],
)
else:
self.board.scalar_summary(
k,
v.GetMedianValue(),
stats['iter'],
)
def step(self, display=False):
stats = self.solver.one_step()
self.add_metrics(stats)
self.send_metrics(stats)
if display:
logger.info(
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
stats['iter'], stats['lr'],
self.metrics['total'].GetMedianValue(),
stats['time'],
)
)
for k, v in self.metrics.items():
if k == 'total':
continue
logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue()))
def train_model(self):
"""Network training loop."""
last_snapshot_iter = -1
timer = Timer()
model_paths = []
start_lr = self.solver.base_lr
while self.solver.iter < cfg.SOLVER.MAX_ITERS:
if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
self.solver.base_lr = \
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
# Apply 1-step SGD update
with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time
eta_seconds = average_time * (
cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
logger.info(
'< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta)
)
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter
model_paths.append(self.snapshot())
if last_snapshot_iter != self.solver.iter:
model_paths.append(self.snapshot())
return model_paths
def train_net(coordinator, start_iter=0):
sw = SolverWrapper(coordinator)
sw.solver.iter = start_iter
logger.info('Solving...')
model_paths = sw.train_model()
return model_paths
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from lib.datasets.taas import TaaS
# TaaS DataSet
_GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)}
def get_imdb(name):
"""Get an imdb (image database) by name."""
keys = name.split(':')
if len(keys) >= 2:
cls, source = keys[0], ':'.join(keys[1:])
if cls not in _GLOBAL_DATA_SETS:
raise KeyError('Unknown DataSet: {}'.format(cls))
return _GLOBAL_DATA_SETS[cls](source)
elif os.path.exists(name):
return _GLOBAL_DATA_SETS['taas'](name)
else:
raise ValueError('Illegal Database: {}' + name)
def list_imdbs():
"""List all registered imdbs."""
return _GLOBAL_DATA_SETS.keys()
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from lib.datasets.taas import TaaS
# TaaS DataSet
_GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)}
def get_imdb(name):
"""Get an imdb (image database) by name."""
keys = name.split(':')
if len(keys) >= 2:
cls, source = keys[0], ':'.join(keys[1:])
if cls not in _GLOBAL_DATA_SETS:
raise KeyError('Unknown DataSet: {}'.format(cls))
return _GLOBAL_DATA_SETS[cls](source)
elif os.path.exists(name):
return _GLOBAL_DATA_SETS['taas'](name)
else:
raise ValueError('Illegal Database: {}' + name)
def list_imdbs():
"""List all registered imdbs."""
return _GLOBAL_DATA_SETS.keys()
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py>
#
# ------------------------------------------------------------
import os
from dragon.tools.db import LMDB
from lib.core.config import cfg
class imdb(object):
def __init__(self, name):
self._name = name
self._num_classes = 0
self._classes = []
@property
def name(self):
return self._name
@property
def num_classes(self):
return len(self._classes)
@property
def classes(self):
return self._classes
@property
def cache_path(self):
cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property
def source(self):
excepted_source = os.path.join(self.cache_path, self.name + '_lmdb')
if not os.path.exists(excepted_source):
raise RuntimeError('Excepted LMDB source from: {}, '
'but it is not existed.'.format(excepted_source))
return excepted_source
@property
def num_images(self):
self._db = LMDB()
self._db.open(self.source)
num_entries = self._db.num_entries()
self._db.close()
return num_entries
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
pass
def evaluate_masks(self, all_boxes, all_masks, output_dir):
pass
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py>
#
# ------------------------------------------------------------
import os
import dragon
from lib.core.config import cfg
class imdb(object):
def __init__(self, name):
self._name = name
self._num_classes = 0
self._classes = []
@property
def name(self):
return self._name
@property
def num_classes(self):
return len(self._classes)
@property
def classes(self):
return self._classes
@property
def cache_path(self):
cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property
def source(self):
excepted_source = os.path.join(self.cache_path, self.name)
if not os.path.exists(excepted_source):
raise RuntimeError(
'Excepted source from: {}, '
'but it is not existed.'
.format(excepted_source)
)
return excepted_source
@property
def num_images(self):
return dragon.io.SeetaRecordDataset(self.source).size
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
pass
def evaluate_masks(self, all_boxes, all_masks, output_dir):
pass
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import json
import numpy as np
import uuid
import cv2
try:
import cPickle
except:
import pickle as cPickle
from .imdb import imdb
from .voc_eval import voc_bbox_eval, voc_segm_eval
from lib.core.config import cfg
from lib.utils import boxes as box_utils
from lib.pycocotools.mask import encode as encode_masks
class TaaS(imdb):
def __init__(self, source):
imdb.__init__(self, 'taas')
self._classes = cfg.MODEL.CLASSES
self._source = source
self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
self._class_to_cat_id = self._class_to_ind
self._salt = str(uuid.uuid4())
self.config = {'cleanup': True, 'use_salt': True}
@property
def source(self):
excepted_source = self._source
if not os.path.exists(excepted_source):
raise RuntimeError('Excepted LMDB source from: {}, '
'but it is not existed.'.format(excepted_source))
return excepted_source
##############################################
# #
# UTILS #
# #
##############################################
def _get_comp_id(self):
return '_' + self._salt if self.config['use_salt'] else ''
@classmethod
def _get_prefix(cls, type='bbox'):
if type == 'bbox':
return 'detections_'
elif type == 'segm':
return 'segmentations_'
elif type == 'kpt':
return 'keypoints_'
return ''
def _get_voc_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
if type == 'bbox':
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
elif type == 'segm':
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
else:
raise ValueError('Type of results can be either bbox or segm.')
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def _get_coco_annotations_T(self, results_folder, type='bbox'):
# experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def _get_coco_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>.json
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
##############################################
# #
# VOC #
# #
##############################################
def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
from xml.dom import minidom
import xml.etree.ElementTree as ET
ix = 0
for image_id, rec in gt_recs.items():
root = ET.Element('annotation')
ET.SubElement(root, 'filename').text = str(image_id)
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][ix]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
object = ET.SubElement(root, 'object')
ET.SubElement(object, 'name').text = cls
ET.SubElement(object, 'difficult').text = '0'
bnd_box = ET.SubElement(object, 'bndbox')
ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
ix += 1
rawText = ET.tostring(root)
dom = minidom.parseString(rawText)
with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
dom.writexml(f, "", "\t", "\n", "utf-8")
def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Writing {} VOC format bbox results'.format(cls))
filename = self._get_voc_results_T(output_dir).format(cls)
with open(filename, 'wt') as f:
ix = 0
for image_id, rec in gt_recs.items():
dets = all_boxes[cls_ind][ix]
ix += 1
if len(dets) == 0:
continue
for k in range(dets.shape[0]):
f.write(
'{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'
.format(image_id, dets[k, -1],
dets[k, 0] + 1, dets[k, 1] + 1,
dets[k, 2] + 1, dets[k, 3] + 1))
def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
for cls_inds, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Writing {} VOC format segm results'.format(cls))
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
with open(bbox_filename, 'wb') as f:
cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
with open(segm_filename, 'wb') as f:
cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self._classes):
if cls == '__background__':
continue
det_file = self._get_voc_results_T(output_dir).format(cls)
rec, prec, ap = voc_bbox_eval(
det_file, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.classes):
if cls == '__background__':
continue
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
ap = voc_segm_eval(
bbox_filename, segm_filename, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
##############################################
# #
# COCO #
# #
##############################################
@classmethod
def _get_coco_image_id(cls, image_name):
image_id = image_name.split('_')[-1].split('.')[0]
try:
return int(image_id)
except:
return image_name
@classmethod
def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
num_pred = len(boxes)
assert len(masks) == num_pred
mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
M = masks[0].shape[0]
scale = (M + 2.0) / M
ref_boxes = box_utils.expand_boxes(boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
for i in range(num_pred):
ref_box = ref_boxes[i, :4]
mask = masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h)
mask_image[y1:y2, x1:x2, i] = \
mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
(x1 - ref_box[0]):(x2 - ref_box[0])]
return encode_masks(mask_image)
def _write_coco_bbox_annotations(self, gt_recs, output_dir):
# Build images
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self._classes:
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
for image_name, rec in gt_recs.items():
for obj in rec['objects']:
x, y = obj['bbox'][0], obj['bbox'][1]
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
dataset['annotations'].append({
'id': str(ann_id),
'bbox': [x, y, w, h],
'area': w * h,
'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def _write_coco_segm_annotations(self, gt_recs, output_dir):
# Build images
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self._classes:
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
for image_name, rec in gt_recs.items():
for obj in rec['objects']:
x, y = obj['bbox'][0], obj['bbox'][1]
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
dataset['annotations'].append({
'id': str(ann_id),
'bbox': [x, y, w, h],
'area': w * h,
'segmentation': {
'size': [rec['height'], rec['width']],
'counts': obj['mask'],
},
'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='segm')
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
ix, results = 0, []
for image_name, rec in gt_recs.items():
dets = boxes[ix]
ix += 1
if isinstance(dets, list) and len(dets) == 0:
continue
dets = dets.astype(np.float)
scores = dets[:, -1]
xs = dets[:, 0]
ys = dets[:, 1]
ws = dets[:, 2] - xs + 1
hs = dets[:, 3] - ys + 1
results.extend(
[{'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
'bbox': [xs[k], ys[k], ws[k], hs[k]],
'score': scores[k],
} for k in range(dets.shape[0])]
)
return results
def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
def filter_boxes(dets):
boxes = dets[:, :4]
ws = boxes[:, 2] - boxes[:, 0]
hs = boxes[:, 3] - boxes[:, 1]
keep = np.where((ws >= 1) & (hs >= 1))[0]
return keep
results = []
ix = 0
for image_name, rec in gt_recs.items():
dets = boxes[ix].astype(np.float)
msks = masks[ix]
ix += 1
keep = filter_boxes(dets)
im_h, im_w = rec['height'], rec['width']
if len(keep) == 0:
continue
scores = dets[:, -1]
mask_encode = self._encode_coco_masks(
msks[keep], dets[keep, :4], im_h, im_w)
for k in range(dets[keep].shape[0]):
rle = mask_encode[k]
if sys.version_info >= (3, 0):
rle['counts'] = rle['counts'].decode()
results.append({
'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
'segmentation': rle,
'score': scores[k],
})
return results
def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir)
results = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_bbox_results_one_category(
all_boxes[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir, type='segm')
results = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_segm_results_one_category(
all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def _do_coco_bbox_eval(self, coco, res_file):
from lib.pycocotools.cocoeval import COCOeval
coco_dt = coco.loadRes(res_file)
coco_eval = COCOeval(coco, coco_dt, 'bbox')
coco_eval.evaluate()
coco_eval.accumulate()
self._print_coco_eval_results(coco_eval)
def _do_coco_segm_eval(self, coco, res_file):
from lib.pycocotools.cocoeval import COCOeval
coco_dt = coco.loadRes(res_file)
coco_eval = COCOeval(coco, coco_dt, 'segm')
coco_eval.evaluate()
coco_eval.accumulate()
self._print_coco_eval_results(coco_eval)
def _print_coco_eval_results(self, coco_eval):
IoU_lo_thresh = 0.5
IoU_hi_thresh = 0.95
def _get_thr_ind(coco_eval, thr):
ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
(coco_eval.params.iouThrs < thr + 1e-5))[0][0]
iou_thr = coco_eval.params.iouThrs[ind]
assert np.isclose(iou_thr, thr)
return ind
ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
# Precision has dims (iou, recall, cls, area range, max dets)
# Area range index 0: all area ranges
# Max dets index 2: 100 per image
precision = \
coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
ap_default = np.mean(precision[precision > -1])
print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
'~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
print('{:.1f}'.format(100 * ap_default))
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
# Minus 1 because of __background__
precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
ap = np.mean(precision[precision > -1])
print('{:.1f}'.format(100 * ap))
print('~~~~ Summary metrics ~~~~')
coco_eval.summarize()
##############################################
# #
# EVAL-API #
# #
##############################################
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
self._do_voc_bbox_eval(
gt_recs, output_dir, IoU=0.5,
use_07_metric='2007' in protocol)
print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
self._do_voc_bbox_eval(
gt_recs, output_dir, IoU=0.7,
use_07_metric='2007' in protocol)
elif 'xml' in protocol:
if cfg.EXP_DIR != '':
output_dir = cfg.EXP_DIR
self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
elif 'coco' in protocol:
from lib.pycocotools.coco import COCO
if os.path.exists(cfg.TEST.JSON_FILE):
coco = COCO(cfg.TEST.JSON_FILE)
# We should override category id before writing results
cats = coco.loadCats(coco.getCatIds())
self._class_to_cat_id = dict(zip(
[c['name'] for c in cats], coco.getCatIds()))
else:
coco = None
res_file = self._write_coco_bbox_results(
all_boxes, gt_recs, output_dir)
if 'wo' not in protocol:
if coco is None:
ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir)
coco = COCO(ann_file)
self._do_coco_bbox_eval(coco, res_file)
def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
self._write_voc_segm_results(all_boxes, all_masks, output_dir)
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
self._do_voc_segm_eval(
gt_recs, output_dir, IoU=0.5,
use_07_metric='2007' in protocol)
print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
self._do_voc_segm_eval(
gt_recs, output_dir, IoU=0.7,
use_07_metric='2007' in protocol)
elif 'coco' in protocol:
from lib.pycocotools.coco import COCO
if os.path.exists(cfg.TEST.JSON_FILE):
coco = COCO(cfg.TEST.JSON_FILE)
# We should override category id before writing results
cats = coco.loadCats(coco.getCatIds())
self._class_to_cat_id = dict(
zip([c['name'] for c in cats], coco.getCatIds()))
else:
coco = None
res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
if 'wo' not in protocol:
if coco is None:
coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
self._do_coco_segm_eval(coco, res_file)
def competition_mode(self, on):
if on:
self.config['use_salt'] = False
self.config['cleanup'] = False
else:
self.config['use_salt'] = True
self.config['cleanup'] = True
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import os
import sys
import uuid
import cv2
import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg
from lib.datasets.imdb import imdb
from lib.datasets.voc_eval import voc_bbox_eval
from lib.datasets.voc_eval import voc_segm_eval
from lib.pycocotools.mask import encode as encode_masks
from lib.utils import boxes as box_utils
class TaaS(imdb):
def __init__(self, source):
imdb.__init__(self, 'taas')
self._classes = cfg.MODEL.CLASSES
self._source = source
self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
self._class_to_cat_id = self._class_to_ind
self._salt = str(uuid.uuid4())
self.config = {'cleanup': True, 'use_salt': True}
@property
def source(self):
excepted_source = self._source
if not os.path.exists(excepted_source):
raise RuntimeError(
'Excepted source from: {}, '
'but it is not existed.'
.format(excepted_source)
)
return excepted_source
##############################################
# #
# UTILS #
# #
##############################################
def _get_comp_id(self):
return '_' + self._salt if self.config['use_salt'] else ''
@classmethod
def _get_prefix(cls, type='bbox'):
if type == 'bbox':
return 'detections_'
elif type == 'segm':
return 'segmentations_'
elif type == 'kpt':
return 'keypoints_'
return ''
def _get_voc_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
if type == 'bbox':
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
elif type == 'segm':
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
else:
raise ValueError('Type of results can be either bbox or segm.')
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def _get_coco_annotations_T(self, results_folder, type='bbox'):
# experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def _get_coco_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>.json
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
##############################################
# #
# VOC #
# #
##############################################
def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
from xml.dom import minidom
import xml.etree.ElementTree as ET
ix = 0
for image_id, rec in gt_recs.items():
root = ET.Element('annotation')
ET.SubElement(root, 'filename').text = str(image_id)
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][ix]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
object = ET.SubElement(root, 'object')
ET.SubElement(object, 'name').text = cls
ET.SubElement(object, 'difficult').text = '0'
bnd_box = ET.SubElement(object, 'bndbox')
ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
ix += 1
rawText = ET.tostring(root)
dom = minidom.parseString(rawText)
with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
dom.writexml(f, "", "\t", "\n", "utf-8")
def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Writing {} VOC format bbox results'.format(cls))
filename = self._get_voc_results_T(output_dir).format(cls)
with open(filename, 'wt') as f:
ix = 0
for image_id, rec in gt_recs.items():
dets = all_boxes[cls_ind][ix]
ix += 1
if len(dets) == 0:
continue
for k in range(dets.shape[0]):
f.write(
'{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'
.format(image_id, dets[k, -1],
dets[k, 0] + 1, dets[k, 1] + 1,
dets[k, 2] + 1, dets[k, 3] + 1))
def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
for cls_inds, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Writing {} VOC format segm results'.format(cls))
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
with open(bbox_filename, 'wb') as f:
cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
with open(segm_filename, 'wb') as f:
cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self._classes):
if cls == '__background__':
continue
det_file = self._get_voc_results_T(output_dir).format(cls)
rec, prec, ap = voc_bbox_eval(
det_file, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.classes):
if cls == '__background__':
continue
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
ap = voc_segm_eval(
bbox_filename, segm_filename, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
##############################################
# #
# COCO #
# #
##############################################
@classmethod
def _get_coco_image_id(cls, image_name):
image_id = image_name.split('_')[-1].split('.')[0]
try:
return int(image_id)
except:
return image_name
@classmethod
def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
num_pred = len(boxes)
assert len(masks) == num_pred
mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
M = masks[0].shape[0]
scale = (M + 2.0) / M
ref_boxes = box_utils.expand_boxes(boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
for i in range(num_pred):
ref_box = ref_boxes[i, :4]
mask = masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h)
mask_image[y1:y2, x1:x2, i] = \
mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
(x1 - ref_box[0]):(x2 - ref_box[0])]
return encode_masks(mask_image)
def _write_coco_bbox_annotations(self, gt_recs, output_dir):
# Build images
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self._classes:
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
for image_name, rec in gt_recs.items():
for obj in rec['objects']:
x, y = obj['bbox'][0], obj['bbox'][1]
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
dataset['annotations'].append({
'id': str(ann_id),
'bbox': [x, y, w, h],
'area': w * h,
'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def _write_coco_segm_annotations(self, gt_recs, output_dir):
# Build images
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self._classes:
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
for image_name, rec in gt_recs.items():
for obj in rec['objects']:
x, y = obj['bbox'][0], obj['bbox'][1]
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
dataset['annotations'].append({
'id': str(ann_id),
'bbox': [x, y, w, h],
'area': w * h,
'segmentation': {
'size': [rec['height'], rec['width']],
'counts': obj['mask'],
},
'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='segm')
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
ix, results = 0, []
for image_name, rec in gt_recs.items():
dets = boxes[ix]
ix += 1
if isinstance(dets, list) and len(dets) == 0:
continue
dets = dets.astype(np.float)
scores = dets[:, -1]
xs = dets[:, 0]
ys = dets[:, 1]
ws = dets[:, 2] - xs + 1
hs = dets[:, 3] - ys + 1
results.extend(
[{'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
'bbox': [xs[k], ys[k], ws[k], hs[k]],
'score': scores[k],
} for k in range(dets.shape[0])]
)
return results
def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
def filter_boxes(dets):
boxes = dets[:, :4]
ws = boxes[:, 2] - boxes[:, 0]
hs = boxes[:, 3] - boxes[:, 1]
keep = np.where((ws >= 1) & (hs >= 1))[0]
return keep
results = []
ix = 0
for image_name, rec in gt_recs.items():
dets = boxes[ix].astype(np.float)
msks = masks[ix]
ix += 1
keep = filter_boxes(dets)
im_h, im_w = rec['height'], rec['width']
if len(keep) == 0:
continue
scores = dets[:, -1]
mask_encode = self._encode_coco_masks(
msks[keep], dets[keep, :4], im_h, im_w)
for k in range(dets[keep].shape[0]):
rle = mask_encode[k]
if sys.version_info >= (3, 0):
rle['counts'] = rle['counts'].decode()
results.append({
'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
'segmentation': rle,
'score': scores[k],
})
return results
def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir)
results = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_bbox_results_one_category(
all_boxes[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir, type='segm')
results = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_segm_results_one_category(
all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def _do_coco_bbox_eval(self, coco, res_file):
from lib.pycocotools.cocoeval import COCOeval
coco_dt = coco.loadRes(res_file)
coco_eval = COCOeval(coco, coco_dt, 'bbox')
coco_eval.evaluate()
coco_eval.accumulate()
self._print_coco_eval_results(coco_eval)
def _do_coco_segm_eval(self, coco, res_file):
from lib.pycocotools.cocoeval import COCOeval
coco_dt = coco.loadRes(res_file)
coco_eval = COCOeval(coco, coco_dt, 'segm')
coco_eval.evaluate()
coco_eval.accumulate()
self._print_coco_eval_results(coco_eval)
def _print_coco_eval_results(self, coco_eval):
IoU_lo_thresh = 0.5
IoU_hi_thresh = 0.95
def _get_thr_ind(coco_eval, thr):
ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
(coco_eval.params.iouThrs < thr + 1e-5))[0][0]
iou_thr = coco_eval.params.iouThrs[ind]
assert np.isclose(iou_thr, thr)
return ind
ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
# Precision has dims (iou, recall, cls, area range, max dets)
# Area range index 0: all area ranges
# Max dets index 2: 100 per image
precision = \
coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
ap_default = np.mean(precision[precision > -1])
print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
'~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
print('{:.1f}'.format(100 * ap_default))
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
# Minus 1 because of __background__
precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
ap = np.mean(precision[precision > -1])
print('{:.1f}'.format(100 * ap))
print('~~~~ Summary metrics ~~~~')
coco_eval.summarize()
##############################################
# #
# EVAL-API #
# #
##############################################
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
self._do_voc_bbox_eval(
gt_recs, output_dir, IoU=0.5,
use_07_metric='2007' in protocol)
print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
self._do_voc_bbox_eval(
gt_recs, output_dir, IoU=0.7,
use_07_metric='2007' in protocol)
elif 'xml' in protocol:
if cfg.EXP_DIR != '':
output_dir = cfg.EXP_DIR
self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
elif 'coco' in protocol:
from lib.pycocotools.coco import COCO
if os.path.exists(cfg.TEST.JSON_FILE):
coco = COCO(cfg.TEST.JSON_FILE)
# We should override category id before writing results
cats = coco.loadCats(coco.getCatIds())
self._class_to_cat_id = dict(zip(
[c['name'] for c in cats], coco.getCatIds()))
else:
coco = None
res_file = self._write_coco_bbox_results(
all_boxes, gt_recs, output_dir)
if 'wo' not in protocol:
if coco is None:
ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir)
coco = COCO(ann_file)
self._do_coco_bbox_eval(coco, res_file)
def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
self._write_voc_segm_results(all_boxes, all_masks, output_dir)
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
self._do_voc_segm_eval(
gt_recs, output_dir, IoU=0.5,
use_07_metric='2007' in protocol)
print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
self._do_voc_segm_eval(
gt_recs, output_dir, IoU=0.7,
use_07_metric='2007' in protocol)
elif 'coco' in protocol:
from lib.pycocotools.coco import COCO
if os.path.exists(cfg.TEST.JSON_FILE):
coco = COCO(cfg.TEST.JSON_FILE)
# We should override category id before writing results
cats = coco.loadCats(coco.getCatIds())
self._class_to_cat_id = dict(
zip([c['name'] for c in cats], coco.getCatIds()))
else:
coco = None
res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
if 'wo' not in protocol:
if coco is None:
coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
self._do_coco_segm_eval(coco, res_file)
def competition_mode(self, on):
if on:
self.config['use_salt'] = False
self.config['cleanup'] = False
else:
self.config['use_salt'] = True
self.config['cleanup'] = True
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg
from lib.pycocotools.mask_utils import mask_rle2im
from lib.utils.boxes import expand_boxes
from lib.utils.mask_transform import mask_overlap
def voc_ap(rec, prec, use_07_metric=False):
""" ap = voc_ap(rec, prec, [use_07_metric])
Compute VOC AP given precision and recall.
If use_07_metric is true, uses the
VOC 07 11 point method (default:False).
"""
if use_07_metric:
# 11 point metric
ap = 0.
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
ap = ap + p / 11.
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def voc_bbox_eval(
det_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
class_recs = {}
n_pos = 0
for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
n_pos = n_pos + sum(~difficult)
class_recs[image_name] = {
'bbox': bbox,
'difficult': difficult,
'det': det
}
# Read detections
with open(det_file, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# Avoid IndexError if detecting nothing
if len(BB) == 0:
return 0, 0, -1
# Sort by confidence
sorted_ind = np.argsort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# Go down detections and mark TPs and FPs
nd = len(image_ids)
tp, fp = np.zeros(nd), np.zeros(nd)
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax, jmax = -np.inf, 0
BBGT = R['bbox'].astype(float)
if BBGT.size > 0:
# Compute overlaps intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# Union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > IoU:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[d] = 1.
R['det'][jmax] = 1
else:
fp[d] = 1.
else:
fp[d] = 1.
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(n_pos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
return rec, prec, ap
def voc_segm_eval(
det_file,
seg_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
# 0. Constants
M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH
scale = (M + 2.0) / M
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
# 1. Get bbox & mask ground truths
image_names, class_recs, n_pos = [], {}, 0
for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R])
mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
n_pos = n_pos + sum(~difficult)
class_recs[image_name] = {
'bbox': bbox,
'mask': mask,
'difficult': difficult,
'det': det
}
image_names.append(image_name)
# 2. Get predict pickle file for this class
with open(det_file, 'rb') as f:
boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f)
# 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs)
box_num = 0
for im_i in range(num_images):
box_num += len(boxes_pkl[im_i])
# avoid IndexError if detecting nothing
if box_num == 0:
return 0, 0, -1
# 4. Re-organize all the predicted boxes
new_boxes = np.zeros((box_num, 5))
new_masks = np.zeros((box_num, M, M))
new_images = []
cnt = 0
for image_ind in range(num_images):
boxes = boxes_pkl[image_ind]
masks = masks_pkl[image_ind]
num_instance = len(boxes)
for box_ind in range(num_instance):
new_boxes[cnt] = boxes[box_ind]
new_masks[cnt] = masks[box_ind]
new_images.append(image_names[image_ind])
cnt += 1
# 5. Rearrange boxes according to their scores
seg_scores = new_boxes[:, -1]
keep_inds = np.argsort(-seg_scores)
new_boxes = new_boxes[keep_inds, :]
new_masks = new_masks[keep_inds, :, :]
num_pred = new_boxes.shape[0]
# 6. Calculate t/f positive
fp = np.zeros((num_pred, 1))
tp = np.zeros((num_pred, 1))
ref_boxes = expand_boxes(new_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
for i in range(num_pred):
image_name = new_images[keep_inds[i]]
if image_name not in class_recs:
print('Warning: {} does not exist in the ground-truths.'.format(image_name))
fp[i] = 1
continue
R = class_recs[image_name]
im_h = gt_recs[image_name]['height']
im_w = gt_recs[image_name]['width']
# Decode mask
ref_box = ref_boxes[i, :4]
mask = new_masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > binary_thresh, dtype=np.uint8)
x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h)
pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
(x1 - ref_box[0]): (x2 - ref_box[0])]
# Calculate max region overlap
ovmax, jmax = -1, -1
for j in range(len(R['det'])):
gt_mask_bound = R['bbox'][j].astype(int)
pred_mask_bound = new_boxes[i, :4].astype(int)
crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
if ov > ovmax:
ovmax = ov
jmax = j
if ovmax > IoU:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[i] = 1.
R['det'][jmax] = 1
else:
fp[i] = 1.
else:
fp[i] = 1
# 7. Calculate precision
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(n_pos)
# avoid divide by zero in case the first matches a difficult gt
prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
return ap
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg
from lib.pycocotools.mask_utils import mask_rle2im
from lib.utils.boxes import expand_boxes
from lib.utils.mask import mask_overlap
def voc_ap(rec, prec, use_07_metric=False):
""" ap = voc_ap(rec, prec, [use_07_metric])
Compute VOC AP given precision and recall.
If use_07_metric is true, uses the
VOC 07 11 point method (default:False).
"""
if use_07_metric:
# 11 point metric
ap = 0.
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
ap = ap + p / 11.
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def voc_bbox_eval(
det_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
class_recs = {}
n_pos = 0
for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
n_pos = n_pos + sum(~difficult)
class_recs[image_name] = {
'bbox': bbox,
'difficult': difficult,
'det': det
}
# Read detections
with open(det_file, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# Avoid IndexError if detecting nothing
if len(BB) == 0:
return 0, 0, -1
# Sort by confidence
sorted_ind = np.argsort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# Go down detections and mark TPs and FPs
nd = len(image_ids)
tp, fp = np.zeros(nd), np.zeros(nd)
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax, jmax = -np.inf, 0
BBGT = R['bbox'].astype(float)
if BBGT.size > 0:
# Compute overlaps intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# Union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > IoU:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[d] = 1.
R['det'][jmax] = 1
else:
fp[d] = 1.
else:
fp[d] = 1.
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(n_pos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
return rec, prec, ap
def voc_segm_eval(
det_file,
seg_file,
gt_recs,
cls_name,
IoU=0.5,
use_07_metric=False,
):
# 0. Constants
M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH
scale = (M + 2.0) / M
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
# 1. Get bbox & mask ground truths
image_names, class_recs, n_pos = [], {}, 0
for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R])
mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
n_pos = n_pos + sum(~difficult)
class_recs[image_name] = {
'bbox': bbox,
'mask': mask,
'difficult': difficult,
'det': det
}
image_names.append(image_name)
# 2. Get predict pickle file for this class
with open(det_file, 'rb') as f:
boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f)
# 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs)
box_num = 0
for im_i in range(num_images):
box_num += len(boxes_pkl[im_i])
# avoid IndexError if detecting nothing
if box_num == 0:
return 0, 0, -1
# 4. Re-organize all the predicted boxes
new_boxes = np.zeros((box_num, 5))
new_masks = np.zeros((box_num, M, M))
new_images = []
cnt = 0
for image_ind in range(num_images):
boxes = boxes_pkl[image_ind]
masks = masks_pkl[image_ind]
num_instance = len(boxes)
for box_ind in range(num_instance):
new_boxes[cnt] = boxes[box_ind]
new_masks[cnt] = masks[box_ind]
new_images.append(image_names[image_ind])
cnt += 1
# 5. Rearrange boxes according to their scores
seg_scores = new_boxes[:, -1]
keep_inds = np.argsort(-seg_scores)
new_boxes = new_boxes[keep_inds, :]
new_masks = new_masks[keep_inds, :, :]
num_pred = new_boxes.shape[0]
# 6. Calculate t/f positive
fp = np.zeros((num_pred, 1))
tp = np.zeros((num_pred, 1))
ref_boxes = expand_boxes(new_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
for i in range(num_pred):
image_name = new_images[keep_inds[i]]
if image_name not in class_recs:
print('Warning: {} does not exist in the ground-truths.'.format(image_name))
fp[i] = 1
continue
R = class_recs[image_name]
im_h = gt_recs[image_name]['height']
im_w = gt_recs[image_name]['width']
# Decode mask
ref_box = ref_boxes[i, :4]
mask = new_masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > binary_thresh, dtype=np.uint8)
x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h)
pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
(x1 - ref_box[0]): (x2 - ref_box[0])]
# Calculate max region overlap
ovmax, jmax = -1, -1
for j in range(len(R['det'])):
gt_mask_bound = R['bbox'][j].astype(int)
pred_mask_bound = new_boxes[i, :4].astype(int)
crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
if ov > ovmax:
ovmax = ov
jmax = j
if ovmax > IoU:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[i] = 1.
R['det'][jmax] = 1
else:
fp[i] = 1.
else:
fp[i] = 1
# 7. Calculate precision
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(n_pos)
# avoid divide by zero in case the first matches a difficult gt
prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
return ap
......@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.faster_rcnn.layers.proposal_layer import ProposalLayer
from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer
from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.data_layer import DataLayer
from lib.faster_rcnn.proposal_layer import ProposalLayer
from lib.faster_rcnn.proposal_target_layer import ProposalTargetLayer
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.faster_rcnn.generate_anchors import generate_anchors
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
# C4 backbone takes the first stride
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
height, width = features[0].shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
all_anchors = (self.base_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
all_labels[ix, inds_inside] = labels # label
all_bbox_targets[ix, inds_inside] = bbox_targets
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
labels = all_labels \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, total_anchors))
bbox_targets = all_bbox_targets \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = all_bbox_inside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = all_bbox_outside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
from lib.faster_rcnn.generate_anchors import generate_anchors
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
# C4 backbone takes the first stride
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
height, width = features[0].shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
all_anchors = (self.base_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
all_labels[ix, inds_inside] = labels # label
all_bbox_targets[ix, inds_inside] = bbox_targets
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
labels = all_labels \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, total_anchors))
bbox_targets = all_bbox_targets \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = all_bbox_inside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = all_bbox_outside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import im_list_to_blob
class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs):
super(BlobFetcher, self).__init__()
self.q1_in = self.q2_in = self.q_out = None
self.daemon = True
def get(self, Q_in):
processed_ims, ims_info, all_boxes = [], [], []
for ix in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = Q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[0:2]) + [im_scale])
# Encode boxes by adding the idx of images
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
im_boxes[:, -1] = ix
all_boxes.append(im_boxes)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, dtype=np.float32),
'gt_boxes': np.concatenate(all_boxes, axis=0),
}
def run(self):
while True:
if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.q_out.put(self.get(self.q1_in))
elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.q_out.put(self.get(self.q2_in))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import multiprocessing
import numpy
from dragon.tools import db
from lib.core.config import cfg
class DataReader(multiprocessing.Process):
"""Collect encoded str from `LMDB`_.
Partition and shuffle records over distributed nodes.
Parameters
----------
source : str
The path of database.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=2048
The number of chunks to split.
"""
def __init__(self, **kwargs):
"""Create a DataReader."""
super(DataReader, self).__init__()
self._source = kwargs.get('source', '')
self._use_shuffle = kwargs.get('shuffle', False)
self._num_chunks = kwargs.get('num_chunks', 2048)
self._part_idx, self._num_parts = 0, 1
self._cursor, self._chunk_cursor = 0, 0
self._chunk_size, self._perm_size = 0, 0
self._head, self._tail, self._num_entries = 0, 0, 0
self._db, self._zfill, self._perm = None, None, None
self._rng_seed = cfg.RNG_SEED
self.q_out = None
self.daemon = True
def element(self):
"""Get the value of current record.
Returns
-------
str
The encoded str.
"""
return self._db.value()
def redirect(self, target):
"""Redirect to the target position.
Parameters
----------
target : int
The key of the record.
Notes
-----
The redirection reopens the database.
You can drop caches by ``echo 3 > /proc/sys/vm/drop_caches``.
This will disturb getting stuck when *Database Size* >> *RAM Size*.
"""
self._db.close()
self._db.open(self._source)
self._cursor = target
self._db.set(str(target).zfill(self._zfill))
def reset(self):
"""Reset the cursor and environment."""
if self._num_parts > 1 or self._use_shuffle:
self._chunk_cursor = 0
self._part_idx = (self._part_idx + 1) % self._num_parts
if self._use_shuffle:
self._perm = numpy.random.permutation(self._perm_size)
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._tail = self._head * self._chunk_size
if self._head >= self._num_entries: self.next_chunk()
self._tail = self._head + self._chunk_size
self._tail = min(self._num_entries, self._tail)
else:
self._head, self._tail = 0, self._num_entries
self.redirect(self._head)
def next_record(self):
"""Step the cursor of records."""
self._db.next()
self._cursor += 1
def next_chunk(self):
"""Step the cursor of chunks."""
self._chunk_cursor += 1
if self._chunk_cursor >= self._perm_size:
self.reset()
else:
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._head = self._head * self._chunk_size
if self._head >= self._num_entries:
self.next_chunk()
else:
self._tail = self._head + self._chunk_size
self._tail = min(self._num_entries, self._tail)
self.redirect(self._head)
def run(self):
"""Start the process."""
# Fix seed
numpy.random.seed(self._rng_seed)
# Init db
self._db = db.LMDB()
self._db.open(self._source)
self._zfill = self._db.zfill()
self._num_entries = self._db.num_entries()
epoch_size = self._num_entries // self._num_parts + 1
if self._use_shuffle:
if self._num_chunks <= 0:
# Each chunk has at most 1 record (Record-Wise)
self._chunk_size, self._perm_size = 1, epoch_size
else:
# Search a optimal chunk size (Chunk-Wise)
min_size, max_size = \
1, self._db._total_size * 1.0 \
/ (self._num_chunks * (1 << 20))
while min_size * 2 < max_size: min_size *= 2
self._perm_size = int(math.ceil(
self._db._total_size * 1.1 /
(self._num_parts * min_size << 20)))
self._chunk_size = int(
self._num_entries * 1.0 /
(self._perm_size * self._num_parts) + 1)
limit = (self._num_parts - 0.5) * self._perm_size * self._chunk_size
if self._num_entries <= limit:
# Roll back to Record-Wise shuffle
self._chunk_size, self._perm_size = 1, epoch_size
else:
# One chunk has at most K records
self._chunk_size, self._perm_size = epoch_size, 1
self._perm = numpy.arange(self._perm_size)
# Init env
self.reset()
# Run!
while True:
self.q_out.put(self.element())
self.next_record()
if self._cursor >= self._tail:
if self._num_parts > 1 or self._use_shuffle:
self.next_chunk()
else:
self.reset()
......@@ -13,55 +13,70 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from multiprocessing import Queue
import multiprocessing as mp
import time
import dragon
import pprint
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.data.data_reader import DataReader
from lib.faster_rcnn.data.data_transformer import DataTransformer
from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
from lib.faster_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
class DataBatch(object):
"""DataBatch aims to prefetch data by ``Triple-Buffering``.
class DataLayer(torch.nn.Module):
"""Generate a mini-batch of data."""
It takes full advantages of the Process/Thread of Python,
def __init__(self):
super(DataLayer, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
})
def forward(self):
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
which provides remarkable I/O speed up for scalable distributed training.
"""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
source : str
The path of database.
dataset : lambda
The creator of a dataset.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=2048
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=128
batch_size : int, optional, default=2
The size of a mini-batch.
prefetch : int, optional, default=5
The prefetch count.
"""
super(DataBatch, self).__init__()
# Init mpi
global_rank, local_rank, group_size = 0, 0, 1
if dragon.mpi.is_init():
group = dragon.mpi.is_parallel()
if group is not None: # DataParallel
global_rank = dragon.mpi.rank()
group_size = len(group)
for i, node in enumerate(group):
if global_rank == node:
local_rank = i
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_default_process_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
......@@ -71,6 +86,7 @@ class DataBatch(object):
self._num_transformers = kwargs.get('num_transformers', -1)
self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Io-Aware Policy
if self._num_transformers == -1:
......@@ -81,66 +97,52 @@ class DataBatch(object):
self._num_transformers = min(
self._num_transformers, self._max_transformers)
# Init queues
self.Q1 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q21 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q22 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q3 = Queue(self._prefetch * self._num_readers)
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Init readers
# Initialize readers
self._readers = []
for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs))
self._readers[-1].q_out = self.Q1
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += local_rank * self._num_readers
self._readers[i]._num_parts = num_parts
self._readers[i]._part_idx = part_idx
self._readers[i]._rng_seed += part_idx
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Init transformers
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer._rng_seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out = self.Q21
transformer.q2_out = self.Q22
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Init blob fetchers
self._fetchers = []
for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs)
fetcher.q1_in = self.Q21
fetcher.q2_in = self.Q22
fetcher.q_out = self.Q3
fetcher.start()
self._fetchers.append(fetcher)
time.sleep(0.1)
# Prevent to echo multiple nodes
if local_rank == 0:
self.echo()
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate(self._fetchers)
logger.info('Terminating BlobFetcher ......')
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminating DataTransformer ......')
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminating DataReader......')
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
......@@ -156,20 +158,27 @@ class DataBatch(object):
"""
return self.Q3.get()
def echo(self):
"""Print I/O Information.
Returns
-------
None
"""
print('---------------------------------------------------------')
print('BatchFetcher({} Threads), Using config:'.format(
self._num_readers + self._num_transformers + self._num_fetchers))
params = {'queue_size': self._prefetch,
'n_readers': self._num_readers,
'n_transformers': self._num_transformers,
'n_fetchers': self._num_fetchers}
pprint.pprint(params)
print('---------------------------------------------------------')
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info, all_boxes = [], [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
all_boxes.append(im_boxes)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, dtype=np.float32),
'gt_boxes': np.concatenate(all_boxes, axis=0),
}
q1, q2 = self.Q21, self.Q22
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Sample two queues uniformly
......@@ -14,22 +14,13 @@ from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
import numpy.random as npr
try:
import cv2
except ImportError as e:
print('Failed to import cv2. Error: {0}'.format(str(e)))
try:
import PIL.Image
except ImportError as e:
print('Failed to import PIL. Error: {0}'.format(str(e)))
import cv2
import numpy as np
from lib.core.config import cfg
from lib.proto import anno_pb2 as pb
from lib.utils import logger
from lib.utils.blob import prep_im_for_blob
from lib.utils.boxes import flip_boxes
class DataTransformer(multiprocessing.Process):
......@@ -47,44 +38,45 @@ class DataTransformer(multiprocessing.Process):
def make_roi_dict(
self,
ann_datum,
example,
im_scale,
apply_flip=False,
offsets=None,
):
annotations = ann_datum.annotation
n_objects = 0
if not self._use_diff:
for ann in annotations:
if not ann.difficult:
for obj in example['object']:
if obj.get('difficult', 0) == 0:
n_objects += 1
else:
n_objects = len(annotations)
n_objects = len(example['object'])
roi_dict = {
'width': ann_datum.datum.width,
'height': ann_datum.datum.height,
'width': example['width'],
'height': example['height'],
'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), 'float32'),
}
# Filter the difficult instances
rec_idx = 0
for ann in annotations:
if not self._use_diff and ann.difficult:
object_idx = 0
for obj in example['object']:
if not self._use_diff and \
obj.get('difficult', 0) > 0:
continue
roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1),
roi_dict['boxes'][object_idx, :] = [
max(0, obj['xmin']),
max(0, obj['ymin']),
min(obj['xmax'], example['width'] - 1),
min(obj['ymax'], example['height'] - 1),
]
roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name]
rec_idx += 1
roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']]
object_idx += 1
# Flip the boxes if necessary
if apply_flip:
roi_dict['boxes'] = _flip_boxes(
roi_dict['boxes'] = flip_boxes(
roi_dict['boxes'], roi_dict['width'])
# Scale the boxes to the detecting scale
......@@ -102,50 +94,34 @@ class DataTransformer(multiprocessing.Process):
return roi_dict
@classmethod
def get_image(cls, serialized):
datum = pb.AnnotatedDatum()
datum.ParseFromString(serialized)
datum = datum.datum
im = np.fromstring(datum.data, np.uint8)
return cv2.imdecode(im, -1) if datum.encoded is True else \
im.reshape((datum.height, datum.width, datum.channels))
def get_image(cls, example):
img = np.frombuffer(example['content'], np.uint8)
return cv2.imdecode(img, -1)
@classmethod
def get_annotations(cls, serialized):
datum = pb.AnnotatedDatum()
datum.ParseFromString(serialized)
filename = datum.filename
annotations = datum.annotation
def get_annotations(cls, example):
objects = []
for ix, ann in enumerate(annotations):
for ix, obj in enumerate(example['object']):
objects.append({
'name': ann.name,
'difficult': int(ann.difficult),
'bbox': [ann.x1, ann.y1, ann.x2, ann.y2],
'mask': ann.mask,
'name': obj['name'],
'difficult': obj.get('difficult', 0),
'bbox': [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']],
})
return filename, objects
def get(self, serialized):
datum = pb.AnnotatedDatum()
datum.ParseFromString(serialized)
im_datum = datum.datum
im = np.fromstring(im_datum.data, np.uint8)
if im_datum.encoded is True:
im = cv2.imdecode(im, -1)
else:
h, w = im_datum.height, im_datum.width
im = im.reshape((h, w, im_datum.channels))
return example['id'], objects
def get(self, example):
img = np.frombuffer(example['content'], np.uint8)
img = cv2.imdecode(img, -1)
# Scale
scale_indices = npr.randint(len(cfg.TRAIN.SCALES))
scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
im, im_scale, jitter = prep_im_for_blob(img, target_size, cfg.TRAIN.MAX_SIZE)
# Flip
apply_flip = False
if self._use_flipped:
if npr.randint(0, 2) > 0:
if np.random.randint(2) > 0:
im = im[:, ::-1, :]
apply_flip = True
......@@ -160,8 +136,8 @@ class DataTransformer(multiprocessing.Process):
# To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im)
# Datum -> RoIDict
roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets)
# Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
......@@ -171,29 +147,16 @@ class DataTransformer(multiprocessing.Process):
return im, im_scale, gt_boxes
def run(self):
npr.seed(self._rng_seed)
np.random.seed(self._rng_seed)
while True:
serialized = self.q_in.get()
data = self.get(serialized)
# Ensure that there should be at least 1 ground-truth
if len(data[2]) < 1:
continue
aspect_ratio = float(data[0].shape[0]) / data[0].shape[1]
if aspect_ratio > 1.0:
self.q1_out.put(data)
outputs = self.get(self.q_in.get())
if len(outputs[2]) < 1:
continue # Ignore the non-object image
aspect_ratio = float(outputs[0].shape[0]) / outputs[0].shape[1]
if aspect_ratio > 1.:
self.q1_out.put(outputs)
else:
self.q2_out.put(data)
def _flip_boxes(boxes, width):
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes
self.q2_out.put(outputs)
def _get_image_with_target_size(target_size, img):
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py>
#
# ------------------------------------------------------------
import numpy as np
# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
# >> anchors
#
# anchors =
#
# -83 -39 100 56
# -175 -87 192 104
# -359 -183 376 200
# -55 -55 72 72
# -119 -119 136 136
# -247 -247 264 264
# -35 -79 52 96
# -79 -167 96 184
# -167 -343 184 360
# array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
def generate_anchors(
base_size=16,
ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6),
):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""
base_anchor = np.array([1, 1, base_size, base_size]) - 1
ratio_anchors = _ratio_enum(base_anchor, ratios)
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
for i in range(ratio_anchors.shape[0])])
return anchors
def generate_anchors_v2(
stride=16,
ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512),
):
"""
Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
are centered on stride / 2, have (approximate) sqrt areas of the specified
sizes, and aspect ratios as given.
"""
return generate_anchors(
base_size=stride,
ratios=ratios,
scales=np.array(sizes, dtype=np.float) / stride,
)
def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window)."""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)))
return anchors
def _ratio_enum(anchor, ratios):
"""Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _scale_enum(anchor, scales):
"""Enumerate a set of anchors for each scale wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
if __name__ == '__main__':
print(generate_anchors())
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
# >> anchors
#
# anchors =
#
# -83 -39 100 56
# -175 -87 192 104
# -359 -183 376 200
# -55 -55 72 72
# -119 -119 136 136
# -247 -247 264 264
# -35 -79 52 96
# -79 -167 96 184
# -167 -343 184 360
# array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
def generate_anchors(
base_size=16,
ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6),
):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""
base_anchor = np.array([1, 1, base_size, base_size]) - 1
ratio_anchors = _ratio_enum(base_anchor, ratios)
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
for i in range(ratio_anchors.shape[0])])
return anchors
def generate_anchors_v2(
stride=16,
ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512),
):
"""
Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
are centered on stride / 2, have (approximate) sqrt areas of the specified
sizes, and aspect ratios as given.
"""
return generate_anchors(
base_size=stride,
ratios=ratios,
scales=np.array(sizes, dtype=np.float) / stride,
)
def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window)."""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)))
return anchors
def _ratio_enum(anchor, ratios):
"""Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _scale_enum(anchor, scales):
"""Enumerate a set of anchors for each scale wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
if __name__ == '__main__':
print(generate_anchors())
# --------------------------------------------------------
# Mask R-CNN @ Detectron
# Copyright (c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.faster_rcnn.data.data_batch import DataBatch
class DataLayer(torch.nn.Module):
def __init__(self):
super(DataLayer, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'source': database.source,
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': 0, # Record-Wise Shuffle
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
})
def forward(self):
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
# Generate proposals from shifted anchors
height, width = cls_prob.shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
anchors = \
self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4))
# Prepare for the outputs
batch_rois = []
# scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
return [blob_to_tensor(rpn_rois)]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
# Generate proposals from shifted anchors
height, width = cls_prob.shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
anchors = \
self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4))
# Prepare for the outputs
batch_rois = []
# scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
return [blob_to_tensor(rpn_rois)]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets."""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return {
'rois': [blob_to_tensor(batch_outputs['rois'])],
'labels': blob_to_tensor(batch_outputs['labels']),
'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(
all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
)
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets."""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return {
'rois': [blob_to_tensor(batch_outputs['rois'])],
'labels': blob_to_tensor(batch_outputs['labels']),
'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(
all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
)
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image
from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
batch_rois = tensor_to_blob(outputs['rois'])
batch_scores = tensor_to_blob(outputs['cls_prob'])
batch_deltas = tensor_to_blob(outputs['bbox_pred'])
batch_boxes = bbox_transform_inv(
boxes=batch_rois[:, 1:5],
deltas=batch_deltas,
weights=cfg.BBOX_REG_WEIGHTS,
)
scores_wide, boxes_wide = [], []
for im_idx in range(len(ims)):
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
boxes = batch_boxes[indices]
boxes /= ims_scale[im_idx]
clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes)
return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
def test_net(detector, server):
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
for i in range(num_images):
image_id, raw_image = server.get_image()
_t['im_detect'].tic()
scores, boxes = im_detect(detector, raw_image)
_t['im_detect'].toc()
_t['misc'].tic()
boxes_this_image = [[]]
for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_detections, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_image, classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(i + 1, num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections')
server.evaluate_detections(all_boxes)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image
from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
batch_rois = tensor_to_blob(outputs['rois'])
batch_scores = tensor_to_blob(outputs['cls_prob'])
batch_deltas = tensor_to_blob(outputs['bbox_pred'])
batch_boxes = bbox_transform_inv(
boxes=batch_rois[:, 1:5],
deltas=batch_deltas,
weights=cfg.BBOX_REG_WEIGHTS,
)
scores_wide, boxes_wide = [], []
for im_idx in range(len(ims)):
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
boxes = batch_boxes[indices]
boxes /= ims_scale[im_idx]
clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes)
return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
def test_net(detector, server):
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
for i in range(num_images):
image_id, raw_image = server.get_image()
_t['im_detect'].tic()
scores, boxes = im_detect(detector, raw_image)
_t['im_detect'].toc()
_t['misc'].tic()
boxes_this_image = [[]]
for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_detections, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_image, classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(i + 1, num_images,
_t['im_detect'].average_time,
_t['misc'].average_time),
end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections')
server.evaluate_detections(all_boxes)
......@@ -13,6 +13,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
from lib.fpn.layers.proposal_layer import ProposalLayer
from lib.fpn.layers.proposal_target_layer import ProposalTargetLayer
from lib.fpn.anchor_target_layer import AnchorTargetLayer
from lib.fpn.proposal_layer import ProposalLayer
from lib.fpn.proposal_target_layer import ProposalTargetLayer
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors)
total_anchors += anchors.shape[0]
all_anchors = np.vstack(all_anchors)
# label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], 0:4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors)
total_anchors += anchors.shape[0]
all_anchors = np.vstack(all_anchors)
# label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], 0:4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
# --------------------------------------------------------
# Mask R-CNN @ Detectron
# Copyright (c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def generate_grid_anchors(self, features):
# Generate proposals from shifted anchors
anchors_wide = []
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors_wide.append(anchors)
return np.vstack(anchors_wide)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
all_anchors = self.generate_grid_anchors(features) # [n, 4]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_rois = []
batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4]
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
all_rois = []
for i in range(K):
lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling
all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else:
all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
return all_rois
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def generate_grid_anchors(self, features):
# Generate proposals from shifted anchors
anchors_wide = []
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors_wide.append(anchors)
return np.vstack(anchors_wide)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
all_anchors = self.generate_grid_anchors(features) # [n, 4]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_rois = []
batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4]
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
all_rois = []
for i in range(K):
lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling
all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else:
all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
return all_rois
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets.
Produces proposal classification labels and bounding-box regression targets.
"""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
self.fake_outputs = {
'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
'labels': np.array([-1], dtype=np.float32),
'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
}
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
_fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return {
'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Sample a batch of RoIs comprising foreground and background examples."""
# overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets.
Produces proposal classification labels and bounding-box regression targets.
"""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
self.fake_outputs = {
'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
'labels': np.array([-1], dtype=np.float32),
'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
}
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
_fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return {
'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Sample a batch of RoIs comprising foreground and background examples."""
# overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Import custom modules
from lib.modeling.base import affine
from lib.modeling.base import bn
from lib.modeling.base import conv1x1
from lib.modeling.base import conv3x3
from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN
from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Import custom modules
from lib.modeling.base import affine
from lib.modeling.base import bn
from lib.modeling.base import conv1x1
from lib.modeling.base import conv3x3
from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN
from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out)
self.downsample = downsample
self.relu = torch.nn.ReLU(inplace=True)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(residual)
out += residual
out = self.relu(out)
return out
class InceptionBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__()
self.conv1 = conv1x1(dim_in, dim_out)
self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out // 2)
self.bn2 = affine(dim_out // 2)
self.conv3a = conv3x3(dim_out // 2, dim_out)
self.bn3a = affine(dim_out)
self.conv3b = conv3x3(dim_out, dim_out)
self.bn3b = affine(dim_out)
self.conv4 = conv3x3(dim_out * 3, dim_out)
self.bn4 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
def forward(self, x):
residual = x
out = self.conv1(x)
out_1x1 = self.bn1(out)
out_1x1 = self.relu(out_1x1)
out = self.conv2(out_1x1)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3a(out)
out_3x3_a = self.bn3a(out)
out_3x3_a = self.relu(out_3x3_a)
out = self.conv3b(out_1x1)
out_3x3_b = self.bn3b(out)
out_3x3_b = self.relu(out_3x3_b)
out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
out = self.conv4(out)
out = self.bn4(out)
out += residual
out = self.relu(out)
return out
class AirNet(torch.nn.Module):
def __init__(self, blocks, num_stages):
super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = [None, None] + \
filters[1:num_stages - 1]
self.conv1 = torch.nn.Conv2d(
3, 64,
kernel_size=7,
stride=2,
padding=3,
bias=False,
)
self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
kernel_size=2,
stride=2,
padding=0,
ceil_mode=True,
)
self.layer1 = self.make_blocks(filters[0], blocks[0])
self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
if num_stages >= 4:
self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5:
self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
self.reset_parameters()
def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_(
m.weight,
# Fix the gain for [-127, 127]
a=1,
) # Xavier Initialization
def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out),
)
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out
for i in range(1, len(blocks)):
if blocks[i] == 'r':
layers.append(WideResBlock(dim_out, dim_out))
elif blocks[i] == 'i':
layers.append(InceptionBlock(dim_out, dim_out))
else:
raise ValueError('Unknown block flag: ' + blocks[i])
return torch.nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
return outputs
def airnet(num_stages):
blocks = (
('r', 'r'), # conv2
('r', 'i'), # conv3
('r', 'i'), # conv4
('r', 'i'), # conv5
)
return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out)
self.downsample = downsample
self.relu = torch.nn.ReLU(inplace=True)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(residual)
out += residual
out = self.relu(out)
return out
class InceptionBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__()
self.conv1 = conv1x1(dim_in, dim_out)
self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out // 2)
self.bn2 = affine(dim_out // 2)
self.conv3a = conv3x3(dim_out // 2, dim_out)
self.bn3a = affine(dim_out)
self.conv3b = conv3x3(dim_out, dim_out)
self.bn3b = affine(dim_out)
self.conv4 = conv3x3(dim_out * 3, dim_out)
self.bn4 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
def forward(self, x):
residual = x
out = self.conv1(x)
out_1x1 = self.bn1(out)
out_1x1 = self.relu(out_1x1)
out = self.conv2(out_1x1)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3a(out)
out_3x3_a = self.bn3a(out)
out_3x3_a = self.relu(out_3x3_a)
out = self.conv3b(out_1x1)
out_3x3_b = self.bn3b(out)
out_3x3_b = self.relu(out_3x3_b)
out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
out = self.conv4(out)
out = self.bn4(out)
out += residual
out = self.relu(out)
return out
class AirNet(torch.nn.Module):
def __init__(self, blocks, num_stages):
super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = [None, None] + \
filters[1:num_stages - 1]
self.conv1 = torch.nn.Conv2d(
3, 64,
kernel_size=7,
stride=2,
padding=3,
bias=False,
)
self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
kernel_size=2,
stride=2,
padding=0,
ceil_mode=True,
)
self.layer1 = self.make_blocks(filters[0], blocks[0])
self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
if num_stages >= 4:
self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5:
self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
self.reset_parameters()
def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_(
m.weight,
# Fix the gain for [-127, 127]
a=1,
) # Xavier Initialization
def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out),
)
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out
for i in range(1, len(blocks)):
if blocks[i] == 'r':
layers.append(WideResBlock(dim_out, dim_out))
elif blocks[i] == 'i':
layers.append(InceptionBlock(dim_out, dim_out))
else:
raise ValueError('Unknown block flag: ' + blocks[i])
return torch.nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
return outputs
def airnet(num_stages):
blocks = (
('r', 'r'), # conv2
('r', 'i'), # conv3
('r', 'i'), # conv4
('r', 'i'), # conv5
)
return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def conv1x1(dim_in, dim_out, stride=1, bias=False):
"""1x1 convolution."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
def conv3x3(dim_in, dim_out, stride=1, bias=False):
"""3x3 convolution with padding."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1,
bias=bias,
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def conv1x1(dim_in, dim_out, stride=1, bias=False):
"""1x1 convolution."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
def conv3x3(dim_in, dim_out, stride=1, bias=False):
"""3x3 convolution with padding."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1,
bias=bias,
)
......@@ -35,11 +35,13 @@ class Detector(torch.nn.Module):
``lib.core.config`` for their hyper-parameters.
"""
def __init__(self):
super(Detector, self).__init__()
model = cfg.MODEL.TYPE
backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:]
self.recorder = None
# + Data Loader
self.data_layer = importlib.import_module(
......@@ -92,9 +94,14 @@ class Detector(torch.nn.Module):
Parameters
----------
inputs : dict or None
inputs : dict, optional
The inputs.
Returns
-------
dict
The outputs.
"""
# 0. Get the inputs
if inputs is None:
......@@ -161,7 +168,6 @@ class Detector(torch.nn.Module):
"""Optimize the graph for the inference.
It usually involves the removing of BN or Affine.
"""
##################################
# Merge Affine into Convolution #
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
_STORE = collections.defaultdict(dict)
###########################################
# #
# Body #
# #
###########################################
# ResNet
for D in [18, 34, 50, 101, 152, 200, 269]:
_STORE['BODY']['resnet{}'.format(D)] = \
'lib.modeling.resnet.make_resnet_{}'.format(D)
# VGG
for D in [16, 19]:
for T in ['', '_reduced_300', '_reduced_512']:
_STORE['BODY']['vgg{}{}'.format(D, T)] = \
'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
# AirNet
for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name):
return get_template_func(
name, _STORE['BODY'], 'Body')
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import importlib
_STORE = collections.defaultdict(dict)
###########################################
# #
# Body #
# #
###########################################
# ResNet
for D in [18, 34, 50, 101, 152, 200, 269]:
_STORE['BODY']['resnet{}'.format(D)] = \
'lib.modeling.resnet.make_resnet_{}'.format(D)
# VGG
for D in [16, 19]:
for T in ['', '_reduced_300', '_reduced_512']:
_STORE['BODY']['vgg{}{}'.format(D, T)] = \
'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
# AirNet
for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
def get_template_func(name, sets, desc):
name = name.lower()
if name not in sets:
raise ValueError(
'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1]
try:
module = importlib.import_module(module_name)
return getattr(module, func_name)
except ImportError as e:
raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name):
return get_template_func(
name, _STORE['BODY'], 'Body')
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.ops.modules import RPNDecoder
class FastRCNN(torch.nn.Module):
"""Generate proposal regions for R-CNN series.
The pipeline is as follows:
... -> RoIs \ /-> cls_score -> cls_loss
-> RoIFeatureXform -> MLP
... -> Features / \-> bbox_pred -> bbox_loss
"""
def __init__(self, dim_in=256):
super(FastRCNN, self).__init__()
if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN)
from lib.fpn import ProposalLayer, ProposalTargetLayer
else:
# RPN with single stride(i.e. C4)
from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.rpn_decoder = RPNDecoder()
self.proposal_layer = ProposalLayer()
self.proposal_target_layer = ProposalTargetLayer()
self.softmax = torch.nn.Softmax(dim=1)
self.relu = torch.nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False)
self.roi_func = {
'RoIPool': torch.vision.ops.roi_pool,
'RoIAlign': torch.vision.ops.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD]
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size')
# Compute spatial scales for multiple strides
roi_levels = [level for level in range(
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
self.reset_parameters()
def reset_parameters(self):
# Careful initialization for Fast R-CNN
torch.nn.init.normal_(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters():
if 'bias' in name:
torch.nn.init.constant_(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func(
feature, rois,
output_size=(
cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale,
)
def forward(self, **kwargs):
# Generate Proposals
# Apply the CXX implementation during inference
proposal_func = self.proposal_layer \
if self.training else self.rpn_decoder
self.rcnn_data = {
'rois': proposal_func(
kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data),
kwargs['rpn_bbox_pred'],
kwargs['ims_info'],
)
}
# Generate Targets from Proposals
if self.training:
self.rcnn_data.update(
self.proposal_target_layer(
rpn_rois=self.rcnn_data['rois'],
gt_boxes=kwargs['gt_boxes'],
)
)
# Transform RoI Feature
roi_features = []
if len(self.rcnn_data['rois']) > 1:
for i, spatial_scale in enumerate(self.spatial_scales):
roi_features.append(
self.RoIFeatureTransform(
kwargs['features'][i],
self.rcnn_data['rois'][i],
spatial_scale,
)
)
roi_features = torch.cat(roi_features, dim=0)
else:
spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
roi_features = \
self.RoIFeatureTransform(
kwargs['features'][0],
self.rcnn_data['rois'][0],
spatial_scale,
)
# Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim)
rcnn_output = self.relu(self.fc6(roi_features))
rcnn_output = self.relu(self.fc7(rcnn_output))
# Compute rcnn logits
cls_score = self.cls_score(rcnn_output).float()
outputs = collections.OrderedDict({
'bbox_pred':
self.bbox_pred(rcnn_output).float(),
})
if self.training:
# Compute rcnn losses
outputs.update(collections.OrderedDict({
'cls_loss': self.cls_loss(
cls_score,
self.rcnn_data['labels'],
),
'bbox_loss': self.bbox_loss(
outputs['bbox_pred'],
self.rcnn_data['bbox_targets'],
self.rcnn_data['bbox_inside_weights'],
self.rcnn_data['bbox_outside_weights'],
),
}))
else:
# Return the rois to decode the refine boxes
if len(self.rcnn_data['rois']) > 1:
outputs['rois'] = torch.cat(
self.rcnn_data['rois'], dim=0)
else:
outputs['rois'] = self.rcnn_data['rois'][0]
# Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.ops.modules import RPNDecoder
class FastRCNN(torch.nn.Module):
"""Generate proposal regions for R-CNN series.
The pipeline is as follows:
... -> RoIs \ /-> cls_score -> cls_loss
-> RoIFeatureXform -> MLP
... -> Features / \-> bbox_pred -> bbox_loss
"""
def __init__(self, dim_in=256):
super(FastRCNN, self).__init__()
if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN)
from lib.fpn import ProposalLayer, ProposalTargetLayer
else:
# RPN with single stride(i.e. C4)
from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.rpn_decoder = RPNDecoder()
self.proposal_layer = ProposalLayer()
self.proposal_target_layer = ProposalTargetLayer()
self.softmax = torch.nn.Softmax(dim=1)
self.relu = torch.nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False)
self.roi_func = {
'RoIPool': torch.vision.ops.roi_pool,
'RoIAlign': torch.vision.ops.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD]
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
# Compute spatial scales for multiple strides
roi_levels = [level for level in range(
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
self.reset_parameters()
def reset_parameters(self):
# Careful initialization for Fast R-CNN
torch.nn.init.normal_(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters():
if 'bias' in name:
torch.nn.init.constant_(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func(
feature, rois,
output_size=(
cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale,
)
def forward(self, **kwargs):
# Generate Proposals
# Apply the CXX implementation during inference
proposal_func = self.proposal_layer \
if self.training else self.rpn_decoder
self.rcnn_data = {
'rois': proposal_func(
kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data),
kwargs['rpn_bbox_pred'],
kwargs['ims_info'],
)
}
# Generate Targets from Proposals
if self.training:
self.rcnn_data.update(
self.proposal_target_layer(
rpn_rois=self.rcnn_data['rois'],
gt_boxes=kwargs['gt_boxes'],
)
)
# Transform RoI Feature
roi_features = []
if len(self.rcnn_data['rois']) > 1:
for i, spatial_scale in enumerate(self.spatial_scales):
roi_features.append(
self.RoIFeatureTransform(
kwargs['features'][i],
self.rcnn_data['rois'][i],
spatial_scale,
)
)
roi_features = torch.cat(roi_features, dim=0)
else:
spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
roi_features = \
self.RoIFeatureTransform(
kwargs['features'][0],
self.rcnn_data['rois'][0],
spatial_scale,
)
# Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim)
rcnn_output = self.relu(self.fc6(roi_features))
rcnn_output = self.relu(self.fc7(rcnn_output))
# Compute rcnn logits
cls_score = self.cls_score(rcnn_output).float()
outputs = collections.OrderedDict([
('bbox_pred', self.bbox_pred(rcnn_output).float()),
])
if self.training:
# Compute rcnn losses
outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.rcnn_data['labels'])),
('bbox_loss', self.bbox_loss(
outputs['bbox_pred'],
self.rcnn_data['bbox_targets'],
self.rcnn_data['bbox_inside_weights'],
self.rcnn_data['bbox_outside_weights'],
)),
]))
else:
# Return the rois to decode the refine boxes
if len(self.rcnn_data['rois']) > 1:
outputs['rois'] = torch.cat(
self.rcnn_data['rois'], dim=0)
else:
outputs['rois'] = self.rcnn_data['rois'][0]
# Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1
from lib.modeling import conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
class FPN(torch.nn.Module):
"""Feature Pyramid Networks for R-CNN and RetinaNet."""
def __init__(self, feature_dims):
super(FPN, self).__init__()
self.C = torch.nn.ModuleList()
self.P = torch.nn.ModuleList()
self.apply_func = self.apply_on_rcnn
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
if 'retinanet' in cfg.MODEL.TYPE:
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
self.apply_func = self.apply_on_retinanet
self.relu = torch.nn.ReLU(inplace=False)
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
self.reset_parameters()
self.feature_dims = [cfg.FPN.DIM]
def reset_parameters(self):
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_(
m.weight,
a=1, # Fix the gain for [-127, 127]
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def apply_on_retinanet(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)]
# Add extra convolutions for higher features
extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def forward(self, features):
return self.apply_func(features)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1
from lib.modeling import conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
class FPN(torch.nn.Module):
"""Feature Pyramid Networks for R-CNN and RetinaNet."""
def __init__(self, feature_dims):
super(FPN, self).__init__()
self.C = torch.nn.ModuleList()
self.P = torch.nn.ModuleList()
self.apply_func = self.apply_on_rcnn
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
if 'retinanet' in cfg.MODEL.TYPE:
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
self.apply_func = self.apply_on_retinanet
self.relu = torch.nn.ReLU(inplace=False)
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
self.reset_parameters()
self.feature_dims = [cfg.FPN.DIM]
def reset_parameters(self):
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_(
m.weight,
a=1, # Fix the gain for [-127, 127]
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def apply_on_retinanet(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Add extra convolutions for higher features
extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def forward(self, features):
return self.apply_func(features)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module):
def __init__(
self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out)
self.downsample = downsample
self.dropblock = dropblock
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv2(out)
out = self.bn2(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None:
residual = self.downsample(residual)
out += residual
out = self.relu(out)
return out
class Bottleneck(torch.nn.Module):
# 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \
* cfg.RESNET.GROUP_WIDTH / 256.0
def __init__(
self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim)
self.bn1 = affine(dim)
self.conv2 = conv3x3(dim, dim, stride=stride)
self.bn2 = affine(dim)
self.conv3 = conv1x1(dim, dim_out)
self.bn3 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample
self.dropblock = dropblock
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv3(out)
out = self.bn3(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None:
residual = self.downsample(residual)
out += residual
out = self.relu(out)
return out
class ResNet(torch.nn.Module):
def __init__(self, block, layers, filters):
super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d(
3, 64,
kernel_size=7,
stride=2,
padding=3,
bias=False,
)
self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
kernel_size=3,
stride=2,
padding=0,
ceil_mode=True,
)
self.drop3 = torch.nn.DropBlock2d(
kp=0.9,
block_size=7,
alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d(
kp=0.9,
block_size=7,
alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
self.reset_parameters()
def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_(
m.weight,
nonlinearity='relu',
)
# Stop the gradients if necessary
def freeze_func(m):
if isinstance(m, torch.nn.Conv2d):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
if cfg.MODEL.FREEZE_AT > 0:
self.conv1.apply(freeze_func)
for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
getattr(self, 'layer{}'.format(i - 1)).apply(freeze_func)
def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
downsample = None
if stride != 1 or self.dim_in != dim_out:
downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out),
)
layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
self.dim_in = dim_out
for i in range(1, blocks):
layers.append(block(dim_out, dim_out, dropblock=dropblock))
return torch.nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
outputs = [x]
outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])]
return outputs
def resnet(depth):
if depth == 18:
units = [2, 2, 2, 2]
elif depth == 34:
units = [3, 4, 6, 3]
elif depth == 50:
units = [3, 4, 6, 3]
elif depth == 101:
units = [3, 4, 23, 3]
elif depth == 152:
units = [3, 8, 36, 3]
elif depth == 200:
units = [3, 24, 36, 3]
elif depth == 269:
units = [3, 30, 48, 8]
else:
raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock
filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512]
return ResNet(block, units, filters)
def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import affine
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module):
def __init__(
self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out)
self.downsample = downsample
self.dropblock = dropblock
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv2(out)
out = self.bn2(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None:
residual = self.downsample(residual)
out += residual
out = self.relu(out)
return out
class Bottleneck(torch.nn.Module):
# 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \
* cfg.RESNET.GROUP_WIDTH / 256.0
def __init__(
self,
dim_in,
dim_out,
stride=1,
downsample=None,
dropblock=None,
):
super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim)
self.bn1 = affine(dim)
self.conv2 = conv3x3(dim, dim, stride=stride)
self.bn2 = affine(dim)
self.conv3 = conv1x1(dim, dim_out)
self.bn3 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample
self.dropblock = dropblock
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv3(out)
out = self.bn3(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None:
residual = self.downsample(residual)
out += residual
out = self.relu(out)
return out
class ResNet(torch.nn.Module):
def __init__(self, block, layers, filters):
super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d(
3, 64,
kernel_size=7,
stride=2,
padding=3,
bias=False,
)
self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
kernel_size=3,
stride=2,
padding=0,
ceil_mode=True,
)
self.drop3 = torch.nn.DropBlock2d(
kp=0.9,
block_size=7,
alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d(
kp=0.9,
block_size=7,
alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
self.reset_parameters()
def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_(
m.weight,
nonlinearity='relu',
)
# Stop the gradients if necessary
def freeze_func(m):
if isinstance(m, torch.nn.Conv2d):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
if cfg.MODEL.FREEZE_AT > 0:
self.conv1.apply(freeze_func)
for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
getattr(self, 'layer{}'.format(i - 1)).apply(freeze_func)
def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
downsample = None
if stride != 1 or self.dim_in != dim_out:
downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out),
)
layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
self.dim_in = dim_out
for i in range(1, blocks):
layers.append(block(dim_out, dim_out, dropblock=dropblock))
return torch.nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
outputs = [x]
outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])]
return outputs
def resnet(depth):
if depth == 18:
units = [2, 2, 2, 2]
elif depth == 34:
units = [3, 4, 6, 3]
elif depth == 50:
units = [3, 4, 6, 3]
elif depth == 101:
units = [3, 4, 23, 3]
elif depth == 152:
units = [3, 8, 36, 3]
elif depth == 200:
units = [3, 24, 36, 3]
elif depth == 269:
units = [3, 30, 48, 8]
else:
raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock
filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512]
return ResNet(block, units, filters)
def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152)
......@@ -59,8 +59,7 @@ class RetinaNet(torch.nn.Module):
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
self.bbox_loss = torch.nn.SmoothL1Loss(
beta=1. / 9., reduction='batch_size',
)
beta=.11, reduction='batch_size')
self.reset_parameters()
def reset_parameters(self):
......@@ -133,26 +132,22 @@ class RetinaNet(torch.nn.Module):
gt_boxes=gt_boxes,
ims_info=ims_info,
)
return collections.OrderedDict({
'cls_loss':
self.cls_loss(
cls_score,
self.retinanet_data['labels'],
),
'bbox_loss':
self.bbox_loss(
bbox_pred,
self.retinanet_data['bbox_targets'],
self.retinanet_data['bbox_inside_weights'],
self.retinanet_data['bbox_outside_weights'],
)
})
return collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.retinanet_data['labels'])),
('bbox_loss', self.bbox_loss(
bbox_pred,
self.retinanet_data['bbox_targets'],
self.retinanet_data['bbox_inside_weights'],
self.retinanet_data['bbox_outside_weights'],
)),
])
def forward(self, *args, **kwargs):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict({'bbox_pred': bbox_pred})
outputs = collections.OrderedDict([('bbox_pred', bbox_pred)])
if self.training:
outputs.update(
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class RPN(torch.nn.Module):
"""Region Proposal Networks for R-CNN series."""
def __init__(self, dim_in=256):
super(RPN, self).__init__()
##################################
# RPN outputs #
##################################
num_anchors = len(cfg.RPN.ASPECT_RATIOS) * (
len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1)
self.output = conv3x3(dim_in, dim_in, bias=True)
self.cls_score = conv1x1(dim_in, num_anchors, bias=True)
self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True)
self.relu = torch.nn.ReLU(inplace=True)
##################################
# RPN losses #
##################################
if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN)
from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer
else:
# RPN with single stride(i.e. C4)
from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer
self.anchor_target_layer = AnchorTargetLayer()
self.cls_loss = torch.nn.BCEWithLogitsLoss()
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
self.reset_parameters()
def reset_parameters(self):
# Initialization for the RPN
# Weight ~ Normal(0, 0.01)
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.01)
def compute_outputs(self, features):
"""Compute the RPN logits.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
"""
# Compute rpn logits
cls_score_wide, bbox_pred_wide = [], []
for feature in features:
x = self.relu(self.output(feature))
if len(features) > 1:
cls_score = self.cls_score(x).view(0, -1)
bbox_pred = self.bbox_pred(x).view(0, 4, -1)
else:
cls_score = self.cls_score(x)
bbox_pred = self.bbox_pred(x)
cls_score_wide.append(cls_score)
bbox_pred_wide.append(bbox_pred)
if len(features) > 1:
# Concat them if necessary
return torch.cat(cls_score_wide, dim=1), \
torch.cat(bbox_pred_wide, dim=2)
else:
return cls_score_wide[0], bbox_pred_wide[0]
def compute_losses(
self,
features,
cls_score,
bbox_pred,
gt_boxes,
ims_info,
):
"""Compute the RPN classification loss and regression loss.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
cls_score : dragon.vm.torch.Tensor
The (binary) classification logits.
bbox_pred : dragon.vm.torch.Tensor
The bbox regression logits.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
self.rpn_data = \
self.anchor_target_layer(
features=features,
gt_boxes=gt_boxes,
ims_info=ims_info,
)
return collections.OrderedDict({
'rpn_cls_loss':
self.cls_loss(cls_score, self.rpn_data['labels']),
'rpn_bbox_loss':
self.bbox_loss(
bbox_pred,
self.rpn_data['bbox_targets'],
self.rpn_data['bbox_inside_weights'],
self.rpn_data['bbox_outside_weights'],
)
})
def forward(self, *args, **kwargs):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict({
'rpn_cls_score': cls_score,
'rpn_bbox_pred': bbox_pred,
})
if self.training:
outputs.update(
self.compute_losses(
kwargs['features'],
cls_score,
bbox_pred,
kwargs['gt_boxes'],
kwargs['ims_info'],
)
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling import conv1x1
from lib.modeling import conv3x3
class RPN(torch.nn.Module):
"""Region Proposal Networks for R-CNN series."""
def __init__(self, dim_in=256):
super(RPN, self).__init__()
##################################
# RPN outputs #
##################################
num_anchors = len(cfg.RPN.ASPECT_RATIOS) * (
len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1)
self.output = conv3x3(dim_in, dim_in, bias=True)
self.cls_score = conv1x1(dim_in, num_anchors, bias=True)
self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True)
self.relu = torch.nn.ReLU(inplace=True)
##################################
# RPN losses #
##################################
if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN)
from lib.fpn.anchor_target_layer import AnchorTargetLayer
else:
# RPN with single stride(i.e. C4)
from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
self.anchor_target_layer = AnchorTargetLayer()
self.cls_loss = torch.nn.BCEWithLogitsLoss()
self.bbox_loss = torch.nn.SmoothL1Loss(
beta=.11, reduction='batch_size')
self.reset_parameters()
def reset_parameters(self):
# Initialization for the RPN
# Weight ~ Normal(0, 0.01)
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.01)
def compute_outputs(self, features):
"""Compute the RPN logits.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
"""
# Compute rpn logits
cls_score_wide, bbox_pred_wide = [], []
for feature in features:
x = self.relu(self.output(feature))
if len(features) > 1:
cls_score = self.cls_score(x).view(0, -1)
bbox_pred = self.bbox_pred(x).view(0, 4, -1)
else:
cls_score = self.cls_score(x)
bbox_pred = self.bbox_pred(x)
cls_score_wide.append(cls_score)
bbox_pred_wide.append(bbox_pred)
if len(features) > 1:
# Concat them if necessary
return torch.cat(cls_score_wide, dim=1), \
torch.cat(bbox_pred_wide, dim=2)
else:
return cls_score_wide[0], bbox_pred_wide[0]
def compute_losses(
self,
features,
cls_score,
bbox_pred,
gt_boxes,
ims_info,
):
"""Compute the RPN classification loss and regression loss.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
cls_score : dragon.vm.torch.Tensor
The (binary) classification logits.
bbox_pred : dragon.vm.torch.Tensor
The bbox regression logits.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
self.rpn_data = \
self.anchor_target_layer(
features=features,
gt_boxes=gt_boxes,
ims_info=ims_info,
)
return collections.OrderedDict([
('rpn_cls_loss', self.cls_loss(
cls_score, self.rpn_data['labels'])),
('rpn_bbox_loss', self.bbox_loss(
bbox_pred,
self.rpn_data['bbox_targets'],
self.rpn_data['bbox_inside_weights'],
self.rpn_data['bbox_outside_weights'],
)),
])
def forward(self, *args, **kwargs):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict([
('rpn_cls_score', cls_score),
('rpn_bbox_pred', bbox_pred),
])
if self.training:
outputs.update(
self.compute_losses(
kwargs['features'],
cls_score,
bbox_pred,
kwargs['gt_boxes'],
kwargs['ims_info'],
)
)
return outputs
......@@ -136,32 +136,29 @@ class SSD(torch.nn.Module):
gt_boxes=gt_boxes,
)
)
return collections.OrderedDict({
return collections.OrderedDict([
# A compensating factor of 4.0 is used
# As we normalize both the pos and neg samples
'cls_loss':
self.cls_loss(
cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
self.ssd_data['labels']
) * 4.,
'bbox_loss':
self.bbox_loss(
bbox_pred,
self.ssd_data['bbox_targets'],
self.ssd_data['bbox_inside_weights'],
self.ssd_data['bbox_outside_weights'],
)
})
('cls_loss', self.cls_loss(
cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
self.ssd_data['labels']) * 4.),
('bbox_loss', self.bbox_loss(
bbox_pred,
self.ssd_data['bbox_targets'],
self.ssd_data['bbox_inside_weights'],
self.ssd_data['bbox_outside_weights'],
)),
])
def forward(self, *args, **kwargs):
prior_boxes = self.prior_box_layer(kwargs['features'])
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict({
'prior_boxes': prior_boxes,
'bbox_pred': bbox_pred,
})
outputs = collections.OrderedDict([
('bbox_pred', bbox_pred),
('prior_boxes', prior_boxes),
])
if self.training:
outputs.update(
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/nms_wrapper.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.core.config import cfg
from lib.utils import logger
try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
except ImportError as e:
print('Failed to import cpu nms. Error: {0}'.format(str(e)))
try:
from lib.nms.gpu_nms import gpu_nms
except ImportError as e:
print('Failed to import gpu nms. Error: {0}'.format(str(e)))
def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else:
return cpu_nms(detections, thresh)
def soft_nms(
detections,
thresh,
method='linear',
sigma=0.5,
score_thresh=0.001,
):
"""Perform CPU Soft-NMS."""
if detections.shape[0] == 0:
return []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods:
logger.fatal('Unknown soft nms method: {}'.format(method))
return cpu_soft_nms(
detections,
thresh,
methods[method],
sigma,
score_thresh,
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/nms_wrapper.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.core.config import cfg
from lib.utils import logger
try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
except ImportError as e:
print('Failed to import cpu nms. Error: {0}'.format(str(e)))
try:
from lib.nms.gpu_nms import gpu_nms
except ImportError as e:
print('Failed to import gpu nms. Error: {0}'.format(str(e)))
def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0:
return []
if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else:
return cpu_nms(detections, thresh)
def soft_nms(
detections,
thresh,
method='linear',
sigma=0.5,
score_thresh=0.001,
):
"""Perform CPU Soft-NMS."""
if detections.shape[0] == 0:
return []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods:
logger.fatal('Unknown soft nms method: {}'.format(method))
return cpu_soft_nms(
detections,
thresh,
methods[method],
sigma,
score_thresh,
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
\ No newline at end of file
syntax = "proto2";
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
optional int32 width = 3;
optional bytes data = 4;
optional int32 label = 5;
repeated float float_data = 6;
optional bool encoded = 7 [default = false];
repeated int32 labels = 8;
}
message Annotation {
optional float x1 = 1;
optional float y1 = 2;
optional float x2 = 3;
optional float y2 = 4;
optional string name = 5;
optional bool difficult = 6 [default = false];
optional string mask = 7;
}
message AnnotatedDatum {
optional Datum datum = 1;
optional string filename = 2;
repeated Annotation annotation = 3;
}
......@@ -13,5 +13,5 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer
from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.data_layer import DataLayer
from lib.retinanet.anchor_target_layer import AnchorTargetLayer
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = []
for stride in self.strides:
sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.base_anchors.append(
generate_anchors_v2(
stride=stride,
ratios=self.ratios,
sizes=sizes,
))
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors)
total_anchors += anchors.shape[0]
all_anchors = np.concatenate(all_anchors, axis=0)
# label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
anchors = all_anchors
inds_inside = np.arange(all_anchors.shape[0])
num_inside = len(inds_inside)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix]
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
# fg label: for each gt, anchor with highest overlap
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
gt_inds = argmax_overlaps[gt_argmax_overlaps]
labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
# fg label: above threshold IOU
inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
gt_inds = argmax_overlaps[inds]
labels[inds] = gt_boxes[gt_inds, 4]
fg_inds = np.where(labels > 0)[0]
# bg label: below threshold IOU
labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = []
for stride in self.strides:
sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.base_anchors.append(
generate_anchors_v2(
stride=stride,
ratios=self.ratios,
sizes=sizes,
))
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors)
total_anchors += anchors.shape[0]
all_anchors = np.concatenate(all_anchors, axis=0)
# label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
anchors = all_anchors
inds_inside = np.arange(all_anchors.shape[0])
num_inside = len(inds_inside)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix]
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float),
)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
# fg label: for each gt, anchor with highest overlap
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
gt_inds = argmax_overlaps[gt_argmax_overlaps]
labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
# fg label: above threshold IOU
inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
gt_inds = argmax_overlaps[inds]
labels[inds] = gt_boxes[gt_inds, 4]
fg_inds = np.where(labels > 0)[0]
# bg label: below threshold IOU
labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.image import scale_image
from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Unpack results
return tensor_to_blob(outputs['detections'])[:, 1:]
def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_images[0])
num_scales = len(ims_scale)
ims_shape = [im.shape for im in raw_images]
for item_idx in range(1, len(raw_images)):
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
ims += ims_ext
ims_scale += ims_scale_ext
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Unpack results
results = tensor_to_blob(outputs['detections'])
detections_wide = [[] for _ in range(len(ims_shape))]
for i in range(len(ims)):
indices = np.where(results[:, 0].astype(np.int32) == i)[0]
detections = results[indices, 1:]
detections_wide[i // num_scales].append(detections)
for i in range(len(ims_shape)):
detections_wide[i] = np.vstack(detections_wide[i]) \
if len(detections_wide[i]) > 1 else detections_wide[i][0]
return detections_wide
def test_net(net, server):
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths
image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue
image_id, raw_image = server.get_image()
image_ids.append(image_id)
raw_images.append(raw_image)
# Run detecting on specific scales
_t['im_detect'].tic()
if cfg.TEST.IMS_PER_BATCH > 1:
results = ims_detect(net, raw_images)
else:
results = [im_detect(net, raw_images[0])]
_t['im_detect'].toc()
# Post-Processing
_t['misc'].tic()
for item_idx, detections in enumerate(results):
i = batch_idx + item_idx
boxes_this_image = [[]]
# {x1, y1, x2, y2, score, cls}
detections = np.array(detections)
for j in range(1, num_classes):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4]
cls_scores = detections[cls_indices, 4]
cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections')
server.evaluate_detections(all_boxes)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob
from lib.utils.image import scale_image
from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Unpack results
return tensor_to_blob(outputs['detections'])[:, 1:]
def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales."""
# Prepare images
ims, ims_scale = scale_image(raw_images[0])
num_scales = len(ims_scale)
ims_shape = [im.shape for im in raw_images]
for item_idx in range(1, len(raw_images)):
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
ims += ims_ext
ims_scale += ims_scale_ext
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32,
)
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Unpack results
results = tensor_to_blob(outputs['detections'])
detections_wide = [[] for _ in range(len(ims_shape))]
for i in range(len(ims)):
indices = np.where(results[:, 0].astype(np.int32) == i)[0]
detections = results[indices, 1:]
detections_wide[i // num_scales].append(detections)
for i in range(len(ims_shape)):
detections_wide[i] = np.vstack(detections_wide[i]) \
if len(detections_wide[i]) > 1 else detections_wide[i][0]
return detections_wide
def test_net(net, server):
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths
image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue
image_id, raw_image = server.get_image()
image_ids.append(image_id)
raw_images.append(raw_image)
# Run detecting on specific scales
_t['im_detect'].tic()
if cfg.TEST.IMS_PER_BATCH > 1:
results = ims_detect(net, raw_images)
else:
results = [im_detect(net, raw_images[0])]
_t['im_detect'].toc()
# Post-Processing
_t['misc'].tic()
for item_idx, detections in enumerate(results):
i = batch_idx + item_idx
boxes_this_image = [[]]
# {x1, y1, x2, y2, score, cls}
detections = np.array(detections)
for j in range(1, num_classes):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4]
cls_scores = detections[cls_indices, 4]
cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images,
_t['im_detect'].average_time,
_t['misc'].average_time),
end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections')
server.evaluate_detections(all_boxes)
......@@ -13,8 +13,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from lib.ssd.layers.data_layer import DataLayer
from lib.ssd.layers.hard_mining_layer import HardMiningLayer
from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer
from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer
from lib.ssd.layers.priorbox_layer import PriorBoxLayer
from lib.ssd.data_layer import DataLayer
from lib.ssd.hard_mining_layer import HardMiningLayer
from lib.ssd.multibox_layer import MultiBoxMatchLayer
from lib.ssd.multibox_layer import MultiBoxTargetLayer
from lib.ssd.priorbox_layer import PriorBoxLayer
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from lib.core.config import cfg
class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs):
super(BlobFetcher, self).__init__()
self._img_blob_size = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
self.q_in = self.q_out = None
self.daemon = True
def get(self):
img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
img_blob[i], gt_boxes = self.q_in.get()
# Pack the boxes by adding the index of images
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
boxes[:, :gt_boxes.shape[1]] = gt_boxes
boxes[:, -1] = i
boxes_blob.append(boxes)
return {
'data': img_blob,
'gt_boxes': np.concatenate(boxes_blob, 0),
}
def run(self):
while True:
self.q_out.put(self.get())
......@@ -13,54 +13,69 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from multiprocessing import Queue
import multiprocessing as mp
import time
import dragon
import pprint
import dragon.vm.torch as torch
import numpy as np
from lib.faster_rcnn.data.data_reader import DataReader
from lib.ssd.data.data_transformer import DataTransformer
from lib.ssd.data.blob_fetcher import BlobFetcher
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.ssd.data_transformer import DataTransformer
from lib.utils import logger
class DataBatch(object):
"""DataBatch aims to prefetch data by ``Triple-Buffering``.
class DataLayer(torch.nn.Module):
"""Generate a mini-batch of data."""
It takes full advantages of the Process/Thread of Python,
def __init__(self):
super(DataLayer, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
})
which provides remarkable I/O speed up for scalable distributed training.
def forward(self):
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
"""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
source : str
The path of database.
dataset : lambda
The creator of a dataset.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=2048
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=128
batch_size : int, optional, default=32
The size of a mini-batch.
prefetch : int, optional, default=5
The prefetch count.
"""
super(DataBatch, self).__init__()
# Init mpi
global_rank, local_rank, group_size = 0, 0, 1
if dragon.mpi.is_init():
group = dragon.mpi.is_parallel()
if group is not None: # DataParallel
global_rank = dragon.mpi.rank()
group_size = len(group)
for i, node in enumerate(group):
if global_rank == node:
local_rank = i
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_default_process_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
......@@ -77,63 +92,50 @@ class DataBatch(object):
self._num_transformers = min(
self._num_transformers, self._max_transformers)
# Init queues
self.Q1 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q2 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q3 = Queue(self._prefetch * self._num_readers)
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q2 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Init readers
# Initialize readers
self._readers = []
for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs))
self._readers[-1].q_out = self.Q1
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += local_rank * self._num_readers
self._readers[i]._num_parts = num_parts
self._readers[i]._part_idx = part_idx
self._readers[i]._rng_seed += part_idx
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Init transformers
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q_out = self.Q2
transformer._rng_seed += (i + rank * self._num_transformers)
transformer.q_in, transformer.q_out = self.Q1, self.Q2
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Init blob fetchers
self._fetchers = []
for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs)
fetcher.q_in = self.Q2
fetcher.q_out = self.Q3
fetcher.start()
self._fetchers.append(fetcher)
time.sleep(0.1)
# Prevent to echo multiple nodes
if local_rank == 0:
self.echo()
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate(self._fetchers)
logger.info('Terminating BlobFetcher ......')
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminating DataTransformer ......')
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminating DataReader......')
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
......@@ -149,14 +151,24 @@ class DataBatch(object):
"""
return self.Q3.get()
def echo(self):
"""Print I/O Information."""
print('---------------------------------------------------------')
print('BatchFetcher({} Threads), Using config:'.format(
self._num_readers + self._num_transformers + self._num_fetchers))
params = {'queue_size': self._prefetch,
'n_readers': self._num_readers,
'n_transformers': self._num_transformers,
'n_fetchers': self._num_fetchers}
pprint.pprint(params)
print('---------------------------------------------------------')
def run(self):
"""Start the process to produce batches."""
image_batch_shape = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
while True:
boxes_to_pack = []
image_batch = np.zeros(image_batch_shape, 'uint8')
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
image_batch[image_index], gt_boxes = self.Q2.get()
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, image_index
boxes_to_pack.append(boxes)
self.Q3.put({
'data': image_batch,
'gt_boxes': np.concatenate(boxes_to_pack),
})
......@@ -13,14 +13,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import multiprocessing
import cv2
import numpy as np
from lib.core.config import cfg
from lib.proto import anno_pb2 as pb
from lib.ssd.data import transforms
from lib.utils import logger
from lib.ssd import transforms
from lib.utils.boxes import flip_boxes
class DataTransformer(multiprocessing.Process):
......@@ -41,38 +41,41 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q_out = None
self.daemon = True
def make_roi_dict(self, ann_datum, flip=False):
annotations = ann_datum.annotation
def make_roi_dict(self, example, flip=False):
n_objects = 0
if not self._use_diff:
for ann in annotations:
if not ann.difficult: n_objects += 1
else: n_objects = len(annotations)
for obj in example['object']:
if obj.get('difficult', 0) == 0:
n_objects += 1
else:
n_objects = len(example['object'])
roi_dict = {
'width': ann_datum.datum.width,
'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32),
'boxes': np.zeros((n_objects, 4), dtype=np.float32),
'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32),
'width': example['width'],
'height': example['height'],
'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), 'float32'),
'normalized_boxes': np.zeros((n_objects, 4), 'float32'),
}
rec_idx = 0
for ann in annotations:
if not self._use_diff and ann.difficult:
# Filter the difficult instances
object_idx = 0
for obj in example['object']:
if not self._use_diff and \
obj.get('difficult', 0) > 0:
continue
roi_dict['boxes'][rec_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1),
roi_dict['boxes'][object_idx, :] = [
max(0, obj['xmin']),
max(0, obj['ymin']),
min(obj['xmax'], example['width'] - 1),
min(obj['ymax'], example['height'] - 1),
]
roi_dict['gt_classes'][rec_idx] = \
self._class_to_ind[ann.name]
rec_idx += 1
roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']]
object_idx += 1
if flip:
roi_dict['boxes'] = _flip_boxes(
roi_dict['boxes'] = flip_boxes(
roi_dict['boxes'], roi_dict['width'])
roi_dict['boxes'][:, 0::2] /= roi_dict['width']
......@@ -80,26 +83,19 @@ class DataTransformer(multiprocessing.Process):
return roi_dict
def get(self, serialized):
ann_datum = pb.AnnotatedDatum()
ann_datum.ParseFromString(serialized)
img_datum = ann_datum.datum
img = np.fromstring(img_datum.data, np.uint8)
if img_datum.encoded is True:
img = cv2.imdecode(img, -1)
else:
h, w = img_datum.height, img_datum.width
img = img.reshape((h, w, img_datum.channels))
def get(self, example):
img = np.frombuffer(example['content'], np.uint8)
img = cv2.imdecode(img, -1)
# Flip
flip = False
if self._mirror:
if np.random.randint(0, 2) > 0:
if np.random.randint(2) > 0:
img = img[:, ::-1, :]
flip = True
# Datum -> RoIDB
roi_dict = self.make_roi_dict(ann_datum, flip)
# Example -> RoIDict
roi_dict = self.make_roi_dict(example, flip)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
......@@ -120,19 +116,7 @@ class DataTransformer(multiprocessing.Process):
def run(self):
np.random.seed(self._rng_seed)
while True:
serialized = self.q_in.get()
im, gt_boxes = self.get(serialized)
if len(gt_boxes) < 1:
continue
self.q_out.put((im, gt_boxes))
def _flip_boxes(boxes, width):
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes
outputs = self.get(self.q_in.get())
if len(outputs[1]) < 1:
continue # Ignore the non-object image
self.q_out.put(outputs)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def generate_anchors(min_sizes, max_sizes, ratios):
"""
Generate anchor (reference) windows by enumerating
aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
"""
total_anchors = []
for idx, min_size in enumerate(min_sizes):
# Note that SSD assume it is a ctr-anchor
base_anchor = np.array([0, 0, min_size, min_size])
anchors = _ratio_enum(base_anchor, ratios)
if len(max_sizes) > 0:
max_size = max_sizes[idx]
_anchors = anchors[0].reshape((1, 4))
_anchors = np.vstack([_anchors, _max_size_enum(
base_anchor, min_size, max_size)])
anchors = np.vstack([_anchors, anchors[1:]])
total_anchors.append(anchors)
return np.vstack(total_anchors)
def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window)."""
w, h = anchor[2], anchor[3]
x_ctr, y_ctr = anchor[0], anchor[1]
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * ws,
y_ctr - 0.5 * hs,
x_ctr + 0.5 * ws,
y_ctr + 0.5 * hs))
return anchors
def _ratio_enum(anchor, ratios):
"""Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
hs = np.round(np.sqrt(size_ratios))
ws = np.round(hs * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _max_size_enum(base_anchor, min_size, max_size):
"""Enumerate a anchor for max_size wrt base_anchor."""
w, h, x_ctr, y_ctr = _whctrs(base_anchor)
ws = hs = np.sqrt([min_size * max_size])
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
if __name__ == '__main__':
print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1, 0.5, 2]))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def generate_anchors(min_sizes, max_sizes, ratios):
"""
Generate anchor (reference) windows by enumerating
aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
"""
total_anchors = []
for idx, min_size in enumerate(min_sizes):
# Note that SSD assume it is a ctr-anchor
base_anchor = np.array([0, 0, min_size, min_size])
anchors = _ratio_enum(base_anchor, ratios)
if len(max_sizes) > 0:
max_size = max_sizes[idx]
_anchors = anchors[0].reshape((1, 4))
_anchors = np.vstack([_anchors, _max_size_enum(
base_anchor, min_size, max_size)])
anchors = np.vstack([_anchors, anchors[1:]])
total_anchors.append(anchors)
return np.vstack(total_anchors)
def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window)."""
w, h = anchor[2], anchor[3]
x_ctr, y_ctr = anchor[0], anchor[1]
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * ws,
y_ctr - 0.5 * hs,
x_ctr + 0.5 * ws,
y_ctr + 0.5 * hs))
return anchors
def _ratio_enum(anchor, ratios):
"""Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
hs = np.round(np.sqrt(size_ratios))
ws = np.round(hs * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _max_size_enum(base_anchor, min_size, max_size):
"""Enumerate a anchor for max_size wrt base_anchor."""
w, h, x_ctr, y_ctr = _whctrs(base_anchor)
ws = hs = np.sqrt([min_size * max_size])
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
if __name__ == '__main__':
print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1, 0.5, 2]))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
class HardMiningLayer(torch.nn.Module):
def __init__(self):
super(HardMiningLayer, self).__init__()
def forward(self, conf_prob, match_labels, max_overlaps):
# Confidence of each matched box
conf_prob_wide = conf_prob.numpy(True)
# Label of each matched box
match_labels_wide = match_labels
# Max overlaps between default boxes and gt boxes
max_overlaps_wide = max_overlaps
# label ``-1`` will be ignored
labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64)
for ix in range(match_labels_wide.shape[0]):
match_labels = match_labels_wide[ix]
max_overlaps = max_overlaps_wide[ix]
conf_prob = conf_prob_wide[ix]
conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
inds = np.where(match_labels >= 0)[0]
flt_min = np.finfo(float).eps
# Softmax cross-entropy
conf_loss[inds] = -np.log(np.maximum(
conf_prob[inds, match_labels[inds]], flt_min))
# Filter negatives
fg_inds = np.where(match_labels > 0)[0]
neg_inds = np.where(match_labels == 0)[0]
neg_overlaps = max_overlaps[neg_inds]
eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0]
sel_inds = neg_inds[eligible_neg_inds]
# Do Mining
sel_loss = conf_loss[sel_inds]
num_pos = len(fg_inds)
num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
bg_inds = sorted_sel_inds[:num_sel]
labels_wide[ix][fg_inds] = match_labels[fg_inds] # Keep fg indices
labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss
return {'labels': blob_to_tensor(labels_wide)}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
class HardMiningLayer(torch.nn.Module):
def __init__(self):
super(HardMiningLayer, self).__init__()
def forward(self, conf_prob, match_labels, max_overlaps):
# Confidence of each matched box
conf_prob_wide = conf_prob.numpy(True)
# Label of each matched box
match_labels_wide = match_labels
# Max overlaps between default boxes and gt boxes
max_overlaps_wide = max_overlaps
# label ``-1`` will be ignored
labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64)
for ix in range(match_labels_wide.shape[0]):
match_labels = match_labels_wide[ix]
max_overlaps = max_overlaps_wide[ix]
conf_prob = conf_prob_wide[ix]
conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
inds = np.where(match_labels >= 0)[0]
flt_min = np.finfo(float).eps
# Softmax cross-entropy
conf_loss[inds] = -np.log(np.maximum(
conf_prob[inds, match_labels[inds]], flt_min))
# Filter negatives
fg_inds = np.where(match_labels > 0)[0]
neg_inds = np.where(match_labels == 0)[0]
neg_overlaps = max_overlaps[neg_inds]
eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0]
sel_inds = neg_inds[eligible_neg_inds]
# Do Mining
sel_loss = conf_loss[sel_inds]
num_pos = len(fg_inds)
num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
bg_inds = sorted_sel_inds[:num_sel]
labels_wide[ix][fg_inds] = match_labels[fg_inds] # Keep fg indices
labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss
return {'labels': blob_to_tensor(labels_wide)}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.ssd.data.data_batch import DataBatch
class DataLayer(torch.nn.Module):
def __init__(self):
super(DataLayer, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'source': database.source,
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': 2048, # Chunk-Wise Shuffle
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
})
def forward(self):
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image
def get_images(ims):
target_h = cfg.SSD.RESIZE.HEIGHT
target_w = cfg.SSD.RESIZE.WIDTH
processed_ims, im_scales = [], []
for im in ims:
im_scales.append((float(target_h) / im.shape[0],
float(target_w) / im.shape[1]))
processed_ims.append(cv2.resize(im, (target_w, target_h)))
ims_blob = np.array(processed_ims, dtype=np.uint8)
return ims_blob, im_scales
def ims_detect(detector, ims):
"""Detect images, with the single scale."""
# Prepare blobs
data, im_scales = get_images(ims)
data = torch.from_numpy(data).cuda(cfg.GPU_ID)
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs={'data': data})
# Decode results
batch_boxes = []
scores = tensor_to_blob(outputs['cls_prob'])
prior_boxes = tensor_to_blob(outputs['prior_boxes'])
box_deltas = tensor_to_blob(outputs['bbox_pred'])
for i in range(box_deltas.shape[0]):
boxes = bbox_transform_inv(
boxes=prior_boxes,
deltas=box_deltas[i],
weights=cfg.BBOX_REG_WEIGHTS,
)
boxes[:, 0::2] /= im_scales[i][1]
boxes[:, 1::2] /= im_scales[i][0]
batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape))
return scores, batch_boxes
def test_net(net, server):
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths
image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue
image_id, raw_image = server.get_image()
image_ids.append(image_id)
raw_images.append(raw_image)
_t['im_detect'].tic()
batch_scores, batch_boxes = ims_detect(net, raw_images)
_t['im_detect'].toc()
_t['misc'].tic()
for item_idx in range(len(batch_scores)):
i = batch_idx + item_idx
scores = batch_scores[item_idx]
boxes = batch_boxes[item_idx]
boxes_this_image = [[]]
for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds]
pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K]
cls_scores = cls_scores[pre_nms_inds]
cls_boxes = cls_boxes[pre_nms_inds]
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.0,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time,
_t['misc'].average_time), end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections')
server.evaluate_detections(all_boxes)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image
def get_images(ims):
target_h = cfg.SSD.RESIZE.HEIGHT
target_w = cfg.SSD.RESIZE.WIDTH
processed_ims, im_scales = [], []
for im in ims:
im_scales.append((float(target_h) / im.shape[0],
float(target_w) / im.shape[1]))
processed_ims.append(cv2.resize(im, (target_w, target_h)))
ims_blob = np.array(processed_ims, dtype=np.uint8)
return ims_blob, im_scales
def ims_detect(detector, ims):
"""Detect images, with the single scale."""
# Prepare blobs
data, im_scales = get_images(ims)
data = torch.from_numpy(data).cuda(cfg.GPU_ID)
# Do Forward
with torch.no_grad():
outputs = detector.forward(inputs={'data': data})
# Decode results
batch_boxes = []
scores = tensor_to_blob(outputs['cls_prob'])
prior_boxes = tensor_to_blob(outputs['prior_boxes'])
box_deltas = tensor_to_blob(outputs['bbox_pred'])
for i in range(box_deltas.shape[0]):
boxes = bbox_transform_inv(
boxes=prior_boxes,
deltas=box_deltas[i],
weights=cfg.BBOX_REG_WEIGHTS,
)
boxes[:, 0::2] /= im_scales[i][1]
boxes[:, 1::2] /= im_scales[i][0]
batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape))
return scores, batch_boxes
def test_net(net, server):
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths
image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue
image_id, raw_image = server.get_image()
image_ids.append(image_id)
raw_images.append(raw_image)
_t['im_detect'].tic()
batch_scores, batch_boxes = ims_detect(net, raw_images)
_t['im_detect'].toc()
_t['misc'].tic()
for item_idx in range(len(batch_scores)):
i = batch_idx + item_idx
scores = batch_scores[item_idx]
boxes = batch_boxes[item_idx]
boxes_this_image = [[]]
for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds]
pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K]
cls_scores = cls_scores[pre_nms_inds]
cls_boxes = cls_boxes[pre_nms_inds]
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_detections,
cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
cls_detections,
cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.0,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images,
_t['im_detect'].average_time,
_t['misc'].average_time),
end='')
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections')
server.evaluate_detections(all_boxes)
......@@ -19,7 +19,7 @@ sys.path.append('../../')
import cv2
import numpy as np
from lib.ssd.data import transforms
from lib.ssd import transforms
if __name__ == '__main__':
......
......@@ -201,6 +201,16 @@ def expand_boxes(boxes, scale):
return boxes_exp
def flip_boxes(boxes, width):
"""Flip the boxes horizontally."""
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
return flip_boxes
def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
......
......@@ -62,22 +62,20 @@ if __name__ == '__main__':
if checkpoint is not None:
cfg.TRAIN.WEIGHTS = checkpoint
# Setup MPI
if cfg.NUM_GPUS != dragon.mpi.size():
# Setup the distributed environment
world_rank = dragon.distributed.get_rank()
world_size = dragon.distributed.get_world_size()
if cfg.NUM_GPUS != world_size:
raise ValueError(
'Excepted {} mpi nodes, but got {}.'
.format(len(args.gpus), dragon.mpi.size())
'Excepted staring of {} processes, got {}.'
.format(cfg.NUM_GPUS, world_size)
)
GPUs = [i for i in range(cfg.NUM_GPUS)]
cfg.GPU_ID = GPUs[dragon.mpi.rank()]
dragon.mpi.add_parallel_group([i for i in range(cfg.NUM_GPUS)])
dragon.mpi.set_parallel_mode('NCCL' if cfg.USE_NCCL else 'MPI')
logger.set_root_logger(world_rank == 0)
# Setup logger
if dragon.mpi.rank() != 0:
logger.set_root_logger(False)
# Select the GPU depending on the rank of process
cfg.GPU_ID = [i for i in range(cfg.NUM_GPUS)][world_rank]
# Fix the random seeds (numpy and dragon) for reproducibility
# Fix the random seed for reproducibility
numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED)
......@@ -89,7 +87,8 @@ if __name__ == '__main__':
# Ready to train the network
logger.info('Output will be saved to `{:s}`'
.format(coordinator.checkpoints_dir()))
train_net(coordinator, start_iter)
# Finalize mpi
dragon.mpi.finalize()
with dragon.distributed.new_group(
ranks=[i for i in range(cfg.NUM_GPUS)],
backend='NCCL' if cfg.USE_NCCL else 'MPI',
verbose=True).as_default():
train_net(coordinator, start_iter)
......@@ -82,7 +82,7 @@ if __name__ == '__main__':
if checkpoint is not None:
cfg.TRAIN.WEIGHTS = checkpoint
# Fix the random seeds (numpy and dragon) for reproducibility
# Fix the random seed for reproducibility
numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED)
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!