Commit f8359d17 by Ting PAN

Adapt to SeetaRecord

1 parent ca255ea0
Showing with 5417 additions and 6186 deletions
...@@ -47,4 +47,4 @@ __pycache__ ...@@ -47,4 +47,4 @@ __pycache__
.idea .idea
# OSX dir files # OSX dir files
.DS_Store .DS_Store
\ No newline at end of file
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.2.0 (20190929)
Dragon Minimum Required (Version 0.3.0.dev20190929)
Changes:
Preview Features:
- Use SeetaRecord instead of LMDB.
- Flatten the implementation of layers.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.2 (20190723) SeetaDet 0.1.2 (20190723)
Dragon Minimum Required (Version 0.3.0.0) Dragon Minimum Required (Version 0.3.0.0)
......
#!/bin/sh
# delete cache # delete cache
rm -r build install *.c *.cpp rm -r build install *.c *.cpp
# compile proto files
protoc -I ../lib/proto --python_out=../lib/proto ../lib/proto/anno.proto
# compile cython modules # compile cython modules
python setup.py build_ext --inplace python setup.py build_ext --inplace
# compile cuda modules # compile cuda modules
cd build cd build && cmake .. && make install && cd ..
cmake .. && make install && cd ..
# setup # setup
cp -r install/lib ../ cp -r install/lib ../
...@@ -32,15 +32,15 @@ FRCNN: ...@@ -32,15 +32,15 @@ FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k_lmdb' DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
TEST: TEST:
DATABASE: '/data/coco_2014_minival_lmdb' DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
......
...@@ -32,15 +32,15 @@ FRCNN: ...@@ -32,15 +32,15 @@ FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k_lmdb' DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
TEST: TEST:
DATABASE: '/data/coco_2014_minival_lmdb' DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
......
...@@ -23,14 +23,14 @@ FRCNN: ...@@ -23,14 +23,14 @@ FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
TEST: TEST:
DATABASE: '/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
SCALES: [600] SCALES: [600]
......
...@@ -28,15 +28,15 @@ FRCNN: ...@@ -28,15 +28,15 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
MLP_HEAD_DIM: 4096 MLP_HEAD_DIM: 4096
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/VGG16.RCNN.pth' WEIGHTS: '/model/VGG16.RCNN.pth'
DATABASE: '/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16 RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
TEST: TEST:
DATABASE: '/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16 RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300 RPN_POST_NMS_TOP_N: 300
......
...@@ -32,13 +32,13 @@ FPN: ...@@ -32,13 +32,13 @@ FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k_lmdb' DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [400] SCALES: [400]
MAX_SIZE: 666 MAX_SIZE: 666
TEST: TEST:
DATABASE: '/data/coco_2014_minival_lmdb' DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
......
...@@ -36,8 +36,8 @@ DROPBLOCK: ...@@ -36,8 +36,8 @@ DROPBLOCK:
DROP_ON: True DROP_ON: True
DECREMENT: 0.000005 # * 20000 = 0.1 DECREMENT: 0.000005 # * 20000 = 0.1
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/coco_2014_trainval35k_lmdb' DATABASE: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [400] SCALES: [400]
MAX_SIZE: 666 MAX_SIZE: 666
...@@ -45,7 +45,7 @@ TRAIN: ...@@ -45,7 +45,7 @@ TRAIN:
COLOR_JITTERING: True COLOR_JITTERING: True
SCALE_RANGE: [0.75, 1.33] SCALE_RANGE: [0.75, 1.33]
TEST: TEST:
DATABASE: '/data/coco_2014_minival_lmdb' DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
......
...@@ -23,8 +23,8 @@ FPN: ...@@ -23,8 +23,8 @@ FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
MAX_SIZE: 500 MAX_SIZE: 500
...@@ -32,7 +32,7 @@ TRAIN: ...@@ -32,7 +32,7 @@ TRAIN:
SCALE_JITTERING: True SCALE_JITTERING: True
COLOR_JITTERING: True COLOR_JITTERING: True
TEST: TEST:
DATABASE: '/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [300] SCALES: [300]
......
...@@ -24,8 +24,8 @@ FPN: ...@@ -24,8 +24,8 @@ FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-18.Affine.pth' WEIGHTS: '/model/R-18.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
MAX_SIZE: 500 MAX_SIZE: 500
...@@ -33,7 +33,7 @@ TRAIN: ...@@ -33,7 +33,7 @@ TRAIN:
SCALE_JITTERING: True SCALE_JITTERING: True
COLOR_JITTERING: True COLOR_JITTERING: True
TEST: TEST:
DATABASE: '/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [300] SCALES: [300]
......
...@@ -24,8 +24,8 @@ FPN: ...@@ -24,8 +24,8 @@ FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-34.Affine.pth' WEIGHTS: '/model/R-34.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
MAX_SIZE: 500 MAX_SIZE: 500
...@@ -33,7 +33,7 @@ TRAIN: ...@@ -33,7 +33,7 @@ TRAIN:
SCALE_JITTERING: True SCALE_JITTERING: True
COLOR_JITTERING: True COLOR_JITTERING: True
TEST: TEST:
DATABASE: '/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [300] SCALES: [300]
......
...@@ -29,11 +29,11 @@ SSD: ...@@ -29,11 +29,11 @@ SSD:
STRIDES: [8, 16, 32] STRIDES: [8, 16, 32]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
TEST: TEST:
DATABASE: '/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
NMS_TOP_K: 400 NMS_TOP_K: 400
......
...@@ -32,11 +32,11 @@ SSD: ...@@ -32,11 +32,11 @@ SSD:
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33], ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]] [1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/VGG16.SSD.pth' WEIGHTS: '/model/VGG16.SSD.pth'
DATABASE: '/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
TEST: TEST:
DATABASE: '/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
NMS_TOP_K: 400 NMS_TOP_K: 400
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os.path as osp
sys.path.insert(0, '../../../')
from database.frcnn.utils.make_from_xml import make_db
if __name__ == '__main__':
VOC_ROOT_DIR = '/home/workspace/datasets/VOC'
# train database: voc_2007_trainval + voc_2012_trainval
make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_0712_trainval_lmdb'),
images_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(VOC_ROOT_DIR, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval'])
# test database: voc_2007_test
make_db(database_file=osp.join(VOC_ROOT_DIR, 'cache/voc_2007_test_lmdb'),
images_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(VOC_ROOT_DIR, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test'])
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import cv2
from dragon.tools.db import LMDB
sys.path.insert(0, '../../..')
from lib.proto import anno_pb2 as pb
ZFILL = 8
ENCODE_QUALITY = 95
def set_zfill(value):
global ZFILL
ZFILL = value
def set_quality(value):
global ENCODE_QUALITY
ENCODE_QUALITY = value
def make_datum(image_id, image_file, objects):
anno_datum = pb.AnnotatedDatum()
datum = pb.Datum()
im = cv2.imread(image_file)
datum.height, datum.width, datum.channels = im.shape
datum.encoded = ENCODE_QUALITY != 100
if datum.encoded:
result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
datum.data = im.tostring()
anno_datum.datum.CopyFrom(datum)
anno_datum.filename = image_id
for ix, obj in enumerate(objects):
anno = pb.Annotation()
anno.x1, anno.y1, anno.x2, anno.y2 = obj['bbox']
anno.name = obj['name']
anno.difficult = obj['difficult']
anno_datum.annotation.add().CopyFrom(anno)
return anno_datum
def make_db(database_file, images_path, gt_recs, ext='.png'):
if os.path.isdir(database_file) is True:
raise ValueError('The database path is already exist.')
else:
root_dir = database_file[:database_file.rfind('/')]
if not os.path.exists(root_dir):
os.makedirs(root_dir)
print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000)
db.open(database_file, mode='w')
count = 0
total_line = len(gt_recs)
start_time = time.time()
zfill_flag = '{0:0%d}' % (ZFILL)
for image_id, objects in gt_recs.items():
count += 1
if count % 10000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
image_file = os.path.join(images_path, image_id + ext)
datum = make_datum(image_id, image_file, objects)
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
db.commit()
db.close()
end_time = time.time()
print('{0} images have been stored in the database.'.format(total_line))
print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
print('The size of database is {0} MB.'.format(
float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import cv2
import xml.etree.ElementTree as ET
from dragon.tools.db import LMDB
sys.path.insert(0, '../../..')
from lib.proto import anno_pb2 as pb
ZFILL = 8
ENCODE_QUALITY = 95
def set_zfill(value):
global ZFILL
ZFILL = value
def set_quality(value):
global ENCODE_QUALITY
ENCODE_QUALITY = value
def make_datum(image_file, xml_file):
tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1]
objs = tree.findall('object')
anno_datum = pb.AnnotatedDatum()
datum = pb.Datum()
im = cv2.imread(image_file)
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.height, datum.width, datum.channels = im.shape
datum.encoded = ENCODE_QUALITY != 100
if datum.encoded:
result, im = cv2.imencode('.jpg', im, [int(cv2.IMWRITE_JPEG_QUALITY), ENCODE_QUALITY])
if im is None or im.shape[0] == 0 or im.shape[1] == 0:
print("XML have not objects ignored: ", xml_file)
return None
datum.data = im.tostring()
anno_datum.datum.CopyFrom(datum)
anno_datum.filename = filename.split('.')[0]
if len(objs) == 0:
return None
for ix, obj in enumerate(objs):
anno = pb.Annotation()
bbox = obj.find('bndbox')
x1 = float(bbox.find('xmin').text)
y1 = float(bbox.find('ymin').text)
x2 = float(bbox.find('xmax').text)
y2 = float(bbox.find('ymax').text)
cls = obj.find('name').text.strip()
anno.x1, anno.y1, anno.x2, anno.y2 = (x1, y1, x2, y2)
anno.name = cls
class_name_set.add(cls)
anno.difficult = False
if obj.find('difficult') is not None:
anno.difficult = int(obj.find('difficult').text) == 1
anno_datum.annotation.add().CopyFrom(anno)
return anno_datum
def make_db(
database_file,
images_path,
annotations_path,
imagesets_path,
splits,
):
if os.path.isdir(database_file) is True:
print('Warning: The database path is already exist.')
else:
root_dir = database_file[:database_file.rfind('/')]
if not os.path.exists(root_dir):
os.makedirs(root_dir)
if not isinstance(images_path, list):
images_path = [images_path]
if not isinstance(annotations_path, list):
annotations_path = [annotations_path]
if not isinstance(imagesets_path, list):
imagesets_path = [imagesets_path]
assert len(splits) == len(imagesets_path)
assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path)
print('Start Time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=1000)
db.open(database_file, mode='w')
count = 0
total_line = 0
start_time = time.time()
zfill_flag = '{0:0%d}' % ZFILL
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
datum = make_datum(image_file, xml_file)
if datum is not None:
count += 1
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
if count % 1000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
db.commit()
db.close()
end_time = time.time()
print('{0} images have been stored in the database.'.format(total_line))
print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
print('The size of database is {0} MB.'.format(
float(os.path.getsize(database_file + '/data.mdb') / 1000 / 1000)))
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/facebookresearch/Detectron/blob/master/lib/core/config.py> # <https://github.com/facebookresearch/Detectron/blob/master/lib/core/config.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os.path as osp import os.path as osp
import numpy as np import numpy as np
from lib.utils.attrdict import AttrDict as edict from lib.utils.attrdict import AttrDict as edict
__C = edict() __C = edict()
cfg = __C cfg = __C
########################################### ###########################################
# # # #
# Training Options # # Training Options #
# # # #
########################################### ###########################################
__C.TRAIN = edict() __C.TRAIN = edict()
# Initialize network with weights from this file # Initialize network with weights from this file
__C.TRAIN.WEIGHTS = '' __C.TRAIN.WEIGHTS = ''
# Database to train # Database to train
__C.TRAIN.DATABASE = '' __C.TRAIN.DATABASE = ''
# Scales to use during training (can list multiple scales) # Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side # Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,) __C.TRAIN.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image # Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1 # A square will be used if value < 1
__C.TRAIN.MAX_SIZE = 1000 __C.TRAIN.MAX_SIZE = 1000
# Images to use per mini-batch # Images to use per mini-batch
__C.TRAIN.IMS_PER_BATCH = 1 __C.TRAIN.IMS_PER_BATCH = 1
# Minibatch size (number of regions of interest [ROIs]) # Minibatch size (number of regions of interest [ROIs])
__C.TRAIN.BATCH_SIZE = 128 __C.TRAIN.BATCH_SIZE = 128
# Fraction of minibatch that is labeled foreground (i.e. class > 0) # Fraction of minibatch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25 __C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5 __C.TRAIN.FG_THRESH = 0.5
# Overlap threshold for a ROI to be considered background (class = 0 if # Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI)) # overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5 __C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.0 __C.TRAIN.BG_THRESH_LO = 0.0
# Use shuffle after each epoch # Use shuffle after each epoch
__C.TRAIN.USE_SHUFFLE = True __C.TRAIN.USE_SHUFFLE = True
# The number of chunks to shuffle
# Use horizontally-flipped images during training? __C.TRAIN.NUM_SHUFFLE_CHUNKS = 0
__C.TRAIN.USE_FLIPPED = True
# Use horizontally-flipped images during training?
# Use the difficult(under occlusion) objects __C.TRAIN.USE_FLIPPED = True
__C.TRAIN.USE_DIFF = True
# Use the difficult(under occlusion) objects
# Overlap required between a ROI and ground-truth box in order for that ROI to __C.TRAIN.USE_DIFF = True
# be used as a bounding-box regression training example
__C.TRAIN.BBOX_THRESH = 0.5 # Overlap required between a ROI and ground-truth box in order for that ROI to
# be used as a bounding-box regression training example
# If True, randomly scale the image by scale range __C.TRAIN.BBOX_THRESH = 0.5
__C.TRAIN.SCALE_JITTERING = False
__C.TRAIN.SCALE_RANGE = [0.75, 1.0] # If True, randomly scale the image by scale range
__C.TRAIN.SCALE_JITTERING = False
# If True, randomly distort the image by brightness, contrast, and saturation __C.TRAIN.SCALE_RANGE = [0.75, 1.0]
__C.TRAIN.COLOR_JITTERING = False
# If True, randomly distort the image by brightness, contrast, and saturation
# IOU >= thresh: positive example __C.TRAIN.COLOR_JITTERING = False
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example # IOU >= thresh: positive example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# If an anchor statisfied by positive and negative conditions set to negative # IOU < thresh: negative example
__C.TRAIN.RPN_CLOBBER_POSITIVES = False __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
# Max number of foreground examples # If an anchor statisfied by positive and negative conditions set to negative
__C.TRAIN.RPN_FG_FRACTION = 0.5 __C.TRAIN.RPN_CLOBBER_POSITIVES = False
# Total number of examples # Max number of foreground examples
__C.TRAIN.RPN_BATCHSIZE = 256 __C.TRAIN.RPN_FG_FRACTION = 0.5
# NMS threshold used on RPN proposals # Total number of examples
__C.TRAIN.RPN_NMS_THRESH = 0.7 __C.TRAIN.RPN_BATCHSIZE = 256
# Number of top scoring boxes to keep before apply NMS to RPN proposals # NMS threshold used on RPN proposals
__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 __C.TRAIN.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep after applying NMS to RPN proposals # Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TRAIN.RPN_POST_NMS_TOP_N = 2000 __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) # Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TRAIN.RPN_MIN_SIZE = 0 __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors __C.TRAIN.RPN_MIN_SIZE = 0
__C.TRAIN.RPN_STRADDLE_THRESH = 0 # Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
__C.TRAIN.RPN_STRADDLE_THRESH = 0
###########################################
# #
# Testing Options # ###########################################
# # # #
########################################### # Testing Options #
# #
###########################################
__C.TEST = edict()
# Database to test __C.TEST = edict()
__C.TEST.DATABASE = ''
# Database to test
# Original json ground-truth file to use __C.TEST.DATABASE = ''
# Records in the Database file will be used instead
__C.TEST.JSON_FILE = '' # Original json ground-truth file to use
# Records in the Database file will be used instead
# Scales to use during testing (can list multiple scales) __C.TEST.JSON_FILE = ''
# Each scale is the pixel size of an image's shortest side
__C.TEST.SCALES = (600,) # Scales to use during testing (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
# Max pixel size of the longest side of a scaled input image __C.TEST.SCALES = (600,)
# A square will be used if value < 1
__C.TEST.MAX_SIZE = 1000 # Max pixel size of the longest side of a scaled input image
# A square will be used if value < 1
# Images to use per mini-batch __C.TEST.MAX_SIZE = 1000
__C.TEST.IMS_PER_BATCH = 1
# Images to use per mini-batch
# Overlap threshold used for non-maximum suppression (suppress boxes with __C.TEST.IMS_PER_BATCH = 1
# IoU >= this threshold)
__C.TEST.NMS = 0.3 # Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
# Use Soft-NMS instead of standard NMS? __C.TEST.NMS = 0.3
# For the soft NMS overlap threshold, we simply use TEST.NMS
__C.TEST.USE_SOFT_NMS = False # Use Soft-NMS instead of standard NMS?
__C.TEST.SOFT_NMS_METHOD = 'linear' # For the soft NMS overlap threshold, we simply use TEST.NMS
__C.TEST.SOFT_NMS_SIGMA = 0.5 __C.TEST.USE_SOFT_NMS = False
__C.TEST.SOFT_NMS_METHOD = 'linear'
# The top-k prior boxes before nms. __C.TEST.SOFT_NMS_SIGMA = 0.5
__C.TEST.NMS_TOP_K = 400
# The top-k prior boxes before nms.
# The threshold for predicting boxes __C.TEST.NMS_TOP_K = 400
__C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting boxes
# The threshold for predicting masks __C.TEST.SCORE_THRESH = 0.05
__C.TEST.BINARY_THRESH = 0.5
# The threshold for predicting masks
# NMS threshold used on RPN proposals __C.TEST.BINARY_THRESH = 0.5
__C.TEST.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals # NMS threshold used on RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000 __C.TEST.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep after applying NMS to RPN proposals # Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300 __C.TEST.RPN_PRE_NMS_TOP_N = 6000
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) # Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_MIN_SIZE = 0 __C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
# Save detection results files if True __C.TEST.RPN_MIN_SIZE = 0
# If false, results files are cleaned up (they can be large) after local
# evaluation # Save detection results files if True
__C.TEST.COMPETITION_MODE = True # If false, results files are cleaned up (they can be large) after local
# evaluation
# The optional test protocol for custom dataSet __C.TEST.COMPETITION_MODE = True
# Ignored by VOC, COCO dataSets
# Available protocols: 'voc2007', 'voc2010', 'coco' # The optional test protocol for custom dataSet
__C.TEST.PROTOCOL = 'voc2007' # Ignored by VOC, COCO dataSets
# Available protocols: 'voc2007', 'voc2010', 'coco'
# Maximum number of detections to return per image (100 is based on the limit __C.TEST.PROTOCOL = 'voc2007'
# established for the COCO dataset)
__C.TEST.DETECTIONS_PER_IM = 100 # Maximum number of detections to return per image (100 is based on the limit
# established for the COCO dataset)
__C.TEST.DETECTIONS_PER_IM = 100
###########################################
# #
# Model Options # ###########################################
# # # #
########################################### # Model Options #
# #
###########################################
__C.MODEL = edict()
# The type of the model __C.MODEL = edict()
# ('faster_rcnn',
# 'mask_rcnn', # The type of the model
# 'ssd', # ('faster_rcnn',
# 'rssd', # 'mask_rcnn',
# 'retinanet, # 'ssd',
# ) # 'rssd',
__C.MODEL.TYPE = '' # 'retinanet,
# )
# The float precision for training and inference __C.MODEL.TYPE = ''
# (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE = 'FLOAT32' # The float precision for training and inference
# (FLOAT32, FLOAT16,)
# The backbone __C.MODEL.DATA_TYPE = 'FLOAT32'
__C.MODEL.BACKBONE = ''
# The backbone
# The number of classes in the dataset __C.MODEL.BACKBONE = ''
__C.MODEL.NUM_CLASSES = -1
# The number of classes in the dataset
# Keep it for TaaS DataSet __C.MODEL.NUM_CLASSES = -1
__C.MODEL.CLASSES = ['__background__']
# Keep it for TaaS DataSet
# Add StopGrad at a specified stage so the bottom layers are frozen __C.MODEL.CLASSES = ['__background__']
__C.MODEL.FREEZE_AT = 2
# Add StopGrad at a specified stage so the bottom layers are frozen
# Whether to use focal loss for one-stage detectors? __C.MODEL.FREEZE_AT = 2
# Enabled if model type in ('ssd',)
# Retinanet is force to use focal loss # Whether to use focal loss for one-stage detectors?
__C.MODEL.USE_FOCAL_LOSS = False # Enabled if model type in ('ssd',)
__C.MODEL.FOCAL_LOSS_ALPHA = 0.25 # Retinanet is force to use focal loss
__C.MODEL.FOCAL_LOSS_GAMMA = 2.0 __C.MODEL.USE_FOCAL_LOSS = False
__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
# Stride of the coarsest Feature level __C.MODEL.FOCAL_LOSS_GAMMA = 2.0
# This is needed so the input can be padded properly
__C.MODEL.COARSEST_STRIDE = -1 # Stride of the coarsest Feature level
# This is needed so the input can be padded properly
__C.MODEL.COARSEST_STRIDE = -1
###########################################
# #
# RPN Options # ###########################################
# # # #
########################################### # RPN Options #
# #
###########################################
__C.RPN = edict()
# Strides for multiple rpn heads __C.RPN = edict()
__C.RPN.STRIDES = [4, 8, 16, 32, 64]
# Strides for multiple rpn heads
# Scales for multiple anchors __C.RPN.STRIDES = [4, 8, 16, 32, 64]
__C.RPN.SCALES = [8, 8, 8, 8, 8]
# Scales for multiple anchors
# RPN anchor aspect ratios __C.RPN.SCALES = [8, 8, 8, 8, 8]
__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
# RPN anchor aspect ratios
__C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
###########################################
# #
# Retina-Net Options # ###########################################
# # # #
########################################### # Retina-Net Options #
# #
###########################################
__C.RETINANET = edict()
# Anchor aspect ratios to use __C.RETINANET = edict()
__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
# Anchor aspect ratios to use
# Anchor scales per octave __C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
__C.RETINANET.SCALES_PER_OCTAVE = 3
# Anchor scales per octave
# At each FPN level, we generate anchors based on their scale, aspect_ratio, __C.RETINANET.SCALES_PER_OCTAVE = 3
# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
__C.RETINANET.ANCHOR_SCALE = 4 # At each FPN level, we generate anchors based on their scale, aspect_ratio,
# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
# Convolutions to use in the cls and bbox tower __C.RETINANET.ANCHOR_SCALE = 4
# NOTE: this doesn't include the last conv for logits
__C.RETINANET.NUM_CONVS = 4 # Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
# During inference, #locs to select based on cls score before NMS is performed __C.RETINANET.NUM_CONVS = 4
__C.RETINANET.PRE_NMS_TOP_N = 5000
# During inference, #locs to select based on cls score before NMS is performed
# IoU overlap ratio for labeling an anchor as positive __C.RETINANET.PRE_NMS_TOP_N = 5000
# Anchors with >= iou overlap are labeled positive
__C.RETINANET.POSITIVE_OVERLAP = 0.5 # IoU overlap ratio for labeling an anchor as positive
# Anchors with >= iou overlap are labeled positive
# IoU overlap ratio for labeling an anchor as negative __C.RETINANET.POSITIVE_OVERLAP = 0.5
# Anchors with < iou overlap are labeled negative
__C.RETINANET.NEGATIVE_OVERLAP = 0.4 # IoU overlap ratio for labeling an anchor as negative
# Anchors with < iou overlap are labeled negative
__C.RETINANET.NEGATIVE_OVERLAP = 0.4
###########################################
# #
# FPN Options # ###########################################
# # # #
########################################### # FPN Options #
# #
###########################################
__C.FPN = edict()
# Channel dimension of the FPN feature levels __C.FPN = edict()
__C.FPN.DIM = 256
# Channel dimension of the FPN feature levels
# Coarsest level of the FPN pyramid __C.FPN.DIM = 256
__C.FPN.RPN_MAX_LEVEL = 6
# Finest level of the FPN pyramid # Coarsest level of the FPN pyramid
__C.FPN.RPN_MIN_LEVEL = 2 __C.FPN.RPN_MAX_LEVEL = 6
# Finest level of the FPN pyramid
# Hyper-Parameters for the RoI-to-FPN level mapping heuristic __C.FPN.RPN_MIN_LEVEL = 2
__C.FPN.ROI_CANONICAL_SCALE = 224
__C.FPN.ROI_CANONICAL_LEVEL = 4 # Hyper-Parameters for the RoI-to-FPN level mapping heuristic
# Coarsest level of the FPN pyramid __C.FPN.ROI_CANONICAL_SCALE = 224
__C.FPN.ROI_MAX_LEVEL = 5 __C.FPN.ROI_CANONICAL_LEVEL = 4
# Finest level of the FPN pyramid # Coarsest level of the FPN pyramid
__C.FPN.ROI_MIN_LEVEL = 2 __C.FPN.ROI_MAX_LEVEL = 5
# Finest level of the FPN pyramid
__C.FPN.ROI_MIN_LEVEL = 2
###########################################
# #
# Fast R-CNN Options # ###########################################
# # # #
########################################### # Fast R-CNN Options #
# #
###########################################
__C.FRCNN = edict()
# RoI transformation function (e.g., RoIPool or RoIAlign) __C.FRCNN = edict()
__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
# RoI transformation function (e.g., RoIPool or RoIAlign)
# Hidden layer dimension when using an MLP for the RoI box head __C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
__C.FRCNN.MLP_HEAD_DIM = 1024
# Hidden layer dimension when using an MLP for the RoI box head
# RoI transform output resolution __C.FRCNN.MLP_HEAD_DIM = 1024
# Note: some models may have constraints on what they can use, e.g. they use
# pretrained FC layers like in VGG16, and will ignore this option # RoI transform output resolution
__C.FRCNN.ROI_XFORM_RESOLUTION = 7 # Note: some models may have constraints on what they can use, e.g. they use
# pretrained FC layers like in VGG16, and will ignore this option
__C.FRCNN.ROI_XFORM_RESOLUTION = 7
###########################################
# #
# Mask R-CNN Options # ###########################################
# # # #
########################################### # Mask R-CNN Options #
# #
###########################################
__C.MRCNN = edict()
# Resolution of mask predictions __C.MRCNN = edict()
__C.MRCNN.RESOLUTION = 28
# Resolution of mask predictions
# RoI transformation function (e.g., RoIPool or RoIAlign) __C.MRCNN.RESOLUTION = 28
__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
# RoI transformation function (e.g., RoIPool or RoIAlign)
# RoI transformation function (e.g., RoIPool or RoIAlign) __C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
__C.MRCNN.ROI_XFORM_RESOLUTION = 14
# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.MRCNN.ROI_XFORM_RESOLUTION = 14
###########################################
# #
# SSD Options # ###########################################
# # # #
########################################### # SSD Options #
# #
###########################################
__C.SSD = edict()
# Whether to enable FPN enhancement? __C.SSD = edict()
__C.SSD.FPN_ON = False
# Whether to enable FPN enhancement?
__C.SSD.MULTIBOX = edict() __C.SSD.FPN_ON = False
# MultiBox configs
__C.SSD.MULTIBOX.STRIDES = [] __C.SSD.MULTIBOX = edict()
__C.SSD.MULTIBOX.MIN_SIZES = [] # MultiBox configs
__C.SSD.MULTIBOX.MAX_SIZES = [] __C.SSD.MULTIBOX.STRIDES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = [] __C.SSD.MULTIBOX.MIN_SIZES = []
__C.SSD.MULTIBOX.ASPECT_ANGLES = [] __C.SSD.MULTIBOX.MAX_SIZES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = []
__C.SSD.OHEM = edict() __C.SSD.MULTIBOX.ASPECT_ANGLES = []
# The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_OVERLAP = 0.5 __C.SSD.OHEM = edict()
# The ratio used in hard example mining # The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_POS_RATIO = 3.0 __C.SSD.OHEM.NEG_OVERLAP = 0.5
# The ratio used in hard example mining
# Distort the image? __C.SSD.OHEM.NEG_POS_RATIO = 3.0
__C.SSD.DISTORT = edict()
__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5 # Distort the image?
__C.SSD.DISTORT.CONTRAST_PROB = 0.5 __C.SSD.DISTORT = edict()
__C.SSD.DISTORT.SATURATION_PROB = 0.5 __C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
__C.SSD.DISTORT.CONTRAST_PROB = 0.5
# Expand the image? __C.SSD.DISTORT.SATURATION_PROB = 0.5
__C.SSD.EXPAND = edict()
__C.SSD.EXPAND.PROB = 0.5 # Expand the image?
__C.SSD.EXPAND.MAX_RATIO = 4.0 __C.SSD.EXPAND = edict()
__C.SSD.EXPAND.PROB = 0.5
# Resize the image? __C.SSD.EXPAND.MAX_RATIO = 4.0
__C.SSD.RESIZE = edict()
__C.SSD.RESIZE.HEIGHT = 300 # Resize the image?
__C.SSD.RESIZE.WIDTH = 300 __C.SSD.RESIZE = edict()
__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4'] __C.SSD.RESIZE.HEIGHT = 300
__C.SSD.RESIZE.WIDTH = 300
# Samplers __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
# Format as (min_scale, max_scale,
# min_aspect_ratio, max_aspect_ratio, # Samplers
# min_jaccard_overlap, max_jaccard_overlap, # Format as (min_scale, max_scale,
# max_trials, max_sample) # min_aspect_ratio, max_aspect_ratio,
__C.SSD.SAMPLERS = [ # min_jaccard_overlap, max_jaccard_overlap,
(1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image # max_trials, max_sample)
(0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1), # IoU >= 0.1 __C.SSD.SAMPLERS = [
(0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1), # IoU >= 0.3 (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image
(0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1), # IoU >= 0.5 (0.3, 1.0, 0.5, 2.0, 0.1, 1.0, 10, 1), # IoU >= 0.1
(0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1), # IoU >= 0.7 (0.3, 1.0, 0.5, 2.0, 0.3, 1.0, 10, 1), # IoU >= 0.3
(0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1), # IoU >= 0.9 (0.3, 1.0, 0.5, 2.0, 0.5, 1.0, 5, 1), # IoU >= 0.5
(0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1), # Any patches (0.3, 1.0, 0.5, 2.0, 0.7, 1.0, 5, 1), # IoU >= 0.7
] (0.3, 1.0, 0.5, 2.0, 0.9, 1.0, 5, 1), # IoU >= 0.9
(0.3, 1.0, 0.5, 2.0, 0.0, 1.0, 1, 1), # Any patches
]
###########################################
# #
# ResNet Options # ###########################################
# # # #
########################################### # ResNet Options #
# #
###########################################
__C.RESNET = edict()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt __C.RESNET = edict()
__C.RESNET.NUM_GROUPS = 1
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
# Baseline width of each group __C.RESNET.NUM_GROUPS = 1
__C.RESNET.GROUP_WIDTH = 64
# Baseline width of each group
__C.RESNET.GROUP_WIDTH = 64
###########################################
# #
# DropBlock Options # ###########################################
# # # #
########################################### # DropBlock Options #
# #
###########################################
__C.DROPBLOCK = edict()
# Whether to use drop block for more regulization __C.DROPBLOCK = edict()
__C.DROPBLOCK.DROP_ON = False
# Whether to use drop block for more regulization
# Decrement for scheduling keep prob after each iteration __C.DROPBLOCK.DROP_ON = False
__C.DROPBLOCK.DECREMENT = 1e-6
# Decrement for scheduling keep prob after each iteration
__C.DROPBLOCK.DECREMENT = 1e-6
###########################################
# #
# Solver Options # ###########################################
# # # #
########################################### # Solver Options #
# #
###########################################
__C.SOLVER = edict()
# Base learning rate for the specified schedule __C.SOLVER = edict()
__C.SOLVER.BASE_LR = 0.001
# Base learning rate for the specified schedule
# Optional scaling factor for total loss __C.SOLVER.BASE_LR = 0.001
# This option is helpful to scale the magnitude
# of gradients during FP16 training # Optional scaling factor for total loss
__C.SOLVER.LOSS_SCALING = 1. # This option is helpful to scale the magnitude
# of gradients during FP16 training
# Schedule type (see functions in utils.lr_policy for options) __C.SOLVER.LOSS_SCALING = 1.
# E.g., 'step', 'steps_with_decay', ...
__C.SOLVER.LR_POLICY = 'steps_with_decay' # Schedule type (see functions in utils.lr_policy for options)
# E.g., 'step', 'steps_with_decay', ...
# Hyperparameter used by the specified policy __C.SOLVER.LR_POLICY = 'steps_with_decay'
# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
__C.SOLVER.GAMMA = 0.1 # Hyperparameter used by the specified policy
# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
# Uniform step size for 'steps' policy __C.SOLVER.GAMMA = 0.1
__C.SOLVER.STEP_SIZE = 30000
# Uniform step size for 'steps' policy
__C.SOLVER.STEPS = [] __C.SOLVER.STEP_SIZE = 30000
# Maximum number of SGD iterations __C.SOLVER.STEPS = []
__C.SOLVER.MAX_ITERS = 40000
# Maximum number of SGD iterations
# Momentum to use with SGD __C.SOLVER.MAX_ITERS = 40000
__C.SOLVER.MOMENTUM = 0.9
# Momentum to use with SGD
# L2 regularization hyper parameters __C.SOLVER.MOMENTUM = 0.9
__C.SOLVER.WEIGHT_DECAY = 0.0005
# L2 regularization hyper parameters
# L2 norm factor for clipping gradients __C.SOLVER.WEIGHT_DECAY = 0.0005
__C.SOLVER.CLIP_NORM = -1.0
# L2 norm factor for clipping gradients
# Warm up to SOLVER.BASE_LR over this number of SGD iterations __C.SOLVER.CLIP_NORM = -1.0
__C.SOLVER.WARM_UP_ITERS = 500
# Warm up to SOLVER.BASE_LR over this number of SGD iterations
# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR __C.SOLVER.WARM_UP_ITERS = 500
__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
# The steps for accumulating gradients __C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
__C.SOLVER.ITER_SIZE = 1
# The steps for accumulating gradients
# The interval to display logs __C.SOLVER.ITER_SIZE = 1
__C.SOLVER.DISPLAY = 20
# The interval to display logs
# The interval to snapshot a model __C.SOLVER.DISPLAY = 20
__C.SOLVER.SNAPSHOT_ITERS = 5000
# The interval to snapshot a model
# prefix to yield the path: <prefix>_iters_XYZ.caffemodel __C.SOLVER.SNAPSHOT_ITERS = 5000
__C.SOLVER.SNAPSHOT_PREFIX = ''
# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
__C.SOLVER.SNAPSHOT_PREFIX = ''
###########################################
# #
# Misc Options # ###########################################
# # # #
########################################### # Misc Options #
# #
###########################################
# Number of GPUs to use (applies to both training and testing)
__C.NUM_GPUS = 1
# Number of GPUs to use (applies to both training and testing)
# Use NCCL for all reduce, otherwise use cuda-aware mpi __C.NUM_GPUS = 1
__C.USE_NCCL = True
# Use NCCL for all reduce, otherwise use cuda-aware mpi
# Hosts for Inter-Machine communication __C.USE_NCCL = True
__C.HOSTS = []
# Hosts for Inter-Machine communication
# Pixel mean values (BGR order) __C.HOSTS = []
__C.PIXEL_MEANS = [102., 115., 122.]
# Pixel mean values (BGR order)
# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets __C.PIXEL_MEANS = [102., 115., 122.]
# These are empirically chosen to approximately lead to unit variance targets
__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.) # Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
# These are empirically chosen to approximately lead to unit variance targets
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets __C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
# Prior prob for the positives at the beginning of training. __C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
# This is used to set the bias init for the logits layer
__C.PRIOR_PROB = 0.01 # Prior prob for the positives at the beginning of training.
# This is used to set the bias init for the logits layer
# For reproducibility __C.PRIOR_PROB = 0.01
__C.RNG_SEED = 3
# For reproducibility
# Root directory of project __C.RNG_SEED = 3
__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
# Root directory of project
# Data directory __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
# Data directory
# Place outputs under an experiments directory __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
__C.EXP_DIR = ''
# Place outputs under an experiments directory
# Use GPU implementation of non-maximum suppression __C.EXP_DIR = ''
__C.USE_GPU_NMS = True
# Use GPU implementation of non-maximum suppression
# Default GPU device id __C.USE_GPU_NMS = True
__C.GPU_ID = 0
# Default GPU device id
# Dump detection visualizations __C.GPU_ID = 0
__C.VIS = False
__C.VIS_ON_FILE = False # Dump detection visualizations
__C.VIS = False
# Score threshold for visualization __C.VIS_ON_FILE = False
__C.VIS_TH = 0.7
# Score threshold for visualization
# Write summaries by tensor board __C.VIS_TH = 0.7
__C.ENABLE_TENSOR_BOARD = False
# Write summaries by tensor board
__C.ENABLE_TENSOR_BOARD = False
def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a. def _merge_a_into_b(a, b):
""" """Merge config dictionary a into config dictionary b, clobbering the
if not isinstance(a, dict): options in b whenever they are also specified in a.
return """
for k, v in a.items(): if not isinstance(a, dict):
# a must specify keys that are in b return
if k not in b: for k, v in a.items():
raise KeyError('{} is not a valid config key'.format(k)) # a must specify keys that are in b
# the types must match, too if k not in b:
v = _check_and_coerce_cfg_value_type(v, b[k], k) raise KeyError('{} is not a valid config key'.format(k))
# recursively merge dicts # the types must match, too
if type(v) is edict: v = _check_and_coerce_cfg_value_type(v, b[k], k)
try: # recursively merge dicts
_merge_a_into_b(a[k], b[k]) if type(v) is edict:
except: try:
print('Error under config key: {}'.format(k)) _merge_a_into_b(a[k], b[k])
raise except:
else: print('Error under config key: {}'.format(k))
b[k] = v raise
else:
b[k] = v
def cfg_from_file(filename):
"""Load a config file and merge it into the default options."""
import yaml def cfg_from_file(filename):
with open(filename, 'r') as f: """Load a config file and merge it into the default options."""
yaml_cfg = edict(yaml.load(f)) import yaml
global __C with open(filename, 'r') as f:
_merge_a_into_b(yaml_cfg, __C) yaml_cfg = edict(yaml.load(f))
global __C
_merge_a_into_b(yaml_cfg, __C)
def cfg_from_list(cfg_list):
"""Set config keys via list (e.g., from command line)."""
from ast import literal_eval def cfg_from_list(cfg_list):
assert len(cfg_list) % 2 == 0 """Set config keys via list (e.g., from command line)."""
for k, v in zip(cfg_list[0::2], cfg_list[1::2]): from ast import literal_eval
key_list = k.split('.') assert len(cfg_list) % 2 == 0
d = __C for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
for subkey in key_list[:-1]: key_list = k.split('.')
assert d.has_key(subkey) d = __C
d = d[subkey] for subkey in key_list[:-1]:
subkey = key_list[-1] assert d.has_key(subkey)
assert subkey in d d = d[subkey]
try: subkey = key_list[-1]
value = literal_eval(v) assert subkey in d
except: try:
# Handle the case when v is a string literal value = literal_eval(v)
value = v except:
assert type(value) == type(d[subkey]), \ # Handle the case when v is a string literal
'type {} does not match original type {}'\ value = v
.format(type(value), type(d[subkey])) assert type(value) == type(d[subkey]), \
d[subkey] = value 'type {} does not match original type {}'\
.format(type(value), type(d[subkey]))
d[subkey] = value
def _check_and_coerce_cfg_value_type(value_a, value_b, key):
"""Checks that `value_a`, which is intended to replace `value_b` is of the
right type. The type is correct if it matches exactly or is one of a few def _check_and_coerce_cfg_value_type(value_a, value_b, key):
cases in which the type can be easily coerced. """Checks that `value_a`, which is intended to replace `value_b` is of the
""" right type. The type is correct if it matches exactly or is one of a few
# The types must match (with some exceptions) cases in which the type can be easily coerced.
type_b = type(value_b) """
type_a = type(value_a) # The types must match (with some exceptions)
if type_a is type_b: type_b = type(value_b)
return value_a type_a = type(value_a)
if type_b is float and type_a is int: if type_a is type_b:
return float(value_a) return value_a
if type_b is float and type_a is int:
# Exceptions: numpy arrays, strings, tuple<->list return float(value_a)
if isinstance(value_b, np.ndarray):
value_a = np.array(value_a, dtype=value_b.dtype) # Exceptions: numpy arrays, strings, tuple<->list
elif isinstance(value_a, tuple) and isinstance(value_b, list): if isinstance(value_b, np.ndarray):
value_a = list(value_a) value_a = np.array(value_a, dtype=value_b.dtype)
elif isinstance(value_a, list) and isinstance(value_b, tuple): elif isinstance(value_a, tuple) and isinstance(value_b, list):
value_a = tuple(value_a) value_a = list(value_a)
elif isinstance(value_a, dict) and isinstance(value_b, edict): elif isinstance(value_a, list) and isinstance(value_b, tuple):
value_a = edict(value_a) value_a = tuple(value_a)
else: elif isinstance(value_a, dict) and isinstance(value_b, edict):
raise ValueError( value_a = edict(value_a)
'Type mismatch ({} vs. {}) with values ({} vs. {}) for config ' else:
'key: {}'.format(type_b, type_a, value_b, value_a, key) raise ValueError(
) 'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
return value_a 'key: {}'.format(type_b, type_a, value_b, value_a, key)
)
return value_a
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
import shutil import shutil
import time import time
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.config import cfg_from_file from lib.core.config import cfg_from_file
class Coordinator(object): class Coordinator(object):
"""Coordinator is a simple tool to manage the """Coordinator is a simple tool to manage the
unique experiments from the YAML configurations. unique experiments from the YAML configurations.
""" """
def __init__(self, cfg_file, exp_dir=None): def __init__(self, cfg_file, exp_dir=None):
# Override the default configs # Override the default configs
cfg_from_file(cfg_file) cfg_from_file(cfg_file)
if cfg.EXP_DIR != '': if cfg.EXP_DIR != '':
exp_dir = cfg.EXP_DIR exp_dir = cfg.EXP_DIR
if exp_dir is None: if exp_dir is None:
model_id = time.strftime( model_id = time.strftime(
'%Y%m%d_%H%M%S', time.localtime(time.time())) '%Y%m%d_%H%M%S', time.localtime(time.time()))
self.experiment_dir = '../experiments/{}'.format(model_id) self.experiment_dir = '../experiments/{}'.format(model_id)
if not os.path.exists(self.experiment_dir): if not os.path.exists(self.experiment_dir):
os.makedirs(self.experiment_dir) os.makedirs(self.experiment_dir)
else: else:
if not os.path.exists(exp_dir): if not os.path.exists(exp_dir):
raise ValueError('ExperimentDir({}) does not exist.'.format(exp_dir)) raise ValueError('ExperimentDir({}) does not exist.'.format(exp_dir))
self.experiment_dir = exp_dir self.experiment_dir = exp_dir
def _path_at(self, file, auto_create=True): def _path_at(self, file, auto_create=True):
path = os.path.abspath(os.path.join(self.experiment_dir, file)) path = os.path.abspath(os.path.join(self.experiment_dir, file))
if auto_create and not os.path.exists(path): if auto_create and not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
return path return path
def checkpoints_dir(self): def checkpoints_dir(self):
return self._path_at('checkpoints') return self._path_at('checkpoints')
def exports_dir(self): def exports_dir(self):
return self._path_at('exports') return self._path_at('exports')
def results_dir(self, checkpoint=None): def results_dir(self, checkpoint=None):
sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else '' sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
return self._path_at(os.path.join('results', sub_dir)) return self._path_at(os.path.join('results', sub_dir))
def checkpoint(self, global_step=None, wait=True): def checkpoint(self, global_step=None, wait=True):
def locate(): def locate():
files = os.listdir(self.checkpoints_dir()) files = os.listdir(self.checkpoints_dir())
steps = [] steps = []
for ix, file in enumerate(files): for ix, file in enumerate(files):
step = int(file.split('_iter_')[-1].split('.')[0]) step = int(file.split('_iter_')[-1].split('.')[0])
if global_step == step: if global_step == step:
return os.path.join(self.checkpoints_dir(), files[ix]), step return os.path.join(self.checkpoints_dir(), files[ix]), step
steps.append(step) steps.append(step)
if global_step is None: if global_step is None:
if len(files) == 0: if len(files) == 0:
return None, 0 return None, 0
last_idx = int(np.argmax(steps)) last_idx = int(np.argmax(steps))
last_step = steps[last_idx] last_step = steps[last_idx]
return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step return os.path.join(self.checkpoints_dir(), files[last_idx]), last_step
return None, 0 return None, 0
result = locate() result = locate()
while result[0] is None and wait: while result[0] is None and wait:
print('\rWaiting for step_{}.checkpoint to exist...'.format(global_step), end='') print('\rWaiting for step_{}.checkpoint to exist...'.format(global_step), end='')
time.sleep(10) time.sleep(10)
result = locate() result = locate()
return result return result
def delete_experiment(self): def delete_experiment(self):
if os.path.exists(self.experiment_dir): if os.path.exists(self.experiment_dir):
shutil.rmtree(self.experiment_dir) shutil.rmtree(self.experiment_dir)
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import collections
import cv2 import multiprocessing as mp
from multiprocessing import Queue import os
from collections import OrderedDict
import cv2
from lib.core.config import cfg import dragon
from lib.datasets.factory import get_imdb
# All detectors share the same reader/transformer during testing from lib.core.config import cfg
from lib.faster_rcnn.data.data_reader import DataReader from lib.datasets.factory import get_imdb
from lib.faster_rcnn.data.data_transformer import DataTransformer from lib.faster_rcnn.data_transformer import DataTransformer
class TestServer(object): class TestServer(object):
def __init__(self, output_dir): def __init__(self, output_dir):
self.imdb = get_imdb(cfg.TEST.DATABASE) self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE) self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.num_images, self.num_classes, self.classes = \ self.num_images, self.num_classes, self.classes = \
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = DataReader(**{'source': self.imdb.source}) self.data_reader = dragon.io.DataReader(
self.data_transformer = DataTransformer() dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_reader.q_out = Queue(cfg.TEST.IMS_PER_BATCH) self.data_transformer = DataTransformer()
self.data_reader.start() self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH)
self.gt_recs = OrderedDict() self.data_reader.start()
self.output_dir = output_dir self.gt_recs = collections.OrderedDict()
if cfg.VIS_ON_FILE: self.output_dir = output_dir
self.vis_dir = os.path.join(self.output_dir, 'vis') if cfg.VIS_ON_FILE:
if not os.path.exists(self.vis_dir): self.vis_dir = os.path.join(self.output_dir, 'vis')
os.makedirs(self.vis_dir) if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls() def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self):
serialized = self.data_reader.q_out.get() def get_image(self):
image = self.data_transformer.get_image(serialized) example = self.data_reader.q_out.get()
image_id, objects = self.data_transformer.get_annotations(serialized) image = self.data_transformer.get_image(example)
self.gt_recs[image_id] = { image_id, objects = self.data_transformer.get_annotations(example)
'objects': objects, self.gt_recs[image_id] = {
'width': image.shape[1], 'objects': objects,
'height': image.shape[0], 'width': image.shape[1],
} 'height': image.shape[0],
return image_id, image }
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \ def get_save_filename(self, image_id, ext='.jpg'):
if cfg.VIS_ON_FILE else None return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
def get_records(self):
if len(self.gt_recs) != self.num_images: def get_records(self):
raise RuntimeError( if len(self.gt_recs) != self.num_images:
'Loading {} records, while {} required.' raise RuntimeError(
.format(len(self.gt_recs), self.num_images), 'Loading {} records, while {} required.'
) .format(len(self.gt_recs), self.num_images),
return self.gt_recs )
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections( def evaluate_detections(self, all_boxes):
all_boxes, self.get_records(), self.output_dir) self.imdb.evaluate_detections(
all_boxes,
def evaluate_segmentations(self, all_boxes, all_masks): self.get_records(),
self.imdb.evaluate_segmentations( self.output_dir,
all_boxes, all_masks, self.get_records(), self.output_dir) )
def evaluate_segmentations(self, all_boxes, all_masks):
class InferServer(object): self.imdb.evaluate_segmentations(
def __init__(self, output_dir): all_boxes,
self.images_dir = cfg.TEST.DATABASE all_masks,
self.imdb = get_imdb('taas:/empty') self.get_records(),
self.images = os.listdir(self.images_dir) self.output_dir,
self.num_images, self.num_classes, self.classes = \ )
len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
self.data_transformer = DataTransformer()
self.gt_recs = OrderedDict() class InferServer(object):
self.output_dir = output_dir def __init__(self, output_dir):
self.image_idx = 0 self.images_dir = cfg.TEST.DATABASE
if cfg.VIS_ON_FILE: self.imdb = get_imdb('taas:/empty')
self.vis_dir = os.path.join(self.output_dir, 'vis') self.images = os.listdir(self.images_dir)
if not os.path.exists(self.vis_dir): self.num_images, self.num_classes, self.classes = \
os.makedirs(self.vis_dir) len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES
self.data_transformer = DataTransformer()
def set_transformer(self, transformer_cls): self.gt_recs = collections.OrderedDict()
self.data_transformer = transformer_cls() self.output_dir = output_dir
self.image_idx = 0
def get_image(self): if cfg.VIS_ON_FILE:
image_name = self.images[self.image_idx] self.vis_dir = os.path.join(self.output_dir, 'vis')
image_id = image_name.split('.')[0] if not os.path.exists(self.vis_dir):
image = cv2.imread(os.path.join(self.images_dir, image_name)) os.makedirs(self.vis_dir)
self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = { def set_transformer(self, transformer_cls):
'width': image.shape[1], self.data_transformer = transformer_cls()
'height': image.shape[0],
} def get_image(self):
return image_id, image image_name = self.images[self.image_idx]
image_id = image_name.split('.')[0]
def get_save_filename(self, image_id, ext='.jpg'): image = cv2.imread(os.path.join(self.images_dir, image_name))
return os.path.join(self.vis_dir, image_id + ext) \ self.image_idx = (self.image_idx + 1) % self.num_images
if cfg.VIS_ON_FILE else None self.gt_recs[image_id] = {'width': image.shape[1], 'height': image.shape[0]}
return image_id, image
def get_records(self):
if len(self.gt_recs) != self.num_images: def get_save_filename(self, image_id, ext='.jpg'):
raise RuntimeError( return os.path.join(self.vis_dir, image_id + ext) \
'Loading {} records, while {} required.' if cfg.VIS_ON_FILE else None
.format(len(self.gt_recs), self.num_images),
) def get_records(self):
return self.gt_recs if len(self.gt_recs) != self.num_images:
raise RuntimeError(
def evaluate_detections(self, all_boxes): 'Loading {} records, while {} required.'
self.imdb.evaluate_detections( .format(len(self.gt_recs), self.num_images),
all_boxes, )
self.get_records(), return self.gt_recs
self.output_dir,
) def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
def evaluate_segmentations(self, all_boxes, all_masks): all_boxes,
self.imdb.evaluate_segmentations( self.get_records(),
all_boxes, self.output_dir,
all_masks, )
self.get_records(),
self.output_dir, def evaluate_segmentations(self, all_boxes, all_masks):
) self.imdb.evaluate_segmentations(
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/train.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/train.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import datetime import datetime
import os import os
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.solver import get_solver_func from lib.core.solver import get_solver_func
from lib.utils import logger from lib.utils import logger
from lib.utils.stats import SmoothedValue from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer from lib.utils.timer import Timer
class SolverWrapper(object): class SolverWrapper(object):
def __init__(self, coordinator): def __init__(self, coordinator):
self.output_dir = coordinator.checkpoints_dir() self.output_dir = coordinator.checkpoints_dir()
self.solver = get_solver_func('MomentumSGD')() self.solver = get_solver_func('MomentumSGD')()
# Load the pre-trained weights # Load the pre-trained weights
init_weights = cfg.TRAIN.WEIGHTS init_weights = cfg.TRAIN.WEIGHTS
if init_weights != '': if init_weights != '':
if os.path.exists(init_weights): if os.path.exists(init_weights):
logger.info('Loading weights from {}.'.format(init_weights)) logger.info('Loading weights from {}.'.format(init_weights))
self.solver.detector.load_weights(init_weights) self.solver.detector.load_weights(init_weights)
else: else:
raise ValueError('Invalid path of weights: {}'.format(init_weights)) raise ValueError('Invalid path of weights: {}'.format(init_weights))
# Mixed precision training? # Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16': if cfg.MODEL.DATA_TYPE.lower() == 'float16':
self.solver.detector.half() # Powerful FP16 Support self.solver.detector.half() # Powerful FP16 Support
self.solver.detector.cuda(cfg.GPU_ID) self.solver.detector.cuda(cfg.GPU_ID)
# Plan the metrics # Plan the metrics
self.metrics = collections.OrderedDict() self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD: if cfg.ENABLE_TENSOR_BOARD:
from dragon.tools.tensorboard import TensorBoard from dragon.tools.tensorboard import TensorBoard
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs') self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
def snapshot(self): def snapshot(self):
if not logger.is_root(): if not logger.is_root():
return None return None
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}' filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth') .format(self.solver.iter) + '.pth')
filename = os.path.join(self.output_dir, filename) filename = os.path.join(self.output_dir, filename)
torch.save(self.solver.detector.state_dict(), filename) torch.save(self.solver.detector.state_dict(), filename)
logger.info('Wrote snapshot to: {:s}'.format(filename)) logger.info('Wrote snapshot to: {:s}'.format(filename))
return filename return filename
def add_metrics(self, stats): def add_metrics(self, stats):
for k, v in stats['loss'].items(): for k, v in stats['loss'].items():
if k not in self.metrics: if k not in self.metrics:
self.metrics[k] = SmoothedValue(20) self.metrics[k] = SmoothedValue(20)
self.metrics[k].AddValue(v) self.metrics[k].AddValue(v)
def send_metrics(self, stats): def send_metrics(self, stats):
if hasattr(self, 'board'): if hasattr(self, 'board'):
self.board.scalar_summary('lr', stats['lr'], stats['iter']) self.board.scalar_summary('lr', stats['lr'], stats['iter'])
self.board.scalar_summary('time', stats['time'], stats['iter']) self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items(): for k, v in self.metrics.items():
if k == 'total': if k == 'total':
self.board.scalar_summary( self.board.scalar_summary(
'total_loss', 'total_loss',
v.GetMedianValue(), v.GetMedianValue(),
stats['iter'], stats['iter'],
) )
else: else:
self.board.scalar_summary( self.board.scalar_summary(
k, k,
v.GetMedianValue(), v.GetMedianValue(),
stats['iter'], stats['iter'],
) )
def step(self, display=False): def step(self, display=False):
stats = self.solver.one_step() stats = self.solver.one_step()
self.add_metrics(stats) self.add_metrics(stats)
self.send_metrics(stats) self.send_metrics(stats)
if display: if display:
logger.info( logger.info(
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % ( 'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
stats['iter'], stats['lr'], stats['iter'], stats['lr'],
self.metrics['total'].GetMedianValue(), self.metrics['total'].GetMedianValue(),
stats['time'], stats['time'],
) )
) )
for k, v in self.metrics.items(): for k, v in self.metrics.items():
if k == 'total': if k == 'total':
continue continue
logger.info(' ' * 10 + 'Train net output({}): {}' logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue())) .format(k, v.GetMedianValue()))
def train_model(self): def train_model(self):
"""Network training loop.""" """Network training loop."""
last_snapshot_iter = -1 last_snapshot_iter = -1
timer = Timer() timer = Timer()
model_paths = [] model_paths = []
start_lr = self.solver.base_lr start_lr = self.solver.base_lr
while self.solver.iter < cfg.SOLVER.MAX_ITERS: while self.solver.iter < cfg.SOLVER.MAX_ITERS:
if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS: if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
self.solver.base_lr = \ self.solver.base_lr = \
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha) start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
# Apply 1-step SGD update # Apply 1-step SGD update
with timer.tic_and_toc(): with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0) self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0: if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time average_time = timer.average_time
eta_seconds = average_time * ( eta_seconds = average_time * (
cfg.SOLVER.MAX_ITERS - self.solver.iter) cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds))) eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
logger.info( logger.info(
'< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta) .format(progress, timer.average_time, eta)
) )
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0: if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter last_snapshot_iter = self.solver.iter
model_paths.append(self.snapshot()) model_paths.append(self.snapshot())
if last_snapshot_iter != self.solver.iter: if last_snapshot_iter != self.solver.iter:
model_paths.append(self.snapshot()) model_paths.append(self.snapshot())
return model_paths return model_paths
def train_net(coordinator, start_iter=0): def train_net(coordinator, start_iter=0):
sw = SolverWrapper(coordinator) sw = SolverWrapper(coordinator)
sw.solver.iter = start_iter sw.solver.iter = start_iter
logger.info('Solving...') logger.info('Solving...')
model_paths = sw.train_model() model_paths = sw.train_model()
return model_paths return model_paths
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
from lib.datasets.taas import TaaS from lib.datasets.taas import TaaS
# TaaS DataSet # TaaS DataSet
_GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)} _GLOBAL_DATA_SETS = {'taas': lambda source: TaaS(source)}
def get_imdb(name): def get_imdb(name):
"""Get an imdb (image database) by name.""" """Get an imdb (image database) by name."""
keys = name.split(':') keys = name.split(':')
if len(keys) >= 2: if len(keys) >= 2:
cls, source = keys[0], ':'.join(keys[1:]) cls, source = keys[0], ':'.join(keys[1:])
if cls not in _GLOBAL_DATA_SETS: if cls not in _GLOBAL_DATA_SETS:
raise KeyError('Unknown DataSet: {}'.format(cls)) raise KeyError('Unknown DataSet: {}'.format(cls))
return _GLOBAL_DATA_SETS[cls](source) return _GLOBAL_DATA_SETS[cls](source)
elif os.path.exists(name): elif os.path.exists(name):
return _GLOBAL_DATA_SETS['taas'](name) return _GLOBAL_DATA_SETS['taas'](name)
else: else:
raise ValueError('Illegal Database: {}' + name) raise ValueError('Illegal Database: {}' + name)
def list_imdbs(): def list_imdbs():
"""List all registered imdbs.""" """List all registered imdbs."""
return _GLOBAL_DATA_SETS.keys() return _GLOBAL_DATA_SETS.keys()
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import os import os
from dragon.tools.db import LMDB import dragon
from lib.core.config import cfg from lib.core.config import cfg
class imdb(object): class imdb(object):
def __init__(self, name): def __init__(self, name):
self._name = name self._name = name
self._num_classes = 0 self._num_classes = 0
self._classes = [] self._classes = []
@property @property
def name(self): def name(self):
return self._name return self._name
@property @property
def num_classes(self): def num_classes(self):
return len(self._classes) return len(self._classes)
@property @property
def classes(self): def classes(self):
return self._classes return self._classes
@property @property
def cache_path(self): def cache_path(self):
cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache')) cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path): if not os.path.exists(cache_path):
os.makedirs(cache_path) os.makedirs(cache_path)
return cache_path return cache_path
@property @property
def source(self): def source(self):
excepted_source = os.path.join(self.cache_path, self.name + '_lmdb') excepted_source = os.path.join(self.cache_path, self.name)
if not os.path.exists(excepted_source): if not os.path.exists(excepted_source):
raise RuntimeError('Excepted LMDB source from: {}, ' raise RuntimeError(
'but it is not existed.'.format(excepted_source)) 'Excepted source from: {}, '
return excepted_source 'but it is not existed.'
.format(excepted_source)
@property )
def num_images(self): return excepted_source
self._db = LMDB()
self._db.open(self.source) @property
num_entries = self._db.num_entries() def num_images(self):
self._db.close() return dragon.io.SeetaRecordDataset(self.source).size
return num_entries
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
def evaluate_detections(self, all_boxes, gt_recs, output_dir): pass
pass
def evaluate_masks(self, all_boxes, all_masks, output_dir):
def evaluate_masks(self, all_boxes, all_masks, output_dir): pass
pass
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import json
import sys import os
import json import sys
import numpy as np import uuid
import uuid
import cv2 import cv2
try: import numpy as np
import cPickle try:
except: import cPickle
import pickle as cPickle except:
from .imdb import imdb import pickle as cPickle
from .voc_eval import voc_bbox_eval, voc_segm_eval
from lib.core.config import cfg
from lib.core.config import cfg from lib.datasets.imdb import imdb
from lib.utils import boxes as box_utils from lib.datasets.voc_eval import voc_bbox_eval
from lib.pycocotools.mask import encode as encode_masks from lib.datasets.voc_eval import voc_segm_eval
from lib.pycocotools.mask import encode as encode_masks
from lib.utils import boxes as box_utils
class TaaS(imdb):
def __init__(self, source):
imdb.__init__(self, 'taas') class TaaS(imdb):
self._classes = cfg.MODEL.CLASSES def __init__(self, source):
self._source = source imdb.__init__(self, 'taas')
self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._classes = cfg.MODEL.CLASSES
self._class_to_cat_id = self._class_to_ind self._source = source
self._salt = str(uuid.uuid4()) self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
self.config = {'cleanup': True, 'use_salt': True} self._class_to_cat_id = self._class_to_ind
self._salt = str(uuid.uuid4())
@property self.config = {'cleanup': True, 'use_salt': True}
def source(self):
excepted_source = self._source @property
if not os.path.exists(excepted_source): def source(self):
raise RuntimeError('Excepted LMDB source from: {}, ' excepted_source = self._source
'but it is not existed.'.format(excepted_source)) if not os.path.exists(excepted_source):
return excepted_source raise RuntimeError(
'Excepted source from: {}, '
############################################## 'but it is not existed.'
# # .format(excepted_source)
# UTILS # )
# # return excepted_source
##############################################
##############################################
def _get_comp_id(self): # #
return '_' + self._salt if self.config['use_salt'] else '' # UTILS #
# #
@classmethod ##############################################
def _get_prefix(cls, type='bbox'):
if type == 'bbox': def _get_comp_id(self):
return 'detections_' return '_' + self._salt if self.config['use_salt'] else ''
elif type == 'segm':
return 'segmentations_' @classmethod
elif type == 'kpt': def _get_prefix(cls, type='bbox'):
return 'keypoints_' if type == 'bbox':
return '' return 'detections_'
elif type == 'segm':
def _get_voc_results_T(self, results_folder, type='bbox'): return 'segmentations_'
# experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt elif type == 'kpt':
if type == 'bbox': return 'keypoints_'
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt' return ''
elif type == 'segm':
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl' def _get_voc_results_T(self, results_folder, type='bbox'):
else: # experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
raise ValueError('Type of results can be either bbox or segm.') if type == 'bbox':
if not os.path.exists(results_folder): filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
os.makedirs(results_folder) elif type == 'segm':
return os.path.join(results_folder, filename) filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
else:
def _get_coco_annotations_T(self, results_folder, type='bbox'): raise ValueError('Type of results can be either bbox or segm.')
# experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json if not os.path.exists(results_folder):
filename = '[GT]_' + self._get_prefix(type) + self._name + '.json' os.makedirs(results_folder)
if not os.path.exists(results_folder): return os.path.join(results_folder, filename)
os.makedirs(results_folder)
return os.path.join(results_folder, filename) def _get_coco_annotations_T(self, results_folder, type='bbox'):
# experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
def _get_coco_results_T(self, results_folder, type='bbox'): filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
# experiments/model_id/results/detections_taas_<comp_id>.json if not os.path.exists(results_folder):
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json' os.makedirs(results_folder)
if not os.path.exists(results_folder): return os.path.join(results_folder, filename)
os.makedirs(results_folder)
return os.path.join(results_folder, filename) def _get_coco_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>.json
############################################## filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
# # if not os.path.exists(results_folder):
# VOC # os.makedirs(results_folder)
# # return os.path.join(results_folder, filename)
##############################################
##############################################
def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir): # #
from xml.dom import minidom # VOC #
import xml.etree.ElementTree as ET # #
ix = 0 ##############################################
for image_id, rec in gt_recs.items():
root = ET.Element('annotation') def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
ET.SubElement(root, 'filename').text = str(image_id) from xml.dom import minidom
for cls_ind, cls in enumerate(self.classes): import xml.etree.ElementTree as ET
if cls == '__background__': ix = 0
continue for image_id, rec in gt_recs.items():
detections = all_boxes[cls_ind][ix] root = ET.Element('annotation')
if len(detections) == 0: ET.SubElement(root, 'filename').text = str(image_id)
continue for cls_ind, cls in enumerate(self.classes):
for k in range(detections.shape[0]): if cls == '__background__':
if detections[k, -1] < cfg.VIS_TH: continue
continue detections = all_boxes[cls_ind][ix]
object = ET.SubElement(root, 'object') if len(detections) == 0:
ET.SubElement(object, 'name').text = cls continue
ET.SubElement(object, 'difficult').text = '0' for k in range(detections.shape[0]):
bnd_box = ET.SubElement(object, 'bndbox') if detections[k, -1] < cfg.VIS_TH:
ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0]) continue
ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1]) object = ET.SubElement(root, 'object')
ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2]) ET.SubElement(object, 'name').text = cls
ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3]) ET.SubElement(object, 'difficult').text = '0'
ix += 1 bnd_box = ET.SubElement(object, 'bndbox')
rawText = ET.tostring(root) ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
dom = minidom.parseString(rawText) ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f: ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
dom.writexml(f, "", "\t", "\n", "utf-8") ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
ix += 1
def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir): rawText = ET.tostring(root)
for cls_ind, cls in enumerate(self.classes): dom = minidom.parseString(rawText)
if cls == '__background__': with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
continue dom.writexml(f, "", "\t", "\n", "utf-8")
print('Writing {} VOC format bbox results'.format(cls))
filename = self._get_voc_results_T(output_dir).format(cls) def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
with open(filename, 'wt') as f: for cls_ind, cls in enumerate(self.classes):
ix = 0 if cls == '__background__':
for image_id, rec in gt_recs.items(): continue
dets = all_boxes[cls_ind][ix] print('Writing {} VOC format bbox results'.format(cls))
ix += 1 filename = self._get_voc_results_T(output_dir).format(cls)
if len(dets) == 0: with open(filename, 'wt') as f:
continue ix = 0
for k in range(dets.shape[0]): for image_id, rec in gt_recs.items():
f.write( dets = all_boxes[cls_ind][ix]
'{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n' ix += 1
.format(image_id, dets[k, -1], if len(dets) == 0:
dets[k, 0] + 1, dets[k, 1] + 1, continue
dets[k, 2] + 1, dets[k, 3] + 1)) for k in range(dets.shape[0]):
f.write(
def _write_voc_segm_results(self, all_boxes, all_masks, output_dir): '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'
for cls_inds, cls in enumerate(self.classes): .format(image_id, dets[k, -1],
if cls == '__background__': dets[k, 0] + 1, dets[k, 1] + 1,
continue dets[k, 2] + 1, dets[k, 3] + 1))
print('Writing {} VOC format segm results'.format(cls))
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls) def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
bbox_filename = segm_filename.replace('segmentations', 'detections') for cls_inds, cls in enumerate(self.classes):
with open(bbox_filename, 'wb') as f: if cls == '__background__':
cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL) continue
with open(segm_filename, 'wb') as f: print('Writing {} VOC format segm results'.format(cls))
cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL) segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True): with open(bbox_filename, 'wb') as f:
aps = [] cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) with open(segm_filename, 'wb') as f:
for i, cls in enumerate(self._classes): cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
if cls == '__background__':
continue def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
det_file = self._get_voc_results_T(output_dir).format(cls) aps = []
rec, prec, ap = voc_bbox_eval( print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
det_file, gt_recs, cls, for i, cls in enumerate(self._classes):
IoU=IoU, use_07_metric=use_07_metric, if cls == '__background__':
) continue
if ap > 0: det_file = self._get_voc_results_T(output_dir).format(cls)
aps += [ap] rec, prec, ap = voc_bbox_eval(
print('AP for {} = {:.4f}'.format(cls, ap)) det_file, gt_recs, cls,
print('Mean AP = {:.4f}\n'.format(np.mean(aps))) IoU=IoU, use_07_metric=use_07_metric,
)
def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True): if ap > 0:
aps = [] aps += [ap]
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) print('AP for {} = {:.4f}'.format(cls, ap))
for i, cls in enumerate(self.classes): print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
if cls == '__background__':
continue def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls) aps = []
bbox_filename = segm_filename.replace('segmentations', 'detections') print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
ap = voc_segm_eval( for i, cls in enumerate(self.classes):
bbox_filename, segm_filename, gt_recs, cls, if cls == '__background__':
IoU=IoU, use_07_metric=use_07_metric, continue
) segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
if ap > 0: bbox_filename = segm_filename.replace('segmentations', 'detections')
aps += [ap] ap = voc_segm_eval(
print('AP for {} = {:.4f}'.format(cls, ap)) bbox_filename, segm_filename, gt_recs, cls,
print('Mean AP = {:.4f}\n'.format(np.mean(aps))) IoU=IoU, use_07_metric=use_07_metric,
)
############################################## if ap > 0:
# # aps += [ap]
# COCO # print('AP for {} = {:.4f}'.format(cls, ap))
# # print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
##############################################
##############################################
@classmethod # #
def _get_coco_image_id(cls, image_name): # COCO #
image_id = image_name.split('_')[-1].split('.')[0] # #
try: ##############################################
return int(image_id)
except: @classmethod
return image_name def _get_coco_image_id(cls, image_name):
image_id = image_name.split('_')[-1].split('.')[0]
@classmethod try:
def _encode_coco_masks(cls, masks, boxes, im_h, im_w): return int(image_id)
num_pred = len(boxes) except:
assert len(masks) == num_pred return image_name
mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
M = masks[0].shape[0] @classmethod
scale = (M + 2.0) / M def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
ref_boxes = box_utils.expand_boxes(boxes, scale) num_pred = len(boxes)
ref_boxes = ref_boxes.astype(np.int32) assert len(masks) == num_pred
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
for i in range(num_pred): M = masks[0].shape[0]
ref_box = ref_boxes[i, :4] scale = (M + 2.0) / M
mask = masks[i] ref_boxes = box_utils.expand_boxes(boxes, scale)
padded_mask[1:-1, 1:-1] = mask[:, :] ref_boxes = ref_boxes.astype(np.int32)
w = ref_box[2] - ref_box[0] + 1 padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
h = ref_box[3] - ref_box[1] + 1 for i in range(num_pred):
w = np.maximum(w, 1) ref_box = ref_boxes[i, :4]
h = np.maximum(h, 1) mask = masks[i]
mask = cv2.resize(padded_mask, (w, h)) padded_mask[1:-1, 1:-1] = mask[:, :]
mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8) w = ref_box[2] - ref_box[0] + 1
x1 = max(ref_box[0], 0) h = ref_box[3] - ref_box[1] + 1
y1 = max(ref_box[1], 0) w = np.maximum(w, 1)
x2 = min(ref_box[2] + 1, im_w) h = np.maximum(h, 1)
y2 = min(ref_box[3] + 1, im_h) mask = cv2.resize(padded_mask, (w, h))
mask_image[y1:y2, x1:x2, i] = \ mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
mask[(y1 - ref_box[1]):(y2 - ref_box[1]), x1 = max(ref_box[0], 0)
(x1 - ref_box[0]):(x2 - ref_box[0])] y1 = max(ref_box[1], 0)
return encode_masks(mask_image) x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h)
def _write_coco_bbox_annotations(self, gt_recs, output_dir): mask_image[y1:y2, x1:x2, i] = \
# Build images mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
dataset = {'images': []} (x1 - ref_box[0]):(x2 - ref_box[0])]
for image_name, rec in gt_recs.items(): return encode_masks(mask_image)
dataset['images'].append({
'file_name': image_name + '.jpg', def _write_coco_bbox_annotations(self, gt_recs, output_dir):
'id': self._get_coco_image_id(image_name), # Build images
'height': rec['height'], 'width': rec['width'], dataset = {'images': []}
}) for image_name, rec in gt_recs.items():
# Build categories dataset['images'].append({
dataset['categories'] = [] 'file_name': image_name + '.jpg',
for cls in self._classes: 'id': self._get_coco_image_id(image_name),
if cls == '__background__': 'height': rec['height'], 'width': rec['width'],
continue })
dataset['categories'].append({ # Build categories
'name': cls, dataset['categories'] = []
'id': self._class_to_ind[cls], for cls in self._classes:
}) if cls == '__background__':
# Build annotations continue
dataset['annotations'] = [] dataset['categories'].append({
ann_id = 0 'name': cls,
for image_name, rec in gt_recs.items(): 'id': self._class_to_ind[cls],
for obj in rec['objects']: })
x, y = obj['bbox'][0], obj['bbox'][1] # Build annotations
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1 dataset['annotations'] = []
dataset['annotations'].append({ ann_id = 0
'id': str(ann_id), for image_name, rec in gt_recs.items():
'bbox': [x, y, w, h], for obj in rec['objects']:
'area': w * h, x, y = obj['bbox'][0], obj['bbox'][1]
'iscrowd': obj['difficult'], w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
'image_id': self._get_coco_image_id(image_name), dataset['annotations'].append({
'category_id': self._class_to_ind[obj['name']], 'id': str(ann_id),
}) 'bbox': [x, y, w, h],
ann_id += 1 'area': w * h,
ann_file = self._get_coco_annotations_T(output_dir, type='bbox') 'iscrowd': obj['difficult'],
with open(ann_file, 'w') as f: 'image_id': self._get_coco_image_id(image_name),
json.dump(dataset, f) 'category_id': self._class_to_ind[obj['name']],
return ann_file })
ann_id += 1
def _write_coco_segm_annotations(self, gt_recs, output_dir): ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
# Build images with open(ann_file, 'w') as f:
dataset = {'images': []} json.dump(dataset, f)
for image_name, rec in gt_recs.items(): return ann_file
dataset['images'].append({
'file_name': image_name + '.jpg', def _write_coco_segm_annotations(self, gt_recs, output_dir):
'id': self._get_coco_image_id(image_name), # Build images
'height': rec['height'], 'width': rec['width'], dataset = {'images': []}
}) for image_name, rec in gt_recs.items():
# Build categories dataset['images'].append({
dataset['categories'] = [] 'file_name': image_name + '.jpg',
for cls in self._classes: 'id': self._get_coco_image_id(image_name),
if cls == '__background__': 'height': rec['height'], 'width': rec['width'],
continue })
dataset['categories'].append({ # Build categories
'name': cls, dataset['categories'] = []
'id': self._class_to_ind[cls], for cls in self._classes:
}) if cls == '__background__':
# Build annotations continue
dataset['annotations'] = [] dataset['categories'].append({
ann_id = 0 'name': cls,
for image_name, rec in gt_recs.items(): 'id': self._class_to_ind[cls],
for obj in rec['objects']: })
x, y = obj['bbox'][0], obj['bbox'][1] # Build annotations
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1 dataset['annotations'] = []
dataset['annotations'].append({ ann_id = 0
'id': str(ann_id), for image_name, rec in gt_recs.items():
'bbox': [x, y, w, h], for obj in rec['objects']:
'area': w * h, x, y = obj['bbox'][0], obj['bbox'][1]
'segmentation': { w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
'size': [rec['height'], rec['width']], dataset['annotations'].append({
'counts': obj['mask'], 'id': str(ann_id),
}, 'bbox': [x, y, w, h],
'iscrowd': obj['difficult'], 'area': w * h,
'image_id': self._get_coco_image_id(image_name), 'segmentation': {
'category_id': self._class_to_ind[obj['name']], 'size': [rec['height'], rec['width']],
}) 'counts': obj['mask'],
ann_id += 1 },
ann_file = self._get_coco_annotations_T(output_dir, type='segm') 'iscrowd': obj['difficult'],
with open(ann_file, 'w') as f: 'image_id': self._get_coco_image_id(image_name),
json.dump(dataset, f) 'category_id': self._class_to_ind[obj['name']],
return ann_file })
ann_id += 1
def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs): ann_file = self._get_coco_annotations_T(output_dir, type='segm')
ix, results = 0, [] with open(ann_file, 'w') as f:
for image_name, rec in gt_recs.items(): json.dump(dataset, f)
dets = boxes[ix] return ann_file
ix += 1
if isinstance(dets, list) and len(dets) == 0: def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
continue ix, results = 0, []
dets = dets.astype(np.float) for image_name, rec in gt_recs.items():
scores = dets[:, -1] dets = boxes[ix]
xs = dets[:, 0] ix += 1
ys = dets[:, 1] if isinstance(dets, list) and len(dets) == 0:
ws = dets[:, 2] - xs + 1 continue
hs = dets[:, 3] - ys + 1 dets = dets.astype(np.float)
results.extend( scores = dets[:, -1]
[{'image_id': self._get_coco_image_id(image_name), xs = dets[:, 0]
'category_id': cat_id, ys = dets[:, 1]
'bbox': [xs[k], ys[k], ws[k], hs[k]], ws = dets[:, 2] - xs + 1
'score': scores[k], hs = dets[:, 3] - ys + 1
} for k in range(dets.shape[0])] results.extend(
) [{'image_id': self._get_coco_image_id(image_name),
return results 'category_id': cat_id,
'bbox': [xs[k], ys[k], ws[k], hs[k]],
def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs): 'score': scores[k],
def filter_boxes(dets): } for k in range(dets.shape[0])]
boxes = dets[:, :4] )
ws = boxes[:, 2] - boxes[:, 0] return results
hs = boxes[:, 3] - boxes[:, 1]
keep = np.where((ws >= 1) & (hs >= 1))[0] def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
return keep def filter_boxes(dets):
results = [] boxes = dets[:, :4]
ix = 0 ws = boxes[:, 2] - boxes[:, 0]
for image_name, rec in gt_recs.items(): hs = boxes[:, 3] - boxes[:, 1]
dets = boxes[ix].astype(np.float) keep = np.where((ws >= 1) & (hs >= 1))[0]
msks = masks[ix] return keep
ix += 1 results = []
keep = filter_boxes(dets) ix = 0
im_h, im_w = rec['height'], rec['width'] for image_name, rec in gt_recs.items():
if len(keep) == 0: dets = boxes[ix].astype(np.float)
continue msks = masks[ix]
scores = dets[:, -1] ix += 1
mask_encode = self._encode_coco_masks( keep = filter_boxes(dets)
msks[keep], dets[keep, :4], im_h, im_w) im_h, im_w = rec['height'], rec['width']
for k in range(dets[keep].shape[0]): if len(keep) == 0:
rle = mask_encode[k] continue
if sys.version_info >= (3, 0): scores = dets[:, -1]
rle['counts'] = rle['counts'].decode() mask_encode = self._encode_coco_masks(
results.append({ msks[keep], dets[keep, :4], im_h, im_w)
'image_id': self._get_coco_image_id(image_name), for k in range(dets[keep].shape[0]):
'category_id': cat_id, rle = mask_encode[k]
'segmentation': rle, if sys.version_info >= (3, 0):
'score': scores[k], rle['counts'] = rle['counts'].decode()
}) results.append({
return results 'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir): 'segmentation': rle,
filename = self._get_coco_results_T(output_dir) 'score': scores[k],
results = [] })
for cls_ind, cls in enumerate(self.classes): return results
if cls == '__background__':
continue def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
print('Collecting {} results ({:d}/{:d})' filename = self._get_coco_results_T(output_dir)
.format(cls, cls_ind, self.num_classes - 1)) results = []
cat_id = self._class_to_cat_id[cls] for cls_ind, cls in enumerate(self.classes):
results.extend(self._coco_bbox_results_one_category( if cls == '__background__':
all_boxes[cls_ind], cat_id, gt_recs)) continue
print('Writing results json to {}'.format(filename)) print('Collecting {} results ({:d}/{:d})'
with open(filename, 'w') as fid: .format(cls, cls_ind, self.num_classes - 1))
json.dump(results, fid) cat_id = self._class_to_cat_id[cls]
return filename results.extend(self._coco_bbox_results_one_category(
all_boxes[cls_ind], cat_id, gt_recs))
def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir): print('Writing results json to {}'.format(filename))
filename = self._get_coco_results_T(output_dir, type='segm') with open(filename, 'w') as fid:
results = [] json.dump(results, fid)
for cls_ind, cls in enumerate(self.classes): return filename
if cls == '__background__':
continue def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
print('Collecting {} results ({:d}/{:d})' filename = self._get_coco_results_T(output_dir, type='segm')
.format(cls, cls_ind, self.num_classes - 1)) results = []
cat_id = self._class_to_cat_id[cls] for cls_ind, cls in enumerate(self.classes):
results.extend(self._coco_segm_results_one_category( if cls == '__background__':
all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs)) continue
print('Writing results json to {}'.format(filename)) print('Collecting {} results ({:d}/{:d})'
with open(filename, 'w') as fid: .format(cls, cls_ind, self.num_classes - 1))
json.dump(results, fid) cat_id = self._class_to_cat_id[cls]
return filename results.extend(self._coco_segm_results_one_category(
all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
def _do_coco_bbox_eval(self, coco, res_file): print('Writing results json to {}'.format(filename))
from lib.pycocotools.cocoeval import COCOeval with open(filename, 'w') as fid:
coco_dt = coco.loadRes(res_file) json.dump(results, fid)
coco_eval = COCOeval(coco, coco_dt, 'bbox') return filename
coco_eval.evaluate()
coco_eval.accumulate() def _do_coco_bbox_eval(self, coco, res_file):
self._print_coco_eval_results(coco_eval) from lib.pycocotools.cocoeval import COCOeval
coco_dt = coco.loadRes(res_file)
def _do_coco_segm_eval(self, coco, res_file): coco_eval = COCOeval(coco, coco_dt, 'bbox')
from lib.pycocotools.cocoeval import COCOeval coco_eval.evaluate()
coco_dt = coco.loadRes(res_file) coco_eval.accumulate()
coco_eval = COCOeval(coco, coco_dt, 'segm') self._print_coco_eval_results(coco_eval)
coco_eval.evaluate()
coco_eval.accumulate() def _do_coco_segm_eval(self, coco, res_file):
self._print_coco_eval_results(coco_eval) from lib.pycocotools.cocoeval import COCOeval
coco_dt = coco.loadRes(res_file)
def _print_coco_eval_results(self, coco_eval): coco_eval = COCOeval(coco, coco_dt, 'segm')
IoU_lo_thresh = 0.5 coco_eval.evaluate()
IoU_hi_thresh = 0.95 coco_eval.accumulate()
self._print_coco_eval_results(coco_eval)
def _get_thr_ind(coco_eval, thr):
ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) & def _print_coco_eval_results(self, coco_eval):
(coco_eval.params.iouThrs < thr + 1e-5))[0][0] IoU_lo_thresh = 0.5
iou_thr = coco_eval.params.iouThrs[ind] IoU_hi_thresh = 0.95
assert np.isclose(iou_thr, thr)
return ind def _get_thr_ind(coco_eval, thr):
ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh) (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh) iou_thr = coco_eval.params.iouThrs[ind]
assert np.isclose(iou_thr, thr)
# Precision has dims (iou, recall, cls, area range, max dets) return ind
# Area range index 0: all area ranges
# Max dets index 2: 100 per image ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
precision = \ ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
ap_default = np.mean(precision[precision > -1]) # Precision has dims (iou, recall, cls, area range, max dets)
print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ' # Area range index 0: all area ranges
'~~~~'.format(IoU_lo_thresh, IoU_hi_thresh)) # Max dets index 2: 100 per image
print('{:.1f}'.format(100 * ap_default)) precision = \
for cls_ind, cls in enumerate(self.classes): coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
if cls == '__background__': ap_default = np.mean(precision[precision > -1])
continue print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
# Minus 1 because of __background__ '~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2] print('{:.1f}'.format(100 * ap_default))
ap = np.mean(precision[precision > -1]) for cls_ind, cls in enumerate(self.classes):
print('{:.1f}'.format(100 * ap)) if cls == '__background__':
continue
print('~~~~ Summary metrics ~~~~') # Minus 1 because of __background__
coco_eval.summarize() precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
ap = np.mean(precision[precision > -1])
############################################## print('{:.1f}'.format(100 * ap))
# #
# EVAL-API # print('~~~~ Summary metrics ~~~~')
# # coco_eval.summarize()
##############################################
##############################################
def evaluate_detections(self, all_boxes, gt_recs, output_dir): # #
protocol = cfg.TEST.PROTOCOL # EVAL-API #
if 'voc' in protocol: # #
self._write_voc_bbox_results(all_boxes, gt_recs, output_dir) ##############################################
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~') def evaluate_detections(self, all_boxes, gt_recs, output_dir):
self._do_voc_bbox_eval( protocol = cfg.TEST.PROTOCOL
gt_recs, output_dir, IoU=0.5, if 'voc' in protocol:
use_07_metric='2007' in protocol) self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
print('~~~~~~ Evaluation IoU@0.7 ~~~~~~') if 'wo' not in protocol:
self._do_voc_bbox_eval( print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
gt_recs, output_dir, IoU=0.7, self._do_voc_bbox_eval(
use_07_metric='2007' in protocol) gt_recs, output_dir, IoU=0.5,
elif 'xml' in protocol: use_07_metric='2007' in protocol)
if cfg.EXP_DIR != '': print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
output_dir = cfg.EXP_DIR self._do_voc_bbox_eval(
self._write_xml_bbox_results(all_boxes, gt_recs, output_dir) gt_recs, output_dir, IoU=0.7,
elif 'coco' in protocol: use_07_metric='2007' in protocol)
from lib.pycocotools.coco import COCO elif 'xml' in protocol:
if os.path.exists(cfg.TEST.JSON_FILE): if cfg.EXP_DIR != '':
coco = COCO(cfg.TEST.JSON_FILE) output_dir = cfg.EXP_DIR
# We should override category id before writing results self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
cats = coco.loadCats(coco.getCatIds()) elif 'coco' in protocol:
self._class_to_cat_id = dict(zip( from lib.pycocotools.coco import COCO
[c['name'] for c in cats], coco.getCatIds())) if os.path.exists(cfg.TEST.JSON_FILE):
else: coco = COCO(cfg.TEST.JSON_FILE)
coco = None # We should override category id before writing results
res_file = self._write_coco_bbox_results( cats = coco.loadCats(coco.getCatIds())
all_boxes, gt_recs, output_dir) self._class_to_cat_id = dict(zip(
if 'wo' not in protocol: [c['name'] for c in cats], coco.getCatIds()))
if coco is None: else:
ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir) coco = None
coco = COCO(ann_file) res_file = self._write_coco_bbox_results(
self._do_coco_bbox_eval(coco, res_file) all_boxes, gt_recs, output_dir)
if 'wo' not in protocol:
def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir): if coco is None:
protocol = cfg.TEST.PROTOCOL ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir)
if 'voc' in protocol: coco = COCO(ann_file)
self._write_voc_segm_results(all_boxes, all_masks, output_dir) self._do_coco_bbox_eval(coco, res_file)
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~') def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
self._do_voc_segm_eval( protocol = cfg.TEST.PROTOCOL
gt_recs, output_dir, IoU=0.5, if 'voc' in protocol:
use_07_metric='2007' in protocol) self._write_voc_segm_results(all_boxes, all_masks, output_dir)
print('~~~~~~ Evaluation IoU@0.7 ~~~~~~') if 'wo' not in protocol:
self._do_voc_segm_eval( print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
gt_recs, output_dir, IoU=0.7, self._do_voc_segm_eval(
use_07_metric='2007' in protocol) gt_recs, output_dir, IoU=0.5,
elif 'coco' in protocol: use_07_metric='2007' in protocol)
from lib.pycocotools.coco import COCO print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
if os.path.exists(cfg.TEST.JSON_FILE): self._do_voc_segm_eval(
coco = COCO(cfg.TEST.JSON_FILE) gt_recs, output_dir, IoU=0.7,
# We should override category id before writing results use_07_metric='2007' in protocol)
cats = coco.loadCats(coco.getCatIds()) elif 'coco' in protocol:
self._class_to_cat_id = dict( from lib.pycocotools.coco import COCO
zip([c['name'] for c in cats], coco.getCatIds())) if os.path.exists(cfg.TEST.JSON_FILE):
else: coco = COCO(cfg.TEST.JSON_FILE)
coco = None # We should override category id before writing results
res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir) cats = coco.loadCats(coco.getCatIds())
if 'wo' not in protocol: self._class_to_cat_id = dict(
if coco is None: zip([c['name'] for c in cats], coco.getCatIds()))
coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir)) else:
self._do_coco_segm_eval(coco, res_file) coco = None
res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
def competition_mode(self, on): if 'wo' not in protocol:
if on: if coco is None:
self.config['use_salt'] = False coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
self.config['cleanup'] = False self._do_coco_segm_eval(coco, res_file)
else:
self.config['use_salt'] = True def competition_mode(self, on):
self.config['cleanup'] = True if on:
self.config['use_salt'] = False
self.config['cleanup'] = False
else:
self.config['use_salt'] = True
self.config['cleanup'] = True
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
try: try:
import cPickle import cPickle
except: except:
import pickle as cPickle import pickle as cPickle
from lib.core.config import cfg from lib.core.config import cfg
from lib.pycocotools.mask_utils import mask_rle2im from lib.pycocotools.mask_utils import mask_rle2im
from lib.utils.boxes import expand_boxes from lib.utils.boxes import expand_boxes
from lib.utils.mask_transform import mask_overlap from lib.utils.mask import mask_overlap
def voc_ap(rec, prec, use_07_metric=False): def voc_ap(rec, prec, use_07_metric=False):
""" ap = voc_ap(rec, prec, [use_07_metric]) """ ap = voc_ap(rec, prec, [use_07_metric])
Compute VOC AP given precision and recall. Compute VOC AP given precision and recall.
If use_07_metric is true, uses the If use_07_metric is true, uses the
VOC 07 11 point method (default:False). VOC 07 11 point method (default:False).
""" """
if use_07_metric: if use_07_metric:
# 11 point metric # 11 point metric
ap = 0. ap = 0.
for t in np.arange(0., 1.1, 0.1): for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0: if np.sum(rec >= t) == 0:
p = 0 p = 0
else: else:
p = np.max(prec[rec >= t]) p = np.max(prec[rec >= t])
ap = ap + p / 11. ap = ap + p / 11.
else: else:
# correct AP calculation # correct AP calculation
# first append sentinel values at the end # first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.])) mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.])) mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope # compute the precision envelope
for i in range(mpre.size - 1, 0, -1): for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points # to calculate area under PR curve, look for points
# where X axis (recall) changes value # where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0] i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec # and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap return ap
def voc_bbox_eval( def voc_bbox_eval(
det_file, det_file,
gt_recs, gt_recs,
cls_name, cls_name,
IoU=0.5, IoU=0.5,
use_07_metric=False, use_07_metric=False,
): ):
class_recs = {} class_recs = {}
n_pos = 0 n_pos = 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name] R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R]) bbox = np.array([x['bbox'] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool) difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R) det = [False] * len(R)
n_pos = n_pos + sum(~difficult) n_pos = n_pos + sum(~difficult)
class_recs[image_name] = { class_recs[image_name] = {
'bbox': bbox, 'bbox': bbox,
'difficult': difficult, 'difficult': difficult,
'det': det 'det': det
} }
# Read detections # Read detections
with open(det_file, 'r') as f: with open(det_file, 'r') as f:
lines = f.readlines() lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines] splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines] image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines]) confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# Avoid IndexError if detecting nothing # Avoid IndexError if detecting nothing
if len(BB) == 0: if len(BB) == 0:
return 0, 0, -1 return 0, 0, -1
# Sort by confidence # Sort by confidence
sorted_ind = np.argsort(-confidence) sorted_ind = np.argsort(-confidence)
BB = BB[sorted_ind, :] BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind] image_ids = [image_ids[x] for x in sorted_ind]
# Go down detections and mark TPs and FPs # Go down detections and mark TPs and FPs
nd = len(image_ids) nd = len(image_ids)
tp, fp = np.zeros(nd), np.zeros(nd) tp, fp = np.zeros(nd), np.zeros(nd)
for d in range(nd): for d in range(nd):
R = class_recs[image_ids[d]] R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float) bb = BB[d, :].astype(float)
ovmax, jmax = -np.inf, 0 ovmax, jmax = -np.inf, 0
BBGT = R['bbox'].astype(float) BBGT = R['bbox'].astype(float)
if BBGT.size > 0: if BBGT.size > 0:
# Compute overlaps intersection # Compute overlaps intersection
ixmin = np.maximum(BBGT[:, 0], bb[0]) ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1]) iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2]) ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3]) iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.) iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih inters = iw * ih
# Union # Union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni overlaps = inters / uni
ovmax = np.max(overlaps) ovmax = np.max(overlaps)
jmax = np.argmax(overlaps) jmax = np.argmax(overlaps)
if ovmax > IoU: if ovmax > IoU:
if not R['difficult'][jmax]: if not R['difficult'][jmax]:
if not R['det'][jmax]: if not R['det'][jmax]:
tp[d] = 1. tp[d] = 1.
R['det'][jmax] = 1 R['det'][jmax] = 1
else: else:
fp[d] = 1. fp[d] = 1.
else: else:
fp[d] = 1. fp[d] = 1.
# compute precision recall # compute precision recall
fp = np.cumsum(fp) fp = np.cumsum(fp)
tp = np.cumsum(tp) tp = np.cumsum(tp)
rec = tp / float(n_pos) rec = tp / float(n_pos)
# avoid divide by zero in case the first detection matches a difficult # avoid divide by zero in case the first detection matches a difficult
# ground truth # ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric) ap = voc_ap(rec, prec, use_07_metric)
return rec, prec, ap return rec, prec, ap
def voc_segm_eval( def voc_segm_eval(
det_file, det_file,
seg_file, seg_file,
gt_recs, gt_recs,
cls_name, cls_name,
IoU=0.5, IoU=0.5,
use_07_metric=False, use_07_metric=False,
): ):
# 0. Constants # 0. Constants
M = cfg.MRCNN.RESOLUTION M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH binary_thresh = cfg.TEST.BINARY_THRESH
scale = (M + 2.0) / M scale = (M + 2.0) / M
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
# 1. Get bbox & mask ground truths # 1. Get bbox & mask ground truths
image_names, class_recs, n_pos = [], {}, 0 image_names, class_recs, n_pos = [], {}, 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name] R = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R]) bbox = np.array([x['bbox'] for x in R])
mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R]) mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool) difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R) det = [False] * len(R)
n_pos = n_pos + sum(~difficult) n_pos = n_pos + sum(~difficult)
class_recs[image_name] = { class_recs[image_name] = {
'bbox': bbox, 'bbox': bbox,
'mask': mask, 'mask': mask,
'difficult': difficult, 'difficult': difficult,
'det': det 'det': det
} }
image_names.append(image_name) image_names.append(image_name)
# 2. Get predict pickle file for this class # 2. Get predict pickle file for this class
with open(det_file, 'rb') as f: with open(det_file, 'rb') as f:
boxes_pkl = cPickle.load(f) boxes_pkl = cPickle.load(f)
with open(seg_file, 'rb') as f: with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f) masks_pkl = cPickle.load(f)
# 3. Pre-compute number of total instances to allocate memory # 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs) num_images = len(gt_recs)
box_num = 0 box_num = 0
for im_i in range(num_images): for im_i in range(num_images):
box_num += len(boxes_pkl[im_i]) box_num += len(boxes_pkl[im_i])
# avoid IndexError if detecting nothing # avoid IndexError if detecting nothing
if box_num == 0: if box_num == 0:
return 0, 0, -1 return 0, 0, -1
# 4. Re-organize all the predicted boxes # 4. Re-organize all the predicted boxes
new_boxes = np.zeros((box_num, 5)) new_boxes = np.zeros((box_num, 5))
new_masks = np.zeros((box_num, M, M)) new_masks = np.zeros((box_num, M, M))
new_images = [] new_images = []
cnt = 0 cnt = 0
for image_ind in range(num_images): for image_ind in range(num_images):
boxes = boxes_pkl[image_ind] boxes = boxes_pkl[image_ind]
masks = masks_pkl[image_ind] masks = masks_pkl[image_ind]
num_instance = len(boxes) num_instance = len(boxes)
for box_ind in range(num_instance): for box_ind in range(num_instance):
new_boxes[cnt] = boxes[box_ind] new_boxes[cnt] = boxes[box_ind]
new_masks[cnt] = masks[box_ind] new_masks[cnt] = masks[box_ind]
new_images.append(image_names[image_ind]) new_images.append(image_names[image_ind])
cnt += 1 cnt += 1
# 5. Rearrange boxes according to their scores # 5. Rearrange boxes according to their scores
seg_scores = new_boxes[:, -1] seg_scores = new_boxes[:, -1]
keep_inds = np.argsort(-seg_scores) keep_inds = np.argsort(-seg_scores)
new_boxes = new_boxes[keep_inds, :] new_boxes = new_boxes[keep_inds, :]
new_masks = new_masks[keep_inds, :, :] new_masks = new_masks[keep_inds, :, :]
num_pred = new_boxes.shape[0] num_pred = new_boxes.shape[0]
# 6. Calculate t/f positive # 6. Calculate t/f positive
fp = np.zeros((num_pred, 1)) fp = np.zeros((num_pred, 1))
tp = np.zeros((num_pred, 1)) tp = np.zeros((num_pred, 1))
ref_boxes = expand_boxes(new_boxes, scale) ref_boxes = expand_boxes(new_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32) ref_boxes = ref_boxes.astype(np.int32)
for i in range(num_pred): for i in range(num_pred):
image_name = new_images[keep_inds[i]] image_name = new_images[keep_inds[i]]
if image_name not in class_recs: if image_name not in class_recs:
print('Warning: {} does not exist in the ground-truths.'.format(image_name)) print('Warning: {} does not exist in the ground-truths.'.format(image_name))
fp[i] = 1 fp[i] = 1
continue continue
R = class_recs[image_name] R = class_recs[image_name]
im_h = gt_recs[image_name]['height'] im_h = gt_recs[image_name]['height']
im_w = gt_recs[image_name]['width'] im_w = gt_recs[image_name]['width']
# Decode mask # Decode mask
ref_box = ref_boxes[i, :4] ref_box = ref_boxes[i, :4]
mask = new_masks[i] mask = new_masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :] padded_mask[1:-1, 1:-1] = mask[:, :]
w = ref_box[2] - ref_box[0] + 1 w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1 h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1) w = np.maximum(w, 1)
h = np.maximum(h, 1) h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h)) mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > binary_thresh, dtype=np.uint8) mask = np.array(mask > binary_thresh, dtype=np.uint8)
x1 = max(ref_box[0], 0) x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0) y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_w) x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h) y2 = min(ref_box[3] + 1, im_h)
pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]), pred_mask = mask[(y1 - ref_box[1]): (y2 - ref_box[1]),
(x1 - ref_box[0]): (x2 - ref_box[0])] (x1 - ref_box[0]): (x2 - ref_box[0])]
# Calculate max region overlap # Calculate max region overlap
ovmax, jmax = -1, -1 ovmax, jmax = -1, -1
for j in range(len(R['det'])): for j in range(len(R['det'])):
gt_mask_bound = R['bbox'][j].astype(int) gt_mask_bound = R['bbox'][j].astype(int)
pred_mask_bound = new_boxes[i, :4].astype(int) pred_mask_bound = new_boxes[i, :4].astype(int)
crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1, crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0]:gt_mask_bound[2] + 1] gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask) ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask)
if ov > ovmax: if ov > ovmax:
ovmax = ov ovmax = ov
jmax = j jmax = j
if ovmax > IoU: if ovmax > IoU:
if not R['difficult'][jmax]: if not R['difficult'][jmax]:
if not R['det'][jmax]: if not R['det'][jmax]:
tp[i] = 1. tp[i] = 1.
R['det'][jmax] = 1 R['det'][jmax] = 1
else: else:
fp[i] = 1. fp[i] = 1.
else: else:
fp[i] = 1 fp[i] = 1
# 7. Calculate precision # 7. Calculate precision
fp = np.cumsum(fp) fp = np.cumsum(fp)
tp = np.cumsum(tp) tp = np.cumsum(tp)
rec = tp / float(n_pos) rec = tp / float(n_pos)
# avoid divide by zero in case the first matches a difficult gt # avoid divide by zero in case the first matches a difficult gt
prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps) prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric=use_07_metric) ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
return ap return ap
...@@ -13,7 +13,7 @@ from __future__ import absolute_import ...@@ -13,7 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
from lib.faster_rcnn.layers.data_layer import DataLayer from lib.faster_rcnn.data_layer import DataLayer
from lib.faster_rcnn.layers.proposal_layer import ProposalLayer from lib.faster_rcnn.proposal_layer import ProposalLayer
from lib.faster_rcnn.layers.proposal_target_layer import ProposalTargetLayer from lib.faster_rcnn.proposal_target_layer import ProposalTargetLayer
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
class AnchorTargetLayer(torch.nn.Module): class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets.""" """Assign anchors to ground-truth targets."""
def __init__(self): def __init__(self):
super(AnchorTargetLayer, self).__init__() super(AnchorTargetLayer, self).__init__()
# Load the basic configs # Load the basic configs
# C4 backbone takes the first stride # C4 backbone takes the first stride
self.scales = cfg.RPN.SCALES self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0] self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS self.ratios = cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount # Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors # Generate base anchors
self.base_anchors = generate_anchors( self.base_anchors = generate_anchors(
base_size=self.stride, base_size=self.stride,
ratios=self.ratios, ratios=self.ratios,
scales=np.array(self.scales), scales=np.array(self.scales),
) )
def forward(self, features, gt_boxes, ims_info): def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets. """Produces anchor classification labels and bounding-box regression targets.
Parameters Parameters
---------- ----------
features : sequence of dragon.vm.torch.Tensor features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers. The features of specific conv layers.
gt_boxes : numpy.ndarray gt_boxes : numpy.ndarray
The packed ground-truth boxes. The packed ground-truth boxes.
ims_info : numpy.ndarray ims_info : numpy.ndarray
The information of input images. The information of input images.
""" """
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal( logger.fatal(
'Input {} images, got {} slices of gt boxes.' 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide)) .format(num_images, len(gt_boxes_wide))
) )
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
height, width = features[0].shape[-2:] height, width = features[0].shape[-2:]
shift_x = np.arange(0, width) * self.stride shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get # cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0] A = self.base_anchors.shape[0]
K = shifts.shape[0] K = shifts.shape[0]
all_anchors = (self.base_anchors.reshape((1, A, 4)) + all_anchors = (self.base_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4)) all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A) total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is don not care # label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32) all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32) all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32) all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32) all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
for ix in range(num_images): for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label) # GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix] gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix] im_info = ims_info[ix]
if self._allowed_border >= 0: if self._allowed_border >= 0:
# Only keep anchors inside the image # Only keep anchors inside the image
inds_inside = np.where( inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :] anchors = all_anchors[inds_inside, :]
else: else:
inds_inside = np.arange(all_anchors.shape[0]) inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors anchors = all_anchors
num_inside = len(inds_inside) num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care # label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32) labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1) labels.fill(-1)
# Overlaps between the anchors and the gt boxes # Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float),
) )
argmax_overlaps = overlaps.argmax(axis=1) argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps] max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0) gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels first so that positive labels can clobber them # Assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# fg label: for each gt, anchor with highest overlap # fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1 labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU # fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES: if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels last so that negative labels can clobber positives # Assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many # Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg: if len(fg_inds) > num_fg:
disable_inds = npr.choice( disable_inds = npr.choice(
fg_inds, fg_inds,
size=len(fg_inds) - num_fg, size=len(fg_inds) - num_fg,
replace=False, replace=False,
) )
labels[disable_inds] = -1 labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many # Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0] bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg: if len(bg_inds) > num_bg:
disable_inds = npr.choice( disable_inds = npr.choice(
bg_inds, bg_inds,
size=len(bg_inds) - num_bg, size=len(bg_inds) - num_bg,
replace=False, replace=False,
) )
labels[disable_inds] = -1 labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform( bbox_targets[fg_inds, :] = bbox_transform(
ex_rois=anchors[fg_inds, :], ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], 0:4], gt_rois=gt_boxes[argmax_overlaps[fg_inds], :4],
) )
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0)) bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
all_labels[ix, inds_inside] = labels # label all_labels[ix, inds_inside] = labels # label
all_bbox_targets[ix, inds_inside] = bbox_targets all_bbox_targets[ix, inds_inside] = bbox_targets
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
labels = all_labels \ labels = all_labels \
.reshape((num_images, height, width, A)) \ .reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \ .transpose(0, 3, 1, 2) \
.reshape((num_images, total_anchors)) .reshape((num_images, total_anchors))
bbox_targets = all_bbox_targets \ bbox_targets = all_bbox_targets \
.reshape((num_images, height, width, A * 4)) \ .reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
bbox_inside_weights = all_bbox_inside_weights \ bbox_inside_weights = all_bbox_inside_weights \
.reshape((num_images, height, width, A * 4)) \ .reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
bbox_outside_weights = all_bbox_outside_weights \ bbox_outside_weights = all_bbox_outside_weights \
.reshape((num_images, height, width, A * 4)) \ .reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
return { return {
'labels': blob_to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import im_list_to_blob
class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs):
super(BlobFetcher, self).__init__()
self.q1_in = self.q2_in = self.q_out = None
self.daemon = True
def get(self, Q_in):
processed_ims, ims_info, all_boxes = [], [], []
for ix in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes = Q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[0:2]) + [im_scale])
# Encode boxes by adding the idx of images
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), dtype=np.float32)
im_boxes[:, 0:gt_boxes.shape[1]] = gt_boxes
im_boxes[:, -1] = ix
all_boxes.append(im_boxes)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, dtype=np.float32),
'gt_boxes': np.concatenate(all_boxes, axis=0),
}
def run(self):
while True:
if self.q1_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.q_out.put(self.get(self.q1_in))
elif self.q2_in.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.q_out.put(self.get(self.q2_in))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import multiprocessing
import numpy
from dragon.tools import db
from lib.core.config import cfg
class DataReader(multiprocessing.Process):
"""Collect encoded str from `LMDB`_.
Partition and shuffle records over distributed nodes.
Parameters
----------
source : str
The path of database.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=2048
The number of chunks to split.
"""
def __init__(self, **kwargs):
"""Create a DataReader."""
super(DataReader, self).__init__()
self._source = kwargs.get('source', '')
self._use_shuffle = kwargs.get('shuffle', False)
self._num_chunks = kwargs.get('num_chunks', 2048)
self._part_idx, self._num_parts = 0, 1
self._cursor, self._chunk_cursor = 0, 0
self._chunk_size, self._perm_size = 0, 0
self._head, self._tail, self._num_entries = 0, 0, 0
self._db, self._zfill, self._perm = None, None, None
self._rng_seed = cfg.RNG_SEED
self.q_out = None
self.daemon = True
def element(self):
"""Get the value of current record.
Returns
-------
str
The encoded str.
"""
return self._db.value()
def redirect(self, target):
"""Redirect to the target position.
Parameters
----------
target : int
The key of the record.
Notes
-----
The redirection reopens the database.
You can drop caches by ``echo 3 > /proc/sys/vm/drop_caches``.
This will disturb getting stuck when *Database Size* >> *RAM Size*.
"""
self._db.close()
self._db.open(self._source)
self._cursor = target
self._db.set(str(target).zfill(self._zfill))
def reset(self):
"""Reset the cursor and environment."""
if self._num_parts > 1 or self._use_shuffle:
self._chunk_cursor = 0
self._part_idx = (self._part_idx + 1) % self._num_parts
if self._use_shuffle:
self._perm = numpy.random.permutation(self._perm_size)
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._tail = self._head * self._chunk_size
if self._head >= self._num_entries: self.next_chunk()
self._tail = self._head + self._chunk_size
self._tail = min(self._num_entries, self._tail)
else:
self._head, self._tail = 0, self._num_entries
self.redirect(self._head)
def next_record(self):
"""Step the cursor of records."""
self._db.next()
self._cursor += 1
def next_chunk(self):
"""Step the cursor of chunks."""
self._chunk_cursor += 1
if self._chunk_cursor >= self._perm_size:
self.reset()
else:
self._head = self._part_idx * self._perm_size + self._perm[self._chunk_cursor]
self._head = self._head * self._chunk_size
if self._head >= self._num_entries:
self.next_chunk()
else:
self._tail = self._head + self._chunk_size
self._tail = min(self._num_entries, self._tail)
self.redirect(self._head)
def run(self):
"""Start the process."""
# Fix seed
numpy.random.seed(self._rng_seed)
# Init db
self._db = db.LMDB()
self._db.open(self._source)
self._zfill = self._db.zfill()
self._num_entries = self._db.num_entries()
epoch_size = self._num_entries // self._num_parts + 1
if self._use_shuffle:
if self._num_chunks <= 0:
# Each chunk has at most 1 record (Record-Wise)
self._chunk_size, self._perm_size = 1, epoch_size
else:
# Search a optimal chunk size (Chunk-Wise)
min_size, max_size = \
1, self._db._total_size * 1.0 \
/ (self._num_chunks * (1 << 20))
while min_size * 2 < max_size: min_size *= 2
self._perm_size = int(math.ceil(
self._db._total_size * 1.1 /
(self._num_parts * min_size << 20)))
self._chunk_size = int(
self._num_entries * 1.0 /
(self._perm_size * self._num_parts) + 1)
limit = (self._num_parts - 0.5) * self._perm_size * self._chunk_size
if self._num_entries <= limit:
# Roll back to Record-Wise shuffle
self._chunk_size, self._perm_size = 1, epoch_size
else:
# One chunk has at most K records
self._chunk_size, self._perm_size = epoch_size, 1
self._perm = numpy.arange(self._perm_size)
# Init env
self.reset()
# Run!
while True:
self.q_out.put(self.element())
self.next_record()
if self._cursor >= self._tail:
if self._num_parts > 1 or self._use_shuffle:
self.next_chunk()
else:
self.reset()
...@@ -13,55 +13,70 @@ from __future__ import absolute_import ...@@ -13,55 +13,70 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from multiprocessing import Queue import multiprocessing as mp
import time import time
import dragon import dragon
import pprint import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.data.data_reader import DataReader from lib.faster_rcnn.data_transformer import DataTransformer
from lib.faster_rcnn.data.data_transformer import DataTransformer from lib.datasets.factory import get_imdb
from lib.faster_rcnn.data.blob_fetcher import BlobFetcher
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import im_list_to_blob
class DataBatch(object): class DataLayer(torch.nn.Module):
"""DataBatch aims to prefetch data by ``Triple-Buffering``. """Generate a mini-batch of data."""
It takes full advantages of the Process/Thread of Python, def __init__(self):
super(DataLayer, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
})
def forward(self):
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
which provides remarkable I/O speed up for scalable distributed training.
"""
def __init__(self, **kwargs): def __init__(self, **kwargs):
"""Construct a ``DataBatch``. """Construct a ``DataBatch``.
Parameters Parameters
---------- ----------
source : str dataset : lambda
The path of database. The creator of a dataset.
shuffle : bool, optional, default=False shuffle : bool, optional, default=False
Whether to shuffle the data. Whether to shuffle the data.
num_chunks : int, optional, default=2048 num_chunks : int, optional, default=0
The number of chunks to split. The number of chunks to split.
batch_size : int, optional, default=128 batch_size : int, optional, default=2
The size of a mini-batch. The size of a mini-batch.
prefetch : int, optional, default=5 prefetch : int, optional, default=5
The prefetch count. The prefetch count.
""" """
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
# Init mpi # Distributed settings
global_rank, local_rank, group_size = 0, 0, 1 rank, group_size = 0, 1
if dragon.mpi.is_init(): process_group = dragon.distributed.get_default_process_group()
group = dragon.mpi.is_parallel() if process_group is not None and kwargs.get(
if group is not None: # DataParallel 'phase', 'TRAIN') == 'TRAIN':
global_rank = dragon.mpi.rank() group_size = process_group.size
group_size = len(group) rank = dragon.distributed.get_rank(process_group)
for i, node in enumerate(group):
if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size kwargs['group_size'] = group_size
# Configuration # Configuration
...@@ -71,6 +86,7 @@ class DataBatch(object): ...@@ -71,6 +86,7 @@ class DataBatch(object):
self._num_transformers = kwargs.get('num_transformers', -1) self._num_transformers = kwargs.get('num_transformers', -1)
self._max_transformers = kwargs.get('max_transformers', 3) self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1) self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Io-Aware Policy # Io-Aware Policy
if self._num_transformers == -1: if self._num_transformers == -1:
...@@ -81,66 +97,52 @@ class DataBatch(object): ...@@ -81,66 +97,52 @@ class DataBatch(object):
self._num_transformers = min( self._num_transformers = min(
self._num_transformers, self._max_transformers) self._num_transformers, self._max_transformers)
# Init queues # Initialize queues
self.Q1 = Queue(self._prefetch * self._num_readers * self._batch_size) num_batches = self._prefetch * self._num_readers
self.Q21 = Queue(self._prefetch * self._num_readers * self._batch_size) self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q22 = Queue(self._prefetch * self._num_readers * self._batch_size) self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q3 = Queue(self._prefetch * self._num_readers) self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Init readers # Initialize readers
self._readers = [] self._readers = []
for i in range(self._num_readers): for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs))
self._readers[-1].q_out = self.Q1
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers part_idx, num_parts = i, self._num_readers
num_parts *= group_size num_parts *= group_size
part_idx += local_rank * self._num_readers part_idx += rank * self._num_readers
self._readers[i]._num_parts = num_parts self._readers.append(dragon.io.DataReader(
self._readers[i]._part_idx = part_idx num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._rng_seed += part_idx self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start() self._readers[i].start()
time.sleep(0.1) time.sleep(0.1)
# Init transformers # Initialize transformers
self._transformers = [] self._transformers = []
for i in range(self._num_transformers): for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs) transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers) transformer._rng_seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1 transformer.q_in = self.Q1
transformer.q1_out = self.Q21 transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.q2_out = self.Q22
transformer.start() transformer.start()
self._transformers.append(transformer) self._transformers.append(transformer)
time.sleep(0.1) time.sleep(0.1)
# Init blob fetchers # Initialize batch-producer
self._fetchers = [] self.start()
for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs)
fetcher.q1_in = self.Q21
fetcher.q2_in = self.Q22
fetcher.q_out = self.Q3
fetcher.start()
self._fetchers.append(fetcher)
time.sleep(0.1)
# Prevent to echo multiple nodes
if local_rank == 0:
self.echo()
# Register cleanup callbacks
def cleanup(): def cleanup():
def terminate(processes): def terminate(processes):
for process in processes: for process in processes:
process.terminate() process.terminate()
process.join() process.join()
terminate(self._fetchers) terminate([self])
logger.info('Terminating BlobFetcher ......') logger.info('Terminate DataBatch.')
terminate(self._transformers) terminate(self._transformers)
logger.info('Terminating DataTransformer ......') logger.info('Terminate DataTransformer.')
terminate(self._readers) terminate(self._readers)
logger.info('Terminating DataReader......') logger.info('Terminate DataReader.')
import atexit import atexit
atexit.register(cleanup) atexit.register(cleanup)
...@@ -156,20 +158,27 @@ class DataBatch(object): ...@@ -156,20 +158,27 @@ class DataBatch(object):
""" """
return self.Q3.get() return self.Q3.get()
def echo(self): def run(self):
"""Print I/O Information. """Start the process to produce batches."""
def produce(q_in):
Returns processed_ims, ims_info, all_boxes = [], [], []
------- for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
None im, im_scale, gt_boxes = q_in.get()
processed_ims.append(im)
""" ims_info.append(list(im.shape[:2]) + [im_scale])
print('---------------------------------------------------------') im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
print('BatchFetcher({} Threads), Using config:'.format( im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
self._num_readers + self._num_transformers + self._num_fetchers)) all_boxes.append(im_boxes)
params = {'queue_size': self._prefetch, return {
'n_readers': self._num_readers, 'data': im_list_to_blob(processed_ims),
'n_transformers': self._num_transformers, 'ims_info': np.array(ims_info, dtype=np.float32),
'n_fetchers': self._num_fetchers} 'gt_boxes': np.concatenate(all_boxes, axis=0),
pprint.pprint(params) }
print('---------------------------------------------------------')
q1, q2 = self.Q21, self.Q22
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Sample two queues uniformly
...@@ -14,22 +14,13 @@ from __future__ import division ...@@ -14,22 +14,13 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import multiprocessing import multiprocessing
import numpy as np
import numpy.random as npr
try: import cv2
import cv2 import numpy as np
except ImportError as e:
print('Failed to import cv2. Error: {0}'.format(str(e)))
try:
import PIL.Image
except ImportError as e:
print('Failed to import PIL. Error: {0}'.format(str(e)))
from lib.core.config import cfg from lib.core.config import cfg
from lib.proto import anno_pb2 as pb
from lib.utils import logger
from lib.utils.blob import prep_im_for_blob from lib.utils.blob import prep_im_for_blob
from lib.utils.boxes import flip_boxes
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
...@@ -47,44 +38,45 @@ class DataTransformer(multiprocessing.Process): ...@@ -47,44 +38,45 @@ class DataTransformer(multiprocessing.Process):
def make_roi_dict( def make_roi_dict(
self, self,
ann_datum, example,
im_scale, im_scale,
apply_flip=False, apply_flip=False,
offsets=None, offsets=None,
): ):
annotations = ann_datum.annotation
n_objects = 0 n_objects = 0
if not self._use_diff: if not self._use_diff:
for ann in annotations: for obj in example['object']:
if not ann.difficult: if obj.get('difficult', 0) == 0:
n_objects += 1 n_objects += 1
else: else:
n_objects = len(annotations) n_objects = len(example['object'])
roi_dict = { roi_dict = {
'width': ann_datum.datum.width, 'width': example['width'],
'height': ann_datum.datum.height, 'height': example['height'],
'gt_classes': np.zeros((n_objects,), 'int32'), 'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), 'float32'), 'boxes': np.zeros((n_objects, 4), 'float32'),
} }
# Filter the difficult instances # Filter the difficult instances
rec_idx = 0 object_idx = 0
for ann in annotations: for obj in example['object']:
if not self._use_diff and ann.difficult: if not self._use_diff and \
obj.get('difficult', 0) > 0:
continue continue
roi_dict['boxes'][rec_idx, :] = [ roi_dict['boxes'][object_idx, :] = [
max(0, ann.x1), max(0, obj['xmin']),
max(0, ann.y1), max(0, obj['ymin']),
min(ann.x2, ann_datum.datum.width - 1), min(obj['xmax'], example['width'] - 1),
min(ann.y2, ann_datum.datum.height - 1), min(obj['ymax'], example['height'] - 1),
] ]
roi_dict['gt_classes'][rec_idx] = self._class_to_ind[ann.name] roi_dict['gt_classes'][object_idx] = \
rec_idx += 1 self._class_to_ind[obj['name']]
object_idx += 1
# Flip the boxes if necessary # Flip the boxes if necessary
if apply_flip: if apply_flip:
roi_dict['boxes'] = _flip_boxes( roi_dict['boxes'] = flip_boxes(
roi_dict['boxes'], roi_dict['width']) roi_dict['boxes'], roi_dict['width'])
# Scale the boxes to the detecting scale # Scale the boxes to the detecting scale
...@@ -102,50 +94,34 @@ class DataTransformer(multiprocessing.Process): ...@@ -102,50 +94,34 @@ class DataTransformer(multiprocessing.Process):
return roi_dict return roi_dict
@classmethod @classmethod
def get_image(cls, serialized): def get_image(cls, example):
datum = pb.AnnotatedDatum() img = np.frombuffer(example['content'], np.uint8)
datum.ParseFromString(serialized) return cv2.imdecode(img, -1)
datum = datum.datum
im = np.fromstring(datum.data, np.uint8)
return cv2.imdecode(im, -1) if datum.encoded is True else \
im.reshape((datum.height, datum.width, datum.channels))
@classmethod @classmethod
def get_annotations(cls, serialized): def get_annotations(cls, example):
datum = pb.AnnotatedDatum()
datum.ParseFromString(serialized)
filename = datum.filename
annotations = datum.annotation
objects = [] objects = []
for ix, ann in enumerate(annotations): for ix, obj in enumerate(example['object']):
objects.append({ objects.append({
'name': ann.name, 'name': obj['name'],
'difficult': int(ann.difficult), 'difficult': obj.get('difficult', 0),
'bbox': [ann.x1, ann.y1, ann.x2, ann.y2], 'bbox': [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']],
'mask': ann.mask,
}) })
return filename, objects return example['id'], objects
def get(self, serialized): def get(self, example):
datum = pb.AnnotatedDatum() img = np.frombuffer(example['content'], np.uint8)
datum.ParseFromString(serialized) img = cv2.imdecode(img, -1)
im_datum = datum.datum
im = np.fromstring(im_datum.data, np.uint8)
if im_datum.encoded is True:
im = cv2.imdecode(im, -1)
else:
h, w = im_datum.height, im_datum.width
im = im.reshape((h, w, im_datum.channels))
# Scale # Scale
scale_indices = npr.randint(len(cfg.TRAIN.SCALES)) scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
target_size = cfg.TRAIN.SCALES[scale_indices] target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE) im, im_scale, jitter = prep_im_for_blob(img, target_size, cfg.TRAIN.MAX_SIZE)
# Flip # Flip
apply_flip = False apply_flip = False
if self._use_flipped: if self._use_flipped:
if npr.randint(0, 2) > 0: if np.random.randint(2) > 0:
im = im[:, ::-1, :] im = im[:, ::-1, :]
apply_flip = True apply_flip = True
...@@ -160,8 +136,8 @@ class DataTransformer(multiprocessing.Process): ...@@ -160,8 +136,8 @@ class DataTransformer(multiprocessing.Process):
# To a square (target_size, target_size) # To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im) im, offsets = _get_image_with_target_size([target_size] * 2, im)
# Datum -> RoIDict # Example -> RoIDict
roi_dict = self.make_roi_dict(datum, im_scale, apply_flip, offsets) roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
...@@ -171,29 +147,16 @@ class DataTransformer(multiprocessing.Process): ...@@ -171,29 +147,16 @@ class DataTransformer(multiprocessing.Process):
return im, im_scale, gt_boxes return im, im_scale, gt_boxes
def run(self): def run(self):
npr.seed(self._rng_seed) np.random.seed(self._rng_seed)
while True: while True:
serialized = self.q_in.get() outputs = self.get(self.q_in.get())
data = self.get(serialized) if len(outputs[2]) < 1:
# Ensure that there should be at least 1 ground-truth continue # Ignore the non-object image
if len(data[2]) < 1: aspect_ratio = float(outputs[0].shape[0]) / outputs[0].shape[1]
continue if aspect_ratio > 1.:
aspect_ratio = float(data[0].shape[0]) / data[0].shape[1] self.q1_out.put(outputs)
if aspect_ratio > 1.0:
self.q1_out.put(data)
else: else:
self.q2_out.put(data) self.q2_out.put(outputs)
def _flip_boxes(boxes, width):
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes
def _get_image_with_target_size(target_size, img): def _get_image_with_target_size(target_size, img):
......
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import numpy as np from __future__ import absolute_import
from __future__ import division
# Verify that we compute the same anchors as Shaoqing's matlab implementation: from __future__ import print_function
#
# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat import numpy as np
# >> anchors
# # Verify that we compute the same anchors as Shaoqing's matlab implementation:
# anchors = #
# # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
# -83 -39 100 56 # >> anchors
# -175 -87 192 104 #
# -359 -183 376 200 # anchors =
# -55 -55 72 72 #
# -119 -119 136 136 # -83 -39 100 56
# -247 -247 264 264 # -175 -87 192 104
# -35 -79 52 96 # -359 -183 376 200
# -79 -167 96 184 # -55 -55 72 72
# -167 -343 184 360 # -119 -119 136 136
# -247 -247 264 264
# array([[ -83., -39., 100., 56.], # -35 -79 52 96
# [-175., -87., 192., 104.], # -79 -167 96 184
# [-359., -183., 376., 200.], # -167 -343 184 360
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.], # array([[ -83., -39., 100., 56.],
# [-247., -247., 264., 264.], # [-175., -87., 192., 104.],
# [ -35., -79., 52., 96.], # [-359., -183., 376., 200.],
# [ -79., -167., 96., 184.], # [ -55., -55., 72., 72.],
# [-167., -343., 184., 360.]]) # [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
def generate_anchors( # [ -79., -167., 96., 184.],
base_size=16, # [-167., -343., 184., 360.]])
ratios=(0.5, 1, 2),
scales=2**np.arange(3, 6),
): def generate_anchors(
""" base_size=16,
Generate anchor (reference) windows by enumerating aspect ratios X ratios=(0.5, 1, 2),
scales wrt a reference (0, 0, 15, 15) window. scales=2**np.arange(3, 6),
""" ):
base_anchor = np.array([1, 1, base_size, base_size]) - 1 """
ratio_anchors = _ratio_enum(base_anchor, ratios) Generate anchor (reference) windows by enumerating aspect ratios X
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) scales wrt a reference (0, 0, 15, 15) window.
for i in range(ratio_anchors.shape[0])]) """
return anchors base_anchor = np.array([1, 1, base_size, base_size]) - 1
ratio_anchors = _ratio_enum(base_anchor, ratios)
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
def generate_anchors_v2( for i in range(ratio_anchors.shape[0])])
stride=16, return anchors
ratios=(0.5, 1, 2),
sizes=(32, 64, 128, 256, 512),
): def generate_anchors_v2(
""" stride=16,
Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors ratios=(0.5, 1, 2),
are centered on stride / 2, have (approximate) sqrt areas of the specified sizes=(32, 64, 128, 256, 512),
sizes, and aspect ratios as given. ):
""" """
return generate_anchors( Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
base_size=stride, are centered on stride / 2, have (approximate) sqrt areas of the specified
ratios=ratios, sizes, and aspect ratios as given.
scales=np.array(sizes, dtype=np.float) / stride, """
) return generate_anchors(
base_size=stride,
ratios=ratios,
def _whctrs(anchor): scales=np.array(sizes, dtype=np.float) / stride,
"""Return width, height, x center, and y center for an anchor (window).""" )
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1) def _whctrs(anchor):
y_ctr = anchor[1] + 0.5 * (h - 1) """Return width, height, x center, and y center for an anchor (window)."""
return w, h, x_ctr, y_ctr w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
def _mkanchors(ws, hs, x_ctr, y_ctr): y_ctr = anchor[1] + 0.5 * (h - 1)
""" return w, h, x_ctr, y_ctr
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
""" def _mkanchors(ws, hs, x_ctr, y_ctr):
ws = ws[:, np.newaxis] """
hs = hs[:, np.newaxis] Given a vector of widths (ws) and heights (hs) around a center
anchors = np.hstack((x_ctr - 0.5 * (ws - 1), (x_ctr, y_ctr), output a set of anchors (windows).
y_ctr - 0.5 * (hs - 1), """
x_ctr + 0.5 * (ws - 1), ws = ws[:, np.newaxis]
y_ctr + 0.5 * (hs - 1))) hs = hs[:, np.newaxis]
return anchors anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
def _ratio_enum(anchor, ratios): y_ctr + 0.5 * (hs - 1)))
"""Enumerate a set of anchors for each aspect ratio wrt an anchor.""" return anchors
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios def _ratio_enum(anchor, ratios):
ws = np.round(np.sqrt(size_ratios)) """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
hs = np.round(ws * ratios) w, h, x_ctr, y_ctr = _whctrs(anchor)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr) size = w * h
return anchors size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
def _scale_enum(anchor, scales): anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
"""Enumerate a set of anchors for each scale wrt an anchor.""" return anchors
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales def _scale_enum(anchor, scales):
anchors = _mkanchors(ws, hs, x_ctr, y_ctr) """Enumerate a set of anchors for each scale wrt an anchor."""
return anchors w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
if __name__ == '__main__': anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
print(generate_anchors()) return anchors
if __name__ == '__main__':
print(generate_anchors())
# --------------------------------------------------------
# Mask R-CNN @ Detectron
# Copyright (c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.faster_rcnn.data.data_batch import DataBatch
class DataLayer(torch.nn.Module):
def __init__(self):
super(DataLayer, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'source': database.source,
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': 0, # Record-Wise Shuffle
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
})
def forward(self):
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms from lib.nms.nms_wrapper import nms
from lib.utils.blob import blob_to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module): class ProposalLayer(torch.nn.Module):
""" """
Compute proposals by applying estimated bounding-box Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors"). transformations to a set of regular boxes (called "anchors").
""" """
def __init__(self): def __init__(self):
super(ProposalLayer, self).__init__() super(ProposalLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales = cfg.RPN.SCALES self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0] self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors # Generate base anchors
self.base_anchors = generate_anchors( self.base_anchors = generate_anchors(
base_size=self.stride, base_size=self.stride,
ratios=self.ratios, ratios=self.ratios,
scales=np.array(self.scales), scales=np.array(self.scales),
) )
def forward(self, features, cls_prob, bbox_pred, ims_info): def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST' cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources # Get resources
num_images = ims_info.shape[0] num_images = ims_info.shape[0]
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
height, width = cls_prob.shape[-2:] height, width = cls_prob.shape[-2:]
shift_x = np.arange(0, width) * self.stride shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get # cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0] A = self.base_anchors.shape[0]
K = shifts.shape[0] K = shifts.shape[0]
anchors = \ anchors = \
self.base_anchors.reshape((1, A, 4)) + \ self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2)) shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4)) all_anchors = anchors.reshape((K * A, 4))
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
# scores & deltas are (1, A, H, W) format # scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A) # Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1)) batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1)) batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
# Extract RoIs separately # Extract RoIs separately
for ix in range(num_images): for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1] scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4)) deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
order = np.argsort(-scores.squeeze()) order = np.argsort(-scores.squeeze())
else: else:
# Avoid sorting possibly large arrays; First partition to get top K # Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores) # unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
order = np.argsort(-scores[inds].squeeze()) order = np.argsort(-scores[inds].squeeze())
order = inds[order] order = inds[order]
deltas = deltas[order] deltas = deltas[order]
anchors = all_anchors[order] anchors = all_anchors[order]
scores = scores[order] scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations # 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas) proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image # 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2]) proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold # 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2]) # (NOTE: convert min_size to input image scale stored in im_info[2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2]) keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :] proposals = proposals[keep, :]
scores = scores[keep] scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7) # 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300) # 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top) # 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh) keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: if post_nms_topN > 0:
keep = keep[:post_nms_topN] keep = keep[:post_nms_topN]
proposals = proposals[keep, :] proposals = proposals[keep, :]
# Output rois blob # Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32) batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix) batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois) batch_rois.append(rpn_rois)
# Merge RoIs into a blob # Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0) rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN': if cfg_key == 'TRAIN':
return rpn_rois return rpn_rois
else: else:
return [blob_to_tensor(rpn_rois)] return [blob_to_tensor(rpn_rois)]
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
class ProposalTargetLayer(torch.nn.Module): class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets.""" """Assign object detection proposals to ground-truth targets."""
def __init__(self): def __init__(self):
super(ProposalTargetLayer, self).__init__() super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES self.num_classes = cfg.MODEL.NUM_CLASSES
def forward(self, rpn_rois, gt_boxes): def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label) # GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs # Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets', keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights'] 'bbox_inside_weights', 'bbox_outside_weights']
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys])) batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately # Generate targets separately
for ix in range(num_images): for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix] gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image # Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]] rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois # Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4])))) rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training # Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes) rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([ _fmap_batch([
labels, labels,
rois, rois,
bbox_targets, bbox_targets,
bbox_inside_weights, bbox_inside_weights,
bbox_outside_weights], bbox_outside_weights],
batch_outputs, batch_outputs,
keys, keys,
) )
# Merge targets into blobs # Merge targets into blobs
for k, v in batch_outputs.items(): for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0) batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return { return {
'rois': [blob_to_tensor(batch_outputs['rois'])], 'rois': [blob_to_tensor(batch_outputs['rois'])],
'labels': blob_to_tensor(batch_outputs['labels']), 'labels': blob_to_tensor(batch_outputs['labels']),
'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']), 'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']), 'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']), 'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
} }
def _get_bbox_regression_labels(bbox_target_data, num_classes): def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a """Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th) compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). by the network (i.e. only one class has non-zero targets).
Returns: Returns:
bbox_target (ndarray): N x 4K blob of regression targets bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights bbox_inside_weights (ndarray): N x 4K blob of loss weights
""" """
clss = bbox_target_data[:, 0] clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0] inds = np.where(clss > 0)[0]
for ind in inds: for ind in inds:
cls = clss[ind] cls = clss[ind]
start = 4 * cls start = 4 * cls
end = start + 4 end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:] bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0) bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels): def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image.""" """Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4 assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS) targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False) return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois( def _sample_rois(
all_rois, all_rois,
gt_boxes, gt_boxes,
fg_rois_per_image, fg_rois_per_image,
rois_per_image, rois_per_image,
num_classes, num_classes,
): ):
"""Generate a random sample of RoIs.""" """Generate a random sample of RoIs."""
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float),
) )
gt_assignment = overlaps.argmax(axis=1) gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1) max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4] labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap # Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image # Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs # foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement # Sample foreground regions without replacement
if fg_inds.size > 0: if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding # Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired) # against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement # Sample background regions without replacement
if bg_inds.size > 0: if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg) # The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds) keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays: # Select sampled values from various arrays:
labels = labels[keep_inds] labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0 # Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0 labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds] rois = all_rois[keep_inds]
bbox_target_data = _compute_targets( bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \ bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes) _get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys): def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(inputs[i]) outputs[key].append(inputs[i])
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.timer import Timer from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales.""" """Detect a image, with single or multiple scales."""
# Prepare images # Prepare images
ims, ims_scale = scale_image(raw_image) ims, ims_scale = scale_image(raw_image)
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32) for im_scale in ims_scale], dtype=np.float32)
blobs['data'] = torch.from_numpy(blobs['data'])
blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward
with torch.no_grad(): # Do Forward
outputs = detector.forward(inputs=blobs) with torch.no_grad():
outputs = detector.forward(inputs=blobs)
# Decode results
batch_rois = tensor_to_blob(outputs['rois']) # Decode results
batch_scores = tensor_to_blob(outputs['cls_prob']) batch_rois = tensor_to_blob(outputs['rois'])
batch_deltas = tensor_to_blob(outputs['bbox_pred']) batch_scores = tensor_to_blob(outputs['cls_prob'])
batch_deltas = tensor_to_blob(outputs['bbox_pred'])
batch_boxes = bbox_transform_inv(
boxes=batch_rois[:, 1:5], batch_boxes = bbox_transform_inv(
deltas=batch_deltas, boxes=batch_rois[:, 1:5],
weights=cfg.BBOX_REG_WEIGHTS, deltas=batch_deltas,
) weights=cfg.BBOX_REG_WEIGHTS,
)
scores_wide, boxes_wide = [], []
scores_wide, boxes_wide = [], []
for im_idx in range(len(ims)):
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0] for im_idx in range(len(ims)):
boxes = batch_boxes[indices] indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0]
boxes /= ims_scale[im_idx] boxes = batch_boxes[indices]
clip_tiled_boxes(boxes, raw_image.shape) boxes /= ims_scale[im_idx]
scores_wide.append(batch_scores[indices]) clip_tiled_boxes(boxes, raw_image.shape)
boxes_wide.append(boxes) scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes)
return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0]) return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
def test_net(detector, server):
# Load settings def test_net(detector, server):
classes = server.classes # Load settings
num_images = server.num_images classes = server.classes
num_classes = server.num_classes num_images = server.num_images
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
_t = {'im_detect': Timer(), 'misc': Timer()}
for i in range(num_images):
image_id, raw_image = server.get_image() for i in range(num_images):
image_id, raw_image = server.get_image()
_t['im_detect'].tic()
scores, boxes = im_detect(detector, raw_image) _t['im_detect'].tic()
_t['im_detect'].toc() scores, boxes = im_detect(detector, raw_image)
_t['im_detect'].toc()
_t['misc'].tic()
boxes_this_image = [[]] _t['misc'].tic()
for j in range(1, num_classes): boxes_this_image = [[]]
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] for j in range(1, num_classes):
cls_scores = scores[inds, j] inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_boxes = boxes[inds, j*4:(j+1)*4] cls_scores = scores[inds, j]
cls_detections = np.hstack( cls_boxes = boxes[inds, j*4:(j+1)*4]
(cls_boxes, cls_scores[:, np.newaxis]) cls_detections = np.hstack(
).astype(np.float32, copy=False) (cls_boxes, cls_scores[:, np.newaxis])
if cfg.TEST.USE_SOFT_NMS: ).astype(np.float32, copy=False)
keep = soft_nms( if cfg.TEST.USE_SOFT_NMS:
cls_detections, cfg.TEST.NMS, keep = soft_nms(
method=cfg.TEST.SOFT_NMS_METHOD, cls_detections, cfg.TEST.NMS,
sigma=cfg.TEST.SOFT_NMS_SIGMA, method=cfg.TEST.SOFT_NMS_METHOD,
) sigma=cfg.TEST.SOFT_NMS_SIGMA,
else: )
keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True) else:
cls_detections = cls_detections[keep, :] keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
all_boxes[j][i] = cls_detections cls_detections = cls_detections[keep, :]
boxes_this_image.append(cls_detections) all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image( if cfg.VIS or cfg.VIS_ON_FILE:
raw_image, classes, boxes_this_image, vis_one_image(
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True, raw_image, classes, boxes_this_image,
filename=server.get_save_filename(image_id), thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
) filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0: # Limit to max_per_image detections *over all classes*
image_scores = [] if cfg.TEST.DETECTIONS_PER_IM > 0:
for j in range(1, num_classes): image_scores = []
if len(all_boxes[j][i]) < 1: continue for j in range(1, num_classes):
image_scores.append(all_boxes[j][i][:, -1]) if len(all_boxes[j][i]) < 1: continue
if len(image_scores) > 0: image_scores.append(all_boxes[j][i][:, -1])
image_scores = np.hstack(image_scores) if len(image_scores) > 0:
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_scores = np.hstack(image_scores)
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
for j in range(1, num_classes): image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] for j in range(1, num_classes):
all_boxes[j][i] = all_boxes[j][i][keep, :] keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
_t['misc'].toc() all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(i + 1, num_images, _t['im_detect'].average_time, print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
_t['misc'].average_time), end='') .format(i + 1, num_images,
_t['im_detect'].average_time,
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') _t['misc'].average_time),
end='')
print('Evaluating detections')
server.evaluate_detections(all_boxes) print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
print('Evaluating detections')
server.evaluate_detections(all_boxes)
...@@ -13,6 +13,6 @@ from __future__ import absolute_import ...@@ -13,6 +13,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer from lib.fpn.anchor_target_layer import AnchorTargetLayer
from lib.fpn.layers.proposal_layer import ProposalLayer from lib.fpn.proposal_layer import ProposalLayer
from lib.fpn.layers.proposal_target_layer import ProposalTargetLayer from lib.fpn.proposal_target_layer import ProposalTargetLayer
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module): class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets.""" """Assign anchors to ground-truth targets."""
def __init__(self): def __init__(self):
super(AnchorTargetLayer, self).__init__() super(AnchorTargetLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales = cfg.RPN.SCALES self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides): if len(self.scales) != len(self.strides):
logger.fatal( logger.fatal(
'Given {} scales and {} strides.' 'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides)) .format(len(self.scales), len(self.strides))
) )
# Allow boxes to sit over the edge by a small amount # Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i] base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable): if not isinstance(scale, collections.Iterable):
scale = [scale] scale = [scale]
self.base_anchors.append( self.base_anchors.append(
generate_anchors( generate_anchors(
base_size=base_size, base_size=base_size,
ratios=self.ratios, ratios=self.ratios,
scales=np.array(scale), scales=np.array(scale),
) )
) )
def forward(self, features, gt_boxes, ims_info): def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.""" """Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal( logger.fatal(
'Input {} images, got {} slices of gt boxes.' 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide)) .format(num_images, len(gt_boxes_wide))
) )
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0 all_anchors, total_anchors = [], 0
for i in range(len(self.strides)): for i in range(len(self.strides)):
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i] shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i] shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get # cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0] A = self.base_anchors[i].shape[0]
K = shifts.shape[0] K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) + anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4] # [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2)) anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4)) anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors) all_anchors.append(anchors)
total_anchors += anchors.shape[0] total_anchors += anchors.shape[0]
all_anchors = np.vstack(all_anchors) all_anchors = np.vstack(all_anchors)
# label: 1 is positive, 0 is negative, -1 is don't care # label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32) labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32) bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32) bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32) bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
for ix in range(num_images): for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, has_mask) # GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes = gt_boxes_wide[ix] gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix] im_info = ims_info[ix]
if self._allowed_border >= 0: if self._allowed_border >= 0:
# Only keep anchors inside the image # Only keep anchors inside the image
inds_inside = np.where( inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0] (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :] anchors = all_anchors[inds_inside, :]
else: else:
inds_inside = np.arange(all_anchors.shape[0]) inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors anchors = all_anchors
num_inside = len(inds_inside) num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care # label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32) labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1) labels.fill(-1)
# Overlaps between the anchors and the gt boxes # Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float),
) )
argmax_overlaps = overlaps.argmax(axis=1) argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps] max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0) gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])] np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap # fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1 labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU # fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU # bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many # Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg: if len(fg_inds) > num_fg:
disable_inds = npr.choice( disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False) fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1 labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0] fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many # Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0] bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg: if len(bg_inds) > num_bg:
disable_inds = npr.choice( disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False) bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1 labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform( bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], 0:4], gt_boxes[argmax_overlaps[fg_inds], 0:4],
) )
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0)) bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors)) labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1)) bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1)) bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': blob_to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
# --------------------------------------------------------
# Mask R-CNN @ Detectron
# Copyright (c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms from lib.nms.nms_wrapper import nms
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform_inv from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module): class ProposalLayer(torch.nn.Module):
""" """
Compute proposals by applying estimated bounding-box Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors"). transformations to a set of regular boxes (called "anchors").
""" """
def __init__(self): def __init__(self):
super(ProposalLayer, self).__init__() super(ProposalLayer, self).__init__()
# Load the basic configs # Load the basic configs
self.scales = cfg.RPN.SCALES self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides): if len(self.scales) != len(self.strides):
logger.fatal( logger.fatal(
'Given {} scales and {} strides.' 'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides)) .format(len(self.scales), len(self.strides))
) )
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i] base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable): if not isinstance(scale, collections.Iterable):
scale = [scale] scale = [scale]
self.base_anchors.append( self.base_anchors.append(
generate_anchors( generate_anchors(
base_size=base_size, base_size=base_size,
ratios=self.ratios, ratios=self.ratios,
scales=np.array(scale), scales=np.array(scale),
) )
) )
def generate_grid_anchors(self, features): def generate_grid_anchors(self, features):
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
anchors_wide = [] anchors_wide = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i] shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i] shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get # cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0] A = self.base_anchors[i].shape[0]
K = shifts.shape[0] K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) + anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4] # [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2)) anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4)) anchors = anchors.reshape((A * K, 4))
anchors_wide.append(anchors) anchors_wide.append(anchors)
return np.vstack(anchors_wide) return np.vstack(anchors_wide)
def forward(self, features, cls_prob, bbox_pred, ims_info): def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST' cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources # Get resources
num_images = ims_info.shape[0] num_images = ims_info.shape[0]
all_anchors = self.generate_grid_anchors(features) # [n, 4] all_anchors = self.generate_grid_anchors(features) # [n, 4]
if cls_prob.shape[0] != num_images or \ if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images: bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images)) logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
batch_scores = cls_prob.numpy(True) batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True) \ batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4] .transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately # Extract RoIs separately
for ix in range(num_images): for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1] scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4] deltas = batch_deltas[ix] # [n, 4]
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
order = np.argsort(-scores.squeeze()) order = np.argsort(-scores.squeeze())
else: else:
# Avoid sorting possibly large arrays; First partition to get top K # Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores) # unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
order = np.argsort(-scores[inds].squeeze()) order = np.argsort(-scores[inds].squeeze())
order = inds[order] order = inds[order]
deltas = deltas[order] deltas = deltas[order]
anchors = all_anchors[order] anchors = all_anchors[order]
scores = scores[order] scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations # 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas) proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image # 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2]) proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold # 3. remove predicted boxes with either height or width < threshold
keep = filter_boxes(proposals, min_size * ims_info[ix, 2]) keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :] proposals = proposals[keep, :]
scores = scores[keep] scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7) # 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300) # 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top) # 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh) keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: if post_nms_topN > 0:
keep = keep[:post_nms_topN] keep = keep[:post_nms_topN]
proposals = proposals[keep, :] proposals = proposals[keep, :]
# Output rois blob # Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32) batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix) batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois) batch_rois.append(rpn_rois)
# Merge RoIs into a blob # Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0) rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN': if cfg_key == 'TRAIN':
return rpn_rois return rpn_rois
else: else:
# Distribute rois into K levels # Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1 K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level) fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
all_rois = [] all_rois = []
for i in range(K): for i in range(K):
lv_indices = np.where(fpn_levels == (i + min_level))[0] lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0: if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling # Fake a tiny roi to avoid empty roi pooling
all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32))) all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else: else:
all_rois.append(blob_to_tensor(rpn_rois[lv_indices])) all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
return all_rois return all_rois
def _map_rois_to_fpn_levels(rois, k_min, k_max): def _map_rois_to_fpn_levels(rois, k_min, k_max):
""" """
Determine which FPN level each RoI in a set of RoIs Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper. should map to based on the heuristic in the FPN paper.
""" """
if len(rois) == 0: if len(rois) == 0:
return [] return []
ws = rois[:, 3] - rois[:, 1] + 1 ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1 hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs) s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224 s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4 lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max) return np.clip(target_levels, k_min, k_max)
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
class ProposalTargetLayer(torch.nn.Module): class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets. """Assign object detection proposals to ground-truth targets.
Produces proposal classification labels and bounding-box regression targets. Produces proposal classification labels and bounding-box regression targets.
""" """
def __init__(self): def __init__(self):
super(ProposalTargetLayer, self).__init__() super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES self.num_classes = cfg.MODEL.NUM_CLASSES
self.fake_outputs = { self.fake_outputs = {
'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32), 'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
'labels': np.array([-1], dtype=np.float32), 'labels': np.array([-1], dtype=np.float32),
'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32), 'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32), 'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32), 'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
} }
def forward(self, rpn_rois, gt_boxes): def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label) # GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs # Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets', keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights'] 'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys])) outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys])) batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately # Generate targets separately
for ix in range(num_images): for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix] gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image # Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]] rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois # Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4])))) rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training # Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = \ labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes) _sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([ _fmap_batch([
labels, labels,
rois, rois,
bbox_targets, bbox_targets,
bbox_inside_weights, bbox_inside_weights,
bbox_outside_weights], bbox_outside_weights],
batch_outputs, batch_outputs,
keys, keys,
) )
# Merge targets into blobs # Merge targets into blobs
for k, v in batch_outputs.items(): for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0) batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
# Distribute rois into K levels # Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1 K = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level) fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)] lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
_fmap_rois( _fmap_rois(
inputs=[batch_outputs[key] for key in keys], inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs, fake_outputs=self.fake_outputs,
outputs=outputs, outputs=outputs,
keys=keys, keys=keys,
levels=lvs_indices, levels=lvs_indices,
) )
return { return {
'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)], 'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)), 'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])), 'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])), 'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])), 'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
} }
def _get_bbox_regression_labels(bbox_target_data, num_classes): def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a """Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th) compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). by the network (i.e. only one class has non-zero targets).
Returns: Returns:
bbox_target (ndarray): N x 4K blob of regression targets bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights bbox_inside_weights (ndarray): N x 4K blob of loss weights
""" """
clss = bbox_target_data[:, 0] clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0] inds = np.where(clss > 0)[0]
for ind in inds: for ind in inds:
cls = clss[ind] cls = clss[ind]
start = 4 * cls start = 4 * cls
end = start + 4 end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:] bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0) bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels): def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image.""" """Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4 assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4 assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS) targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False) return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _map_rois_to_fpn_levels(rois, k_min, k_max): def _map_rois_to_fpn_levels(rois, k_min, k_max):
""" """
Determine which FPN level each RoI in a set of RoIs Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper. should map to based on the heuristic in the FPN paper.
""" """
if len(rois) == 0: if len(rois) == 0:
return [] return []
ws = rois[:, 3] - rois[:, 1] + 1 ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1 hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs) s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224 s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4 lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max) return np.clip(target_levels, k_min, k_max)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Sample a batch of RoIs comprising foreground and background examples.""" """Sample a batch of RoIs comprising foreground and background examples."""
# overlaps: (rois x gt_boxes) # overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
gt_assignment = overlaps.argmax(axis=1) gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1) max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4] labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap # Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image # Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs # foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement # Sample foreground regions without replacement
if fg_inds.size > 0: if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding # Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired) # against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement # Sample background regions without replacement
if bg_inds.size > 0: if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg) # The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds) keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays: # Select sampled values from various arrays:
labels = labels[keep_inds] labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0 # Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0 labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds] rois = all_rois[keep_inds]
bbox_target_data = _compute_targets( bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \ bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes) _get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys): def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(inputs[i]) outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels): def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
def impl(a, b, indices): def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b return a[indices] if len(indices) > 0 else b
for k in range(len(levels)): for k in range(len(levels)):
inds = levels[k] inds = levels[k]
for i, key in enumerate(keys): for i, key in enumerate(keys):
outputs[key].append(impl(inputs[i], fake_outputs[key], inds)) outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
# Import custom modules # Import custom modules
from lib.modeling.base import affine from lib.modeling.base import affine
from lib.modeling.base import bn from lib.modeling.base import bn
from lib.modeling.base import conv1x1 from lib.modeling.base import conv1x1
from lib.modeling.base import conv3x3 from lib.modeling.base import conv3x3
from lib.modeling.fast_rcnn import FastRCNN from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN from lib.modeling.fpn import FPN
from lib.modeling.retinanet import RetinaNet from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD from lib.modeling.ssd import SSD
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.modeling import affine from lib.modeling import affine
from lib.modeling import conv1x1 from lib.modeling import conv1x1
from lib.modeling import conv3x3 from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module): class WideResBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None): def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__() super(WideResBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out) self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out) self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out) self.bn2 = affine(dim_out)
self.downsample = downsample self.downsample = downsample
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
residual = x residual = x
out = self.conv1(x) out = self.conv1(x)
out = self.bn1(out) out = self.bn1(out)
out = self.relu(out) out = self.relu(out)
out = self.conv2(out) out = self.conv2(out)
out = self.bn2(out) out = self.bn2(out)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
out += residual out += residual
out = self.relu(out) out = self.relu(out)
return out return out
class InceptionBlock(torch.nn.Module): class InceptionBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out): def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__() super(InceptionBlock, self).__init__()
self.conv1 = conv1x1(dim_in, dim_out) self.conv1 = conv1x1(dim_in, dim_out)
self.bn1 = affine(dim_out) self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out // 2) self.conv2 = conv3x3(dim_out, dim_out // 2)
self.bn2 = affine(dim_out // 2) self.bn2 = affine(dim_out // 2)
self.conv3a = conv3x3(dim_out // 2, dim_out) self.conv3a = conv3x3(dim_out // 2, dim_out)
self.bn3a = affine(dim_out) self.bn3a = affine(dim_out)
self.conv3b = conv3x3(dim_out, dim_out) self.conv3b = conv3x3(dim_out, dim_out)
self.bn3b = affine(dim_out) self.bn3b = affine(dim_out)
self.conv4 = conv3x3(dim_out * 3, dim_out) self.conv4 = conv3x3(dim_out * 3, dim_out)
self.bn4 = affine(dim_out) self.bn4 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
residual = x residual = x
out = self.conv1(x) out = self.conv1(x)
out_1x1 = self.bn1(out) out_1x1 = self.bn1(out)
out_1x1 = self.relu(out_1x1) out_1x1 = self.relu(out_1x1)
out = self.conv2(out_1x1) out = self.conv2(out_1x1)
out = self.bn2(out) out = self.bn2(out)
out = self.relu(out) out = self.relu(out)
out = self.conv3a(out) out = self.conv3a(out)
out_3x3_a = self.bn3a(out) out_3x3_a = self.bn3a(out)
out_3x3_a = self.relu(out_3x3_a) out_3x3_a = self.relu(out_3x3_a)
out = self.conv3b(out_1x1) out = self.conv3b(out_1x1)
out_3x3_b = self.bn3b(out) out_3x3_b = self.bn3b(out)
out_3x3_b = self.relu(out_3x3_b) out_3x3_b = self.relu(out_3x3_b)
out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1) out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1)
out = self.conv4(out) out = self.conv4(out)
out = self.bn4(out) out = self.bn4(out)
out += residual out += residual
out = self.relu(out) out = self.relu(out)
return out return out
class AirNet(torch.nn.Module): class AirNet(torch.nn.Module):
def __init__(self, blocks, num_stages): def __init__(self, blocks, num_stages):
super(AirNet, self).__init__() super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384] self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = [None, None] + \ self.feature_dims = [None, None] + \
filters[1:num_stages - 1] filters[1:num_stages - 1]
self.conv1 = torch.nn.Conv2d( self.conv1 = torch.nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
stride=2, stride=2,
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = affine(self.dim_in) self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = torch.nn.MaxPool2d(
kernel_size=2, kernel_size=2,
stride=2, stride=2,
padding=0, padding=0,
ceil_mode=True, ceil_mode=True,
) )
self.layer1 = self.make_blocks(filters[0], blocks[0]) self.layer1 = self.make_blocks(filters[0], blocks[0])
self.layer2 = self.make_blocks(filters[1], blocks[1], 2) self.layer2 = self.make_blocks(filters[1], blocks[1], 2)
if num_stages >= 4: if num_stages >= 4:
self.layer3 = self.make_blocks(filters[2], blocks[2], 2) self.layer3 = self.make_blocks(filters[2], blocks[2], 2)
if num_stages >= 5: if num_stages >= 5:
self.layer4 = self.make_blocks(filters[3], blocks[3], 2) self.layer4 = self.make_blocks(filters[3], blocks[3], 2)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# The Kaiming Initialization # The Kaiming Initialization
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_( torch.nn.init.kaiming_uniform_(
m.weight, m.weight,
# Fix the gain for [-127, 127] # Fix the gain for [-127, 127]
a=1, a=1,
) # Xavier Initialization ) # Xavier Initialization
def make_blocks(self, dim_out, blocks, stride=1): def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential( downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride), conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out), affine(dim_out),
) )
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)] layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out self.dim_in = dim_out
for i in range(1, len(blocks)): for i in range(1, len(blocks)):
if blocks[i] == 'r': if blocks[i] == 'r':
layers.append(WideResBlock(dim_out, dim_out)) layers.append(WideResBlock(dim_out, dim_out))
elif blocks[i] == 'i': elif blocks[i] == 'i':
layers.append(InceptionBlock(dim_out, dim_out)) layers.append(InceptionBlock(dim_out, dim_out))
else: else:
raise ValueError('Unknown block flag: ' + blocks[i]) raise ValueError('Unknown block flag: ' + blocks[i])
return torch.nn.Sequential(*layers) return torch.nn.Sequential(*layers)
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.conv1(x)
x = self.bn1(x) x = self.bn1(x)
x = self.relu(x) x = self.relu(x)
x = self.maxpool(x) x = self.maxpool(x)
x = self.layer1(x) x = self.layer1(x)
outputs = [None, None, self.layer2(x)] outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])] if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])] if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
return outputs return outputs
def airnet(num_stages): def airnet(num_stages):
blocks = ( blocks = (
('r', 'r'), # conv2 ('r', 'r'), # conv2
('r', 'i'), # conv3 ('r', 'i'), # conv3
('r', 'i'), # conv4 ('r', 'i'), # conv4
('r', 'i'), # conv5 ('r', 'i'), # conv5
) )
return AirNet(blocks, num_stages) return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5) def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3) def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4) def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5) def make_airnet_5b(): return airnet(5)
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
"""Define some basic structures.""" """Define some basic structures."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
def affine(dim_in, inplace=True): def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed.""" """AffineBN, weight and bias are fixed."""
return torch.nn.Affine( return torch.nn.Affine(
dim_in, dim_in,
fix_weight=True, fix_weight=True,
fix_bias=True, fix_bias=True,
inplace=inplace, inplace=inplace,
) )
def bn(dim_in, eps=1e-5): def bn(dim_in, eps=1e-5):
"""The BatchNorm.""" """The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps) return torch.nn.BatchNorm2d(dim_in, eps=eps)
def conv1x1(dim_in, dim_out, stride=1, bias=False): def conv1x1(dim_in, dim_out, stride=1, bias=False):
"""1x1 convolution.""" """1x1 convolution."""
return torch.nn.Conv2d( return torch.nn.Conv2d(
dim_in, dim_in,
dim_out, dim_out,
kernel_size=1, kernel_size=1,
stride=stride, stride=stride,
bias=bias, bias=bias,
) )
def conv3x3(dim_in, dim_out, stride=1, bias=False): def conv3x3(dim_in, dim_out, stride=1, bias=False):
"""3x3 convolution with padding.""" """3x3 convolution with padding."""
return torch.nn.Conv2d( return torch.nn.Conv2d(
dim_in, dim_in,
dim_out, dim_out,
kernel_size=3, kernel_size=3,
stride=stride, stride=stride,
padding=1, padding=1,
bias=bias, bias=bias,
) )
...@@ -35,11 +35,13 @@ class Detector(torch.nn.Module): ...@@ -35,11 +35,13 @@ class Detector(torch.nn.Module):
``lib.core.config`` for their hyper-parameters. ``lib.core.config`` for their hyper-parameters.
""" """
def __init__(self): def __init__(self):
super(Detector, self).__init__() super(Detector, self).__init__()
model = cfg.MODEL.TYPE model = cfg.MODEL.TYPE
backbone = cfg.MODEL.BACKBONE.lower().split('.') backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:] body, modules = backbone[0], backbone[1:]
self.recorder = None
# + Data Loader # + Data Loader
self.data_layer = importlib.import_module( self.data_layer = importlib.import_module(
...@@ -92,9 +94,14 @@ class Detector(torch.nn.Module): ...@@ -92,9 +94,14 @@ class Detector(torch.nn.Module):
Parameters Parameters
---------- ----------
inputs : dict or None inputs : dict, optional
The inputs. The inputs.
Returns
-------
dict
The outputs.
""" """
# 0. Get the inputs # 0. Get the inputs
if inputs is None: if inputs is None:
...@@ -161,7 +168,6 @@ class Detector(torch.nn.Module): ...@@ -161,7 +168,6 @@ class Detector(torch.nn.Module):
"""Optimize the graph for the inference. """Optimize the graph for the inference.
It usually involves the removing of BN or Affine. It usually involves the removing of BN or Affine.
""" """
################################## ##################################
# Merge Affine into Convolution # # Merge Affine into Convolution #
......
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import importlib import importlib
_STORE = collections.defaultdict(dict) _STORE = collections.defaultdict(dict)
########################################### ###########################################
# # # #
# Body # # Body #
# # # #
########################################### ###########################################
# ResNet # ResNet
for D in [18, 34, 50, 101, 152, 200, 269]: for D in [18, 34, 50, 101, 152, 200, 269]:
_STORE['BODY']['resnet{}'.format(D)] = \ _STORE['BODY']['resnet{}'.format(D)] = \
'lib.modeling.resnet.make_resnet_{}'.format(D) 'lib.modeling.resnet.make_resnet_{}'.format(D)
# VGG # VGG
for D in [16, 19]: for D in [16, 19]:
for T in ['', '_reduced_300', '_reduced_512']: for T in ['', '_reduced_300', '_reduced_512']:
_STORE['BODY']['vgg{}{}'.format(D, T)] = \ _STORE['BODY']['vgg{}{}'.format(D, T)] = \
'lib.modeling.vgg.make_vgg_{}{}'.format(D, T) 'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
# AirNet # AirNet
for D in ['', '3b', '4b', '5b']: for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \ _STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D) 'lib.modeling.airnet.make_airnet_{}'.format(D)
def get_template_func(name, sets, desc): def get_template_func(name, sets, desc):
name = name.lower() name = name.lower()
if name not in sets: if name not in sets:
raise ValueError( raise ValueError(
'The {} for {} was not registered.\n' 'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format( 'Registered modules: [{}]'.format(
name, desc, ', '.join(sets.keys()))) name, desc, ', '.join(sets.keys())))
module_name = '.'.join(sets[name].split('.')[0:-1]) module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1] func_name = sets[name].split('.')[-1]
try: try:
module = importlib.import_module(module_name) module = importlib.import_module(module_name)
return getattr(module, func_name) return getattr(module, func_name)
except ImportError as e: except ImportError as e:
raise ValueError('Can not import module from: ' + module_name) raise ValueError('Can not import module from: ' + module_name)
def get_body_func(name): def get_body_func(name):
return get_template_func( return get_template_func(
name, _STORE['BODY'], 'Body') name, _STORE['BODY'], 'Body')
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.ops.modules import RPNDecoder from lib.ops.modules import RPNDecoder
class FastRCNN(torch.nn.Module): class FastRCNN(torch.nn.Module):
"""Generate proposal regions for R-CNN series. """Generate proposal regions for R-CNN series.
The pipeline is as follows: The pipeline is as follows:
... -> RoIs \ /-> cls_score -> cls_loss ... -> RoIs \ /-> cls_score -> cls_loss
-> RoIFeatureXform -> MLP -> RoIFeatureXform -> MLP
... -> Features / \-> bbox_pred -> bbox_loss ... -> Features / \-> bbox_pred -> bbox_loss
""" """
def __init__(self, dim_in=256): def __init__(self, dim_in=256):
super(FastRCNN, self).__init__() super(FastRCNN, self).__init__()
if len(cfg.RPN.STRIDES) > 1: if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN) # RPN with multiple strides(i.e. FPN)
from lib.fpn import ProposalLayer, ProposalTargetLayer from lib.fpn import ProposalLayer, ProposalTargetLayer
else: else:
# RPN with single stride(i.e. C4) # RPN with single stride(i.e. C4)
from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2) self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM) self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM) self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES) self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4) self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.rpn_decoder = RPNDecoder() self.rpn_decoder = RPNDecoder()
self.proposal_layer = ProposalLayer() self.proposal_layer = ProposalLayer()
self.proposal_target_layer = ProposalTargetLayer() self.proposal_target_layer = ProposalTargetLayer()
self.softmax = torch.nn.Softmax(dim=1) self.softmax = torch.nn.Softmax(dim=1)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False) self.sigmoid = torch.nn.Sigmoid(inplace=False)
self.roi_func = { self.roi_func = {
'RoIPool': torch.vision.ops.roi_pool, 'RoIPool': torch.vision.ops.roi_pool,
'RoIAlign': torch.vision.ops.roi_align, 'RoIAlign': torch.vision.ops.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD] }[cfg.FRCNN.ROI_XFORM_METHOD]
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1., reduction='batch_size') self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size')
# Compute spatial scales for multiple strides # Compute spatial scales for multiple strides
roi_levels = [level for level in range( roi_levels = [level for level in range(
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)] cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)]
self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels] self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels]
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# Careful initialization for Fast R-CNN # Careful initialization for Fast R-CNN
torch.nn.init.normal_(self.cls_score.weight, std=0.01) torch.nn.init.normal_(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001) torch.nn.init.normal_(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters(): for name, p in self.named_parameters():
if 'bias' in name: if 'bias' in name:
torch.nn.init.constant_(p, 0) torch.nn.init.constant_(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale): def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func( return self.roi_func(
feature, rois, feature, rois,
output_size=( output_size=(
cfg.FRCNN.ROI_XFORM_RESOLUTION, cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION, cfg.FRCNN.ROI_XFORM_RESOLUTION,
), ),
spatial_scale=spatial_scale, spatial_scale=spatial_scale,
) )
def forward(self, **kwargs): def forward(self, **kwargs):
# Generate Proposals # Generate Proposals
# Apply the CXX implementation during inference # Apply the CXX implementation during inference
proposal_func = self.proposal_layer \ proposal_func = self.proposal_layer \
if self.training else self.rpn_decoder if self.training else self.rpn_decoder
self.rcnn_data = { self.rcnn_data = {
'rois': proposal_func( 'rois': proposal_func(
kwargs['features'], kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data), self.sigmoid(kwargs['rpn_cls_score'].data),
kwargs['rpn_bbox_pred'], kwargs['rpn_bbox_pred'],
kwargs['ims_info'], kwargs['ims_info'],
) )
} }
# Generate Targets from Proposals # Generate Targets from Proposals
if self.training: if self.training:
self.rcnn_data.update( self.rcnn_data.update(
self.proposal_target_layer( self.proposal_target_layer(
rpn_rois=self.rcnn_data['rois'], rpn_rois=self.rcnn_data['rois'],
gt_boxes=kwargs['gt_boxes'], gt_boxes=kwargs['gt_boxes'],
) )
) )
# Transform RoI Feature # Transform RoI Feature
roi_features = [] roi_features = []
if len(self.rcnn_data['rois']) > 1: if len(self.rcnn_data['rois']) > 1:
for i, spatial_scale in enumerate(self.spatial_scales): for i, spatial_scale in enumerate(self.spatial_scales):
roi_features.append( roi_features.append(
self.RoIFeatureTransform( self.RoIFeatureTransform(
kwargs['features'][i], kwargs['features'][i],
self.rcnn_data['rois'][i], self.rcnn_data['rois'][i],
spatial_scale, spatial_scale,
) )
) )
roi_features = torch.cat(roi_features, dim=0) roi_features = torch.cat(roi_features, dim=0)
else: else:
spatial_scale = 1.0 / cfg.RPN.STRIDES[0] spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
roi_features = \ roi_features = \
self.RoIFeatureTransform( self.RoIFeatureTransform(
kwargs['features'][0], kwargs['features'][0],
self.rcnn_data['rois'][0], self.rcnn_data['rois'][0],
spatial_scale, spatial_scale,
) )
# Apply a simple MLP # Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim) roi_features = roi_features.view(-1, self.roi_head_dim)
rcnn_output = self.relu(self.fc6(roi_features)) rcnn_output = self.relu(self.fc6(roi_features))
rcnn_output = self.relu(self.fc7(rcnn_output)) rcnn_output = self.relu(self.fc7(rcnn_output))
# Compute rcnn logits # Compute rcnn logits
cls_score = self.cls_score(rcnn_output).float() cls_score = self.cls_score(rcnn_output).float()
outputs = collections.OrderedDict({ outputs = collections.OrderedDict([
'bbox_pred': ('bbox_pred', self.bbox_pred(rcnn_output).float()),
self.bbox_pred(rcnn_output).float(), ])
})
if self.training:
if self.training: # Compute rcnn losses
# Compute rcnn losses outputs.update(collections.OrderedDict([
outputs.update(collections.OrderedDict({ ('cls_loss', self.cls_loss(
'cls_loss': self.cls_loss( cls_score, self.rcnn_data['labels'])),
cls_score, ('bbox_loss', self.bbox_loss(
self.rcnn_data['labels'], outputs['bbox_pred'],
), self.rcnn_data['bbox_targets'],
'bbox_loss': self.bbox_loss( self.rcnn_data['bbox_inside_weights'],
outputs['bbox_pred'], self.rcnn_data['bbox_outside_weights'],
self.rcnn_data['bbox_targets'], )),
self.rcnn_data['bbox_inside_weights'], ]))
self.rcnn_data['bbox_outside_weights'], else:
), # Return the rois to decode the refine boxes
})) if len(self.rcnn_data['rois']) > 1:
else: outputs['rois'] = torch.cat(
# Return the rois to decode the refine boxes self.rcnn_data['rois'], dim=0)
if len(self.rcnn_data['rois']) > 1: else:
outputs['rois'] = torch.cat( outputs['rois'] = self.rcnn_data['rois'][0]
self.rcnn_data['rois'], dim=0) # Return the classification prob
else: outputs['cls_prob'] = self.softmax(cls_score)
outputs['rois'] = self.rcnn_data['rois'][0]
# Return the classification prob return outputs
outputs['cls_prob'] = self.softmax(cls_score)
return outputs
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1 from lib.modeling import conv1x1
from lib.modeling import conv3x3 from lib.modeling import conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
class FPN(torch.nn.Module): class FPN(torch.nn.Module):
"""Feature Pyramid Networks for R-CNN and RetinaNet.""" """Feature Pyramid Networks for R-CNN and RetinaNet."""
def __init__(self, feature_dims): def __init__(self, feature_dims):
super(FPN, self).__init__() super(FPN, self).__init__()
self.C = torch.nn.ModuleList() self.C = torch.nn.ModuleList()
self.P = torch.nn.ModuleList() self.P = torch.nn.ModuleList()
self.apply_func = self.apply_on_rcnn self.apply_func = self.apply_on_rcnn
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1): for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True)) self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True)) self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
if 'retinanet' in cfg.MODEL.TYPE: if 'retinanet' in cfg.MODEL.TYPE:
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1): for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True)) self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
self.apply_func = self.apply_on_retinanet self.apply_func = self.apply_on_retinanet
self.relu = torch.nn.ReLU(inplace=False) self.relu = torch.nn.ReLU(inplace=False)
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True) self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
self.reset_parameters() self.reset_parameters()
self.feature_dims = [cfg.FPN.DIM] self.feature_dims = [cfg.FPN.DIM]
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_uniform_( torch.nn.init.kaiming_uniform_(
m.weight, m.weight,
a=1, # Fix the gain for [-127, 127] a=1, # Fix the gain for [-127, 127]
) # Xavier Initialization ) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0) torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Apply MaxPool for higher features # Apply MaxPool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1])) outputs.append(self.maxpool(outputs[-1]))
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize( upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:]) fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output) fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
def apply_on_retinanet(self, features): def apply_on_retinanet(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL- min_lvl](fpn_input)] outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
# Add extra convolutions for higher features # Add extra convolutions for higher features
extra_input = features[-1] extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input)) outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: if i != max_lvl:
extra_input = self.relu(outputs[-1]) extra_input = self.relu(outputs[-1])
# Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build Pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
upscale_output = torch.vision.ops.nn_resize( upscale_output = torch.vision.ops.nn_resize(
fpn_input, dsize=lateral_output.shape[-2:]) fpn_input, dsize=lateral_output.shape[-2:])
fpn_input = lateral_output.__iadd__(upscale_output) fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
def forward(self, features): def forward(self, features):
return self.apply_func(features) return self.apply_func(features)
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py> # <https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import affine from lib.modeling import affine
from lib.modeling import conv1x1 from lib.modeling import conv1x1
from lib.modeling import conv3x3 from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module): class BasicBlock(torch.nn.Module):
def __init__( def __init__(
self, self,
dim_in, dim_in,
dim_out, dim_out,
stride=1, stride=1,
downsample=None, downsample=None,
dropblock=None, dropblock=None,
): ):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out) self.bn1 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.conv2 = conv3x3(dim_out, dim_out) self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out) self.bn2 = affine(dim_out)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock = dropblock
def forward(self, x): def forward(self, x):
residual = x residual = x
out = self.conv1(x) out = self.conv1(x)
out = self.bn1(out) out = self.bn1(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None: if self.dropblock is not None:
out = self.dropblock(out) out = self.dropblock(out)
out = self.conv2(out) out = self.conv2(out)
out = self.bn2(out) out = self.bn2(out)
if self.dropblock is not None: if self.dropblock is not None:
residual = self.dropblock(residual) residual = self.dropblock(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
out += residual out += residual
out = self.relu(out) out = self.relu(out)
return out return out
class Bottleneck(torch.nn.Module): class Bottleneck(torch.nn.Module):
# 1x64d => 0.25 (ResNet) # 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt) # 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \ contraction = cfg.RESNET.NUM_GROUPS \
* cfg.RESNET.GROUP_WIDTH / 256.0 * cfg.RESNET.GROUP_WIDTH / 256.0
def __init__( def __init__(
self, self,
dim_in, dim_in,
dim_out, dim_out,
stride=1, stride=1,
downsample=None, downsample=None,
dropblock=None, dropblock=None,
): ):
super(Bottleneck, self).__init__() super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction) dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim) self.conv1 = conv1x1(dim_in, dim)
self.bn1 = affine(dim) self.bn1 = affine(dim)
self.conv2 = conv3x3(dim, dim, stride=stride) self.conv2 = conv3x3(dim, dim, stride=stride)
self.bn2 = affine(dim) self.bn2 = affine(dim)
self.conv3 = conv1x1(dim, dim_out) self.conv3 = conv1x1(dim, dim_out)
self.bn3 = affine(dim_out) self.bn3 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock = dropblock
def forward(self, x): def forward(self, x):
residual = x residual = x
out = self.conv1(x) out = self.conv1(x)
out = self.bn1(out) out = self.bn1(out)
out = self.relu(out) out = self.relu(out)
out = self.conv2(out) out = self.conv2(out)
out = self.bn2(out) out = self.bn2(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None: if self.dropblock is not None:
out = self.dropblock(out) out = self.dropblock(out)
out = self.conv3(out) out = self.conv3(out)
out = self.bn3(out) out = self.bn3(out)
if self.dropblock is not None: if self.dropblock is not None:
residual = self.dropblock(residual) residual = self.dropblock(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
out += residual out += residual
out = self.relu(out) out = self.relu(out)
return out return out
class ResNet(torch.nn.Module): class ResNet(torch.nn.Module):
def __init__(self, block, layers, filters): def __init__(self, block, layers, filters):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:] self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d( self.conv1 = torch.nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
stride=2, stride=2,
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = affine(self.dim_in) self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = torch.nn.MaxPool2d(
kernel_size=3, kernel_size=3,
stride=2, stride=2,
padding=0, padding=0,
ceil_mode=True, ceil_mode=True,
) )
self.drop3 = torch.nn.DropBlock2d( self.drop3 = torch.nn.DropBlock2d(
kp=0.9, kp=0.9,
block_size=7, block_size=7,
alpha=0.25, alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None ) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d( self.drop4 = torch.nn.DropBlock2d(
kp=0.9, kp=0.9,
block_size=7, block_size=7,
alpha=1.00, alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None ) if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0]) self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2) self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3) self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4) self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# The Kaiming Initialization # The Kaiming Initialization
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_( torch.nn.init.kaiming_normal_(
m.weight, m.weight,
nonlinearity='relu', nonlinearity='relu',
) )
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
if cfg.MODEL.FREEZE_AT > 0: if cfg.MODEL.FREEZE_AT > 0:
self.conv1.apply(freeze_func) self.conv1.apply(freeze_func)
for i in range(cfg.MODEL.FREEZE_AT, 1, -1): for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
getattr(self, 'layer{}'.format(i - 1)).apply(freeze_func) getattr(self, 'layer{}'.format(i - 1)).apply(freeze_func)
def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None): def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
downsample = None downsample = None
if stride != 1 or self.dim_in != dim_out: if stride != 1 or self.dim_in != dim_out:
downsample = torch.nn.Sequential( downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride), conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out), affine(dim_out),
) )
layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)] layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
self.dim_in = dim_out self.dim_in = dim_out
for i in range(1, blocks): for i in range(1, blocks):
layers.append(block(dim_out, dim_out, dropblock=dropblock)) layers.append(block(dim_out, dim_out, dropblock=dropblock))
return torch.nn.Sequential(*layers) return torch.nn.Sequential(*layers)
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.conv1(x)
x = self.bn1(x) x = self.bn1(x)
x = self.relu(x) x = self.relu(x)
x = self.maxpool(x) x = self.maxpool(x)
outputs = [x] outputs = [x]
outputs += [self.layer1(outputs[-1])] outputs += [self.layer1(outputs[-1])]
outputs += [self.layer2(outputs[-1])] outputs += [self.layer2(outputs[-1])]
outputs += [self.layer3(outputs[-1])] outputs += [self.layer3(outputs[-1])]
outputs += [self.layer4(outputs[-1])] outputs += [self.layer4(outputs[-1])]
return outputs return outputs
def resnet(depth): def resnet(depth):
if depth == 18: if depth == 18:
units = [2, 2, 2, 2] units = [2, 2, 2, 2]
elif depth == 34: elif depth == 34:
units = [3, 4, 6, 3] units = [3, 4, 6, 3]
elif depth == 50: elif depth == 50:
units = [3, 4, 6, 3] units = [3, 4, 6, 3]
elif depth == 101: elif depth == 101:
units = [3, 4, 23, 3] units = [3, 4, 23, 3]
elif depth == 152: elif depth == 152:
units = [3, 8, 36, 3] units = [3, 8, 36, 3]
elif depth == 200: elif depth == 200:
units = [3, 24, 36, 3] units = [3, 24, 36, 3]
elif depth == 269: elif depth == 269:
units = [3, 30, 48, 8] units = [3, 30, 48, 8]
else: else:
raise ValueError('Unsupported depth: %d' % depth) raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock block = Bottleneck if depth >= 50 else BasicBlock
filters = [64, 256, 512, 1024, 2048] \ filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512] if depth >= 50 else [64, 64, 128, 256, 512]
return ResNet(block, units, filters) return ResNet(block, units, filters)
def make_resnet_18(): return resnet(18) def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34) def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50) def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101) def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152) def make_resnet_152(): return resnet(152)
...@@ -59,8 +59,7 @@ class RetinaNet(torch.nn.Module): ...@@ -59,8 +59,7 @@ class RetinaNet(torch.nn.Module):
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA, gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
) )
self.bbox_loss = torch.nn.SmoothL1Loss( self.bbox_loss = torch.nn.SmoothL1Loss(
beta=1. / 9., reduction='batch_size', beta=.11, reduction='batch_size')
)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -133,26 +132,22 @@ class RetinaNet(torch.nn.Module): ...@@ -133,26 +132,22 @@ class RetinaNet(torch.nn.Module):
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info, ims_info=ims_info,
) )
return collections.OrderedDict({ return collections.OrderedDict([
'cls_loss': ('cls_loss', self.cls_loss(
self.cls_loss( cls_score, self.retinanet_data['labels'])),
cls_score, ('bbox_loss', self.bbox_loss(
self.retinanet_data['labels'], bbox_pred,
), self.retinanet_data['bbox_targets'],
'bbox_loss': self.retinanet_data['bbox_inside_weights'],
self.bbox_loss( self.retinanet_data['bbox_outside_weights'],
bbox_pred, )),
self.retinanet_data['bbox_targets'], ])
self.retinanet_data['bbox_inside_weights'],
self.retinanet_data['bbox_outside_weights'],
)
})
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict({'bbox_pred': bbox_pred}) outputs = collections.OrderedDict([('bbox_pred', bbox_pred)])
if self.training: if self.training:
outputs.update( outputs.update(
......
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1 from lib.modeling import conv1x1
from lib.modeling import conv3x3 from lib.modeling import conv3x3
class RPN(torch.nn.Module): class RPN(torch.nn.Module):
"""Region Proposal Networks for R-CNN series.""" """Region Proposal Networks for R-CNN series."""
def __init__(self, dim_in=256): def __init__(self, dim_in=256):
super(RPN, self).__init__() super(RPN, self).__init__()
################################## ##################################
# RPN outputs # # RPN outputs #
################################## ##################################
num_anchors = len(cfg.RPN.ASPECT_RATIOS) * ( num_anchors = len(cfg.RPN.ASPECT_RATIOS) * (
len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1) len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1)
self.output = conv3x3(dim_in, dim_in, bias=True) self.output = conv3x3(dim_in, dim_in, bias=True)
self.cls_score = conv1x1(dim_in, num_anchors, bias=True) self.cls_score = conv1x1(dim_in, num_anchors, bias=True)
self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True) self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
################################## ##################################
# RPN losses # # RPN losses #
################################## ##################################
if len(cfg.RPN.STRIDES) > 1: if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN) # RPN with multiple strides(i.e. FPN)
from lib.fpn.layers.anchor_target_layer import AnchorTargetLayer from lib.fpn.anchor_target_layer import AnchorTargetLayer
else: else:
# RPN with single stride(i.e. C4) # RPN with single stride(i.e. C4)
from lib.faster_rcnn.layers.anchor_target_layer import AnchorTargetLayer from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
self.anchor_target_layer = AnchorTargetLayer() self.anchor_target_layer = AnchorTargetLayer()
self.cls_loss = torch.nn.BCEWithLogitsLoss() self.cls_loss = torch.nn.BCEWithLogitsLoss()
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.) self.bbox_loss = torch.nn.SmoothL1Loss(
self.reset_parameters() beta=.11, reduction='batch_size')
self.reset_parameters()
def reset_parameters(self):
# Initialization for the RPN def reset_parameters(self):
# Weight ~ Normal(0, 0.01) # Initialization for the RPN
for m in self.modules(): # Weight ~ Normal(0, 0.01)
if isinstance(m, torch.nn.Conv2d): for m in self.modules():
torch.nn.init.normal_(m.weight, std=0.01) if isinstance(m, torch.nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.01)
def compute_outputs(self, features):
"""Compute the RPN logits. def compute_outputs(self, features):
"""Compute the RPN logits.
Parameters
---------- Parameters
features : sequence of dragon.vm.torch.Tensor ----------
The features of specific conv layers. features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
"""
# Compute rpn logits """
cls_score_wide, bbox_pred_wide = [], [] # Compute rpn logits
for feature in features: cls_score_wide, bbox_pred_wide = [], []
x = self.relu(self.output(feature)) for feature in features:
if len(features) > 1: x = self.relu(self.output(feature))
cls_score = self.cls_score(x).view(0, -1) if len(features) > 1:
bbox_pred = self.bbox_pred(x).view(0, 4, -1) cls_score = self.cls_score(x).view(0, -1)
else: bbox_pred = self.bbox_pred(x).view(0, 4, -1)
cls_score = self.cls_score(x) else:
bbox_pred = self.bbox_pred(x) cls_score = self.cls_score(x)
cls_score_wide.append(cls_score) bbox_pred = self.bbox_pred(x)
bbox_pred_wide.append(bbox_pred) cls_score_wide.append(cls_score)
bbox_pred_wide.append(bbox_pred)
if len(features) > 1:
# Concat them if necessary if len(features) > 1:
return torch.cat(cls_score_wide, dim=1), \ # Concat them if necessary
torch.cat(bbox_pred_wide, dim=2) return torch.cat(cls_score_wide, dim=1), \
else: torch.cat(bbox_pred_wide, dim=2)
return cls_score_wide[0], bbox_pred_wide[0] else:
return cls_score_wide[0], bbox_pred_wide[0]
def compute_losses(
self, def compute_losses(
features, self,
cls_score, features,
bbox_pred, cls_score,
gt_boxes, bbox_pred,
ims_info, gt_boxes,
): ims_info,
"""Compute the RPN classification loss and regression loss. ):
"""Compute the RPN classification loss and regression loss.
Parameters
---------- Parameters
features : sequence of dragon.vm.torch.Tensor ----------
The features of specific conv layers. features : sequence of dragon.vm.torch.Tensor
cls_score : dragon.vm.torch.Tensor The features of specific conv layers.
The (binary) classification logits. cls_score : dragon.vm.torch.Tensor
bbox_pred : dragon.vm.torch.Tensor The (binary) classification logits.
The bbox regression logits. bbox_pred : dragon.vm.torch.Tensor
gt_boxes : numpy.ndarray The bbox regression logits.
The packed ground-truth boxes. gt_boxes : numpy.ndarray
ims_info : numpy.ndarray The packed ground-truth boxes.
The information of input images. ims_info : numpy.ndarray
The information of input images.
"""
self.rpn_data = \ """
self.anchor_target_layer( self.rpn_data = \
features=features, self.anchor_target_layer(
gt_boxes=gt_boxes, features=features,
ims_info=ims_info, gt_boxes=gt_boxes,
) ims_info=ims_info,
return collections.OrderedDict({ )
'rpn_cls_loss': return collections.OrderedDict([
self.cls_loss(cls_score, self.rpn_data['labels']), ('rpn_cls_loss', self.cls_loss(
'rpn_bbox_loss': cls_score, self.rpn_data['labels'])),
self.bbox_loss( ('rpn_bbox_loss', self.bbox_loss(
bbox_pred, bbox_pred,
self.rpn_data['bbox_targets'], self.rpn_data['bbox_targets'],
self.rpn_data['bbox_inside_weights'], self.rpn_data['bbox_inside_weights'],
self.rpn_data['bbox_outside_weights'], self.rpn_data['bbox_outside_weights'],
) )),
}) ])
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict({ outputs = collections.OrderedDict([
'rpn_cls_score': cls_score, ('rpn_cls_score', cls_score),
'rpn_bbox_pred': bbox_pred, ('rpn_bbox_pred', bbox_pred),
}) ])
if self.training: if self.training:
outputs.update( outputs.update(
self.compute_losses( self.compute_losses(
kwargs['features'], kwargs['features'],
cls_score, cls_score,
bbox_pred, bbox_pred,
kwargs['gt_boxes'], kwargs['gt_boxes'],
kwargs['ims_info'], kwargs['ims_info'],
) )
) )
return outputs return outputs
...@@ -136,32 +136,29 @@ class SSD(torch.nn.Module): ...@@ -136,32 +136,29 @@ class SSD(torch.nn.Module):
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
) )
) )
return collections.OrderedDict({ return collections.OrderedDict([
# A compensating factor of 4.0 is used # A compensating factor of 4.0 is used
# As we normalize both the pos and neg samples # As we normalize both the pos and neg samples
'cls_loss': ('cls_loss', self.cls_loss(
self.cls_loss( cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
cls_score.view(-1, cfg.MODEL.NUM_CLASSES), self.ssd_data['labels']) * 4.),
self.ssd_data['labels'] ('bbox_loss', self.bbox_loss(
) * 4., bbox_pred,
'bbox_loss': self.ssd_data['bbox_targets'],
self.bbox_loss( self.ssd_data['bbox_inside_weights'],
bbox_pred, self.ssd_data['bbox_outside_weights'],
self.ssd_data['bbox_targets'], )),
self.ssd_data['bbox_inside_weights'], ])
self.ssd_data['bbox_outside_weights'],
)
})
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
prior_boxes = self.prior_box_layer(kwargs['features']) prior_boxes = self.prior_box_layer(kwargs['features'])
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict({ outputs = collections.OrderedDict([
'prior_boxes': prior_boxes, ('bbox_pred', bbox_pred),
'bbox_pred': bbox_pred, ('prior_boxes', prior_boxes),
}) ])
if self.training: if self.training:
outputs.update( outputs.update(
......
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/nms_wrapper.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/nms_wrapper.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import logger from lib.utils import logger
try: try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
except ImportError as e: except ImportError as e:
print('Failed to import cpu nms. Error: {0}'.format(str(e))) print('Failed to import cpu nms. Error: {0}'.format(str(e)))
try: try:
from lib.nms.gpu_nms import gpu_nms from lib.nms.gpu_nms import gpu_nms
except ImportError as e: except ImportError as e:
print('Failed to import gpu nms. Error: {0}'.format(str(e))) print('Failed to import gpu nms. Error: {0}'.format(str(e)))
def nms(detections, thresh, force_cpu=False): def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS.""" """Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0: if detections.shape[0] == 0:
return [] return []
if cfg.USE_GPU_NMS and not force_cpu: if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID) return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else: else:
return cpu_nms(detections, thresh) return cpu_nms(detections, thresh)
def soft_nms( def soft_nms(
detections, detections,
thresh, thresh,
method='linear', method='linear',
sigma=0.5, sigma=0.5,
score_thresh=0.001, score_thresh=0.001,
): ):
"""Perform CPU Soft-NMS.""" """Perform CPU Soft-NMS."""
if detections.shape[0] == 0: if detections.shape[0] == 0:
return [] return []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2} methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods: if method not in methods:
logger.fatal('Unknown soft nms method: {}'.format(method)) logger.fatal('Unknown soft nms method: {}'.format(method))
return cpu_soft_nms( return cpu_soft_nms(
detections, detections,
thresh, thresh,
methods[method], methods[method],
sigma, sigma,
score_thresh, score_thresh,
) )
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
\ No newline at end of file
syntax = "proto2";
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
optional int32 width = 3;
optional bytes data = 4;
optional int32 label = 5;
repeated float float_data = 6;
optional bool encoded = 7 [default = false];
repeated int32 labels = 8;
}
message Annotation {
optional float x1 = 1;
optional float y1 = 2;
optional float x2 = 3;
optional float y2 = 4;
optional string name = 5;
optional bool difficult = 6 [default = false];
optional string mask = 7;
}
message AnnotatedDatum {
optional Datum datum = 1;
optional string filename = 2;
repeated Annotation annotation = 3;
}
...@@ -13,5 +13,5 @@ from __future__ import absolute_import ...@@ -13,5 +13,5 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.layers.data_layer import DataLayer from lib.faster_rcnn.data_layer import DataLayer
from lib.retinanet.layers.anchor_target_layer import AnchorTargetLayer from lib.retinanet.anchor_target_layer import AnchorTargetLayer
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2 from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor from lib.utils.blob import blob_to_tensor
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
from lib.utils.cython_bbox import bbox_overlaps from lib.utils.cython_bbox import bbox_overlaps
class AnchorTargetLayer(torch.nn.Module): class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets.""" """Assign anchors to ground-truth targets."""
def __init__(self): def __init__(self):
super(AnchorTargetLayer, self).__init__() super(AnchorTargetLayer, self).__init__()
# Load the basic configs # Load the basic configs
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
anchor_scale = cfg.RETINANET.ANCHOR_SCALE anchor_scale = cfg.RETINANET.ANCHOR_SCALE
self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)] self.strides = [2. ** lvl for lvl in range(k_min, k_max + 1)]
self.ratios = cfg.RETINANET.ASPECT_RATIOS self.ratios = cfg.RETINANET.ASPECT_RATIOS
# Generate base anchors # Generate base anchors
self.base_anchors = [] self.base_anchors = []
for stride in self.strides: for stride in self.strides:
sizes = [stride * anchor_scale * sizes = [stride * anchor_scale *
(2 ** (octave / float(scales_per_octave))) (2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)] for octave in range(scales_per_octave)]
self.base_anchors.append( self.base_anchors.append(
generate_anchors_v2( generate_anchors_v2(
stride=stride, stride=stride,
ratios=self.ratios, ratios=self.ratios,
sizes=sizes, sizes=sizes,
)) ))
def forward(self, features, gt_boxes, ims_info): def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.""" """Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal( logger.fatal(
'Input {} images, got {} slices of gt boxes.' 'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide)) .format(num_images, len(gt_boxes_wide))
) )
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0 all_anchors, total_anchors = [], 0
for i in range(len(self.strides)): for i in range(len(self.strides)):
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i] shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i] shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get # cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0] A = self.base_anchors[i].shape[0]
K = shifts.shape[0] K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) + anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4] # [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2)) anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4)) anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors) all_anchors.append(anchors)
total_anchors += anchors.shape[0] total_anchors += anchors.shape[0]
all_anchors = np.concatenate(all_anchors, axis=0) all_anchors = np.concatenate(all_anchors, axis=0)
# label: 1 is positive, 0 is negative, -1 is don't care # label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32) labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32) bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32) bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32) bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
anchors = all_anchors anchors = all_anchors
inds_inside = np.arange(all_anchors.shape[0]) inds_inside = np.arange(all_anchors.shape[0])
num_inside = len(inds_inside) num_inside = len(inds_inside)
for ix in range(num_images): for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label) # GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix] gt_boxes = gt_boxes_wide[ix]
# label: 1 is positive, 0 is negative, -1 is don't care # label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32) labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1) labels.fill(-1)
# Overlaps between the anchors and the gt boxes # Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps( overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float),
) )
argmax_overlaps = overlaps.argmax(axis=1) argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps] max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
# fg label: for each gt, anchor with highest overlap # fg label: for each gt, anchor with highest overlap
gt_argmax_overlaps = overlaps.argmax(axis=0) gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
gt_inds = argmax_overlaps[gt_argmax_overlaps] gt_inds = argmax_overlaps[gt_argmax_overlaps]
labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4] labels[gt_argmax_overlaps] = gt_boxes[gt_inds, 4]
# fg label: above threshold IOU # fg label: above threshold IOU
inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP inds = max_overlaps >= cfg.RETINANET.POSITIVE_OVERLAP
gt_inds = argmax_overlaps[inds] gt_inds = argmax_overlaps[inds]
labels[inds] = gt_boxes[gt_inds, 4] labels[inds] = gt_boxes[gt_inds, 4]
fg_inds = np.where(labels > 0)[0] fg_inds = np.where(labels > 0)[0]
# bg label: below threshold IOU # bg label: below threshold IOU
labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0 labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform( bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4]) anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4])
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.)) bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1) bbox_outside_weights[fg_inds, :] = np.ones((1, 4)) / max(len(fg_inds), 1)
labels_wide[ix, inds_inside] = labels labels_wide[ix, inds_inside] = labels
bbox_targets_wide[ix, inds_inside] = bbox_targets bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors)) labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1)) bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1)) bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': blob_to_tensor(labels), 'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets), 'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights), 'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights), 'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
} }
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
from lib.utils.blob import tensor_to_blob from lib.utils.blob import tensor_to_blob
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.timer import Timer from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales.""" """Detect a image, with single or multiple scales."""
# Prepare images # Prepare images
ims, ims_scale = scale_image(raw_image) ims, ims_scale = scale_image(raw_image)
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32, for im_scale in ims_scale], dtype=np.float32,
) )
blobs['data'] = torch.from_numpy(blobs['data']) blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward # Do Forward
with torch.no_grad(): with torch.no_grad():
outputs = detector.forward(inputs=blobs) outputs = detector.forward(inputs=blobs)
# Unpack results # Unpack results
return tensor_to_blob(outputs['detections'])[:, 1:] return tensor_to_blob(outputs['detections'])[:, 1:]
def ims_detect(detector, raw_images): def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales.""" """Detect images, with single or multiple scales."""
# Prepare images # Prepare images
ims, ims_scale = scale_image(raw_images[0]) ims, ims_scale = scale_image(raw_images[0])
num_scales = len(ims_scale) num_scales = len(ims_scale)
ims_shape = [im.shape for im in raw_images] ims_shape = [im.shape for im in raw_images]
for item_idx in range(1, len(raw_images)): for item_idx in range(1, len(raw_images)):
ims_ext, ims_scale_ext = scale_image(raw_images[item_idx]) ims_ext, ims_scale_ext = scale_image(raw_images[item_idx])
ims += ims_ext ims += ims_ext
ims_scale += ims_scale_ext ims_scale += ims_scale_ext
# Prepare blobs # Prepare blobs
blobs = {'data': im_list_to_blob(ims)} blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([ blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale] list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale], dtype=np.float32, for im_scale in ims_scale], dtype=np.float32,
) )
blobs['data'] = torch.from_numpy(blobs['data']) blobs['data'] = torch.from_numpy(blobs['data'])
# Do Forward # Do Forward
with torch.no_grad(): with torch.no_grad():
outputs = detector.forward(inputs=blobs) outputs = detector.forward(inputs=blobs)
# Unpack results # Unpack results
results = tensor_to_blob(outputs['detections']) results = tensor_to_blob(outputs['detections'])
detections_wide = [[] for _ in range(len(ims_shape))] detections_wide = [[] for _ in range(len(ims_shape))]
for i in range(len(ims)): for i in range(len(ims)):
indices = np.where(results[:, 0].astype(np.int32) == i)[0] indices = np.where(results[:, 0].astype(np.int32) == i)[0]
detections = results[indices, 1:] detections = results[indices, 1:]
detections_wide[i // num_scales].append(detections) detections_wide[i // num_scales].append(detections)
for i in range(len(ims_shape)): for i in range(len(ims_shape)):
detections_wide[i] = np.vstack(detections_wide[i]) \ detections_wide[i] = np.vstack(detections_wide[i]) \
if len(detections_wide[i]) > 1 else detections_wide[i][0] if len(detections_wide[i]) > 1 else detections_wide[i][0]
return detections_wide return detections_wide
def test_net(net, server): def test_net(net, server):
# Load settings # Load settings
classes = server.classes classes = server.classes
num_images = server.num_images num_images = server.num_images
num_classes = server.num_classes num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()} _t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH): for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths # Collect raw images and ground-truths
image_ids, raw_images = [], [] image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH): for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue if batch_idx + item_idx >= num_images: continue
image_id, raw_image = server.get_image() image_id, raw_image = server.get_image()
image_ids.append(image_id) image_ids.append(image_id)
raw_images.append(raw_image) raw_images.append(raw_image)
# Run detecting on specific scales # Run detecting on specific scales
_t['im_detect'].tic() _t['im_detect'].tic()
if cfg.TEST.IMS_PER_BATCH > 1: if cfg.TEST.IMS_PER_BATCH > 1:
results = ims_detect(net, raw_images) results = ims_detect(net, raw_images)
else: else:
results = [im_detect(net, raw_images[0])] results = [im_detect(net, raw_images[0])]
_t['im_detect'].toc() _t['im_detect'].toc()
# Post-Processing # Post-Processing
_t['misc'].tic() _t['misc'].tic()
for item_idx, detections in enumerate(results): for item_idx, detections in enumerate(results):
i = batch_idx + item_idx i = batch_idx + item_idx
boxes_this_image = [[]] boxes_this_image = [[]]
# {x1, y1, x2, y2, score, cls} # {x1, y1, x2, y2, score, cls}
detections = np.array(detections) detections = np.array(detections)
for j in range(1, num_classes): for j in range(1, num_classes):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0] cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4] cls_boxes = detections[cls_indices, 0:4]
cls_scores = detections[cls_indices, 4] cls_scores = detections[cls_indices, 4]
cls_detections = np.hstack(( cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])) \ cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms( keep = soft_nms(
cls_detections, cls_detections,
cfg.TEST.NMS, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms( keep = nms(
cls_detections, cls_detections,
cfg.TEST.NMS, cfg.TEST.NMS,
force_cpu=True, force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE: if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image( vis_one_image(
raw_images[item_idx], raw_images[item_idx],
classes, classes,
boxes_this_image, boxes_this_image,
thresh=cfg.VIS_TH, thresh=cfg.VIS_TH,
box_alpha=1., box_alpha=1.,
show_class=True, show_class=True,
filename=server.get_save_filename(image_ids[item_idx]), filename=server.get_save_filename(image_ids[item_idx]),
) )
# Limit to max_per_image detections *over all classes* # Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0: if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = [] image_scores = []
for j in range(1, num_classes): for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: if len(all_boxes[j][i]) < 1:
continue continue
image_scores.append(all_boxes[j][i][:, -1]) image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: if len(image_scores) > 0:
image_scores = np.hstack(image_scores) image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes): for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :] all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH, .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time, num_images,
_t['misc'].average_time), end='') _t['im_detect'].average_time,
_t['misc'].average_time),
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') end='')
print('Evaluating detections') print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
server.evaluate_detections(all_boxes)
print('Evaluating detections')
server.evaluate_detections(all_boxes)
...@@ -13,8 +13,8 @@ from __future__ import absolute_import ...@@ -13,8 +13,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.ssd.layers.data_layer import DataLayer from lib.ssd.data_layer import DataLayer
from lib.ssd.layers.hard_mining_layer import HardMiningLayer from lib.ssd.hard_mining_layer import HardMiningLayer
from lib.ssd.layers.multibox_layer import MultiBoxMatchLayer from lib.ssd.multibox_layer import MultiBoxMatchLayer
from lib.ssd.layers.multibox_layer import MultiBoxTargetLayer from lib.ssd.multibox_layer import MultiBoxTargetLayer
from lib.ssd.layers.priorbox_layer import PriorBoxLayer from lib.ssd.priorbox_layer import PriorBoxLayer
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from lib.core.config import cfg
class BlobFetcher(multiprocessing.Process):
def __init__(self, **kwargs):
super(BlobFetcher, self).__init__()
self._img_blob_size = (
cfg.TRAIN.IMS_PER_BATCH,
cfg.SSD.RESIZE.HEIGHT,
cfg.SSD.RESIZE.WIDTH, 3,
)
self.q_in = self.q_out = None
self.daemon = True
def get(self):
img_blob, boxes_blob = np.zeros(self._img_blob_size, 'uint8'), []
for i in range(cfg.TRAIN.IMS_PER_BATCH):
img_blob[i], gt_boxes = self.q_in.get()
# Pack the boxes by adding the index of images
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), np.float32)
boxes[:, :gt_boxes.shape[1]] = gt_boxes
boxes[:, -1] = i
boxes_blob.append(boxes)
return {
'data': img_blob,
'gt_boxes': np.concatenate(boxes_blob, 0),
}
def run(self):
while True:
self.q_out.put(self.get())
...@@ -13,54 +13,69 @@ from __future__ import absolute_import ...@@ -13,54 +13,69 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from multiprocessing import Queue import multiprocessing as mp
import time import time
import dragon import dragon
import pprint import dragon.vm.torch as torch
import numpy as np
from lib.faster_rcnn.data.data_reader import DataReader from lib.core.config import cfg
from lib.ssd.data.data_transformer import DataTransformer from lib.datasets.factory import get_imdb
from lib.ssd.data.blob_fetcher import BlobFetcher from lib.ssd.data_transformer import DataTransformer
from lib.utils import logger from lib.utils import logger
class DataBatch(object): class DataLayer(torch.nn.Module):
"""DataBatch aims to prefetch data by ``Triple-Buffering``. """Generate a mini-batch of data."""
It takes full advantages of the Process/Thread of Python, def __init__(self):
super(DataLayer, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
})
which provides remarkable I/O speed up for scalable distributed training. def forward(self):
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
"""
def __init__(self, **kwargs): def __init__(self, **kwargs):
"""Construct a ``DataBatch``. """Construct a ``DataBatch``.
Parameters Parameters
---------- ----------
source : str dataset : lambda
The path of database. The creator of a dataset.
shuffle : bool, optional, default=False shuffle : bool, optional, default=False
Whether to shuffle the data. Whether to shuffle the data.
num_chunks : int, optional, default=2048 num_chunks : int, optional, default=0
The number of chunks to split. The number of chunks to split.
batch_size : int, optional, default=128 batch_size : int, optional, default=32
The size of a mini-batch. The size of a mini-batch.
prefetch : int, optional, default=5 prefetch : int, optional, default=5
The prefetch count. The prefetch count.
""" """
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
# Init mpi # Distributed settings
global_rank, local_rank, group_size = 0, 0, 1 rank, group_size = 0, 1
if dragon.mpi.is_init(): process_group = dragon.distributed.get_default_process_group()
group = dragon.mpi.is_parallel() if process_group is not None and kwargs.get(
if group is not None: # DataParallel 'phase', 'TRAIN') == 'TRAIN':
global_rank = dragon.mpi.rank() group_size = process_group.size
group_size = len(group) rank = dragon.distributed.get_rank(process_group)
for i, node in enumerate(group):
if global_rank == node:
local_rank = i
kwargs['group_size'] = group_size kwargs['group_size'] = group_size
# Configuration # Configuration
...@@ -77,63 +92,50 @@ class DataBatch(object): ...@@ -77,63 +92,50 @@ class DataBatch(object):
self._num_transformers = min( self._num_transformers = min(
self._num_transformers, self._max_transformers) self._num_transformers, self._max_transformers)
# Init queues # Initialize queues
self.Q1 = Queue(self._prefetch * self._num_readers * self._batch_size) num_batches = self._prefetch * self._num_readers
self.Q2 = Queue(self._prefetch * self._num_readers * self._batch_size) self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q3 = Queue(self._prefetch * self._num_readers) self.Q2 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Init readers # Initialize readers
self._readers = [] self._readers = []
for i in range(self._num_readers): for i in range(self._num_readers):
self._readers.append(DataReader(**kwargs))
self._readers[-1].q_out = self.Q1
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers part_idx, num_parts = i, self._num_readers
num_parts *= group_size num_parts *= group_size
part_idx += local_rank * self._num_readers part_idx += rank * self._num_readers
self._readers[i]._num_parts = num_parts self._readers.append(dragon.io.DataReader(
self._readers[i]._part_idx = part_idx num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._rng_seed += part_idx self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start() self._readers[i].start()
time.sleep(0.1) time.sleep(0.1)
# Init transformers # Initialize transformers
self._transformers = [] self._transformers = []
for i in range(self._num_transformers): for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs) transformer = DataTransformer(**kwargs)
transformer._rng_seed += (i + local_rank * self._num_transformers) transformer._rng_seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1 transformer.q_in, transformer.q_out = self.Q1, self.Q2
transformer.q_out = self.Q2
transformer.start() transformer.start()
self._transformers.append(transformer) self._transformers.append(transformer)
time.sleep(0.1) time.sleep(0.1)
# Init blob fetchers # Initialize batch-producer
self._fetchers = [] self.start()
for i in range(self._num_fetchers):
fetcher = BlobFetcher(**kwargs)
fetcher.q_in = self.Q2
fetcher.q_out = self.Q3
fetcher.start()
self._fetchers.append(fetcher)
time.sleep(0.1)
# Prevent to echo multiple nodes
if local_rank == 0:
self.echo()
# Register cleanup callbacks
def cleanup(): def cleanup():
def terminate(processes): def terminate(processes):
for process in processes: for process in processes:
process.terminate() process.terminate()
process.join() process.join()
terminate(self._fetchers) terminate([self])
logger.info('Terminating BlobFetcher ......') logger.info('Terminate DataBatch.')
terminate(self._transformers) terminate(self._transformers)
logger.info('Terminating DataTransformer ......') logger.info('Terminate DataTransformer.')
terminate(self._readers) terminate(self._readers)
logger.info('Terminating DataReader......') logger.info('Terminate DataReader.')
import atexit import atexit
atexit.register(cleanup) atexit.register(cleanup)
...@@ -149,14 +151,24 @@ class DataBatch(object): ...@@ -149,14 +151,24 @@ class DataBatch(object):
""" """
return self.Q3.get() return self.Q3.get()
def echo(self): def run(self):
"""Print I/O Information.""" """Start the process to produce batches."""
print('---------------------------------------------------------') image_batch_shape = (
print('BatchFetcher({} Threads), Using config:'.format( cfg.TRAIN.IMS_PER_BATCH,
self._num_readers + self._num_transformers + self._num_fetchers)) cfg.SSD.RESIZE.HEIGHT,
params = {'queue_size': self._prefetch, cfg.SSD.RESIZE.WIDTH, 3,
'n_readers': self._num_readers, )
'n_transformers': self._num_transformers,
'n_fetchers': self._num_fetchers} while True:
pprint.pprint(params) boxes_to_pack = []
print('---------------------------------------------------------') image_batch = np.zeros(image_batch_shape, 'uint8')
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
image_batch[image_index], gt_boxes = self.Q2.get()
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, image_index
boxes_to_pack.append(boxes)
self.Q3.put({
'data': image_batch,
'gt_boxes': np.concatenate(boxes_to_pack),
})
...@@ -13,14 +13,14 @@ from __future__ import absolute_import ...@@ -13,14 +13,14 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import cv2
import multiprocessing import multiprocessing
import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.proto import anno_pb2 as pb from lib.ssd import transforms
from lib.ssd.data import transforms from lib.utils.boxes import flip_boxes
from lib.utils import logger
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
...@@ -41,38 +41,41 @@ class DataTransformer(multiprocessing.Process): ...@@ -41,38 +41,41 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q_out = None self.q_in = self.q_out = None
self.daemon = True self.daemon = True
def make_roi_dict(self, ann_datum, flip=False): def make_roi_dict(self, example, flip=False):
annotations = ann_datum.annotation
n_objects = 0 n_objects = 0
if not self._use_diff: if not self._use_diff:
for ann in annotations: for obj in example['object']:
if not ann.difficult: n_objects += 1 if obj.get('difficult', 0) == 0:
else: n_objects = len(annotations) n_objects += 1
else:
n_objects = len(example['object'])
roi_dict = { roi_dict = {
'width': ann_datum.datum.width, 'width': example['width'],
'height': ann_datum.datum.height, 'height': example['height'],
'gt_classes': np.zeros((n_objects,), dtype=np.int32), 'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), dtype=np.float32), 'boxes': np.zeros((n_objects, 4), 'float32'),
'normalized_boxes': np.zeros((n_objects, 4), dtype=np.float32), 'normalized_boxes': np.zeros((n_objects, 4), 'float32'),
} }
rec_idx = 0 # Filter the difficult instances
for ann in annotations: object_idx = 0
if not self._use_diff and ann.difficult: for obj in example['object']:
if not self._use_diff and \
obj.get('difficult', 0) > 0:
continue continue
roi_dict['boxes'][rec_idx, :] = [ roi_dict['boxes'][object_idx, :] = [
max(0, ann.x1), max(0, obj['xmin']),
max(0, ann.y1), max(0, obj['ymin']),
min(ann.x2, ann_datum.datum.width - 1), min(obj['xmax'], example['width'] - 1),
min(ann.y2, ann_datum.datum.height - 1), min(obj['ymax'], example['height'] - 1),
] ]
roi_dict['gt_classes'][rec_idx] = \ roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[ann.name] self._class_to_ind[obj['name']]
rec_idx += 1 object_idx += 1
if flip: if flip:
roi_dict['boxes'] = _flip_boxes( roi_dict['boxes'] = flip_boxes(
roi_dict['boxes'], roi_dict['width']) roi_dict['boxes'], roi_dict['width'])
roi_dict['boxes'][:, 0::2] /= roi_dict['width'] roi_dict['boxes'][:, 0::2] /= roi_dict['width']
...@@ -80,26 +83,19 @@ class DataTransformer(multiprocessing.Process): ...@@ -80,26 +83,19 @@ class DataTransformer(multiprocessing.Process):
return roi_dict return roi_dict
def get(self, serialized): def get(self, example):
ann_datum = pb.AnnotatedDatum() img = np.frombuffer(example['content'], np.uint8)
ann_datum.ParseFromString(serialized) img = cv2.imdecode(img, -1)
img_datum = ann_datum.datum
img = np.fromstring(img_datum.data, np.uint8)
if img_datum.encoded is True:
img = cv2.imdecode(img, -1)
else:
h, w = img_datum.height, img_datum.width
img = img.reshape((h, w, img_datum.channels))
# Flip # Flip
flip = False flip = False
if self._mirror: if self._mirror:
if np.random.randint(0, 2) > 0: if np.random.randint(2) > 0:
img = img[:, ::-1, :] img = img[:, ::-1, :]
flip = True flip = True
# Datum -> RoIDB # Example -> RoIDict
roi_dict = self.make_roi_dict(ann_datum, flip) roi_dict = self.make_roi_dict(example, flip)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
...@@ -120,19 +116,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -120,19 +116,7 @@ class DataTransformer(multiprocessing.Process):
def run(self): def run(self):
np.random.seed(self._rng_seed) np.random.seed(self._rng_seed)
while True: while True:
serialized = self.q_in.get() outputs = self.get(self.q_in.get())
im, gt_boxes = self.get(serialized) if len(outputs[1]) < 1:
if len(gt_boxes) < 1: continue # Ignore the non-object image
continue self.q_out.put(outputs)
self.q_out.put((im, gt_boxes))
def _flip_boxes(boxes, width):
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
if not (flip_boxes[:, 2] >= flip_boxes[:, 0]).all():
logger.fatal('Encounter invalid coordinates after flipping boxes.')
return flip_boxes
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
def generate_anchors(min_sizes, max_sizes, ratios): def generate_anchors(min_sizes, max_sizes, ratios):
""" """
Generate anchor (reference) windows by enumerating Generate anchor (reference) windows by enumerating
aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h). aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
""" """
total_anchors = [] total_anchors = []
for idx, min_size in enumerate(min_sizes): for idx, min_size in enumerate(min_sizes):
# Note that SSD assume it is a ctr-anchor # Note that SSD assume it is a ctr-anchor
base_anchor = np.array([0, 0, min_size, min_size]) base_anchor = np.array([0, 0, min_size, min_size])
anchors = _ratio_enum(base_anchor, ratios) anchors = _ratio_enum(base_anchor, ratios)
if len(max_sizes) > 0: if len(max_sizes) > 0:
max_size = max_sizes[idx] max_size = max_sizes[idx]
_anchors = anchors[0].reshape((1, 4)) _anchors = anchors[0].reshape((1, 4))
_anchors = np.vstack([_anchors, _max_size_enum( _anchors = np.vstack([_anchors, _max_size_enum(
base_anchor, min_size, max_size)]) base_anchor, min_size, max_size)])
anchors = np.vstack([_anchors, anchors[1:]]) anchors = np.vstack([_anchors, anchors[1:]])
total_anchors.append(anchors) total_anchors.append(anchors)
return np.vstack(total_anchors) return np.vstack(total_anchors)
def _whctrs(anchor): def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window).""" """Return width, height, x center, and y center for an anchor (window)."""
w, h = anchor[2], anchor[3] w, h = anchor[2], anchor[3]
x_ctr, y_ctr = anchor[0], anchor[1] x_ctr, y_ctr = anchor[0], anchor[1]
return w, h, x_ctr, y_ctr return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr): def _mkanchors(ws, hs, x_ctr, y_ctr):
""" """
Given a vector of widths (ws) and heights (hs) around a center Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows). (x_ctr, y_ctr), output a set of anchors (windows).
""" """
ws = ws[:, np.newaxis] ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis] hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * ws, anchors = np.hstack((x_ctr - 0.5 * ws,
y_ctr - 0.5 * hs, y_ctr - 0.5 * hs,
x_ctr + 0.5 * ws, x_ctr + 0.5 * ws,
y_ctr + 0.5 * hs)) y_ctr + 0.5 * hs))
return anchors return anchors
def _ratio_enum(anchor, ratios): def _ratio_enum(anchor, ratios):
"""Enumerate a set of anchors for each aspect ratio wrt an anchor.""" """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor) w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h size = w * h
size_ratios = size / ratios size_ratios = size / ratios
hs = np.round(np.sqrt(size_ratios)) hs = np.round(np.sqrt(size_ratios))
ws = np.round(hs * ratios) ws = np.round(hs * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr) anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors return anchors
def _max_size_enum(base_anchor, min_size, max_size): def _max_size_enum(base_anchor, min_size, max_size):
"""Enumerate a anchor for max_size wrt base_anchor.""" """Enumerate a anchor for max_size wrt base_anchor."""
w, h, x_ctr, y_ctr = _whctrs(base_anchor) w, h, x_ctr, y_ctr = _whctrs(base_anchor)
ws = hs = np.sqrt([min_size * max_size]) ws = hs = np.sqrt([min_size * max_size])
anchors = _mkanchors(ws, hs, x_ctr, y_ctr) anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors return anchors
if __name__ == '__main__': if __name__ == '__main__':
print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1, 0.5, 2])) print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1, 0.5, 2]))
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor from lib.utils.blob import blob_to_tensor
class HardMiningLayer(torch.nn.Module): class HardMiningLayer(torch.nn.Module):
def __init__(self): def __init__(self):
super(HardMiningLayer, self).__init__() super(HardMiningLayer, self).__init__()
def forward(self, conf_prob, match_labels, max_overlaps): def forward(self, conf_prob, match_labels, max_overlaps):
# Confidence of each matched box # Confidence of each matched box
conf_prob_wide = conf_prob.numpy(True) conf_prob_wide = conf_prob.numpy(True)
# Label of each matched box # Label of each matched box
match_labels_wide = match_labels match_labels_wide = match_labels
# Max overlaps between default boxes and gt boxes # Max overlaps between default boxes and gt boxes
max_overlaps_wide = max_overlaps max_overlaps_wide = max_overlaps
# label ``-1`` will be ignored # label ``-1`` will be ignored
labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64) labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64)
for ix in range(match_labels_wide.shape[0]): for ix in range(match_labels_wide.shape[0]):
match_labels = match_labels_wide[ix] match_labels = match_labels_wide[ix]
max_overlaps = max_overlaps_wide[ix] max_overlaps = max_overlaps_wide[ix]
conf_prob = conf_prob_wide[ix] conf_prob = conf_prob_wide[ix]
conf_loss = np.zeros(match_labels.shape, dtype=np.float32) conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
inds = np.where(match_labels >= 0)[0] inds = np.where(match_labels >= 0)[0]
flt_min = np.finfo(float).eps flt_min = np.finfo(float).eps
# Softmax cross-entropy # Softmax cross-entropy
conf_loss[inds] = -np.log(np.maximum( conf_loss[inds] = -np.log(np.maximum(
conf_prob[inds, match_labels[inds]], flt_min)) conf_prob[inds, match_labels[inds]], flt_min))
# Filter negatives # Filter negatives
fg_inds = np.where(match_labels > 0)[0] fg_inds = np.where(match_labels > 0)[0]
neg_inds = np.where(match_labels == 0)[0] neg_inds = np.where(match_labels == 0)[0]
neg_overlaps = max_overlaps[neg_inds] neg_overlaps = max_overlaps[neg_inds]
eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0] eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0]
sel_inds = neg_inds[eligible_neg_inds] sel_inds = neg_inds[eligible_neg_inds]
# Do Mining # Do Mining
sel_loss = conf_loss[sel_inds] sel_loss = conf_loss[sel_inds]
num_pos = len(fg_inds) num_pos = len(fg_inds)
num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds)) num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
sorted_sel_inds = sel_inds[np.argsort(-sel_loss)] sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
bg_inds = sorted_sel_inds[:num_sel] bg_inds = sorted_sel_inds[:num_sel]
labels_wide[ix][fg_inds] = match_labels[fg_inds] # Keep fg indices labels_wide[ix][fg_inds] = match_labels[fg_inds] # Keep fg indices
labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss # Feed labels to compute cls loss
return {'labels': blob_to_tensor(labels_wide)} return {'labels': blob_to_tensor(labels_wide)}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.datasets.factory import get_imdb
from lib.ssd.data.data_batch import DataBatch
class DataLayer(torch.nn.Module):
def __init__(self):
super(DataLayer, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'source': database.source,
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': 2048, # Chunk-Wise Shuffle
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
})
def forward(self):
# Get an array blob from the Queue
outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import cv2 import cv2
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms from lib.nms.nms_wrapper import soft_nms
from lib.utils.blob import tensor_to_blob from lib.utils.blob import tensor_to_blob
from lib.utils.boxes import bbox_transform_inv from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes from lib.utils.boxes import clip_tiled_boxes
from lib.utils.timer import Timer from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
def get_images(ims): def get_images(ims):
target_h = cfg.SSD.RESIZE.HEIGHT target_h = cfg.SSD.RESIZE.HEIGHT
target_w = cfg.SSD.RESIZE.WIDTH target_w = cfg.SSD.RESIZE.WIDTH
processed_ims, im_scales = [], [] processed_ims, im_scales = [], []
for im in ims: for im in ims:
im_scales.append((float(target_h) / im.shape[0], im_scales.append((float(target_h) / im.shape[0],
float(target_w) / im.shape[1])) float(target_w) / im.shape[1]))
processed_ims.append(cv2.resize(im, (target_w, target_h))) processed_ims.append(cv2.resize(im, (target_w, target_h)))
ims_blob = np.array(processed_ims, dtype=np.uint8) ims_blob = np.array(processed_ims, dtype=np.uint8)
return ims_blob, im_scales return ims_blob, im_scales
def ims_detect(detector, ims): def ims_detect(detector, ims):
"""Detect images, with the single scale.""" """Detect images, with the single scale."""
# Prepare blobs # Prepare blobs
data, im_scales = get_images(ims) data, im_scales = get_images(ims)
data = torch.from_numpy(data).cuda(cfg.GPU_ID) data = torch.from_numpy(data).cuda(cfg.GPU_ID)
# Do Forward # Do Forward
with torch.no_grad(): with torch.no_grad():
outputs = detector.forward(inputs={'data': data}) outputs = detector.forward(inputs={'data': data})
# Decode results # Decode results
batch_boxes = [] batch_boxes = []
scores = tensor_to_blob(outputs['cls_prob']) scores = tensor_to_blob(outputs['cls_prob'])
prior_boxes = tensor_to_blob(outputs['prior_boxes']) prior_boxes = tensor_to_blob(outputs['prior_boxes'])
box_deltas = tensor_to_blob(outputs['bbox_pred']) box_deltas = tensor_to_blob(outputs['bbox_pred'])
for i in range(box_deltas.shape[0]): for i in range(box_deltas.shape[0]):
boxes = bbox_transform_inv( boxes = bbox_transform_inv(
boxes=prior_boxes, boxes=prior_boxes,
deltas=box_deltas[i], deltas=box_deltas[i],
weights=cfg.BBOX_REG_WEIGHTS, weights=cfg.BBOX_REG_WEIGHTS,
) )
boxes[:, 0::2] /= im_scales[i][1] boxes[:, 0::2] /= im_scales[i][1]
boxes[:, 1::2] /= im_scales[i][0] boxes[:, 1::2] /= im_scales[i][0]
batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape)) batch_boxes.append(clip_tiled_boxes(boxes, ims[i].shape))
return scores, batch_boxes return scores, batch_boxes
def test_net(net, server): def test_net(net, server):
# Load settings # Load settings
classes = server.classes classes = server.classes
num_images = server.num_images num_images = server.num_images
num_classes = server.num_classes num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()} _t = {'im_detect': Timer(), 'misc': Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH): for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths # Collect raw images and ground-truths
image_ids, raw_images = [], [] image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH): for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue if batch_idx + item_idx >= num_images: continue
image_id, raw_image = server.get_image() image_id, raw_image = server.get_image()
image_ids.append(image_id) image_ids.append(image_id)
raw_images.append(raw_image) raw_images.append(raw_image)
_t['im_detect'].tic() _t['im_detect'].tic()
batch_scores, batch_boxes = ims_detect(net, raw_images) batch_scores, batch_boxes = ims_detect(net, raw_images)
_t['im_detect'].toc() _t['im_detect'].toc()
_t['misc'].tic() _t['misc'].tic()
for item_idx in range(len(batch_scores)): for item_idx in range(len(batch_scores)):
i = batch_idx + item_idx i = batch_idx + item_idx
scores = batch_scores[item_idx] scores = batch_scores[item_idx]
boxes = batch_boxes[item_idx] boxes = batch_boxes[item_idx]
boxes_this_image = [[]] boxes_this_image = [[]]
for j in range(1, num_classes): for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j] cls_scores = scores[inds, j]
cls_boxes = boxes[inds] cls_boxes = boxes[inds]
pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K] pre_nms_inds = np.argsort(-cls_scores)[:cfg.TEST.NMS_TOP_K]
cls_scores = cls_scores[pre_nms_inds] cls_scores = cls_scores[pre_nms_inds]
cls_boxes = cls_boxes[pre_nms_inds] cls_boxes = cls_boxes[pre_nms_inds]
cls_detections = np.hstack( cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])) \ (cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms( keep = soft_nms(
cls_detections, cls_detections,
cfg.TEST.NMS, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms( keep = nms(
cls_detections, cls_detections,
cfg.TEST.NMS, cfg.TEST.NMS,
force_cpu=True, force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE: if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image( vis_one_image(
raw_images[item_idx], raw_images[item_idx],
classes, classes,
boxes_this_image, boxes_this_image,
thresh=cfg.VIS_TH, thresh=cfg.VIS_TH,
box_alpha=1.0, box_alpha=1.0,
show_class=True, show_class=True,
filename=server.get_save_filename(image_ids[item_idx]), filename=server.get_save_filename(image_ids[item_idx]),
) )
# Limit to max_per_image detections *over all classes* # Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0: if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = [] image_scores = []
for j in range(1, num_classes): for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: if len(all_boxes[j][i]) < 1:
continue continue
image_scores.append(all_boxes[j][i][:, -1]) image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: if len(image_scores) > 0:
image_scores = np.hstack(image_scores) image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes): for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :] all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s'
.format(batch_idx + cfg.TEST.IMS_PER_BATCH, .format(batch_idx + cfg.TEST.IMS_PER_BATCH,
num_images, _t['im_detect'].average_time, num_images,
_t['misc'].average_time), end='') _t['im_detect'].average_time,
_t['misc'].average_time),
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') end='')
print('Evaluating detections') print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<')
server.evaluate_detections(all_boxes)
print('Evaluating detections')
server.evaluate_detections(all_boxes)
...@@ -19,7 +19,7 @@ sys.path.append('../../') ...@@ -19,7 +19,7 @@ sys.path.append('../../')
import cv2 import cv2
import numpy as np import numpy as np
from lib.ssd.data import transforms from lib.ssd import transforms
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -201,6 +201,16 @@ def expand_boxes(boxes, scale): ...@@ -201,6 +201,16 @@ def expand_boxes(boxes, scale):
return boxes_exp return boxes_exp
def flip_boxes(boxes, width):
"""Flip the boxes horizontally."""
flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy()
flip_boxes[:, 0] = width - old_x2 - 1
flip_boxes[:, 2] = width - old_x1 - 1
return flip_boxes
def filter_boxes(boxes, min_size): def filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min size.""" """Remove all boxes with any side smaller than min size."""
ws = boxes[:, 2] - boxes[:, 0] + 1 ws = boxes[:, 2] - boxes[:, 0] + 1
......
...@@ -62,22 +62,20 @@ if __name__ == '__main__': ...@@ -62,22 +62,20 @@ if __name__ == '__main__':
if checkpoint is not None: if checkpoint is not None:
cfg.TRAIN.WEIGHTS = checkpoint cfg.TRAIN.WEIGHTS = checkpoint
# Setup MPI # Setup the distributed environment
if cfg.NUM_GPUS != dragon.mpi.size(): world_rank = dragon.distributed.get_rank()
world_size = dragon.distributed.get_world_size()
if cfg.NUM_GPUS != world_size:
raise ValueError( raise ValueError(
'Excepted {} mpi nodes, but got {}.' 'Excepted staring of {} processes, got {}.'
.format(len(args.gpus), dragon.mpi.size()) .format(cfg.NUM_GPUS, world_size)
) )
GPUs = [i for i in range(cfg.NUM_GPUS)] logger.set_root_logger(world_rank == 0)
cfg.GPU_ID = GPUs[dragon.mpi.rank()]
dragon.mpi.add_parallel_group([i for i in range(cfg.NUM_GPUS)])
dragon.mpi.set_parallel_mode('NCCL' if cfg.USE_NCCL else 'MPI')
# Setup logger # Select the GPU depending on the rank of process
if dragon.mpi.rank() != 0: cfg.GPU_ID = [i for i in range(cfg.NUM_GPUS)][world_rank]
logger.set_root_logger(False)
# Fix the random seeds (numpy and dragon) for reproducibility # Fix the random seed for reproducibility
numpy.random.seed(cfg.RNG_SEED) numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED) dragon.config.set_random_seed(cfg.RNG_SEED)
...@@ -89,7 +87,8 @@ if __name__ == '__main__': ...@@ -89,7 +87,8 @@ if __name__ == '__main__':
# Ready to train the network # Ready to train the network
logger.info('Output will be saved to `{:s}`' logger.info('Output will be saved to `{:s}`'
.format(coordinator.checkpoints_dir())) .format(coordinator.checkpoints_dir()))
train_net(coordinator, start_iter) with dragon.distributed.new_group(
ranks=[i for i in range(cfg.NUM_GPUS)],
# Finalize mpi backend='NCCL' if cfg.USE_NCCL else 'MPI',
dragon.mpi.finalize() verbose=True).as_default():
train_net(coordinator, start_iter)
...@@ -82,7 +82,7 @@ if __name__ == '__main__': ...@@ -82,7 +82,7 @@ if __name__ == '__main__':
if checkpoint is not None: if checkpoint is not None:
cfg.TRAIN.WEIGHTS = checkpoint cfg.TRAIN.WEIGHTS = checkpoint
# Fix the random seeds (numpy and dragon) for reproducibility # Fix the random seed for reproducibility
numpy.random.seed(cfg.RNG_SEED) numpy.random.seed(cfg.RNG_SEED)
dragon.config.set_random_seed(cfg.RNG_SEED) dragon.config.set_random_seed(cfg.RNG_SEED)
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!