Commit d3ed62db by Ting PAN

Support Mask R-CNN

1 parent 41b3932b
Showing with 4500 additions and 3762 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.3.0 (20191121)
Dragon Minimum Required (Version 0.3.0.dev20191121)
Changes:
Preview Features:
- New algorithm: Mask R-CNN.
- Add MobileNet(V2 and NAS) as backbone.
- Refactor testing module, multi-GPU is supported.
Bugs fixed:
- Remove rotated boxes, use Mask R-CNN instead.
------------------------------------------------------------------------
SeetaDet 0.2.3 (20191101) SeetaDet 0.2.3 (20191101)
Dragon Minimum Required (Version 0.3.0.dev20191021) Dragon Minimum Required (Version 0.3.0.dev20191021)
......
...@@ -12,6 +12,10 @@ while the style of codes is PyTorch. ...@@ -12,6 +12,10 @@ while the style of codes is PyTorch.
The torch-style codes help us to simplify the hierarchical pipeline of modern detection. The torch-style codes help us to simplify the hierarchical pipeline of modern detection.
## Requirements
seeta-dragon >= 0.3.0.dev20191121
## Installation ## Installation
#### 1. Install the required python packages #### 1. Install the required python packages
......
...@@ -5,7 +5,6 @@ rm -r build install *.c *.cpp ...@@ -5,7 +5,6 @@ rm -r build install *.c *.cpp
# Compile cpp modules # Compile cpp modules
python setup.py build_ext --inplace python setup.py build_ext --inplace
g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -std=c++11 -fopenmp
# Compile cuda modules # Compile cuda modules
cd build && cmake .. && make install && cd .. cd build && cmake .. && make install && cd ..
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// Codes are based on:
//
// <https://github.com/facebookresearch/detectron2/blob/master/detectron2
// /layers/csrc/box_iou_rotated/box_iou_rotated_utils.h>
//
// ------------------------------------------------------------
#include <cmath>
#include <algorithm>
#include <omp.h>
template <typename T>
struct RotatedBox {
T x_ctr, y_ctr, w, h, a;
};
template <typename T>
struct Point {
T x, y;
Point(const T& px = 0, const T& py = 0) : x(px), y(py) {}
Point operator+(const Point& p) const {
return Point(x + p.x, y + p.y);
}
Point& operator+=(const Point& p) {
x += p.x;
y += p.y;
return *this;
}
Point operator-(const Point& p) const {
return Point(x - p.x, y - p.y);
}
Point operator*(const T coeff) const {
return Point(x * coeff, y * coeff);
}
};
template <typename T>
T dot_2d(const Point<T>& A, const Point<T>& B) {
return A.x * B.x + A.y * B.y;
}
template <typename T>
T cross_2d(const Point<T>& A, const Point<T>& B) {
return A.x * B.y - B.x * A.y;
}
template <typename T>
void get_rotated_vertices(
const RotatedBox<T>& box,
Point<T> (&pts)[4]) {
// M_PI / 180. == 0.01745329251
double theta = box.a * 0.01745329251;
T cosTheta2 = (T)cos(theta) * 0.5f;
T sinTheta2 = (T)sin(theta) * 0.5f;
// y: top --> down; x: left --> right
pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w;
pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w;
pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
pts[2].x = 2 * box.x_ctr - pts[0].x;
pts[2].y = 2 * box.y_ctr - pts[0].y;
pts[3].x = 2 * box.x_ctr - pts[1].x;
pts[3].y = 2 * box.y_ctr - pts[1].y;
}
template <typename T>
int get_intersection_points(
const Point<T> (&pts1)[4],
const Point<T> (&pts2)[4],
Point<T> (&intersections)[24]) {
// Line vector
// A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
Point<T> vec1[4], vec2[4];
for (int i = 0; i < 4; i++) {
vec1[i] = pts1[(i + 1) % 4] - pts1[i];
vec2[i] = pts2[(i + 1) % 4] - pts2[i];
}
// Line test - test all line combos for intersection
int num = 0; // number of intersections
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
// Solve for 2x2 Ax=b
T det = cross_2d(vec2[j], vec1[i]);
// This takes care of parallel lines
if (fabs(det) <= 1e-14) {
continue;
}
auto vec12 = pts2[j] - pts1[i];
T t1 = cross_2d(vec2[j], vec12) / det;
T t2 = cross_2d(vec1[i], vec12) / det;
if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
intersections[num++] = pts1[i] + vec1[i] * t1;
}
}
}
// Check for vertices of rect1 inside rect2
{
const auto& AB = vec2[0];
const auto& DA = vec2[3];
auto ABdotAB = dot_2d(AB, AB);
auto ADdotAD = dot_2d(DA, DA);
for (int i = 0; i < 4; i++) {
// assume ABCD is the rectangle, and P is the point to be judged
// P is inside ABCD iff. P's projection on AB lies within AB
// and P's projection on AD lies within AD
auto AP = pts1[i] - pts2[0];
auto APdotAB = dot_2d<T>(AP, AB);
auto APdotAD = -dot_2d<T>(AP, DA);
if ((APdotAB >= 0) &&
(APdotAD >= 0) &&
(APdotAB <= ABdotAB) &&
(APdotAD <= ADdotAD)) {
intersections[num++] = pts1[i];
}
}
}
// Reverse the check - check for vertices of rect2 inside rect1
{
const auto& AB = vec1[0];
const auto& DA = vec1[3];
auto ABdotAB = dot_2d<T>(AB, AB);
auto ADdotAD = dot_2d<T>(DA, DA);
for (int i = 0; i < 4; i++) {
auto AP = pts2[i] - pts1[0];
auto APdotAB = dot_2d<T>(AP, AB);
auto APdotAD = -dot_2d<T>(AP, DA);
if ((APdotAB >= 0) &&
(APdotAD >= 0) &&
(APdotAB <= ABdotAB) &&
(APdotAD <= ADdotAD)) {
intersections[num++] = pts2[i];
}
}
}
return num;
}
template <typename T>
int convex_hull_graham(
const Point<T> (&p)[24],
const int& num_in,
Point<T> (&q)[24],
bool shift_to_zero = false) {
// Step 1:
// Find point with minimum y
// if more than 1 points have the same minimum y,
// pick the one with the minimum x.
int t = 0;
for (int i = 1; i < num_in; i++) {
if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
t = i;
}
}
auto& start = p[t]; // starting point
// Step 2:
// Subtract starting point from every points (for sorting in the next step)
for (int i = 0; i < num_in; i++) {
q[i] = p[i] - start;
}
// Swap the starting point to position 0
auto tmp = q[0];
q[0] = q[t];
q[t] = tmp;
// Step 3:
// Sort point 1 ~ num_in according to their relative cross-product values
// (essentially sorting according to angles)
// If the angles are the same, sort according to their distance to origin
T dist[24];
for (int i = 0; i < num_in; i++) {
dist[i] = dot_2d(q[i], q[i]);
}
std::sort(
q + 1, q + num_in, [](const Point<T>& A, const Point<T>& B) -> bool {
T temp = cross_2d<T>(A, B);
if (fabs(temp) < 1e-6) {
return dot_2d(A, A) < dot_2d(B, B);
} else {
return temp > 0;
}
});
// Step 4:
// Make sure there are at least 2 points (that don't overlap with each other)
// in the stack
int k; // index of the non-overlapped second point
for (k = 1; k < num_in; k++) {
if (dist[k] > 1e-8) {
break;
}
}
if (k == num_in) {
// We reach the end, which means the convex hull is just one point
q[0] = p[t];
return 1;
}
q[1] = q[k];
int m = 2; // 2 points in the stack
// Step 5:
// Finally we can start the scanning process.
// When a non-convex relationship between the 3 points is found
// (either concave shape or duplicated points),
// we pop the previous point from the stack
// until the 3-point relationship is convex again, or
// until the stack only contains two points
for (int i = k + 1; i < num_in; i++) {
while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
m--;
}
q[m++] = q[i];
}
// Step 6 (Optional):
// In general sense we need the original coordinates, so we
// need to shift the points back (reverting Step 2)
// But if we're only interested in getting the area/perimeter of the shape
// We can simply return.
if (!shift_to_zero) {
for (int i = 0; i < m; i++) {
q[i] += start;
}
}
return m;
}
template <typename T>
T polygon_area(const Point<T> (&q)[24], const int& m) {
if (m <= 2) {
return 0;
}
T area = 0;
for (int i = 1; i < m - 1; i++) {
area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0]));
}
return area / 2.0;
}
template <typename T>
T rotated_boxes_intersection(
const RotatedBox<T>& box1,
const RotatedBox<T>& box2) {
// There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
// from rotated_rect_intersection_pts
Point<T> intersectPts[24], orderedPts[24];
Point<T> pts1[4];
Point<T> pts2[4];
get_rotated_vertices(box1, pts1);
get_rotated_vertices(box2, pts2);
int num = get_intersection_points(pts1, pts2, intersectPts);
if (num <= 2) {
return 0.0;
}
// Convex Hull to order the intersection points in clockwise order and find
// the contour area.
int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true);
return polygon_area(orderedPts, num_convex);
}
template <typename T>
T single_box_iou_rotated(
T const* const box1_raw,
T const* const box2_raw) {
// shift center to the middle point to achieve higher precision in result
RotatedBox<T> box1, box2;
auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
box1.x_ctr = box1_raw[0] - center_shift_x;
box1.y_ctr = box1_raw[1] - center_shift_y;
box1.w = box1_raw[2];
box1.h = box1_raw[3];
box1.a = box1_raw[4];
box2.x_ctr = box2_raw[0] - center_shift_x;
box2.y_ctr = box2_raw[1] - center_shift_y;
box2.w = box2_raw[2];
box2.h = box2_raw[3];
box2.a = box2_raw[4];
const T area1 = box1.w * box1.h;
const T area2 = box2.w * box2.h;
if (area1 < 1e-14 || area2 < 1e-14) {
return 0.f;
}
const T inter = rotated_boxes_intersection(box1, box2);
const T iou = inter / (area1 + area2 - inter);
return iou;
}
extern "C" {
void apply_cpu_nms(
double* dets,
int* indices,
int& n,
double threshold) {
int count = 0;
for(int i = 0; i < n; i++) {
bool keep = true;
auto* box1 = dets + i * 6;
for(int j = 0; j < count; j++) {
auto* box2 = dets + indices[j] * 6;
auto ovr = single_box_iou_rotated(box1, box2);
if (ovr > threshold) {
keep = false;
break;
}
}
if (keep) {
indices[count] = i;
count++;
}
}
n = count;
}
void bbox_overlaps(
double* boxes1,
double* boxes2,
int* shape,
double* overlaps) {
int N = shape[0], K = shape[1];
#pragma omp parallel for num_threads(std::min(omp_get_num_procs(), 4))
for (int i = 0; i < N; i++) {
auto* box1 = boxes1 + i * 5;
for (int j = 0; j < K; j++) {
auto* box2 = boxes2 + j * 5;
overlaps[i * K + j] = single_box_iou_rotated(box1, box2);
}
}
}
}
...@@ -41,6 +41,9 @@ __C.TRAIN.WEIGHTS = '' ...@@ -41,6 +41,9 @@ __C.TRAIN.WEIGHTS = ''
# Database to train # Database to train
__C.TRAIN.DATABASE = '' __C.TRAIN.DATABASE = ''
# The number of workers to transform data
__C.TRAIN.NUM_WORKERS = 3
# Scales to use during training (can list multiple scales) # Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side # Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,) __C.TRAIN.SCALES = (600,)
...@@ -151,10 +154,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5 ...@@ -151,10 +154,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5
# The top-k prior boxes before nms. # The top-k prior boxes before nms.
__C.TEST.NMS_TOP_K = 400 __C.TEST.NMS_TOP_K = 400
# The threshold for prAttrDicting boxes # The threshold for predicting boxes
__C.TEST.SCORE_THRESH = 0.05 __C.TEST.SCORE_THRESH = 0.05
# The threshold for prAttrDicting masks # The threshold for predicting masks
__C.TEST.BINARY_THRESH = 0.5 __C.TEST.BINARY_THRESH = 0.5
# NMS threshold used on RPN proposals # NMS threshold used on RPN proposals
...@@ -192,8 +195,9 @@ __C.MODEL = AttrDict() ...@@ -192,8 +195,9 @@ __C.MODEL = AttrDict()
# The type of the model # The type of the model
# ('faster_rcnn', # ('faster_rcnn',
# 'ssd', # 'mask_rcnn',
# 'retinanet, # 'retinanet,
# 'ssd',
# ) # )
__C.MODEL.TYPE = '' __C.MODEL.TYPE = ''
...@@ -361,14 +365,14 @@ __C.SSD.NUM_CONVS = 0 ...@@ -361,14 +365,14 @@ __C.SSD.NUM_CONVS = 0
# Weight for bbox regression loss # Weight for bbox regression loss
__C.SSD.BBOX_REG_WEIGHT = 1. __C.SSD.BBOX_REG_WEIGHT = 1.
__C.SSD.MULTIBOX = AttrDict()
# MultiBox configs # MultiBox configs
__C.SSD.MULTIBOX = AttrDict()
__C.SSD.MULTIBOX.STRIDES = [] __C.SSD.MULTIBOX.STRIDES = []
__C.SSD.MULTIBOX.MIN_SIZES = [] __C.SSD.MULTIBOX.MIN_SIZES = []
__C.SSD.MULTIBOX.MAX_SIZES = [] __C.SSD.MULTIBOX.MAX_SIZES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = [] __C.SSD.MULTIBOX.ASPECT_RATIOS = []
__C.SSD.MULTIBOX.ASPECT_ANGLES = []
# OHEM configs
__C.SSD.OHEM = AttrDict() __C.SSD.OHEM = AttrDict()
# The threshold for selecting negative bbox in hard example mining # The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_OVERLAP = 0.5 __C.SSD.OHEM.NEG_OVERLAP = 0.5
......
...@@ -21,46 +21,56 @@ import cv2 ...@@ -21,46 +21,56 @@ import cv2
import dragon import dragon
from lib.core.config import cfg from lib.core.config import cfg
from lib.datasets.example import Example
from lib.datasets.factory import get_imdb from lib.datasets.factory import get_imdb
from lib.faster_rcnn.data_transformer import DataTransformer
class TestServer(object): class _Server(object):
def __init__(self, output_dir): def __init__(self, output_dir):
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.num_images, self.num_classes, self.classes = \
self.imdb.num_images, self.imdb.num_classes, self.imdb.classes
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_transformer = DataTransformer()
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir self.output_dir = output_dir
if cfg.VIS_ON_FILE: if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis') self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir): if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir) os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls): def evaluate_detections(self, all_boxes):
self.data_transformer = transformer_cls() pass
def evaluate_segmentations(self, all_boxes, all_masks):
pass
def get_image(self): def get_image(self):
example = self.data_reader.q_out.get() pass
image = self.data_transformer.get_image(example)
image_id, objects = self.data_transformer.get_annotations(example)
self.gt_recs[image_id] = {
'objects': objects,
'width': image.shape[1],
'height': image.shape[0],
}
return image_id, image
def get_save_filename(self, image_id, ext='.jpg'): def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \ return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None if cfg.VIS_ON_FILE else None
class TestServer(_Server):
def __init__(self, output_dir):
super(TestServer, self).__init__(output_dir)
self.imdb = get_imdb(cfg.TEST.DATABASE)
self.imdb.competition_mode(cfg.TEST.COMPETITION_MODE)
self.classes = self.imdb.classes
self.num_images = self.imdb.num_images
self.num_classes = self.imdb.num_classes
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
def get_image(self):
example = Example(self.data_reader.q_out.get())
image, image_id = example.image, example.id
self.gt_recs[image_id] = {
'height': example.height,
'width': example.width,
'objects': example.objects,
}
return image_id, image
def get_records(self): def get_records(self):
if len(self.gt_recs) != self.num_images: if len(self.gt_recs) != self.num_images:
raise RuntimeError( raise RuntimeError(
...@@ -70,7 +80,7 @@ class TestServer(object): ...@@ -70,7 +80,7 @@ class TestServer(object):
return self.gt_recs return self.gt_recs
def evaluate_detections(self, all_boxes): def evaluate_detections(self, all_boxes):
if cfg.TEST.PROTOCOL == 'null': if cfg.TEST.PROTOCOL == 'dump':
self.imdb.dump_detections(all_boxes, self.output_dir) self.imdb.dump_detections(all_boxes, self.output_dir)
else: else:
self.imdb.evaluate_detections( self.imdb.evaluate_detections(
...@@ -88,56 +98,20 @@ class TestServer(object): ...@@ -88,56 +98,20 @@ class TestServer(object):
) )
class InferServer(object): class InferServer(_Server):
def __init__(self, output_dir): def __init__(self, output_dir):
super(InferServer, self).__init__(output_dir)
self.images_dir = cfg.TEST.DATABASE self.images_dir = cfg.TEST.DATABASE
self.imdb = get_imdb('taas:/empty')
self.images = os.listdir(self.images_dir) self.images = os.listdir(self.images_dir)
self.num_images, self.num_classes, self.classes = \ self.classes = cfg.MODEL.CLASSES
len(self.images), cfg.MODEL.NUM_CLASSES, cfg.MODEL.CLASSES self.num_images = len(self.images)
self.data_transformer = DataTransformer() self.num_classes = cfg.MODEL.NUM_CLASSES
self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir self.output_dir = output_dir
self.image_idx = 0 self.image_idx = 0
if cfg.VIS_ON_FILE:
self.vis_dir = os.path.join(self.output_dir, 'vis')
if not os.path.exists(self.vis_dir):
os.makedirs(self.vis_dir)
def set_transformer(self, transformer_cls):
self.data_transformer = transformer_cls()
def get_image(self): def get_image(self):
image_name = self.images[self.image_idx] image_name = self.images[self.image_idx]
image_id = image_name.split('.')[0] image_id = image_name.split('.')[0]
image = cv2.imread(os.path.join(self.images_dir, image_name)) image = cv2.imread(os.path.join(self.images_dir, image_name))
self.image_idx = (self.image_idx + 1) % self.num_images self.image_idx = (self.image_idx + 1) % self.num_images
self.gt_recs[image_id] = {'width': image.shape[1], 'height': image.shape[0]}
return image_id, image return image_id, image
def get_save_filename(self, image_id, ext='.jpg'):
return os.path.join(self.vis_dir, image_id + ext) \
if cfg.VIS_ON_FILE else None
def get_records(self):
if len(self.gt_recs) != self.num_images:
raise RuntimeError(
'Loading {} records, while {} required.'
.format(len(self.gt_recs), self.num_images),
)
return self.gt_recs
def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections(
all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations(
all_boxes,
all_masks,
self.get_records(),
self.output_dir,
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import importlib
import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.utils import time_util
from lib.utils.vis import vis_one_image
def run_test_net(checkpoint, server, devices):
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
devices = devices if devices else [cfg.GPU_ID]
num_workers = len(devices)
test_fn = importlib.import_module(
'lib.%s.test' % cfg.MODEL.TYPE).test_net
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
vis_image_dict = {}
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
all_masks = [[[] for _ in range(num_images)] for _ in range(num_classes)]
queues = [
multiprocessing.Queue()
for _ in range(num_workers + 1)
]
workers = [
multiprocessing.Process(
target=test_fn,
kwargs={
'weights': checkpoint,
'num_classes': server.num_classes,
'q_in': queues[i],
'q_out': queues[-1],
'device': devices[i],
}
) for i in range(num_workers)
]
for process in workers:
process.start()
for i in range(num_images):
image_id, raw_image = server.get_image()
queues[i % num_workers].put((i, raw_image))
# Hold the image until the visualization
if cfg.VIS or cfg.VIS_ON_FILE:
vis_image_dict[i] = (image_id, raw_image)
for i in range(num_workers):
queues[i].put((-1, None))
for count in range(num_images):
i, time_diffs, results = queues[-1].get()
# Unpack the diverse results
boxes_this_image = results['boxes']
masks_this_image = results.get('masks', None)
# Disable some collections
if masks_this_image is None:
all_masks = None
# Update time difference
for name, diff in time_diffs.items():
_t[name].add_diff(diff)
# Visualize the results if necessary
if cfg.VIS or cfg.VIS_ON_FILE:
image_id, raw_image = vis_image_dict[i]
vis_one_image(
raw_image,
classes,
boxes_this_image,
masks_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_id),
)
del vis_image_dict[i]
_t['misc'].tic()
# Pack the results in the class-major order
for j in range(1, num_classes):
all_boxes[j][i] = boxes_this_image[j]
if all_masks is not None:
if j < len(masks_this_image):
all_masks[j][i] = masks_this_image[j]
# Limit to max_per_image detections *over all classes*
max_detections = cfg.TEST.DETECTIONS_PER_IM
if max_detections > 0:
scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
scores.append(all_boxes[j][i][:, -1])
if len(scores) > 0:
scores = np.hstack(scores)
if len(scores) > max_detections:
thr = np.sort(scores)[-max_detections]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= thr)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
if all_masks is not None:
all_masks[j][i] = all_masks[j][i][keep]
_t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s|{:.3f}s {:.3f}s'
.format(count + 1, num_images,
_t['im_detect'].average_time,
_t['mask_detect'].average_time,
_t['misc'].average_time),
end='')
print('\n\n>>> Evaluating detections\n')
server.evaluate_detections(all_boxes)
if all_masks is not None:
print('>>> Evaluating segmentations\n')
server.evaluate_segmentations(all_boxes, all_masks)
...@@ -31,9 +31,9 @@ from lib.utils.stats import SmoothedValue ...@@ -31,9 +31,9 @@ from lib.utils.stats import SmoothedValue
class SolverWrapper(object): class SolverWrapper(object):
def __init__(self, coordinator): def __init__(self, coordinator):
self.output_dir = coordinator.checkpoints_dir()
self.solver = SGDSolver() self.solver = SGDSolver()
self.detector = self.solver.detector self.detector = self.solver.detector
self.output_dir = coordinator.checkpoints_dir()
# Setup the detector # Setup the detector
self.detector.load_weights(cfg.TRAIN.WEIGHTS) self.detector.load_weights(cfg.TRAIN.WEIGHTS)
...@@ -89,7 +89,6 @@ class SolverWrapper(object): ...@@ -89,7 +89,6 @@ class SolverWrapper(object):
display = self.solver.iter % cfg.SOLVER.DISPLAY == 0 display = self.solver.iter % cfg.SOLVER.DISPLAY == 0
stats = self.solver.one_step() stats = self.solver.one_step()
self.add_metrics(stats) self.add_metrics(stats)
self.send_metrics(stats)
if display: if display:
logger.info( logger.info(
...@@ -104,6 +103,7 @@ class SolverWrapper(object): ...@@ -104,6 +103,7 @@ class SolverWrapper(object):
continue continue
logger.info(' ' * 10 + 'Train net output({}): {}' logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue())) .format(k, v.GetMedianValue()))
self.send_metrics(stats)
def train_model(self): def train_model(self):
"""Network training loop.""" """Network training loop."""
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import os
import sys
import numpy as np
from lib.core.config import cfg
from lib.pycocotools import mask as mask_tools
from lib.pycocotools.coco import COCO
from lib.pycocotools.cocoeval import COCOeval
from lib.utils import mask as mask_util
class COCOEvaluator(object):
def __init__(self, imdb, ann_file=None):
self.imdb = imdb
if ann_file is not None and \
os.path.exists(ann_file):
self.coco = COCO(ann_file)
cats = self.coco.loadCats(self.coco.getCatIds())
self.class_to_cat_id = dict(
zip([c['name'] for c in cats],
self.coco.getCatIds()))
else:
self.coco = None
self.class_to_cat_id = None
def bbox_results_one_category(self, boxes, cat_id, gt_recs):
ix, results = 0, []
for image_name, rec in gt_recs.items():
dets = boxes[ix]
ix += 1
if isinstance(dets, list) and len(dets) == 0:
continue
dets = dets.astype('float64')
scores = dets[:, -1]
xs = dets[:, 0]
ys = dets[:, 1]
ws = dets[:, 2] - xs + 1
hs = dets[:, 3] - ys + 1
results.extend([{
'image_id': self.get_image_id(image_name),
'category_id': cat_id,
'bbox': [xs[k], ys[k], ws[k], hs[k]],
'score': scores[k],
} for k in range(dets.shape[0])])
return results
def do_bbox_eval(self, res_file):
coco_dt = self.coco.loadRes(res_file)
coco_eval = COCOeval(self.coco, coco_dt, 'bbox')
coco_eval.evaluate()
coco_eval.accumulate()
self.print_coco_eval_results(coco_eval)
def do_segm_eval(self, res_file):
coco_dt = self.coco.loadRes(res_file)
coco_eval = COCOeval(self.coco, coco_dt, 'segm')
coco_eval.evaluate()
coco_eval.accumulate()
self.print_coco_eval_results(coco_eval)
@staticmethod
def encode_masks(masks, boxes, im_h, im_w):
mask_image = mask_util.project_masks(
masks, boxes, im_h, im_w,
cfg.TEST.BINARY_THRESH)
return mask_tools.encode(mask_image)
@staticmethod
def get_prefix(type='bbox'):
if type == 'bbox':
return 'detections'
elif type == 'segm':
return 'segmentations'
elif type == 'kpt':
return 'keypoints'
return ''
@staticmethod
def get_annotations_file(results_folder, type='bbox'):
# experiments/model_id/annotations/[GT]detections.json
filename = '[GT]' + COCOEvaluator.get_prefix(type) + '.json'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
@staticmethod
def get_image_id(image_name):
image_id = image_name.split('_')[-1].split('.')[0]
try:
return int(image_id)
except ValueError:
return image_name
def get_results_file(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>.json
filename = self.get_prefix(type) + self.imdb.comp_id + '.json'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def print_coco_eval_results(self, coco_eval, iou_thr=(0.5, 0.95)):
def get_thr_ind(coco_eval, thr):
ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
(coco_eval.params.iouThrs < thr + 1e-5))[0][0]
iou_thr = coco_eval.params.iouThrs[ind]
assert np.isclose(iou_thr, thr)
return ind
ind_lo = get_thr_ind(coco_eval, iou_thr[0])
ind_hi = get_thr_ind(coco_eval, iou_thr[1])
# Precision has dims (iou, recall, cls, area range, max dets)
# Area range index 0: all area ranges
# Max dets index 2: 100 per image
precision_res = coco_eval.eval['precision']
precision = precision_res[ind_lo:(ind_hi + 1), :, :, 0, 2]
ap_default = np.mean(precision[precision > -1])
print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
'~~~~'.format(iou_thr[0], iou_thr[1]))
print('{:.1f}'.format(100 * ap_default))
for cls_ind, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
precision = precision_res[ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
ap = np.mean(precision[precision > -1])
print('{:.1f}'.format(100 * ap))
print('~~~~ Summary metrics ~~~~')
coco_eval.summarize()
def segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
def filter_boxes(dets):
boxes = dets[:, :4]
ws = boxes[:, 2] - boxes[:, 0]
hs = boxes[:, 3] - boxes[:, 1]
keep = np.where((ws >= 1) & (hs >= 1))[0]
return keep
results = []
ix = 0
for image_name, rec in gt_recs.items():
dets = boxes[ix].astype(np.float)
msks = masks[ix]
ix += 1
keep = filter_boxes(dets)
im_h, im_w = rec['height'], rec['width']
if len(keep) == 0:
continue
scores = dets[:, -1]
mask_encode = self.encode_masks(
msks[keep], dets[keep, :4], im_h, im_w)
for k in range(dets[keep].shape[0]):
rle = mask_encode[k]
if sys.version_info >= (3, 0):
rle['counts'] = rle['counts'].decode()
results.append({
'image_id': self.get_image_id(image_name),
'category_id': cat_id,
'segmentation': rle,
'score': scores[k],
})
return results
def write_bbox_annotations(self, gt_recs, output_dir):
# Build images
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self.get_image_id(image_name),
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self.imdb.classes:
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls,
'id': self.imdb.class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
for image_name, rec in gt_recs.items():
for obj in rec['objects']:
x, y = obj['bbox'][0], obj['bbox'][1]
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
dataset['annotations'].append({
'id': str(ann_id),
'bbox': [x, y, w, h],
'area': w * h,
'iscrowd': obj['difficult'],
'image_id': self.get_image_id(image_name),
'category_id': self.imdb.class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self.get_annotations_file(output_dir, 'bbox')
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def write_bbox_results(self, all_boxes, gt_recs, output_dir):
filename = self.get_results_file(output_dir)
results = []
for cls_ind, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.imdb.num_classes - 1))
cat_id = self.class_to_cat_id[cls]
results.extend(self.bbox_results_one_category(
all_boxes[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def write_segm_annotations(self, gt_recs, output_dir):
# Build images
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self.get_image_id(image_name),
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self.imdb._classes:
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls,
'id': self.imdb.class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
for image_name, rec in gt_recs.items():
mask_size = (rec['height'], rec['width'])
for obj in rec['objects']:
x, y = obj['bbox'][0], obj['bbox'][1]
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
mask = obj['mask']
if sys.version_info >= (3, 0):
mask = mask.decode()
dataset['annotations'].append({
'id': str(ann_id),
'bbox': [x, y, w, h],
'area': w * h,
'segmentation': {'size': mask_size, 'counts': mask},
'iscrowd': obj['difficult'],
'image_id': self.get_image_id(image_name),
'category_id': self.imdb.class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self.get_annotations_file(output_dir, 'segm')
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def write_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
filename = self.get_results_file(output_dir, 'segm')
results = []
for cls_ind, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.imdb.num_classes - 1))
cat_id = self.class_to_cat_id[cls]
results.extend(self.segm_results_one_category(
all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
from lib.pycocotools import mask_utils
class Example(object):
"""Wrapper for annotated example."""
def __init__(self, datum):
"""Create a ``Example``.
Parameters
----------
datum : Dict
The data loaded for dataset
"""
self._datum = datum
@property
def id(self):
"""Return the example id.
Returns
-------
str
The unique id.
"""
return self._datum['id']
@property
def image(self):
"""Return the image data.
Returns
-------
numpy.ndarray
The image.
"""
img = np.frombuffer(self._datum['content'], 'uint8')
return cv2.imdecode(img, 3)
@property
def height(self):
"""Return the image height.
Returns
-------
int
The height of image.
"""
return self._datum['height']
@property
def objects(self):
"""Return the annotated objects.
Returns
-------
Sequence[Dict]
The objects.
"""
objects = []
for ix, obj in enumerate(self._datum['object']):
mask = obj.get('mask', None)
if 'x3' in obj:
poly = np.array([
obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
obj['x4'], obj['y4']
], 'float32')
x, y, w, h = cv2.boundingRect(
poly.reshape((-1, 2)))
bbox = [x, y, x + w, y + h]
mask = mask_utils.poly2bytes(
[poly],
self._datum['height'],
self._datum['width'],
)
elif 'x2' in obj:
bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
elif 'xmin' in obj:
bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
else:
bbox = obj['bbox']
objects.append({
'name': obj['name'],
'bbox': bbox,
'mask': mask,
'difficult': obj.get('difficult', 0),
})
return objects
@property
def width(self):
"""Return the image width.
Returns
-------
int
The width of image.
"""
return self._datum['width']
...@@ -13,84 +13,118 @@ ...@@ -13,84 +13,118 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import shutil import uuid
import dragon
from lib.core.config import cfg from lib.core.config import cfg
from lib.datasets.coco_evaluator import COCOEvaluator
from lib.datasets.voc_evaluator import VOCEvaluator
class imdb(object): class imdb(object):
def __init__(self, name): def __init__(self, source):
self._name = name self._source = source
self._num_classes = 0 self._num_images = 0
self._classes = [] self._classes = cfg.MODEL.CLASSES
self._class_to_ind = self._class_to_cat_id = \
@property dict(zip(self.classes, range(self.num_classes)))
def name(self): self._salt = str(uuid.uuid4())
return self._name self.config = {'cleanup': True, 'use_salt': True}
@property @property
def num_classes(self): def cache_path(self):
return len(self._classes) cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property @property
def classes(self): def classes(self):
return self._classes return self._classes
@property @property
def cache_path(self): def class_to_ind(self):
cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache')) return self._class_to_ind
if not os.path.exists(cache_path):
os.makedirs(cache_path)
return cache_path
@property @property
def source(self): def comp_id(self):
excepted_source = os.path.join(self.cache_path, self.name) return '_' + self._salt if self.config['use_salt'] else ''
if not os.path.exists(excepted_source):
raise RuntimeError( @property
'Excepted source from: {}, ' def num_classes(self):
'but it is not existed.' return len(self._classes)
.format(excepted_source)
)
return excepted_source
@property @property
def num_images(self): def num_images(self):
return dragon.io.SeetaRecordDataset(self.source).size return self._num_images
@property
def source(self):
return self._source
def competition_mode(self, on):
if on:
self.config['use_salt'] = False
self.config['cleanup'] = False
else:
self.config['use_salt'] = True
self.config['cleanup'] = True
def dump_detections(self, all_boxes, output_dir): def dump_detections(self, all_boxes, output_dir):
dataset = dragon.io.SeetaRecordDataset(self.source) pass
for file in ('data.data', 'data.index', 'data.meta'):
file = os.path.join(output_dir, file)
if os.path.exists(file):
os.remove(file)
writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
for i in range(len(dataset)):
example = dataset.get()
example['object'] = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][i]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
example['object'].append({
'name': cls,
'xmin': float(detections[k][0]),
'ymin': float(detections[k][1]),
'xmax': float(detections[k][2]),
'ymax': float(detections[k][3]),
'difficult': 0,
})
writer.write(example)
def evaluate_detections(self, all_boxes, gt_recs, output_dir): def evaluate_detections(self, all_boxes, gt_recs, output_dir):
pass protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
evaluator = VOCEvaluator(self)
evaluator.write_bbox_results(all_boxes, gt_recs, output_dir)
if '!' not in protocol:
for ovr in (0.5, 0.7):
evaluator.do_bbox_eval(
gt_recs,
output_dir,
iou=ovr,
use_07_metric='2007' in protocol,
)
elif 'coco' in protocol:
ann_file = cfg.TEST.JSON_FILE
evaluator = COCOEvaluator(self, ann_file)
if evaluator.coco is None:
ann_file = evaluator \
.write_bbox_annotations(
gt_recs, output_dir)
evaluator = COCOEvaluator(self, ann_file)
res_file = evaluator.write_bbox_results(
all_boxes, gt_recs, output_dir)
if '!' not in protocol:
evaluator.do_bbox_eval(res_file)
def evaluate_masks(self, all_boxes, all_masks, output_dir): def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
pass protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
evaluator = VOCEvaluator(self)
evaluator.write_segm_results(all_boxes, all_masks, output_dir)
if '!' not in protocol:
for ovr in (0.5, 0.7):
evaluator.do_segm_eval(
gt_recs,
output_dir,
iou=ovr,
use_07_metric='2007' in protocol,
)
elif 'coco' in protocol:
ann_file = cfg.TEST.JSON_FILE
evaluator = COCOEvaluator(self, ann_file)
if evaluator.coco is None:
ann_file = evaluator \
.write_segm_annotations(
gt_recs, output_dir)
evaluator = COCOEvaluator(self, ann_file)
res_file = evaluator.write_segm_results(
all_boxes, all_masks, gt_recs, output_dir)
if '!' not in protocol:
evaluator.do_segm_eval(res_file)
...@@ -17,496 +17,45 @@ from __future__ import absolute_import ...@@ -17,496 +17,45 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import json
import os import os
import sys
import uuid
import cv2 import dragon
import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg from lib.core.config import cfg
from lib.datasets.imdb import imdb from lib.datasets.imdb import imdb
from lib.datasets.voc_eval import voc_bbox_eval
from lib.datasets.voc_eval import voc_segm_eval
from lib.pycocotools.mask import encode as encode_masks
from lib.utils import boxes as box_utils
class TaaS(imdb): class TaaS(imdb):
def __init__(self, source): def __init__(self, source):
imdb.__init__(self, 'taas') imdb.__init__(self, source)
self._classes = cfg.MODEL.CLASSES self._dataset = dragon.io.SeetaRecordDataset
self._source = source self._num_images = self._dataset(self.source).size
self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
self._class_to_cat_id = self._class_to_ind def dump_detections(self, all_boxes, output_dir):
self._salt = str(uuid.uuid4()) dataset = self._dataset(self.source)
self.config = {'cleanup': True, 'use_salt': True} for file in ('data.data', 'data.index', 'data.meta'):
file = os.path.join(output_dir, file)
@property if os.path.exists(file):
def source(self): os.remove(file)
excepted_source = self._source writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
if not os.path.exists(excepted_source): for i in range(len(dataset)):
raise RuntimeError( example = dataset.get()
'Excepted source from: {}, ' example['object'] = []
'but it is not existed.'
.format(excepted_source)
)
return excepted_source
##############################################
# #
# UTILS #
# #
##############################################
def _get_comp_id(self):
return '_' + self._salt if self.config['use_salt'] else ''
@classmethod
def _get_prefix(cls, type='bbox'):
if type == 'bbox':
return 'detections_'
elif type == 'segm':
return 'segmentations_'
elif type == 'kpt':
return 'keypoints_'
return ''
def _get_voc_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>_aeroplane.txt
if type == 'bbox':
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.txt'
elif type == 'segm':
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '_{:s}.pkl'
else:
raise ValueError('Type of results can be either bbox or segm.')
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def _get_coco_annotations_T(self, results_folder, type='bbox'):
# experiments/model_id/annotations/[GT]detections_taas_<comp_id>.json
filename = '[GT]_' + self._get_prefix(type) + self._name + '.json'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def _get_coco_results_T(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_taas_<comp_id>.json
filename = self._get_prefix(type) + self._name + self._get_comp_id() + '.json'
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
##############################################
# #
# VOC #
# #
##############################################
def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
for cls_ind, cls in enumerate(self.classes): for cls_ind, cls in enumerate(self.classes):
if cls == '__background__': if cls == '__background__':
continue continue
print('Writing {} VOC format bbox results'.format(cls)) detections = all_boxes[cls_ind][i]
filename = self._get_voc_results_T(output_dir).format(cls) if len(detections) == 0:
with open(filename, 'wt') as f:
ix = 0
for image_id, rec in gt_recs.items():
dets = all_boxes[cls_ind][ix]
ix += 1
if len(dets) == 0:
continue
for k in range(dets.shape[0]):
content = '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}' \
.format(image_id, dets[k, -1],
dets[k, 0] + 1, dets[k, 1] + 1,
dets[k, 2] + 1, dets[k, 3] + 1)
if dets.shape[1] == 6:
content += ' {:.2f}'.format(dets[k, 4])
f.write(content + '\n')
def _write_voc_segm_results(self, all_boxes, all_masks, output_dir):
for cls_inds, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Writing {} VOC format segm results'.format(cls))
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
with open(bbox_filename, 'wb') as f:
cPickle.dump(all_boxes[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
with open(segm_filename, 'wb') as f:
cPickle.dump(all_masks[cls_inds], f, cPickle.HIGHEST_PROTOCOL)
def _do_voc_bbox_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self._classes):
if cls == '__background__':
continue
det_file = self._get_voc_results_T(output_dir).format(cls)
rec, prec, ap = voc_bbox_eval(
det_file, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
def _do_voc_segm_eval(self, gt_recs, output_dir, IoU=0.5, use_07_metric=True):
aps = []
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.classes):
if cls == '__background__':
continue continue
segm_filename = self._get_voc_results_T(output_dir, type='segm').format(cls) for k in range(detections.shape[0]):
bbox_filename = segm_filename.replace('segmentations', 'detections') if detections[k, -1] < cfg.VIS_TH:
ap = voc_segm_eval(
bbox_filename, segm_filename, gt_recs, cls,
IoU=IoU, use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
##############################################
# #
# COCO #
# #
##############################################
@classmethod
def _get_coco_image_id(cls, image_name):
image_id = image_name.split('_')[-1].split('.')[0]
try:
return int(image_id)
except:
return image_name
@classmethod
def _encode_coco_masks(cls, masks, boxes, im_h, im_w):
num_pred = len(boxes)
assert len(masks) == num_pred
mask_image = np.zeros((im_h, im_w, num_pred), dtype=np.uint8, order='F')
M = masks[0].shape[0]
scale = (M + 2.0) / M
ref_boxes = box_utils.expand_boxes(boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
for i in range(num_pred):
ref_box = ref_boxes[i, :4]
mask = masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > cfg.TEST.BINARY_THRESH, dtype=np.uint8)
x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_w)
y2 = min(ref_box[3] + 1, im_h)
mask_image[y1:y2, x1:x2, i] = \
mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
(x1 - ref_box[0]):(x2 - ref_box[0])]
return encode_masks(mask_image)
def _write_coco_bbox_annotations(self, gt_recs, output_dir):
# Build images
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width'],
})
# Build categories
dataset['categories'] = []
for cls in self._classes:
if cls == '__background__':
continue continue
dataset['categories'].append({ example['object'].append({
'name': cls, 'name': cls,
'id': self._class_to_ind[cls], 'xmin': float(detections[k][0]),
}) 'ymin': float(detections[k][1]),
# Build annotations 'xmax': float(detections[k][2]),
dataset['annotations'] = [] 'ymax': float(detections[k][3]),
ann_id = 0 'difficult': 0,
for image_name, rec in gt_recs.items():
for obj in rec['objects']:
x, y = obj['bbox'][0], obj['bbox'][1]
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
dataset['annotations'].append({
'id': str(ann_id),
'bbox': [x, y, w, h],
'area': w * h,
'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='bbox')
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def _write_coco_segm_annotations(self, gt_recs, output_dir):
# Build images
dataset = {'images': []}
for image_name, rec in gt_recs.items():
dataset['images'].append({
'file_name': image_name + '.jpg',
'id': self._get_coco_image_id(image_name),
'height': rec['height'], 'width': rec['width'],
}) })
# Build categories writer.write(example)
dataset['categories'] = []
for cls in self._classes:
if cls == '__background__':
continue
dataset['categories'].append({
'name': cls,
'id': self._class_to_ind[cls],
})
# Build annotations
dataset['annotations'] = []
ann_id = 0
for image_name, rec in gt_recs.items():
for obj in rec['objects']:
x, y = obj['bbox'][0], obj['bbox'][1]
w, h = obj['bbox'][2] - x + 1, obj['bbox'][3] - y + 1
dataset['annotations'].append({
'id': str(ann_id),
'bbox': [x, y, w, h],
'area': w * h,
'segmentation': {
'size': [rec['height'], rec['width']],
'counts': obj['mask'],
},
'iscrowd': obj['difficult'],
'image_id': self._get_coco_image_id(image_name),
'category_id': self._class_to_ind[obj['name']],
})
ann_id += 1
ann_file = self._get_coco_annotations_T(output_dir, type='segm')
with open(ann_file, 'w') as f:
json.dump(dataset, f)
return ann_file
def _coco_bbox_results_one_category(self, boxes, cat_id, gt_recs):
ix, results = 0, []
for image_name, rec in gt_recs.items():
dets = boxes[ix]
ix += 1
if isinstance(dets, list) and len(dets) == 0:
continue
dets = dets.astype(np.float)
scores = dets[:, -1]
xs = dets[:, 0]
ys = dets[:, 1]
ws = dets[:, 2] - xs + 1
hs = dets[:, 3] - ys + 1
results.extend(
[{'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
'bbox': [xs[k], ys[k], ws[k], hs[k]],
'score': scores[k],
} for k in range(dets.shape[0])]
)
return results
def _coco_segm_results_one_category(self, boxes, masks, cat_id, gt_recs):
def filter_boxes(dets):
boxes = dets[:, :4]
ws = boxes[:, 2] - boxes[:, 0]
hs = boxes[:, 3] - boxes[:, 1]
keep = np.where((ws >= 1) & (hs >= 1))[0]
return keep
results = []
ix = 0
for image_name, rec in gt_recs.items():
dets = boxes[ix].astype(np.float)
msks = masks[ix]
ix += 1
keep = filter_boxes(dets)
im_h, im_w = rec['height'], rec['width']
if len(keep) == 0:
continue
scores = dets[:, -1]
mask_encode = self._encode_coco_masks(
msks[keep], dets[keep, :4], im_h, im_w)
for k in range(dets[keep].shape[0]):
rle = mask_encode[k]
if sys.version_info >= (3, 0):
rle['counts'] = rle['counts'].decode()
results.append({
'image_id': self._get_coco_image_id(image_name),
'category_id': cat_id,
'segmentation': rle,
'score': scores[k],
})
return results
def _write_coco_bbox_results(self, all_boxes, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir)
results = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_bbox_results_one_category(
all_boxes[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def _write_coco_segm_results(self, all_boxes, all_masks, gt_recs, output_dir):
filename = self._get_coco_results_T(output_dir, type='segm')
results = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
print('Collecting {} results ({:d}/{:d})'
.format(cls, cls_ind, self.num_classes - 1))
cat_id = self._class_to_cat_id[cls]
results.extend(self._coco_segm_results_one_category(
all_boxes[cls_ind], all_masks[cls_ind], cat_id, gt_recs))
print('Writing results json to {}'.format(filename))
with open(filename, 'w') as fid:
json.dump(results, fid)
return filename
def _do_coco_bbox_eval(self, coco, res_file):
from lib.pycocotools.cocoeval import COCOeval
coco_dt = coco.loadRes(res_file)
coco_eval = COCOeval(coco, coco_dt, 'bbox')
coco_eval.evaluate()
coco_eval.accumulate()
self._print_coco_eval_results(coco_eval)
def _do_coco_segm_eval(self, coco, res_file):
from lib.pycocotools.cocoeval import COCOeval
coco_dt = coco.loadRes(res_file)
coco_eval = COCOeval(coco, coco_dt, 'segm')
coco_eval.evaluate()
coco_eval.accumulate()
self._print_coco_eval_results(coco_eval)
def _print_coco_eval_results(self, coco_eval):
IoU_lo_thresh = 0.5
IoU_hi_thresh = 0.95
def _get_thr_ind(coco_eval, thr):
ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
(coco_eval.params.iouThrs < thr + 1e-5))[0][0]
iou_thr = coco_eval.params.iouThrs[ind]
assert np.isclose(iou_thr, thr)
return ind
ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
# Precision has dims (iou, recall, cls, area range, max dets)
# Area range index 0: all area ranges
# Max dets index 2: 100 per image
precision = \
coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
ap_default = np.mean(precision[precision > -1])
print('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
'~~~~'.format(IoU_lo_thresh, IoU_hi_thresh))
print('{:.1f}'.format(100 * ap_default))
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
# Minus 1 because of __background__
precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
ap = np.mean(precision[precision > -1])
print('{:.1f}'.format(100 * ap))
print('~~~~ Summary metrics ~~~~')
coco_eval.summarize()
##############################################
# #
# EVAL-API #
# #
##############################################
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
self._write_voc_bbox_results(all_boxes, gt_recs, output_dir)
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
self._do_voc_bbox_eval(
gt_recs, output_dir, IoU=0.5,
use_07_metric='2007' in protocol)
print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
self._do_voc_bbox_eval(
gt_recs, output_dir, IoU=0.7,
use_07_metric='2007' in protocol)
elif 'coco' in protocol:
from lib.pycocotools.coco import COCO
if os.path.exists(cfg.TEST.JSON_FILE):
coco = COCO(cfg.TEST.JSON_FILE)
# We should override category id before writing results
cats = coco.loadCats(coco.getCatIds())
self._class_to_cat_id = dict(zip(
[c['name'] for c in cats], coco.getCatIds()))
else:
coco = None
res_file = self._write_coco_bbox_results(
all_boxes, gt_recs, output_dir)
if 'wo' not in protocol:
if coco is None:
ann_file = self._write_coco_bbox_annotations(gt_recs, output_dir)
coco = COCO(ann_file)
self._do_coco_bbox_eval(coco, res_file)
def evaluate_segmentations(self, all_boxes, all_masks, gt_recs, output_dir):
protocol = cfg.TEST.PROTOCOL
if 'voc' in protocol:
self._write_voc_segm_results(all_boxes, all_masks, output_dir)
if 'wo' not in protocol:
print('\n~~~~~~ Evaluation IoU@0.5 ~~~~~~')
self._do_voc_segm_eval(
gt_recs, output_dir, IoU=0.5,
use_07_metric='2007' in protocol)
print('~~~~~~ Evaluation IoU@0.7 ~~~~~~')
self._do_voc_segm_eval(
gt_recs, output_dir, IoU=0.7,
use_07_metric='2007' in protocol)
elif 'coco' in protocol:
from lib.pycocotools.coco import COCO
if os.path.exists(cfg.TEST.JSON_FILE):
coco = COCO(cfg.TEST.JSON_FILE)
# We should override category id before writing results
cats = coco.loadCats(coco.getCatIds())
self._class_to_cat_id = dict(
zip([c['name'] for c in cats], coco.getCatIds()))
else:
coco = None
res_file = self._write_coco_segm_results(all_boxes, all_masks, gt_recs, output_dir)
if 'wo' not in protocol:
if coco is None:
coco = COCO(self._write_coco_segm_annotations(gt_recs, output_dir))
self._do_coco_segm_eval(coco, res_file)
def competition_mode(self, on):
if on:
self.config['use_salt'] = False
self.config['cleanup'] = False
else:
self.config['use_salt'] = True
self.config['cleanup'] = True
...@@ -20,15 +20,10 @@ from __future__ import print_function ...@@ -20,15 +20,10 @@ from __future__ import print_function
import cv2 import cv2
import numpy as np import numpy as np
try:
import cPickle
except:
import pickle as cPickle
from lib.core.config import cfg from lib.core.config import cfg
from lib.pycocotools.mask_utils import mask_rle2im from lib.pycocotools import mask_utils
from lib.utils import rotated_boxes from lib.utils import boxes as box_util
from lib.utils.boxes import expand_boxes from lib.utils.framework import pickle
from lib.utils.mask import mask_overlap from lib.utils.mask import mask_overlap
...@@ -66,15 +61,15 @@ def voc_bbox_eval( ...@@ -66,15 +61,15 @@ def voc_bbox_eval(
det_file, det_file,
gt_recs, gt_recs,
cls_name, cls_name,
IoU=0.5, iou=0.5,
use_07_metric=False, use_07_metric=False,
): ):
class_recs, n_pos = {}, 0 class_recs, n_pos = {}, 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name] objects = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R]) bbox = np.array([x['bbox'] for x in objects])
diff = np.array([x['difficult'] for x in R]).astype(np.bool) diff = np.array([x['difficult'] for x in objects]).astype(np.bool)
det = [False] * len(R) det = [False] * len(objects)
n_pos = n_pos + sum(~diff) n_pos = n_pos + sum(~diff)
class_recs[image_name] = {'bbox': bbox, 'difficult': diff, 'det': det} class_recs[image_name] = {'bbox': bbox, 'difficult': diff, 'det': det}
...@@ -100,7 +95,7 @@ def voc_bbox_eval( ...@@ -100,7 +95,7 @@ def voc_bbox_eval(
nd = len(image_ids) nd = len(image_ids)
tp, fp = np.zeros(nd), np.zeros(nd) tp, fp = np.zeros(nd), np.zeros(nd)
def overlaps4(bb, BBGT): def compute_overlaps(bb, BBGT):
ixmin = np.maximum(BBGT[:, 0], bb[0]) ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1]) iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2]) ixmax = np.minimum(BBGT[:, 2], bb[2])
...@@ -114,9 +109,6 @@ def voc_bbox_eval( ...@@ -114,9 +109,6 @@ def voc_bbox_eval(
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
return inters / uni return inters / uni
def overlaps5(bb, BBGT):
return rotated_boxes.bbox_overlaps(bb.reshape((1, 5)), BBGT)[0]
for d in range(nd): for d in range(nd):
R = class_recs[image_ids[d]] R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float) bb = BB[d, :].astype(float)
...@@ -124,12 +116,11 @@ def voc_bbox_eval( ...@@ -124,12 +116,11 @@ def voc_bbox_eval(
BBGT = R['bbox'].astype(float) BBGT = R['bbox'].astype(float)
if BBGT.size > 0: if BBGT.size > 0:
overlaps = overlaps4(bb, BBGT) \ overlaps = compute_overlaps(bb, BBGT)
if len(bb) == 4 else overlaps5(bb, BBGT)
ov_max = np.max(overlaps) ov_max = np.max(overlaps)
j_max = np.argmax(overlaps) j_max = np.argmax(overlaps)
if ov_max > IoU: if ov_max > iou:
if not R['difficult'][j_max]: if not R['difficult'][j_max]:
if not R['det'][j_max]: if not R['det'][j_max]:
tp[d] = 1. tp[d] = 1.
...@@ -154,23 +145,29 @@ def voc_segm_eval( ...@@ -154,23 +145,29 @@ def voc_segm_eval(
seg_file, seg_file,
gt_recs, gt_recs,
cls_name, cls_name,
IoU=0.5, iou=0.5,
use_07_metric=False, use_07_metric=False,
): ):
# 0. Constants # 0. Constants
M = cfg.MRCNN.RESOLUTION M = cfg.MRCNN.RESOLUTION
binary_thresh = cfg.TEST.BINARY_THRESH binary_thresh = cfg.TEST.BINARY_THRESH
scale = (M + 2.0) / M scale = (M + 2.) / M
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
# 1. Get bbox & mask ground truths # 1. Get bbox & mask ground truths
image_names, class_recs, n_pos = [], {}, 0 image_names, class_recs, n_pos = [], {}, 0
for image_name, rec in gt_recs.items(): for image_name, rec in gt_recs.items():
R = [obj for obj in rec['objects'] if obj['name'] == cls_name] objects = [obj for obj in rec['objects'] if obj['name'] == cls_name]
bbox = np.array([x['bbox'] for x in R]) bbox = np.array([x['bbox'] for x in objects])
mask = np.array([mask_rle2im([x['mask']], rec['height'], rec['width'])[0] for x in R]) mask = np.array([
difficult = np.array([x['difficult'] for x in R]).astype(np.bool) mask_utils.bytes2img(
det = [False] * len(R) x['mask'],
rec['height'],
rec['width']
) for x in objects]
)
difficult = np.array([x['difficult'] for x in objects]).astype(np.bool)
det = [False] * len(objects)
n_pos = n_pos + sum(~difficult) n_pos = n_pos + sum(~difficult)
class_recs[image_name] = { class_recs[image_name] = {
'bbox': bbox, 'bbox': bbox,
...@@ -182,9 +179,9 @@ def voc_segm_eval( ...@@ -182,9 +179,9 @@ def voc_segm_eval(
# 2. Get predict pickle file for this class # 2. Get predict pickle file for this class
with open(det_file, 'rb') as f: with open(det_file, 'rb') as f:
boxes_pkl = cPickle.load(f) boxes_pkl = pickle.load(f)
with open(seg_file, 'rb') as f: with open(seg_file, 'rb') as f:
masks_pkl = cPickle.load(f) masks_pkl = pickle.load(f)
# 3. Pre-compute number of total instances to allocate memory # 3. Pre-compute number of total instances to allocate memory
num_images = len(gt_recs) num_images = len(gt_recs)
...@@ -222,7 +219,7 @@ def voc_segm_eval( ...@@ -222,7 +219,7 @@ def voc_segm_eval(
fp = np.zeros((num_pred, 1)) fp = np.zeros((num_pred, 1))
tp = np.zeros((num_pred, 1)) tp = np.zeros((num_pred, 1))
ref_boxes = expand_boxes(new_boxes, scale) ref_boxes = box_util.expand_boxes(new_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32) ref_boxes = ref_boxes.astype(np.int32)
for i in range(num_pred): for i in range(num_pred):
...@@ -261,13 +258,19 @@ def voc_segm_eval( ...@@ -261,13 +258,19 @@ def voc_segm_eval(
crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1, crop_mask = R['mask'][j][gt_mask_bound[1]:gt_mask_bound[3] + 1,
gt_mask_bound[0]:gt_mask_bound[2] + 1] gt_mask_bound[0]:gt_mask_bound[2] + 1]
ov = mask_overlap(gt_mask_bound, pred_mask_bound, crop_mask, pred_mask) ov = \
mask_overlap(
gt_mask_bound,
pred_mask_bound,
crop_mask,
pred_mask,
)
if ov > ovmax: if ov > ovmax:
ovmax = ov ovmax = ov
jmax = j jmax = j
if ovmax > IoU: if ovmax > iou:
if not R['difficult'][jmax]: if not R['difficult'][jmax]:
if not R['det'][jmax]: if not R['det'][jmax]:
tp[i] = 1. tp[i] = 1.
...@@ -281,7 +284,7 @@ def voc_segm_eval( ...@@ -281,7 +284,7 @@ def voc_segm_eval(
fp = np.cumsum(fp) fp = np.cumsum(fp)
tp = np.cumsum(tp) tp = np.cumsum(tp)
rec = tp / float(n_pos) rec = tp / float(n_pos)
# avoid divide by zero in case the first matches a difficult gt # Avoid divide by zero in case the first matches a difficult gt
prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps) prec = tp / np.maximum(fp + tp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric=use_07_metric) ap = voc_ap(rec, prec, use_07_metric=use_07_metric)
return ap return ap
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from lib.datasets import voc_eval
from lib.utils.framework import pickle
class VOCEvaluator(object):
def __init__(self, imdb):
self.imdb = imdb
def do_bbox_eval(
self,
gt_recs,
output_dir,
iou=0.5,
use_07_metric=True,
):
aps = []
print('~~~~~~ Evaluation IoU@%s ~~~~~~' % str(iou))
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
det_file = self.get_results_file(output_dir).format(cls)
rec, prec, ap = \
voc_eval.voc_bbox_eval(
det_file,
gt_recs, cls,
iou=iou,
use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
def do_segm_eval(
self,
gt_recs,
output_dir,
iou=0.5,
use_07_metric=True,
):
aps = []
print('~~~~~~ Evaluation IoU@%s ~~~~~~' % str(iou))
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
for i, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
segm_filename = self.get_results_file(output_dir, 'segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
ap = voc_eval.voc_segm_eval(
bbox_filename,
segm_filename,
gt_recs, cls,
iou=iou,
use_07_metric=use_07_metric,
)
if ap > 0:
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
print('Mean AP = {:.4f}\n'.format(np.mean(aps)))
@staticmethod
def get_prefix(type='bbox'):
if type == 'bbox':
return 'detections'
elif type == 'segm':
return 'segmentations'
elif type == 'kpt':
return 'keypoints'
return ''
def get_results_file(self, results_folder, type='bbox'):
# experiments/model_id/results/detections_<comp_id>_<class_name>.txt
if type == 'bbox':
filename = self.get_prefix(type) + self.imdb.comp_id + '_{:s}.txt'
elif type == 'segm':
filename = self.get_prefix(type) + self.imdb.comp_id + '_{:s}.pkl'
else:
raise ValueError('Type of results can be either bbox or segm.')
if not os.path.exists(results_folder):
os.makedirs(results_folder)
return os.path.join(results_folder, filename)
def write_bbox_results(self, all_boxes, gt_recs, output_dir):
for cls_ind, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
print('Writing {} VOC format bbox results'.format(cls))
filename = self.get_results_file(output_dir).format(cls)
with open(filename, 'wt') as f:
ix = 0
for image_id, rec in gt_recs.items():
dets = all_boxes[cls_ind][ix]
ix += 1
if len(dets) == 0:
continue
for k in range(dets.shape[0]):
content = '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}' \
.format(image_id, dets[k, -1],
dets[k, 0] + 1, dets[k, 1] + 1,
dets[k, 2] + 1, dets[k, 3] + 1)
if dets.shape[1] == 6:
content += ' {:.2f}'.format(dets[k, 4])
f.write(content + '\n')
def write_segm_results(self, all_boxes, all_masks, output_dir):
for cls_inds, cls in enumerate(self.imdb.classes):
if cls == '__background__':
continue
print('Writing {} VOC format segm results'.format(cls))
segm_filename = self.get_results_file(output_dir, 'segm').format(cls)
bbox_filename = segm_filename.replace('segmentations', 'detections')
with open(bbox_filename, 'wb') as f:
pickle.dump(all_boxes[cls_inds], f, pickle.HIGHEST_PROTOCOL)
with open(segm_filename, 'wb') as f:
pickle.dump(all_masks[cls_inds], f, pickle.HIGHEST_PROTOCOL)
...@@ -13,7 +13,11 @@ from __future__ import absolute_import ...@@ -13,7 +13,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer from lib.faster_rcnn.anchor_target import AnchorTarget
from lib.faster_rcnn.data_layer import DataLayer from lib.faster_rcnn.data_loader import DataLoader
from lib.faster_rcnn.proposal_layer import ProposalLayer from lib.faster_rcnn.proposal import Proposal
from lib.faster_rcnn.proposal_target_layer import ProposalTargetLayer from lib.faster_rcnn.proposal_target import ProposalTarget
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.faster_rcnn.utils import map_returns_to_blobs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
class AnchorTarget(object):
"""Assign ground-truth targets to anchors."""
def __init__(self):
super(AnchorTarget, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
self.num_strides = len(self.strides)
self.allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = []
for i in range(self.num_strides):
self.base_anchors.append(
generate_anchors(
self.strides[i],
self.ratios,
np.array([self.scales[i]])
if self.num_strides > 1
else np.array(self.scales)
)
)
def __call__(self, features, gt_boxes, ims_info):
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, num_anchors,), 'float32')
bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32')
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, ...)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self.allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self.allowed_border) &
(all_anchors[:, 1] >= -self.allowed_border) &
(all_anchors[:, 2] < im_info[1] + self.allowed_border) &
(all_anchors[:, 3] < im_info[0] + self.allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside, anchors = np.arange(num_anchors), all_anchors
num_inside = len(inds_inside)
labels = np.empty((num_inside,), 'float32')
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = box_util.bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(fg_inds, len(fg_inds) - num_fg, False)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(bg_inds, len(bg_inds) - num_bg, False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), 'float32')
bbox_targets[fg_inds, :] = \
box_util.bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), 'float32')
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), 'float32')
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
if self.num_strides > 1:
labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
else:
A = self.base_anchors[0].shape[0]
height, width = features[0].shape[-2:]
labels = labels_wide \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = bbox_inside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = bbox_outside_weights_wide \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': new_tensor(labels),
'bbox_targets': new_tensor(bbox_targets),
'bbox_inside_weights': new_tensor(bbox_inside_weights),
'bbox_outside_weights': new_tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
# C4 backbone takes the first stride
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets.
Parameters
----------
features : sequence of dragon.vm.torch.Tensor
The features of specific conv layers.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
height, width = features[0].shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
all_anchors = (self.base_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)
# label: 1 is positive, 0 is negative, -1 is don not care
all_labels = -np.ones((num_images, total_anchors,), dtype=np.float32)
all_bbox_targets = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
all_bbox_inside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
all_bbox_outside_weights = np.zeros_like(all_bbox_targets, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels first so that positive labels can clobber them
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
# Assign bg labels last so that negative labels can clobber positives
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds,
size=len(fg_inds) - num_fg,
replace=False,
)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds,
size=len(bg_inds) - num_bg,
replace=False,
)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
ex_rois=anchors[fg_inds, :],
gt_rois=gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
all_labels[ix, inds_inside] = labels # label
all_bbox_targets[ix, inds_inside] = bbox_targets
all_bbox_inside_weights[ix, inds_inside] = bbox_inside_weights
all_bbox_outside_weights[ix, inds_inside] = bbox_outside_weights
labels = all_labels \
.reshape((num_images, height, width, A)) \
.transpose(0, 3, 1, 2) \
.reshape((num_images, total_anchors))
bbox_targets = all_bbox_targets \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_inside_weights = all_bbox_inside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
bbox_outside_weights = all_bbox_outside_weights \
.reshape((num_images, height, width, A * 4)) \
.transpose(0, 3, 1, 2)
return {
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
...@@ -27,11 +27,11 @@ from lib.utils import logger ...@@ -27,11 +27,11 @@ from lib.utils import logger
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
class DataLayer(torch.nn.Module): class DataLoader(object):
"""Generate a mini-batch of data.""" """Provide mini-batches of data."""
def __init__(self): def __init__(self):
super(DataLayer, self).__init__() super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE) database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{ self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source), 'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
...@@ -39,12 +39,11 @@ class DataLayer(torch.nn.Module): ...@@ -39,12 +39,11 @@ class DataLayer(torch.nn.Module):
'shuffle': cfg.TRAIN.USE_SHUFFLE, 'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS, 'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2, 'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
}) })
def forward(self): def __call__(self):
# Get an array blob from the Queue
outputs = self.data_batch.get() outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data']) outputs['data'] = torch.from_numpy(outputs['data'])
return outputs return outputs
...@@ -59,14 +58,16 @@ class DataBatch(mp.Process): ...@@ -59,14 +58,16 @@ class DataBatch(mp.Process):
---------- ----------
dataset : lambda dataset : lambda
The creator of a dataset. The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False shuffle : bool, optional, default=False
Whether to shuffle the data. Whether to shuffle the data.
num_chunks : int, optional, default=0 num_chunks : int, optional, default=0
The number of chunks to split. The number of chunks to split.
batch_size : int, optional, default=2 batch_size : int, optional, default=2
The size of a mini-batch. The size of a mini-batch.
prefetch : int, optional, default=5 num_transformers : int, optional, default=3
The prefetch count. The number of workers to transform data.
""" """
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
...@@ -83,20 +84,10 @@ class DataBatch(mp.Process): ...@@ -83,20 +84,10 @@ class DataBatch(mp.Process):
self._prefetch = kwargs.get('prefetch', 5) self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2) self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1) self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', -1) self._num_transformers = kwargs.get('num_transformers', 3)
self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1) self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True self.daemon = True
# Io-Aware Policy
if self._num_transformers == -1:
self._num_transformers = 2
# Add 1 transformer for color augmentation
if cfg.TRAIN.USE_COLOR_JITTER:
self._num_transformers += 1
self._num_transformers = min(
self._num_transformers, self._max_transformers)
# Initialize queues # Initialize queues
num_batches = self._prefetch * self._num_readers num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size) self.Q1 = mp.Queue(num_batches * self._batch_size)
......
...@@ -19,9 +19,9 @@ import cv2 ...@@ -19,9 +19,9 @@ import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import rotated_boxes from lib.datasets.example import Example
from lib.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob from lib.utils.blob import prep_im_for_blob
from lib.utils.boxes import flip_boxes
from lib.utils.image import get_image_with_target_size from lib.utils.image import get_image_with_target_size
...@@ -44,32 +44,32 @@ class DataTransformer(multiprocessing.Process): ...@@ -44,32 +44,32 @@ class DataTransformer(multiprocessing.Process):
apply_flip=False, apply_flip=False,
offsets=None, offsets=None,
): ):
n_objects = 0 objects, n_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff: if not self._use_diff:
for obj in example['object']: for obj in objects:
if obj.get('difficult', 0) == 0: if obj.get('difficult', 0) == 0:
n_objects += 1 n_objects += 1
else: else:
n_objects = len(example['object']) n_objects = len(objects)
roi_dict = { roi_dict = {
'width': example['width'],
'height': example['height'],
'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, 4), 'float32'), 'boxes': np.zeros((n_objects, 4), 'float32'),
'gt_classes': np.zeros((n_objects,), 'int32'),
} }
# Filter the difficult instances # Filter the difficult instances
object_idx = 0 object_idx = 0
for obj in example['object']: for obj in objects:
if not self._use_diff and \ if not self._use_diff and \
obj.get('difficult', 0) > 0: obj.get('difficult', 0) > 0:
continue continue
bbox = obj['bbox']
roi_dict['boxes'][object_idx, :] = [ roi_dict['boxes'][object_idx, :] = [
max(0, obj['xmin']), max(0, bbox[0]),
max(0, obj['ymin']), max(0, bbox[1]),
min(obj['xmax'], example['width'] - 1), min(bbox[2], width - 1),
min(obj['ymax'], example['height'] - 1), min(bbox[3], height - 1),
] ]
roi_dict['gt_classes'][object_idx] = \ roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']] self._class_to_ind[obj['name']]
...@@ -77,8 +77,11 @@ class DataTransformer(multiprocessing.Process): ...@@ -77,8 +77,11 @@ class DataTransformer(multiprocessing.Process):
# Flip the boxes if necessary # Flip the boxes if necessary
if apply_flip: if apply_flip:
roi_dict['boxes'] = flip_boxes( roi_dict['boxes'] = \
roi_dict['boxes'], roi_dict['width']) box_util.flip_boxes(
roi_dict['boxes'],
width,
)
# Scale the boxes to the detecting scale # Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale roi_dict['boxes'] *= im_scale
...@@ -94,61 +97,32 @@ class DataTransformer(multiprocessing.Process): ...@@ -94,61 +97,32 @@ class DataTransformer(multiprocessing.Process):
return roi_dict return roi_dict
@classmethod
def get_image(cls, example):
img = np.frombuffer(example['content'], np.uint8)
return cv2.imdecode(img, -1)
@classmethod
def get_annotations(cls, example):
objects = []
for ix, obj in enumerate(example['object']):
if 'x3' in obj:
bbox = rotated_boxes.vertices2box(
[obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
obj['x4'], obj['y4']]
)
elif 'x2' in obj:
bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
elif 'xmin' in obj:
bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
else:
bbox = obj['bbox']
objects.append({
'name': obj['name'],
'difficult': obj.get('difficult', 0),
'bbox': bbox,
})
return example['id'], objects
def get(self, example): def get(self, example):
img = np.frombuffer(example['content'], np.uint8) example = Example(example)
img = cv2.imdecode(img, 1) img = example.image
# Scale # Scale
scale_indices = np.random.randint(len(cfg.TRAIN.SCALES)) max_size = cfg.TRAIN.MAX_SIZE
target_size = cfg.TRAIN.SCALES[scale_indices] target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
im, im_scale, jitter = prep_im_for_blob(img, target_size, cfg.TRAIN.MAX_SIZE) img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip # Flip
apply_flip = False apply_flip = False
if self._use_flipped: if self._use_flipped:
if np.random.randint(2) > 0: if np.random.randint(2) > 0:
im = im[:, ::-1, :] img = img[:, ::-1]
apply_flip = True apply_flip = True
# Random Crop or RandomPad # Random Crop or RandomPad
offsets = None offsets = None
if cfg.TRAIN.MAX_SIZE > 0: if cfg.TRAIN.MAX_SIZE > 0:
if jitter != 1.0: if jitter != 1:
# To a rectangle (scale, max_size) # To a rectangle (scale, max_size)
target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int) target_size = (np.array(img.shape[:2]) / jitter).astype(np.int32)
im, offsets = get_image_with_target_size(target_size, im) img, offsets = get_image_with_target_size(target_size, img)
else: else:
# To a square (target_size, target_size) # To a square (target_size, target_size)
im, offsets = get_image_with_target_size([target_size] * 2, im) img, offsets = get_image_with_target_size([target_size] * 2, img)
# Example -> RoIDict # Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets) roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
...@@ -158,7 +132,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -158,7 +132,7 @@ class DataTransformer(multiprocessing.Process):
gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32) gt_boxes = np.empty((len(roi_dict['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes'] gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
return im, im_scale, gt_boxes return img, im_scale, gt_boxes
def run(self): def run(self):
# Fix the process-local random seed # Fix the process-local random seed
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.faster_rcnn.utils import generate_grid_anchors
from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
class Proposal(object):
"""Compute proposals by applying transformations anchors."""
def __init__(self):
super(Proposal, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
self.num_strides = len(self.strides)
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
])
# Generate base anchors
self.base_anchors = []
for i in range(self.num_strides):
self.base_anchors.append(
generate_anchors(
self.strides[i],
self.ratios,
np.array([self.scales[i]])
if self.num_strides > 1
else np.array(self.scales)
)
)
def __call__(self, features, cls_prob, bbox_pred, ims_info):
pre_nms_top_n = cfg.TRAIN.RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg.TRAIN.RPN_POST_NMS_TOP_N
nms_thresh = cfg.TRAIN.RPN_NMS_THRESH
min_size = cfg.TRAIN.RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
# Prepare for the outputs
batch_rois = []
cls_prob = cls_prob.numpy(True)
bbox_pred = bbox_pred.numpy(True)
if self.num_strides > 1:
# (?, 4, A * K) -> (?, A * K, 4)
bbox_pred = bbox_pred.transpose((0, 2, 1))
else:
# (?, A * 4, H, W) -> (?, H, W, A * 4)
cls_prob = cls_prob.transpose((0, 2, 3, 1))
bbox_pred = bbox_pred.transpose((0, 2, 3, 1))
# Extract RoIs separately
for ix in range(num_images):
# [?, N] -> [? * N, 1]
scores = cls_prob[ix].reshape((-1, 1))
if self.num_strides > 1:
deltas = bbox_pred[ix]
else:
deltas = bbox_pred[ix].reshape((-1, 4))
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# Convert anchors into proposals via bbox transformations
proposals = box_util.bbox_transform_inv(anchors, deltas)
# Clip predicted boxes to image
proposals = box_util.clip_tiled_boxes(proposals, ims_info[ix, :2])
# Remove predicted boxes with either height or width < threshold
keep = box_util.filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# Apply nms (e.g. threshold = 0.7)
# Take after_nms_topN (e.g. 300)
# Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Attach RoIs with batch indices
batch_inds = np.empty((proposals.shape[0], 1), 'float32')
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype('float32', copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
return np.concatenate(batch_rois, 0)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms import nms_wrapper
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Compute proposals by applying transformations to anchors."""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.stride = cfg.RPN.STRIDES[0]
self.ratios = cfg.RPN.ASPECT_RATIOS
# Generate base anchors
self.base_anchors = generate_anchors(
base_size=self.stride,
ratios=self.ratios,
scales=np.array(self.scales),
)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
# Generate proposals from shifted anchors
height, width = cls_prob.shape[-2:]
shift_x = np.arange(0, width) * self.stride
shift_y = np.arange(0, height) * self.stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors.shape[0]
K = shifts.shape[0]
anchors = \
self.base_anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
all_anchors = anchors.reshape((K * A, 4))
# Prepare for the outputs
batch_rois = []
# scores & deltas are (1, A, H, W) format
# Transpose to (1, H, W, A)
batch_scores = cls_prob.numpy(True).transpose((0, 2, 3, 1))
batch_deltas = bbox_pred.numpy(True).transpose((0, 2, 3, 1))
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_top_n (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
return [array2tensor(rpn_rois)]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_returns_to_blobs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.utils import boxes as box_util
from lib.utils.framework import new_tensor
class ProposalTarget(object):
"""Assign ground-truth targets to proposals."""
def __init__(self):
super(ProposalTarget, self).__init__()
self.num_strides = len(cfg.RPN.STRIDES)
self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
])
def __call__(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = self.defaults.keys()
blobs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype('int32') == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, :4]))))
# Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs(
sample_rois(
rois,
gt_boxes,
rois_per_image,
fg_rois_per_image,
self.num_classes,
), blobs, keys,
)
# Stack into continuous blobs
for k, v in blobs.items():
blobs[k] = np.concatenate(blobs[k], 0)
if self.num_strides > 1:
# Distribute RoIs into pyramids
min_lvl = cfg.FPN.ROI_MIN_LEVEL
max_lvl = cfg.FPN.ROI_MAX_LEVEL
k = max_lvl - min_lvl + 1
levels = map_rois_to_levels(blobs['rois'], min_lvl, max_lvl)
outputs = map_blobs_to_outputs(
blobs,
self.defaults,
[np.where(levels == (i + min_lvl))[0] for i in range(k)],
)
return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
}
else:
# Return RoIs directly for CX-stride
return {
'rois': [new_tensor(blobs['rois'])],
'labels': new_tensor(blobs['labels']),
'bbox_targets': new_tensor(blobs['bbox_targets']),
'bbox_inside_weights': new_tensor(blobs['bbox_inside_weights']),
'bbox_outside_weights': new_tensor(blobs['bbox_outside_weights']),
}
def get_targets(ex_rois, gt_rois, gt_labels, num_classes):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
return bbox_targets, inside_weights, outside_weights
def sample_rois(
all_rois,
gt_boxes,
num_rois,
num_fg_rois,
num_classes,
):
"""Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image
bg_rois_per_this_image = num_rois - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, bg_rois_per_this_image, False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays
rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs
outputs = [rois, labels]
outputs += get_targets(
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels,
num_classes,
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets."""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return {
'rois': [array2tensor(batch_outputs['rois'])],
'labels': array2tensor(batch_outputs['labels']),
'bbox_targets': array2tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': array2tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': array2tensor(batch_outputs['bbox_outside_weights']),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(
all_rois,
gt_boxes,
fg_rois_per_image,
rois_per_image,
num_classes,
):
"""Generate a random sample of RoIs."""
overlaps = bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
...@@ -17,14 +17,13 @@ import dragon.vm.torch as torch ...@@ -17,14 +17,13 @@ import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
from lib.utils import framework from lib.utils import framework
from lib.utils import time_util from lib.utils import time_util
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image): def im_detect(detector, raw_image):
...@@ -39,69 +38,65 @@ def im_detect(detector, raw_image): ...@@ -39,69 +38,65 @@ def im_detect(detector, raw_image):
], dtype=np.float32) ], dtype=np.float32)
# Do Forward # Do Forward
if not hasattr(detector, 'frozen_graph'): if not hasattr(detector, 'graph'):
inputs = { with framework.new_workspace().as_default():
'data': torch.from_numpy(blobs['data']), data = torch.from_numpy(blobs['data'])
'ims_info': torch.from_numpy(blobs['ims_info']), ims_info = torch.from_numpy(blobs['ims_info'])
}
with torch.no_grad(): with torch.no_grad():
with torch.jit.Recorder(retain_ops=True): with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs) outputs = detector.forward(inputs)
detector.frozen_graph = \ detector.graph = \
framework.FrozenGraph( framework.Graph(inputs, {
{'data': inputs['data'], 'rois': outputs['rois'],
'ims_info': inputs['ims_info']},
{'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'], 'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']}, 'bbox_pred': outputs['bbox_pred']
) })
outputs = detector.frozen_graph(**blobs) outputs = detector.graph(**blobs)
# Decode results # Decode results
batch_rois = outputs['rois'] rois = outputs['rois']
batch_scores = outputs['cls_prob'] scores, boxes, batch_inds = [], [], []
batch_deltas = outputs['bbox_pred'] pred_boxes = \
batch_boxes = bbox_transform_inv( box_util.bbox_transform_inv(
batch_rois[:, 1:5], rois[:, 1:5],
batch_deltas, outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS, cfg.BBOX_REG_WEIGHTS,
) )
scores_wide, boxes_wide = [], [] for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
for im_idx in range(len(ims)): return (
indices = np.where(batch_rois[:, 0].astype(np.int32) == im_idx)[0] np.vstack(scores) if len(ims) > 0 else scores[0],
boxes = batch_boxes[indices] np.vstack(boxes) if len(ims) > 0 else boxes[0],
boxes /= ims_scale[im_idx] )
clip_tiled_boxes(boxes, raw_image.shape)
scores_wide.append(batch_scores[indices])
boxes_wide.append(boxes)
return (np.vstack(scores_wide), np.vstack(boxes_wide)) \
if len(scores_wide) > 1 else (scores_wide[0], boxes_wide[0])
def test_net(weights, num_classes, q_in, q_out, device):
num_classes, cfg.GPU_ID = num_classes, device
detector = new_detector(device, weights)
def test_net(detector, server): _t = time_util.new_timers('im_detect', 'misc')
# Load settings
classes = server.classes
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()} while True:
idx, raw_image = q_in.get()
if raw_image is None:
break
for i in range(num_images): boxes_this_image = [[]]
image_id, raw_image = server.get_image()
with _t['im_detect'].tic_and_toc(): with _t['im_detect'].tic_and_toc():
scores, boxes = im_detect(detector, raw_image) scores, boxes = im_detect(detector, raw_image)
_t['misc'].tic() _t['misc'].tic()
boxes_this_image = [[]]
for j in range(1, num_classes): for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j] cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4] cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
cls_detections = np.hstack( cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis]) (cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False) ).astype(np.float32, copy=False)
...@@ -119,43 +114,16 @@ def test_net(detector, server): ...@@ -119,43 +114,16 @@ def test_net(detector, server):
force_cpu=True, force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_image,
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_id),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' q_out.put((
.format(i + 1, num_images, idx,
_t['im_detect'].average_time, {
_t['misc'].average_time), 'im_detect': _t['im_detect'].average_time,
end='') 'misc': _t['misc'].average_time,
},
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') {
'boxes': boxes_this_image,
print('Evaluating detections') },
server.evaluate_detections(all_boxes) ))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
from lib.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides):
num_strides = len(strides)
if len(features) != num_strides:
raise ValueError(
'Given %d features for %d strides.'
% (len(features), num_strides)
)
# Generate proposals from shifted anchors
anchors_to_pack = []
for i in range(len(features)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to
# match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors_to_pack.append(anchors)
else:
# Original order of Faster R-CNN
return anchors.reshape((K * A, 4))
return np.vstack(anchors_to_pack)
def map_returns_to_blobs(returns, blobs, keys):
"""Map returns of image to blobs."""
for i, key in enumerate(keys):
blobs[key].append(returns[i])
def map_rois_to_levels(rois, k_min, k_max):
"""Map rois to fpn levels."""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
def map_blobs_to_outputs(blobs, defaults, lvl_inds):
"""Map blobs to outputs according to fpn indices."""
outputs = collections.defaultdict(list)
for inds in lvl_inds:
for key, blob in blobs.items():
outputs[key].append(
blob[inds]
if len(inds) > 0
else defaults[key]
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class AnchorTargetLayer(torch.nn.Module):
"""Assign anchors to ground-truth targets."""
def __init__(self):
super(AnchorTargetLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Allow boxes to sit over the edge by a small amount
self._allowed_border = cfg.TRAIN.RPN_STRADDLE_THRESH
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def forward(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images:
logger.fatal(
'Input {} images, got {} slices of gt boxes.'
.format(num_images, len(gt_boxes_wide))
)
# Generate proposals from shifted anchors
all_anchors, total_anchors = [], 0
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors)
total_anchors += anchors.shape[0]
all_anchors = np.vstack(all_anchors)
# label: 1 is positive, 0 is negative, -1 is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32)
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32)
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32)
for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label, has_mask)
gt_boxes = gt_boxes_wide[ix]
im_info = ims_info[ix]
if self._allowed_border >= 0:
# Only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self._allowed_border) &
(all_anchors[:, 1] >= -self._allowed_border) &
(all_anchors[:, 2] < im_info[1] + self._allowed_border) &
(all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
# label: 1 is positive, 0 is negative, -1 is don't care
labels = np.empty((num_inside,), dtype=np.float32)
labels.fill(-1)
# Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1
# fg label: above threshold IOU
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
# bg label: below threshold IOU
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
# Subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# Subsample negative labels if we have too many
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array((1., 1., 1., 1.))
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_outside_weights[labels == 1, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
bbox_outside_weights[labels == 0, :] = np.ones((1, 4)) / cfg.TRAIN.RPN_BATCHSIZE
labels_wide[ix, inds_inside] = labels # label
bbox_targets_wide[ix, inds_inside] = bbox_targets
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms import nms_wrapper
from lib.utils import logger
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""Compute proposals by applying transformations anchors."""
def __init__(self):
super(ProposalLayer, self).__init__()
# Load the basic configs
self.scales = cfg.RPN.SCALES
self.strides = cfg.RPN.STRIDES
self.ratios = cfg.RPN.ASPECT_RATIOS
if len(self.scales) != len(self.strides):
logger.fatal(
'Given {} scales and {} strides.'
.format(len(self.scales), len(self.strides))
)
# Generate base anchors
self.base_anchors = []
for i in range(len(self.strides)):
base_size, scale = self.strides[i], self.scales[i]
if not isinstance(scale, collections.Iterable):
scale = [scale]
self.base_anchors.append(
generate_anchors(
base_size=base_size,
ratios=self.ratios,
scales=np.array(scale),
)
)
def generate_grid_anchors(self, features):
# Generate proposals from shifted anchors
anchors_wide = []
for i in range(len(self.strides)):
height, width = features[i].shape[-2:]
shift_x = np.arange(0, width) * self.strides[i]
shift_y = np.arange(0, height) * self.strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors_wide.append(anchors)
return np.vstack(anchors_wide)
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Get resources
num_images = ims_info.shape[0]
all_anchors = self.generate_grid_anchors(features) # [n, 4]
if cls_prob.shape[0] != num_images or \
bbox_pred.shape[0] != num_images:
logger.fatal('Incorrect num of images: {}'.format(num_images))
# Prepare for the outputs
batch_rois = []
batch_scores = cls_prob.numpy(True)
batch_deltas = bbox_pred.numpy(True) \
.transpose((0, 2, 1)) # [?, 4, n] -> [?, n, 4]
# Extract RoIs separately
for ix in range(num_images):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4]
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
# 1. Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, deltas)
# 2. Clip predicted boxes to image
proposals = clip_tiled_boxes(proposals, ims_info[ix, :2])
# 3. remove predicted boxes with either height or width < threshold
keep = filter_boxes(proposals, min_size * ims_info[ix, 2])
proposals = proposals[keep, :]
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Output rois blob
batch_inds = np.empty((proposals.shape[0], 1), dtype=np.float32)
batch_inds.fill(ix)
rpn_rois = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
batch_rois.append(rpn_rois)
# Merge RoIs into a blob
rpn_rois = np.concatenate(batch_rois, axis=0)
if cfg_key == 'TRAIN':
return rpn_rois
else:
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
all_rois = []
for i in range(k):
lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling
all_rois.append(array2tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else:
all_rois.append(array2tensor(rpn_rois[lv_indices]))
return all_rois
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class ProposalTargetLayer(torch.nn.Module):
"""Assign object detection proposals to ground-truth targets.
Produces proposal classification labels and bounding-box regression targets.
"""
def __init__(self):
super(ProposalTargetLayer, self).__init__()
self.num_classes = cfg.MODEL.NUM_CLASSES
self.fake_outputs = {
'rois': np.array([[0, 0, 0, 1, 1]], dtype=np.float32),
'labels': np.array([-1], dtype=np.float32),
'bbox_targets': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_inside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
'bbox_outside_weights': np.zeros((1, self.num_classes * 4), dtype=np.float32),
}
def forward(self, rpn_rois, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images)
# Prepare for the outputs
keys = ['labels', 'rois', 'bbox_targets',
'bbox_inside_weights', 'bbox_outside_weights']
outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
batch_outputs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype(np.int32) == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, 0:4]))))
# Sample a batch of rois for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
labels, rois, bbox_targets, bbox_inside_weights = \
_sample_rois(rois, gt_boxes, fg_rois_per_image, rois_per_image, self.num_classes)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
_fmap_batch([
labels,
rois,
bbox_targets,
bbox_inside_weights,
bbox_outside_weights],
batch_outputs,
keys,
)
# Merge targets into blobs
for k, v in batch_outputs.items():
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(k)]
_fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
outputs=outputs,
keys=keys,
levels=lvs_indices,
)
return {
'rois': [array2tensor(outputs['rois'][i]) for i in range(k)],
'labels': array2tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': array2tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': array2tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': array2tensor(np.vstack(outputs['bbox_outside_weights'])),
}
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, int(start):int(end)] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, int(start):int(end)] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
targets = bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _map_rois_to_fpn_levels(rois, k_min, k_max):
"""
Determine which FPN level each RoI in a set of RoIs
should map to based on the heuristic in the FPN paper.
"""
if len(rois) == 0:
return []
ws = rois[:, 3] - rois[:, 1] + 1
hs = rois[:, 4] - rois[:, 2] + 1
s = np.sqrt(ws * hs)
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
target_levels = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
return np.clip(target_levels, k_min, k_max)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""Sample a batch of RoIs comprising foreground and background examples."""
# overlaps: (rois x gt_boxes)
overlaps = bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
rois = all_rois[keep_inds]
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, bbox_targets, bbox_inside_weights
def _fmap_batch(inputs, outputs, keys):
for i, key in enumerate(keys):
outputs[key].append(inputs[i])
def _fmap_rois(inputs, fake_outputs, outputs, keys, levels):
def impl(a, b, indices):
return a[indices] if len(indices) > 0 else b
for k in range(len(levels)):
inds = levels[k]
for i, key in enumerate(keys):
outputs[key].append(impl(inputs[i], fake_outputs[key], inds))
...@@ -13,6 +13,7 @@ from __future__ import absolute_import ...@@ -13,6 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.fpn.anchor_target_layer import AnchorTargetLayer from lib.faster_rcnn.anchor_target import AnchorTarget
from lib.fpn.proposal_layer import ProposalLayer from lib.faster_rcnn.proposal import Proposal
from lib.fpn.proposal_target_layer import ProposalTargetLayer from lib.mask_rcnn.data_loader import DataLoader
from lib.mask_rcnn.proposal_target import ProposalTarget
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing as mp
import time
import dragon
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.mask_rcnn.data_transformer import DataTransformer
from lib.datasets.factory import get_imdb
from lib.utils import logger
from lib.utils.blob import im_list_to_blob
from lib.utils.blob import mask_list_to_blob
class DataLoader(object):
"""Provide mini-batches of data."""
def __init__(self):
super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
'classes': database.classes,
'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
})
def __call__(self):
outputs = self.data_batch.get()
outputs['data'] = torch.from_numpy(outputs['data'])
return outputs
class DataBatch(mp.Process):
"""Prefetch the batch of data."""
def __init__(self, **kwargs):
"""Construct a ``DataBatch``.
Parameters
----------
dataset : lambda
The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=0
The number of chunks to split.
batch_size : int, optional, default=2
The size of a mini-batch.
num_transformers : int, optional, default=3
The number of workers to transform data.
"""
super(DataBatch, self).__init__()
# Distributed settings
rank, group_size = 0, 1
process_group = dragon.distributed.get_group()
if process_group is not None and kwargs.get(
'phase', 'TRAIN') == 'TRAIN':
group_size = process_group.size
rank = dragon.distributed.get_rank(process_group)
kwargs['group_size'] = group_size
# Configuration
self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 2)
self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1)
self.daemon = True
# Initialize queues
num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size)
self.Q21 = mp.Queue(num_batches * self._batch_size)
self.Q22 = mp.Queue(num_batches * self._batch_size)
self.Q3 = mp.Queue(num_batches)
# Initialize readers
self._readers = []
for i in range(self._num_readers):
part_idx, num_parts = i, self._num_readers
num_parts *= group_size
part_idx += rank * self._num_readers
self._readers.append(dragon.io.DataReader(
num_parts=num_parts, part_idx=part_idx, **kwargs))
self._readers[i]._seed += part_idx
self._readers[i].q_out = self.Q1
self._readers[i].start()
time.sleep(0.1)
# Initialize transformers
self._transformers = []
for i in range(self._num_transformers):
transformer = DataTransformer(**kwargs)
transformer._seed += (i + rank * self._num_transformers)
transformer.q_in = self.Q1
transformer.q1_out, transformer.q2_out = self.Q21, self.Q22
transformer.start()
self._transformers.append(transformer)
time.sleep(0.1)
# Initialize batch-producer
self.start()
# Register cleanup callbacks
def cleanup():
def terminate(processes):
for process in processes:
process.terminate()
process.join()
terminate([self])
logger.info('Terminate DataBatch.')
terminate(self._transformers)
logger.info('Terminate DataTransformer.')
terminate(self._readers)
logger.info('Terminate DataReader.')
import atexit
atexit.register(cleanup)
def get(self):
"""Get a batch.
Returns
-------
dict
The batch dict.
"""
return self.Q3.get()
def run(self):
"""Start the process to produce batches."""
def produce(q_in):
processed_ims, ims_info = [], []
packed_boxes, packed_masks = [], []
for image_index in range(cfg.TRAIN.IMS_PER_BATCH):
im, im_scale, gt_boxes, gt_masks = q_in.get()
processed_ims.append(im)
ims_info.append(list(im.shape[:2]) + [im_scale])
im_boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
im_boxes[:, :gt_boxes.shape[1]], im_boxes[:, -1] = gt_boxes, image_index
packed_boxes.append(im_boxes)
packed_masks.append(gt_masks)
return {
'data': im_list_to_blob(processed_ims),
'ims_info': np.array(ims_info, 'float32'),
'gt_boxes': np.concatenate(packed_boxes, 0),
'gt_masks': mask_list_to_blob(packed_masks),
}
# Two queues to implement aspect-grouping
# This is necessary to reduce the gpu memory
# from fetching a huge square batch blob
q1, q2 = self.Q21, self.Q22
# Main prefetch loop
while True:
if q1.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q1))
elif q2.qsize() >= cfg.TRAIN.IMS_PER_BATCH:
self.Q3.put(produce(q2))
q1, q2 = q2, q1 # Uniform sampling trick
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import numpy as np
from lib.core.config import cfg
from lib.datasets.example import Example
from lib.pycocotools import mask_utils
from lib.utils import boxes as box_util
from lib.utils.blob import prep_im_for_blob
from lib.utils.image import get_image_with_target_size
class DataTransformer(multiprocessing.Process):
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._seed = cfg.RNG_SEED
self._use_flipped = cfg.TRAIN.USE_FLIPPED
self._use_diff = cfg.TRAIN.USE_DIFF
self._classes = kwargs.get('classes', ('__background__',))
self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self.q_in = self.q1_out = self.q2_out = None
self.daemon = True
def make_roi_dict(self, example, im_scale, apply_flip=False):
objects, n_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff:
for obj in objects:
if obj.get('difficult', 0) == 0:
n_objects += 1
else:
n_objects = len(objects)
roi_dict = {
'boxes': np.zeros((n_objects, 4), 'float32'),
'masks': np.empty((n_objects, height, width), 'uint8'),
'gt_classes': np.zeros((n_objects, 1), 'int32'),
'mask_flags': np.ones((n_objects, 1), 'float32'),
}
# Filter the difficult instances
object_idx = 0
for obj in objects:
if not self._use_diff and \
obj.get('difficult', 0) > 0:
continue
bbox, mask = obj['bbox'], obj['mask']
roi_dict['boxes'][object_idx, :] = [
max(0, bbox[0]),
max(0, bbox[1]),
min(bbox[2], width - 1),
min(bbox[3], height - 1),
]
if mask is not None:
roi_dict['masks'][object_idx] = (
mask_utils.bytes2img(
obj['mask'],
height,
width,
))
else:
roi_dict['mask_flags'][object_idx] = 0.
roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']]
object_idx += 1
# Flip the boxes if necessary
if apply_flip:
roi_dict['boxes'] = \
box_util.flip_boxes(
roi_dict['boxes'],
width,
)
# Scale the boxes to the detecting scale
roi_dict['boxes'] *= im_scale
return roi_dict
def get(self, example):
example = Example(example)
img = example.image
# Scale
max_size = cfg.TRAIN.MAX_SIZE
target_size = cfg.TRAIN.SCALES[np.random.randint(len(cfg.TRAIN.SCALES))]
img, im_scale, jitter = prep_im_for_blob(img, target_size, max_size)
# Flip
apply_flip = False
if self._use_flipped:
if np.random.randint(2) > 0:
img = img[:, ::-1]
apply_flip = True
# Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls, flag}]
gt_boxes = \
np.concatenate([
roi_dict['boxes'],
roi_dict['gt_classes'],
roi_dict['mask_flags']
], axis=1)
# Post-Process for gt masks
# Shape like: [num_objects, im_h, im_w]
if gt_boxes.shape[0] > 0:
gt_masks = roi_dict['masks']
if apply_flip:
gt_masks = gt_masks[:, :, ::-1]
else:
gt_masks = None
return img, im_scale, gt_boxes, gt_masks
def run(self):
# Fix the process-local random seed
np.random.seed(self._seed)
# Main prefetch loop
while True:
outputs = self.get(self.q_in.get())
if len(outputs[2]) < 1:
continue # Ignore the non-object image
aspect_ratio = float(outputs[0].shape[0]) / outputs[0].shape[1]
if aspect_ratio > 1.:
self.q1_out.put(outputs)
else:
self.q2_out.put(outputs)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.utils import map_blobs_to_outputs
from lib.faster_rcnn.utils import map_returns_to_blobs
from lib.faster_rcnn.utils import map_rois_to_levels
from lib.utils import boxes as box_util
from lib.utils import mask as mask_util
from lib.utils.framework import new_tensor
class ProposalTarget(object):
"""Assign proposals to ground-truth targets."""
def __init__(self):
super(ProposalTarget, self).__init__()
self.resolution = cfg.MRCNN.RESOLUTION
self.num_classes = cfg.MODEL.NUM_CLASSES
self.defaults = collections.OrderedDict([
('rois', np.array([[-1, 0, 0, 1, 1]], 'float32')),
('labels', np.array([-1], 'float32')),
('bbox_targets', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_inside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('bbox_outside_weights', np.zeros((1, self.num_classes * 4), 'float32')),
('mask_targets', -np.ones((1, self.resolution, self.resolution), 'float32')),
])
def __call__(self, rpn_rois, gt_boxes, gt_masks, ims_info):
num_images = cfg.TRAIN.IMS_PER_BATCH
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
all_rois = rpn_rois
# GT boxes (x1, y1, x2, y2, label)
# GT masks (num_objects, im_h, im_w)
gt_boxes_wide, gt_masks_wide = \
mask_util.dismantle_masks(
gt_boxes,
gt_masks,
num_images,
)
# Prepare for the outputs
keys = self.defaults.keys()
blobs = dict(map(lambda a, b: (a, b), keys, [[] for _ in keys]))
# Generate targets separately
for ix in range(num_images):
gt_boxes = gt_boxes_wide[ix]
gt_masks = gt_masks_wide[ix]
# Extract proposals for this image
rois = all_rois[np.where(all_rois[:, 0].astype('int32') == ix)[0]]
# Include ground-truth boxes in the set of candidate rois
inds = np.ones((gt_boxes.shape[0], 1), gt_boxes.dtype) * ix
rois = np.vstack((rois, np.hstack((inds, gt_boxes[:, :4]))))
# Sample a batch of RoIs for training
rois_per_image = cfg.TRAIN.BATCH_SIZE
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
map_returns_to_blobs(
sample_rois(
rois,
gt_boxes,
gt_masks,
rois_per_image,
fg_rois_per_image,
self.num_classes,
ims_info[ix][2],
), blobs, keys,
)
# Stack into continuous blobs
for k, v in blobs.items():
blobs[k] = np.concatenate(blobs[k], 0)
# Distribute rois into pyramids
k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1
levels = map_rois_to_levels(blobs['rois'], k_min, k_max)
outputs = \
map_blobs_to_outputs(
blobs,
self.defaults,
[np.where(levels == (i + k_min))[0] for i in range(k)],
)
# Select the foreground RoIs only for mask branch
for i in range(k):
inds = np.where(outputs['labels'][i] > 0)[0]
inds = inds if len(inds) > 0 else np.array([0], 'int64')
outputs['mask_rois'].append(outputs['rois'][i][inds])
outputs['mask_targets'][i] = outputs['mask_targets'][i][inds]
outputs['mask_labels'].append(outputs['labels'][i][inds].astype('int64') - 1)
# Use the sparse indices to select logits
# Reduce the overhead on feeding dense class-specific targets
mask_labels = np.concatenate(outputs['mask_labels'], 0)
mask_indices = np.arange(len(mask_labels)) * (self.num_classes - 1)
return {
'rois': [new_tensor(outputs['rois'][i]) for i in range(k)],
'labels': new_tensor(np.concatenate(outputs['labels'], 0)),
'bbox_targets': new_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': new_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': new_tensor(np.vstack(outputs['bbox_outside_weights'])),
'mask_rois': [new_tensor(outputs['mask_rois'][i]) for i in range(k)],
'mask_targets': new_tensor(np.vstack(outputs['mask_targets'])),
'mask_indices': new_tensor(mask_indices + mask_labels),
}
def get_targets(
ex_rois,
gt_rois,
gt_labels,
gt_masks,
mask_flags,
mask_size,
num_classes,
im_scale,
):
"""Compute the bounding-box regression targets."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
targets = box_util.bbox_transform(ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = np.zeros((ex_rois.shape[0], 4 * num_classes), 'float32')
inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
for i in fg_inds:
start = int(4 * gt_labels[i])
bbox_targets[i, start:start + 4] = targets[i]
inside_weights[i, start:start + 4] = (1., 1., 1., 1.)
outside_weights = np.array(inside_weights > 0).astype('float32')
# Compute mask classification targets
mask_shape = [mask_size] * 2
ex_rois_ori = np.round(ex_rois / im_scale).astype(int)
gt_rois_ori = np.round(gt_rois / im_scale).astype(int)
mask_targets = -np.ones([len(gt_labels)] + mask_shape, 'float32')
for i in fg_inds:
if mask_flags[i] > 0:
box_mask = \
mask_util.intersect_box_mask(
ex_rois_ori[i],
gt_rois_ori[i],
gt_masks[i],
)
if box_mask is not None:
mask_targets[i] = \
mask_util.resize_mask(
mask=box_mask,
size=mask_shape,
)
return bbox_targets, inside_weights, outside_weights, mask_targets
def sample_rois(
all_rois,
gt_boxes,
gt_masks,
num_rois,
num_fg_rois,
num_classes,
im_scale,
):
"""Sample a batch of RoIs comprising foreground and background examples."""
overlaps = box_util.bbox_overlaps(all_rois[:, 1:5], gt_boxes[:, :4])
gt_assignment = overlaps.argmax(axis=1)
max_overlaps = overlaps.max(axis=1)
labels = gt_boxes[gt_assignment, 4]
# Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
fg_rois_per_this_image = int(min(num_fg_rois, fg_inds.size))
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(fg_inds, fg_rois_per_this_image, False)
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image
bg_rois_per_this_image = num_rois - fg_rois_per_this_image
bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
# Sample background regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(bg_inds, bg_rois_per_this_image, False)
# The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays
rois, labels = all_rois[keep_inds], labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
# Clamp the image indices for the background RoIs to -1
rois[fg_rois_per_this_image:][0] = -1
# Compute the target from RoIs
outputs = [rois, labels]
outputs += get_targets(
rois[:, 1:5],
gt_boxes[gt_assignment[keep_inds], :4],
labels,
gt_masks[gt_assignment[fg_inds]],
gt_boxes[gt_assignment[fg_inds], 5],
cfg.MRCNN.RESOLUTION,
num_classes,
im_scale,
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn import map_rois_to_levels
from lib.faster_rcnn import map_blobs_to_outputs
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper
from lib.utils import framework
from lib.utils import time_util
from lib.utils import boxes as box_util
from lib.utils.blob import im_list_to_blob
from lib.utils.image import scale_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward
if not hasattr(detector, 'graph'):
with framework.new_workspace().as_default():
data = torch.from_numpy(blobs['data'])
ims_info = torch.from_numpy(blobs['ims_info'])
with torch.no_grad():
with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs)
detector.graph = \
framework.Graph(inputs, {
'rois': outputs['rois'],
'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']
})
outputs = detector.graph(**blobs)
# Decode results
rois = outputs['rois']
scores, boxes, batch_inds = [], [], []
pred_boxes = \
box_util.bbox_transform_inv(
rois[:, 1:5],
outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS,
)
for i in range(len(ims)):
inds = np.where(rois[:, 0].astype(np.int32) == i)[0]
im_boxes = pred_boxes[inds] / ims_scale[i]
scores.append(outputs['cls_prob'][inds])
boxes.append(box_util.clip_tiled_boxes(im_boxes, raw_image.shape))
batch_inds.append(np.ones((len(inds), 1), 'int32') * i)
return (
np.vstack(scores) if len(ims) > 0 else scores[0],
np.vstack(boxes) if len(ims) > 0 else boxes[0],
np.vstack(batch_inds) if len(ims) > 0 else batch_inds[0],
np.array(ims_scale, 'float64'),
)
def mask_detect(detector, rois):
k_min = cfg.FPN.ROI_MIN_LEVEL
k_max = cfg.FPN.ROI_MAX_LEVEL
k = k_max - k_min + 1
levels = map_rois_to_levels(rois, k_min, k_max)
level_inds = [np.where(levels == (i + k_min))[0] for i in range(k)]
fpn_rois = map_blobs_to_outputs(
{'rois': rois[:, :5]},
{'rois': np.array([[-1, 0, 0, 1, 1]], 'float32')},
level_inds)['rois']
workspace = detector.graph.workspace
placeholders = detector.graph.placeholders
score_fn = detector.rcnn.compute_mask_score
with workspace.as_default():
if 'rois' not in placeholders:
placeholders['rois'] = \
[framework.new_placeholder(cfg.GPU_ID) for _ in range(k)]
placeholders['mask_inds'] = \
framework.new_placeholder(cfg.GPU_ID)
for i, v in enumerate(fpn_rois):
framework.feed_tensor(placeholders['rois'][i], v.astype('float32'))
with torch.no_grad():
mask_score = score_fn(rois=placeholders['rois'])
nc, i = mask_score.shape[1], 0
mask_inds = {}
for inds in level_inds:
for idx in inds:
cls = int(rois[idx, 5])
mask_inds[idx] = (i * nc + cls)
i += 1
if len(inds) == 0:
i += 1
mask_inds = list(map(mask_inds.get, sorted(mask_inds)))
framework.feed_tensor(
placeholders['mask_inds'],
np.array(mask_inds, 'int64'),
)
with torch.no_grad():
mask_pred = mask_score.index_select(
(0, 1), placeholders['mask_inds'])
return detector.rcnn.sigmoid(mask_pred).numpy(True).copy()
def test_net(weights, num_classes, q_in, q_out, device):
num_classes, cfg.GPU_ID = num_classes, device
detector = new_detector(device, weights)
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
while True:
idx, raw_image = q_in.get()
if raw_image is None:
break
rois_this_image = []
boxes_this_image = [[]]
masks_this_image = [[]]
with _t['im_detect'].tic_and_toc():
scores, boxes, batch_inds, ims_scale = \
im_detect(detector, raw_image)
_t['misc'].tic()
for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
cls_batch_inds = batch_inds[inds]
cls_detections = np.hstack(
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = nms_wrapper.soft_nms(
cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms_wrapper.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
cls_batch_inds = cls_batch_inds[keep]
boxes_this_image.append(cls_detections)
rois_this_image.append(
np.hstack((
cls_batch_inds,
cls_detections[:, :4] * ims_scale[cls_batch_inds],
np.ones((len(keep), 1)) * (j - 1),
)))
mask_rois = np.concatenate(rois_this_image)
_t['misc'].toc()
if len(mask_rois) > 0:
k = 0
_t['mask_detect'].tic()
mask_pred = mask_detect(detector, mask_rois)
for j in range(1, num_classes):
num_pred = len(boxes_this_image[j])
cls_masks = mask_pred[k:k + num_pred]
masks_this_image.append(cls_masks)
k += num_pred
_t['mask_detect'].toc()
q_out.put((
idx,
{
'im_detect': _t['im_detect'].average_time,
'mask_detect': _t['mask_detect'].average_time,
'misc': _t['misc'].average_time,
},
{
'boxes': boxes_this_image,
'masks': masks_this_image,
},
))
...@@ -14,12 +14,9 @@ from __future__ import division ...@@ -14,12 +14,9 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
# Import custom modules # Import custom modules
from lib.modeling.base import affine
from lib.modeling.base import bn
from lib.modeling.base import conv1x1
from lib.modeling.base import conv3x3
from lib.modeling.fast_rcnn import FastRCNN from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.fpn import FPN from lib.modeling.fpn import FPN
from lib.modeling.mask_rcnn import MaskRCNN
from lib.modeling.retinanet import RetinaNet from lib.modeling.retinanet import RetinaNet
from lib.modeling.rpn import RPN from lib.modeling.rpn import RPN
from lib.modeling.ssd import SSD from lib.modeling.ssd import SSD
...@@ -15,20 +15,19 @@ from __future__ import print_function ...@@ -15,20 +15,19 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.modeling import affine from lib.modules import init
from lib.modeling import conv1x1 from lib.modules import nn
from lib.modeling import conv3x3
class WideResBlock(torch.nn.Module): class WideResBlock(nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None): def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__() super(WideResBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out) self.bn1 = nn.Affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out) self.bn2 = nn.Affine(dim_out)
self.downsample = downsample self.downsample = downsample
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -48,20 +47,20 @@ class WideResBlock(torch.nn.Module): ...@@ -48,20 +47,20 @@ class WideResBlock(torch.nn.Module):
return out return out
class InceptionBlock(torch.nn.Module): class InceptionBlock(nn.Module):
def __init__(self, dim_in, dim_out): def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__() super(InceptionBlock, self).__init__()
self.conv1 = conv1x1(dim_in, dim_out) self.conv1 = nn.Conv1x1(dim_in, dim_out)
self.bn1 = affine(dim_out) self.bn1 = nn.Affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out // 2) self.conv2 = nn.Conv3x3(dim_out, dim_out // 2)
self.bn2 = affine(dim_out // 2) self.bn2 = nn.Affine(dim_out // 2)
self.conv3a = conv3x3(dim_out // 2, dim_out) self.conv3a = nn.Conv3x3(dim_out // 2, dim_out)
self.bn3a = affine(dim_out) self.bn3a = nn.Affine(dim_out)
self.conv3b = conv3x3(dim_out, dim_out) self.conv3b = nn.Conv3x3(dim_out, dim_out)
self.bn3b = affine(dim_out) self.bn3b = nn.Affine(dim_out)
self.conv4 = conv3x3(dim_out * 3, dim_out) self.conv4 = nn.Conv3x3(dim_out * 3, dim_out)
self.bn4 = affine(dim_out) self.bn4 = nn.Affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -82,7 +81,7 @@ class InceptionBlock(torch.nn.Module): ...@@ -82,7 +81,7 @@ class InceptionBlock(torch.nn.Module):
out_3x3_b = self.bn3b(out) out_3x3_b = self.bn3b(out)
out_3x3_b = self.relu(out_3x3_b) out_3x3_b = self.relu(out_3x3_b)
out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], dim=1) out = torch.cat([out_1x1, out_3x3_a, out_3x3_b], 1)
out = self.conv4(out) out = self.conv4(out)
out = self.bn4(out) out = self.bn4(out)
...@@ -91,22 +90,22 @@ class InceptionBlock(torch.nn.Module): ...@@ -91,22 +90,22 @@ class InceptionBlock(torch.nn.Module):
return out return out
class AirNet(torch.nn.Module): class AirNet(nn.Module):
def __init__(self, blocks, num_stages): def __init__(self, blocks, num_stages):
super(AirNet, self).__init__() super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384] self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = [None, None] + \ self.feature_dims = [None, None] + \
filters[1:num_stages - 1] filters[1:num_stages - 1]
self.conv1 = torch.nn.Conv2d( self.conv1 = nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
stride=2, stride=2,
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = affine(self.dim_in) self.bn1 = nn.Affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = nn.MaxPool2d(
kernel_size=2, kernel_size=2,
stride=2, stride=2,
padding=0, padding=0,
...@@ -121,19 +120,14 @@ class AirNet(torch.nn.Module): ...@@ -121,19 +120,14 @@ class AirNet(torch.nn.Module):
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_uniform_( init.xaiver(m.weight)
m.weight,
# Fix the gain for [-127, 127]
a=1,
) # Xavier Initialization
def make_blocks(self, dim_out, blocks, stride=1): def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential( downsample = nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride), nn.Conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out), nn.Affine(dim_out),
) )
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)] layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out self.dim_in = dim_out
...@@ -144,7 +138,7 @@ class AirNet(torch.nn.Module): ...@@ -144,7 +138,7 @@ class AirNet(torch.nn.Module):
layers.append(InceptionBlock(dim_out, dim_out)) layers.append(InceptionBlock(dim_out, dim_out))
else: else:
raise ValueError('Unknown block flag: ' + blocks[i]) raise ValueError('Unknown block flag: ' + blocks[i])
return torch.nn.Sequential(*layers) return nn.Sequential(*layers)
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.conv1(x)
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
def affine(dim_in, inplace=True):
"""AffineBN, weight and bias are fixed."""
return torch.nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
def bn(dim_in, eps=1e-5):
"""The BatchNorm."""
return torch.nn.BatchNorm2d(dim_in, eps=eps)
def conv1x1(dim_in, dim_out, stride=1, bias=False):
"""1x1 convolution."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
def conv3x3(dim_in, dim_out, stride=1, bias=False):
"""3x3 convolution with padding."""
return torch.nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1,
bias=bias,
)
...@@ -21,14 +21,16 @@ from lib.core.config import cfg ...@@ -21,14 +21,16 @@ from lib.core.config import cfg
from lib.modeling import FPN from lib.modeling import FPN
from lib.modeling import RPN from lib.modeling import RPN
from lib.modeling import FastRCNN from lib.modeling import FastRCNN
from lib.modeling import MaskRCNN
from lib.modeling import RetinaNet from lib.modeling import RetinaNet
from lib.modeling import SSD from lib.modeling import SSD
from lib.modeling.factory import get_body_func from lib.modeling.factory import get_body_func
from lib.ops.modules import Bootstrap from lib.modules import nn
from lib.utils.logger import is_root from lib.modules import vision
from lib.utils import logger
class Detector(torch.nn.Module): class Detector(nn.Module):
"""Organize the detection pipelines. """Organize the detection pipelines.
A bunch of classic algorithms are integrated, see the A bunch of classic algorithms are integrated, see the
...@@ -42,19 +44,20 @@ class Detector(torch.nn.Module): ...@@ -42,19 +44,20 @@ class Detector(torch.nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.') backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:] body, modules = backbone[0], backbone[1:]
# + Data Loader # + DataLoader
self.data_layer = importlib.import_module( self.data_loader_cls = importlib.import_module(
'lib.{}'.format(model)).DataLayer 'lib.{}'.format(model)).DataLoader
self.bootstrap = Bootstrap() self.bootstrap = vision.Bootstrap()
# + Feature Extractor # + FeatureExtractor
self.body = get_body_func(body)() self.body = get_body_func(body)()
feature_dims = self.body.feature_dims feature_dims = self.body.feature_dims
# + Feature Enhancer # + FeatureEnhancer
if 'fpn' in modules: if 'fpn' in modules:
self.fpn = FPN(feature_dims) self.fpn = FPN(feature_dims)
feature_dims = self.fpn.feature_dims feature_dims = self.fpn.feature_dims
elif 'mbox' in modules: elif 'mbox' in modules:
pass # Placeholder pass # Placeholder
else: else:
...@@ -63,7 +66,10 @@ class Detector(torch.nn.Module): ...@@ -63,7 +66,10 @@ class Detector(torch.nn.Module):
# + Detection Modules # + Detection Modules
if 'rcnn' in model: if 'rcnn' in model:
self.rpn = RPN(feature_dims[0]) self.rpn = RPN(feature_dims[0])
self.fast_rcnn = FastRCNN(feature_dims[0]) if 'faster' in model:
self.rcnn = FastRCNN(feature_dims[0])
elif 'mask' in model:
self.rcnn = MaskRCNN(feature_dims[0])
if 'retinanet' in model: if 'retinanet' in model:
self.retinanet = RetinaNet(feature_dims[0]) self.retinanet = RetinaNet(feature_dims[0])
...@@ -85,7 +91,7 @@ class Detector(torch.nn.Module): ...@@ -85,7 +91,7 @@ class Detector(torch.nn.Module):
self.load_state_dict( self.load_state_dict(
torch.load(weights), torch.load(weights),
strict=False, strict=False,
verbose=is_root(), verbose=logger.is_root(),
) )
def forward(self, inputs=None): def forward(self, inputs=None):
...@@ -107,7 +113,7 @@ class Detector(torch.nn.Module): ...@@ -107,7 +113,7 @@ class Detector(torch.nn.Module):
# 1) Training: <= DataLayer # 1) Training: <= DataLayer
# 2) Inference: <= Given # 2) Inference: <= Given
if not hasattr(self, 'data_loader'): if not hasattr(self, 'data_loader'):
self.data_loader = self.data_layer() self.data_loader = self.data_loader_cls()
inputs = self.data_loader() inputs = self.data_loader()
# 1. Extract features # 1. Extract features
...@@ -126,7 +132,7 @@ class Detector(torch.nn.Module): ...@@ -126,7 +132,7 @@ class Detector(torch.nn.Module):
# 3. Collect detection outputs # 3. Collect detection outputs
outputs = collections.OrderedDict() outputs = collections.OrderedDict()
# 3.1 Feature -> RPN -> Fast R-CNN # 3.1 Feature -> RPN -> R-CNN
if hasattr(self, 'rpn'): if hasattr(self, 'rpn'):
outputs.update( outputs.update(
self.rpn( self.rpn(
...@@ -135,7 +141,7 @@ class Detector(torch.nn.Module): ...@@ -135,7 +141,7 @@ class Detector(torch.nn.Module):
) )
) )
outputs.update( outputs.update(
self.fast_rcnn( self.rcnn(
features=features, features=features,
rpn_cls_score=outputs['rpn_cls_score'], rpn_cls_score=outputs['rpn_cls_score'],
rpn_bbox_pred=outputs['rpn_bbox_pred'], rpn_bbox_pred=outputs['rpn_bbox_pred'],
...@@ -174,8 +180,8 @@ class Detector(torch.nn.Module): ...@@ -174,8 +180,8 @@ class Detector(torch.nn.Module):
################################## ##################################
last_module = None last_module = None
for e in self.modules(): for e in self.modules():
if isinstance(e, torch.nn.Affine) and \ if isinstance(e, nn.Affine) and \
isinstance(last_module, torch.nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x e.forward = lambda x: x
...@@ -188,8 +194,8 @@ class Detector(torch.nn.Module): ...@@ -188,8 +194,8 @@ class Detector(torch.nn.Module):
###################################### ######################################
last_module = None last_module = None
for e in self.modules(): for e in self.modules():
if isinstance(e, torch.nn.BatchNorm2d) and \ if isinstance(e, nn.BatchNorm2d) and \
isinstance(last_module, torch.nn.Conv2d): nn.is_conv2d(last_module):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x e.forward = lambda x: x
...@@ -204,3 +210,17 @@ class Detector(torch.nn.Module): ...@@ -204,3 +210,17 @@ class Detector(torch.nn.Module):
else: else:
last_module.weight.data.mul_(term) last_module.weight.data.mul_(term)
last_module = e last_module = e
def new_detector(device, weights=None, training=False):
detector = Detector().cuda(device)
if weights is not None:
detector.load_weights(weights)
if not training:
detector.eval()
detector.optimize_for_inference()
# Enable the fp16 inference support if necessary
# Boost a little if TensorCore is available
if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half()
return detector
...@@ -43,14 +43,20 @@ for D in ['', '3b', '4b', '5b']: ...@@ -43,14 +43,20 @@ for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \ _STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D) 'lib.modeling.airnet.make_airnet_{}'.format(D)
# MobileNet
for D in ['a1', 'v2']:
_STORE['BODY']['mobilenet_{}'.format(D)] = \
'lib.modeling.mobilenet.make_mobilenet_{}'.format(D)
def get_template_func(name, sets, desc): def get_template_func(name, sets, desc):
name = name.lower() name = name.lower()
if name not in sets: if name not in sets:
raise ValueError( raise ValueError(
'The {} for {} was not registered.\n' 'The {} for {} was not registered.\n'
'Registered modules: [{}]'.format( 'Registered modules: [{}]'
name, desc, ', '.join(sets.keys()))) .format(name, desc, ', '.join(sets.keys()))
)
module_name = '.'.join(sets[name].split('.')[0:-1]) module_name = '.'.join(sets[name].split('.')[0:-1])
func_name = sets[name].split('.')[-1] func_name = sets[name].split('.')[-1]
try: try:
......
...@@ -14,13 +14,19 @@ from __future__ import division ...@@ -14,13 +14,19 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import functools
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import faster_rcnn
from lib.core.config import cfg from lib.core.config import cfg
from lib.ops.modules import RPNDecoder from lib.modules import det
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
class FastRCNN(torch.nn.Module): class FastRCNN(nn.Module):
"""Generate proposal regions for R-CNN series. """Generate proposal regions for R-CNN series.
The pipeline is as follows: The pipeline is as follows:
...@@ -32,59 +38,45 @@ class FastRCNN(torch.nn.Module): ...@@ -32,59 +38,45 @@ class FastRCNN(torch.nn.Module):
""" """
def __init__(self, dim_in=256): def __init__(self, dim_in=256):
super(FastRCNN, self).__init__() super(FastRCNN, self).__init__()
if len(cfg.RPN.STRIDES) > 1:
# RPN with multiple strides(i.e. FPN)
from lib.fpn import ProposalLayer, ProposalTargetLayer
else:
# RPN with single stride(i.e. C4)
from lib.faster_rcnn import ProposalLayer, ProposalTargetLayer
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2) self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = torch.nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM) self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM) self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES) self.cls_score = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4) self.bbox_pred = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.rpn_decoder = RPNDecoder() self.rpn_decoder = det.RPNDecoder()
self.proposal_layer = ProposalLayer() self.proposal = faster_rcnn.Proposal()
self.proposal_target_layer = ProposalTargetLayer() self.proposal_target = faster_rcnn.ProposalTarget()
self.softmax = torch.nn.Softmax(dim=1) self.softmax = nn.Softmax(dim=1)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.sigmoid = torch.nn.Sigmoid(inplace=False) self.sigmoid = nn.Sigmoid()
self.roi_func = { self.box_roi_feature = functools.partial({
'RoIPool': torch.vision.ops.roi_pool, 'RoIPool': vision.roi_pool,
'RoIAlign': torch.vision.ops.roi_align, 'RoIAlign': vision.roi_align
}[cfg.FRCNN.ROI_XFORM_METHOD] }[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size') self.bbox_loss = nn.SmoothL1Loss()
# Compute spatial scales for multiple strides # Compute spatial scales according to strides
roi_levels = [level for level in range( self.spatial_scales = [
cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)] 1. / (2 ** lvl)
self.spatial_scales = [1.0 / (2 ** level) for level in roi_levels] for lvl in range(
cfg.FPN.ROI_MIN_LEVEL,
cfg.FPN.ROI_MAX_LEVEL + 1
)]
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# Careful initialization for Fast R-CNN # Careful initialization for Fast R-CNN
torch.nn.init.normal_(self.cls_score.weight, std=0.01) init.normal(self.cls_score.weight, std=0.01)
torch.nn.init.normal_(self.bbox_pred.weight, std=0.001) init.normal(self.bbox_pred.weight, std=0.001)
for name, p in self.named_parameters(): for name, p in self.named_parameters():
if 'bias' in name: if 'bias' in name:
torch.nn.init.constant_(p, 0) init.constant(p, 0)
def RoIFeatureTransform(self, feature, rois, spatial_scale):
return self.roi_func(
feature, rois,
output_size=(
cfg.FRCNN.ROI_XFORM_RESOLUTION,
cfg.FRCNN.ROI_XFORM_RESOLUTION,
),
spatial_scale=spatial_scale,
)
def forward(self, **kwargs): def forward(self, **kwargs):
# Generate Proposals # Generate proposals
# Apply the CXX implementation during inference proposal_func = self.proposal \
proposal_func = self.proposal_layer \
if self.training else self.rpn_decoder if self.training else self.rpn_decoder
self.rcnn_data = { self.data = {
'rois': proposal_func( 'rois': proposal_func(
kwargs['features'], kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data), self.sigmoid(kwargs['rpn_cls_score'].data),
...@@ -93,66 +85,61 @@ class FastRCNN(torch.nn.Module): ...@@ -93,66 +85,61 @@ class FastRCNN(torch.nn.Module):
) )
} }
# Generate Targets from Proposals # Generate targets from proposals
if self.training: if self.training:
self.rcnn_data.update( self.data.update(
self.proposal_target_layer( self.proposal_target(
rpn_rois=self.rcnn_data['rois'], rpn_rois=self.data['rois'],
gt_boxes=kwargs['gt_boxes'], gt_boxes=kwargs['gt_boxes'],
) )
) )
# Transform RoI Feature # Transform RoI features
roi_features = [] if len(self.data['rois']) > 1:
if len(self.rcnn_data['rois']) > 1: roi_features = \
for i, spatial_scale in enumerate(self.spatial_scales): torch.cat([
roi_features.append( self.box_roi_feature(
self.RoIFeatureTransform(
kwargs['features'][i], kwargs['features'][i],
self.rcnn_data['rois'][i], self.data['rois'][i],
spatial_scale, spatial_scale,
) ) for i, spatial_scale in enumerate(self.spatial_scales)
) ], dim=0)
roi_features = torch.cat(roi_features, dim=0)
else: else:
spatial_scale = 1.0 / cfg.RPN.STRIDES[0]
roi_features = \ roi_features = \
self.RoIFeatureTransform( self.box_roi_feature(
kwargs['features'][0], kwargs['features'][0],
self.rcnn_data['rois'][0], self.data['rois'][0],
spatial_scale, 1. / cfg.RPN.STRIDES[0],
) )
# Apply a simple MLP # Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim) roi_features = roi_features.view(-1, self.roi_head_dim)
rcnn_output = self.relu(self.fc6(roi_features)) roi_features = self.relu(self.fc6(roi_features))
rcnn_output = self.relu(self.fc7(rcnn_output)) roi_features = self.relu(self.fc7(roi_features))
# Compute rcnn logits # Compute logits and losses
cls_score = self.cls_score(rcnn_output).float() outputs = collections.OrderedDict()
outputs = collections.OrderedDict([ cls_score = self.cls_score(roi_features).float()
('bbox_pred', self.bbox_pred(rcnn_output).float()), outputs['bbox_pred'] = self.bbox_pred(roi_features).float()
])
if self.training: if self.training:
# Compute rcnn losses # Compute rcnn losses
outputs.update(collections.OrderedDict([ outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss( ('cls_loss', self.cls_loss(
cls_score, self.rcnn_data['labels'])), cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss( ('bbox_loss', self.bbox_loss(
outputs['bbox_pred'], outputs['bbox_pred'],
self.rcnn_data['bbox_targets'], self.data['bbox_targets'],
self.rcnn_data['bbox_inside_weights'], self.data['bbox_inside_weights'],
self.rcnn_data['bbox_outside_weights'], self.data['bbox_outside_weights'],
)), )),
])) ]))
else: else:
# Return the rois to decode the refine boxes # Return the rois to decode the refine boxes
if len(self.rcnn_data['rois']) > 1: if len(self.data['rois']) > 1:
outputs['rois'] = torch.cat( outputs['rois'] = torch.cat(self.data['rois'], 0)
self.rcnn_data['rois'], dim=0)
else: else:
outputs['rois'] = self.rcnn_data['rois'][0] outputs['rois'] = self.data['rois'][0]
# Return the classification prob # Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score) outputs['cls_prob'] = self.softmax(cls_score)
......
...@@ -16,43 +16,41 @@ from __future__ import print_function ...@@ -16,43 +16,41 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1 from lib.modules import init
from lib.modeling import conv3x3 from lib.modules import nn
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
class FPN(torch.nn.Module): class FPN(nn.Module):
"""Feature Pyramid Networks for R-CNN and RetinaNet.""" """Feature Pyramid Networks for R-CNN and RetinaNet."""
def __init__(self, feature_dims): def __init__(self, feature_dims):
super(FPN, self).__init__() super(FPN, self).__init__()
self.C = torch.nn.ModuleList() dim = cfg.FPN.DIM
self.P = torch.nn.ModuleList() self.C = nn.ModuleList()
self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1): for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True)) self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True)) self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE: if 'rcnn' in cfg.MODEL.TYPE:
self.apply_func = self.apply_on_rcnn self.apply_func = self.apply_on_rcnn
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True) self.maxpool = nn.MaxPool2d(1, 2, ceil_mode=True)
else: else:
self.apply_func = self.apply_on_generic self.apply_func = self.apply_on_generic
self.relu = torch.nn.ReLU(inplace=False) self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1): for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim
self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True)) self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim]
self.reset_parameters() self.reset_parameters()
self.feature_dims = [cfg.FPN.DIM]
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_uniform_( init.xaiver(m.weight)
m.weight, init.constant(m.bias, 0)
a=1, # Fix the gain for [-127, 127]
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0)
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import functools
import dragon.vm.torch as torch
from lib import mask_rcnn
from lib.core.config import cfg
from lib.modules import det
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
class MaskRCNN(nn.Module):
def __init__(self, dim_in=256):
"""Generate mask regions for R-CNN series.
The pipeline is as follows:
... -> BoxRoIs \ /-> cls_score -> cls_loss
-> RoIFeatureXform -> MLP
... -> Features / \-> bbox_pred -> bbox_loss
... -> MaskRoIs \
-> RoIFeatureXform -> FCN -> mask_score -> mask_loss
... -> Features /
"""
super(MaskRCNN, self).__init__()
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.fcn = nn.ModuleList([nn.Conv3x3(dim_in, dim_in, bias=True) for _ in range(4)])
self.fcn += [nn.ConvTranspose2d(dim_in, dim_in, 2, 2, 0)]
self.cls_score = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.mask_score = nn.Conv1x1(dim_in, cfg.MODEL.NUM_CLASSES - 1, bias=True)
self.rpn_decoder = det.RPNDecoder()
self.proposal = mask_rcnn.Proposal()
self.proposal_target = mask_rcnn.ProposalTarget()
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)
self.relu = nn.ReLU(True)
self.box_roi_feature = functools.partial({
'RoIPool': vision.roi_pool,
'RoIAlign': vision.roi_align,
}[cfg.FRCNN.ROI_XFORM_METHOD], size=cfg.FRCNN.ROI_XFORM_RESOLUTION)
self.mask_roi_feature = functools.partial({
'RoIPool': vision.roi_pool,
'RoIAlign': vision.roi_align,
}[cfg.MRCNN.ROI_XFORM_METHOD], size=cfg.MRCNN.ROI_XFORM_RESOLUTION)
self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss()
self.mask_loss = nn.BCEWithLogitsLoss()
# Compute spatial scales according to strides
self.spatial_scales = [
1. / (2 ** lvl)
for lvl in range(
cfg.FPN.ROI_MIN_LEVEL,
cfg.FPN.ROI_MAX_LEVEL + 1
)]
self.reset_parameters()
def reset_parameters(self):
# Careful initialization for Fast R-CNN
init.normal(self.cls_score.weight, std=0.01)
init.normal(self.bbox_pred.weight, std=0.001)
# Careful initialization for Mask R-CNN
init.normal(self.mask_score.weight, std=0.001)
for m in self.fcn.modules():
if hasattr(m, 'weight'):
init.kaiming_normal(m.weight)
for name, p in self.named_parameters():
if 'bias' in name:
init.constant(p, 0)
def get_mask_score(self, features, rois):
roi_features = \
torch.cat([
self.mask_roi_feature(
features[i], rois[i], spatial_scale,
) for i, spatial_scale in enumerate(self.spatial_scales)
], dim=0)
for i in range(len(self.fcn)):
roi_features = self.relu(self.fcn[i](roi_features))
return self.mask_score(roi_features).float()
def forward(self, **kwargs):
# Generate proposals
proposal_func = self.proposal \
if self.training else self.rpn_decoder
self.data = {
'rois': proposal_func(
kwargs['features'],
self.sigmoid(kwargs['rpn_cls_score'].data),
kwargs['rpn_bbox_pred'],
kwargs['ims_info'],
)
}
# Generate targets from proposals
if self.training:
self.data.update(
self.proposal_target(
rpn_rois=self.data['rois'],
gt_boxes=kwargs['gt_boxes'],
gt_masks=kwargs['gt_masks'],
ims_info=kwargs['ims_info'],
)
)
# Transform RoI features
roi_features = \
torch.cat([
self.box_roi_feature(
kwargs['features'][i],
self.data['rois'][i],
spatial_scale,
) for i, spatial_scale in enumerate(self.spatial_scales)
], dim=0)
# Apply a simple MLP
roi_features = roi_features.view(-1, self.roi_head_dim)
roi_features = self.relu(self.fc6(roi_features))
roi_features = self.relu(self.fc7(roi_features))
# Compute logits and losses
outputs = collections.OrderedDict()
cls_score = self.cls_score(roi_features).float()
outputs['bbox_pred'] = self.bbox_pred(roi_features).float()
if self.training:
# Compute the loss of bbox branch
outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.data['labels'])),
('bbox_loss', self.bbox_loss(
outputs['bbox_pred'],
self.data['bbox_targets'],
self.data['bbox_inside_weights'],
self.data['bbox_outside_weights'],
)),
]))
# Compute the loss of mask branch
mask_score = self.get_mask_score(
kwargs['features'], self.data['mask_rois'])
mask_score = mask_score.index_select(
(0, 1), self.data['mask_indices'])
outputs['mask_loss'] = self.mask_loss(
mask_score, self.data['mask_targets'])
else:
# Return the RoIs to decode the refine boxes
if len(self.data['rois']) > 1:
outputs['rois'] = torch.cat(self.data['rois'], 0)
else:
outputs['rois'] = self.data['rois'][0]
# Return the classification prob
outputs['cls_prob'] = self.softmax(cls_score)
# Set a callback to decode mask from refine RoIs
self.compute_mask_score = \
functools.partial(
self.get_mask_score,
features=kwargs['features'],
)
return outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modules import init
from lib.modules import nn
from lib.modules import vision
def conv_triplet(dim_in, dim_out):
"""1x1 convolution + BN + ReLU."""
return [
nn.Conv2d(dim_in, dim_out, 1, bias=False),
nn.Affine(dim_out),
nn.ReLU(True),
]
def conv_quintet(dim_in, dim_out, ks, stride):
"""KxK convolution + BN + ReLU."""
return [
nn.DepthwiseConv2d(
dim_in, dim_in,
kernel_size=ks,
stride=stride,
padding=ks // 2,
bias=False,
),
nn.Affine(dim_in),
nn.ReLU(True),
nn.Conv1x1(dim_in, dim_out),
nn.Affine(dim_out),
]
class Setting(object):
V2 = (
[2, 3, 4, 3, 3, 1],
[2, 2, 2, 1, 2, 1],
[32, 16, 24, 32, 64, 96, 160, 320, 1280],
)
PROXYLESS_MOBILE = (
[4, 4, 4, 4, 4, 1],
[2, 2, 2, 1, 2, 1],
[32, 16, 32, 40, 80, 96, 192, 320, 1280],
)
PROXYLESS_GPU = (
[4, 4, 4, 4, 4, 1],
[2, 2, 2, 1, 2, 1],
[40, 24, 32, 56, 112, 128, 256, 432, 1280],
)
def Stem(dim_out, stride=1):
return torch.nn.Sequential(
torch.nn.Conv2d(
3, dim_out,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
),
nn.Affine(dim_out),
nn.ReLU(True),
)
class Choice(nn.Module):
def __init__(self, dim_in, dim_out, mb=3, ks=3, stride=1):
super(Choice, self).__init__()
self.mb = mb
dim_hidden = int(round(dim_in * mb))
seq = conv_triplet(dim_in, dim_hidden) if mb != 1 else []
seq += conv_quintet(dim_hidden, dim_out, ks, stride)
self.conv = nn.ModuleList(seq)
self.stride = stride
self.apply_residual = stride == 1 and dim_in == dim_out
def forward(self, x):
residual = x if self.apply_residual else None
for i in range(3):
x = self.conv[i](x)
y = x if self.stride == 2 else None
for i in range(3, len(self.conv)):
x = self.conv[i](x)
if self.apply_residual:
return residual + x, y
else:
return x, y
class NASMobileNet(nn.Module):
def __init__(self, choices, preset=Setting.PROXYLESS_MOBILE):
super(NASMobileNet, self).__init__()
# Pre-defined blocks
def select_block(choice):
return {
0: functools.partial(Choice, mb=3, ks=3),
1: functools.partial(Choice, mb=6, ks=3),
2: functools.partial(Choice, mb=3, ks=5),
3: functools.partial(Choice, mb=6, ks=5),
4: functools.partial(Choice, mb=3, ks=7),
5: functools.partial(Choice, mb=6, ks=7),
6: nn.Identity,
}[choice]
# Hand-craft configurations
repeats, strides, out_channels = preset
names = ['2!', '3!', '4', '4!', '5', '5!']
self.num_layers = len(choices)
assert sum(repeats) == self.num_layers
# + Stem
self.bootstrap = vision.Bootstrap()
self.conv1 = Stem(out_channels[0], stride=2)
self.stage1 = Choice(out_channels[0], out_channels[1], mb=1, ks=3)
dim_in = out_channels[1]
self.feature_dims = [out_channels[-1]]
# + Body
self.layers = []
for name, rep, dim_out, stride in zip(
names, repeats, out_channels[2:], strides):
self.layers.append(select_block(
choices[len(self.layers)]
)(dim_in, dim_out, stride=stride))
if stride == 2:
self.feature_dims.insert(
-1, dim_in * self.layers[-1].mb)
for i in range(rep - 1):
self.layers.append(select_block(
choices[len(self.layers)]
)(dim_out, dim_out, stride=1))
fullname = 'stage%s' % name.split('!')[0]
seq = getattr(self, fullname, [])
seq += self.layers[-rep:]
seq = nn.Sequential(*seq) if '!' in name else seq
setattr(self, fullname, seq)
dim_in = dim_out
self.conv6 = nn.Sequential(*conv_triplet(dim_in, out_channels[-1]))
self.reset_parameters()
def reset_parameters(self):
for m in self.modules():
if nn.is_conv2d(m):
init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None:
init.constant(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant(m.weight, 1)
elif isinstance(m, nn.Linear):
if m.bias is not None:
init.constant(m.bias, 0)
# Stop the gradients if necessary
def freeze_func(m):
if nn.is_conv2d(m):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
if cfg.MODEL.FREEZE_AT > 0:
self.conv1.apply(freeze_func)
self.stage1.apply(freeze_func)
for i in range(cfg.MODEL.FREEZE_AT, 1, -1):
getattr(self, 'stage{}'.format(i)).apply(freeze_func)
def forward(self, x):
x = self.conv1(x)
x, _ = self.stage1(x)
outputs = []
for layer in self.layers:
x = layer(x)
x, y = x if isinstance(x, tuple) else (x, None)
if y is not None:
outputs.append(y)
outputs.append(self.conv6(x))
return outputs
def make_mobilenet_a1():
return NASMobileNet([
4, 6, 6, 6,
3, 3, 4, 6,
2, 4, 0, 4, 1, 5, 3, 5,
2, 4, 2, 4,
1,
], Setting.PROXYLESS_MOBILE)
def make_mobilenet_v2():
return NASMobileNet([
1, 1,
1, 1, 1,
1, 1, 1, 1, 1, 1, 1,
1, 1, 1,
1,
], Setting.V2)
...@@ -20,12 +20,11 @@ from __future__ import print_function ...@@ -20,12 +20,11 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import affine from lib.modules import nn
from lib.modeling import conv1x1 from lib.modules import init
from lib.modeling import conv3x3
class BasicBlock(torch.nn.Module): class BasicBlock(nn.Module):
def __init__( def __init__(
self, self,
dim_in, dim_in,
...@@ -35,11 +34,11 @@ class BasicBlock(torch.nn.Module): ...@@ -35,11 +34,11 @@ class BasicBlock(torch.nn.Module):
dropblock=None, dropblock=None,
): ):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = affine(dim_out) self.bn1 = nn.Affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.conv2 = conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = affine(dim_out) self.bn2 = nn.Affine(dim_out)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock = dropblock
...@@ -83,12 +82,12 @@ class Bottleneck(torch.nn.Module): ...@@ -83,12 +82,12 @@ class Bottleneck(torch.nn.Module):
): ):
super(Bottleneck, self).__init__() super(Bottleneck, self).__init__()
dim = int(dim_out * self.contraction) dim = int(dim_out * self.contraction)
self.conv1 = conv1x1(dim_in, dim) self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = affine(dim) self.bn1 = nn.Affine(dim)
self.conv2 = conv3x3(dim, dim, stride=stride) self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.bn2 = affine(dim) self.bn2 = nn.Affine(dim)
self.conv3 = conv1x1(dim, dim_out) self.conv3 = nn.Conv1x1(dim, dim_out)
self.bn3 = affine(dim_out) self.bn3 = nn.Affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock = dropblock
...@@ -133,7 +132,7 @@ class ResNet(torch.nn.Module): ...@@ -133,7 +132,7 @@ class ResNet(torch.nn.Module):
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = affine(self.dim_in) self.bn1 = nn.Affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = torch.nn.MaxPool2d(
kernel_size=3, kernel_size=3,
...@@ -160,13 +159,9 @@ class ResNet(torch.nn.Module): ...@@ -160,13 +159,9 @@ class ResNet(torch.nn.Module):
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# The Kaiming Initialization
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_normal_( init.kaiming_normal(m.weight)
m.weight,
nonlinearity='relu',
)
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
...@@ -184,15 +179,15 @@ class ResNet(torch.nn.Module): ...@@ -184,15 +179,15 @@ class ResNet(torch.nn.Module):
def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None): def make_blocks(self, block, dim_out, blocks, stride=1, dropblock=None):
downsample = None downsample = None
if stride != 1 or self.dim_in != dim_out: if stride != 1 or self.dim_in != dim_out:
downsample = torch.nn.Sequential( downsample = nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride), nn.Conv1x1(self.dim_in, dim_out, stride=stride),
affine(dim_out), nn.Affine(dim_out),
) )
layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)] layers = [block(self.dim_in, dim_out, stride, downsample, dropblock)]
self.dim_in = dim_out self.dim_in = dim_out
for i in range(1, blocks): for i in range(1, blocks):
layers.append(block(dim_out, dim_out, dropblock=dropblock)) layers.append(block(dim_out, dim_out, dropblock=dropblock))
return torch.nn.Sequential(*layers) return nn.Sequential(*layers)
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.conv1(x)
......
...@@ -17,13 +17,14 @@ import collections ...@@ -17,13 +17,14 @@ import collections
import math import math
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import retinanet
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv3x3 from lib.modules import det
from lib.ops.modules import RetinaNetDecoder from lib.modules import init
from lib.retinanet import AnchorTargetLayer from lib.modules import nn
class RetinaNet(torch.nn.Module): class RetinaNet(nn.Module):
def __init__(self, dim_in=256): def __init__(self, dim_in=256):
super(RetinaNet, self).__init__() super(RetinaNet, self).__init__()
...@@ -32,34 +33,30 @@ class RetinaNet(torch.nn.Module): ...@@ -32,34 +33,30 @@ class RetinaNet(torch.nn.Module):
######################################## ########################################
self.cls_conv = torch.nn.ModuleList( self.cls_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True) nn.Conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS) for _ in range(cfg.RETINANET.NUM_CONVS)
) )
self.bbox_conv = torch.nn.ModuleList( self.bbox_conv = torch.nn.ModuleList(
conv3x3(dim_in, dim_in, bias=True) nn.Conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS) for _ in range(cfg.RETINANET.NUM_CONVS)
) )
# Packed as [C, A] not [A, C] # Packed as [C, A] not [A, C]
self.C = cfg.MODEL.NUM_CLASSES - 1 self.C = cfg.MODEL.NUM_CLASSES - 1
A = len(cfg.RETINANET.ASPECT_RATIOS) * \ A = len(cfg.RETINANET.ASPECT_RATIOS) * \
cfg.RETINANET.SCALES_PER_OCTAVE cfg.RETINANET.SCALES_PER_OCTAVE
self.cls_score = conv3x3(dim_in, self.C * A, bias=True) self.cls_score = nn.Conv3x3(dim_in, self.C * A, bias=True)
self.bbox_pred = conv3x3(dim_in, 4 * A, bias=True) self.bbox_pred = nn.Conv3x3(dim_in, 4 * A, bias=True)
self.cls_prob = torch.nn.Sigmoid(inplace=True) self.cls_prob = nn.Sigmoid(inplace=True)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.decoder = RetinaNetDecoder() self.decoder = det.RetinaNetDecoder()
######################################## ########################################
# RetinaNet losses # # RetinaNet losses #
######################################## ########################################
self.anchor_target_layer = AnchorTargetLayer() self.anchor_target = retinanet.AnchorTarget()
self.cls_loss = torch.nn.SigmoidFocalLoss( self.cls_loss = nn.SigmoidFocalLoss()
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA, self.bbox_loss = nn.SmoothL1Loss(0.1111)
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
self.bbox_loss = torch.nn.SmoothL1Loss(
beta=.11, reduction='batch_size')
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -67,8 +64,8 @@ class RetinaNet(torch.nn.Module): ...@@ -67,8 +64,8 @@ class RetinaNet(torch.nn.Module):
# Weight ~ Normal(0, 0.01) # Weight ~ Normal(0, 0.01)
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.01) init.normal(m.weight, std=0.01)
torch.nn.init.constant_(m.bias, 0) init.constant(m.bias, 0)
# Bias prior initialization for Focal Loss # Bias prior initialization for Focal Loss
# For details, See the official codes: # For details, See the official codes:
...@@ -127,7 +124,7 @@ class RetinaNet(torch.nn.Module): ...@@ -127,7 +124,7 @@ class RetinaNet(torch.nn.Module):
""" """
self.retinanet_data = \ self.retinanet_data = \
self.anchor_target_layer( self.anchor_target(
features=features, features=features,
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info, ims_info=ims_info,
......
...@@ -16,12 +16,13 @@ from __future__ import print_function ...@@ -16,12 +16,13 @@ from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import faster_rcnn
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1 from lib.modules import init
from lib.modeling import conv3x3 from lib.modules import nn
class RPN(torch.nn.Module): class RPN(nn.Module):
"""Region Proposal Networks for R-CNN series.""" """Region Proposal Networks for R-CNN series."""
def __init__(self, dim_in=256): def __init__(self, dim_in=256):
...@@ -33,34 +34,26 @@ class RPN(torch.nn.Module): ...@@ -33,34 +34,26 @@ class RPN(torch.nn.Module):
num_anchors = len(cfg.RPN.ASPECT_RATIOS) * ( num_anchors = len(cfg.RPN.ASPECT_RATIOS) * (
len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1) len(cfg.RPN.SCALES) if len(cfg.RPN.STRIDES) == 1 else 1)
self.output = conv3x3(dim_in, dim_in, bias=True) self.output = nn.Conv3x3(dim_in, dim_in, bias=True)
self.cls_score = conv1x1(dim_in, num_anchors, bias=True) self.cls_score = nn.Conv1x1(dim_in, num_anchors, bias=True)
self.bbox_pred = conv1x1(dim_in, num_anchors * 4, bias=True) self.bbox_pred = nn.Conv1x1(dim_in, num_anchors * 4, bias=True)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
################################## ##################################
# RPN losses # # RPN losses #
################################## ##################################
if len(cfg.RPN.STRIDES) > 1: self.anchor_target = faster_rcnn.AnchorTarget()
# RPN with multiple strides(i.e. FPN) self.cls_loss = nn.BCEWithLogitsLoss()
from lib.fpn.anchor_target_layer import AnchorTargetLayer self.bbox_loss = nn.SmoothL1Loss(0.1111)
else:
# RPN with single stride(i.e. C4)
from lib.faster_rcnn.anchor_target_layer import AnchorTargetLayer
self.anchor_target_layer = AnchorTargetLayer()
self.cls_loss = torch.nn.BCEWithLogitsLoss()
self.bbox_loss = torch.nn.SmoothL1Loss(
beta=.11, reduction='batch_size')
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
# Initialization for the RPN # Initialization for the RPN
# Weight ~ Normal(0, 0.01) # Weight ~ Normal(0, 0.01)
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.01) init.normal(m.weight, std=0.01)
def compute_outputs(self, features): def compute_outputs(self, features):
"""Compute the RPN logits. """Compute the RPN logits.
...@@ -116,7 +109,7 @@ class RPN(torch.nn.Module): ...@@ -116,7 +109,7 @@ class RPN(torch.nn.Module):
""" """
self.rpn_data = \ self.rpn_data = \
self.anchor_target_layer( self.anchor_target(
features=features, features=features,
gt_boxes=gt_boxes, gt_boxes=gt_boxes,
ims_info=ims_info, ims_info=ims_info,
......
...@@ -16,15 +16,13 @@ from __future__ import print_function ...@@ -16,15 +16,13 @@ from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib import ssd
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv3x3 from lib.modules import init
from lib.ssd import HardMiningLayer from lib.modules import nn
from lib.ssd import MultiBoxMatchLayer
from lib.ssd import MultiBoxTargetLayer
from lib.ssd import PriorBoxLayer
class SSD(torch.nn.Module): class SSD(nn.Module):
def __init__(self, feature_dims): def __init__(self, feature_dims):
super(SSD, self).__init__() super(SSD, self).__init__()
...@@ -33,20 +31,19 @@ class SSD(torch.nn.Module): ...@@ -33,20 +31,19 @@ class SSD(torch.nn.Module):
######################################## ########################################
self.cls_conv = torch.nn.ModuleList( self.cls_conv = torch.nn.ModuleList(
conv3x3(feature_dims[0], feature_dims[0], bias=True) nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True)
for _ in range(cfg.SSD.NUM_CONVS) for _ in range(cfg.SSD.NUM_CONVS)
) )
self.bbox_conv = torch.nn.ModuleList( self.bbox_conv = torch.nn.ModuleList(
conv3x3(feature_dims[0], feature_dims[0], bias=True) nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True)
for _ in range(cfg.SSD.NUM_CONVS) for _ in range(cfg.SSD.NUM_CONVS)
) )
self.cls_score = torch.nn.ModuleList() self.cls_score = nn.ModuleList()
self.bbox_pred = torch.nn.ModuleList() self.bbox_pred = nn.ModuleList()
self.softmax = torch.nn.Softmax(dim=2) self.softmax = nn.Softmax(dim=2)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
C = cfg.MODEL.NUM_CLASSES
self.box_dim = len(cfg.BBOX_REG_WEIGHTS) self.box_dim = len(cfg.BBOX_REG_WEIGHTS)
if len(feature_dims) == 1 and \ if len(feature_dims) == 1 and \
len(feature_dims) != len(cfg.SSD.MULTIBOX.STRIDES): len(feature_dims) != len(cfg.SSD.MULTIBOX.STRIDES):
...@@ -54,24 +51,22 @@ class SSD(torch.nn.Module): ...@@ -54,24 +51,22 @@ class SSD(torch.nn.Module):
feature_dims = list(filter(None, feature_dims)) feature_dims = list(filter(None, feature_dims))
for i, dim_in in enumerate(feature_dims): for i, dim_in in enumerate(feature_dims):
A = len(cfg.SSD.MULTIBOX.ASPECT_RATIOS[i]) + 1 nc = cfg.MODEL.NUM_CLASSES
if self.box_dim == 5 and \ na = len(cfg.SSD.MULTIBOX.ASPECT_RATIOS[i]) + 1
len(cfg.SSD.MULTIBOX.ASPECT_ANGLES) > 0: self.cls_score.append(nn.Conv3x3(dim_in, na * nc, bias=True))
A *= len(cfg.SSD.MULTIBOX.ASPECT_ANGLES) self.bbox_pred.append(nn.Conv3x3(dim_in, na * self.box_dim, bias=True))
self.cls_score.append(conv3x3(dim_in, A * C, bias=True))
self.bbox_pred.append(conv3x3(dim_in, A * self.box_dim, bias=True))
self.prior_box_layer = PriorBoxLayer() self.prior_box = ssd.PriorBox()
######################################## ########################################
# SSD losses # # SSD losses #
######################################## ########################################
self.box_match_layer = MultiBoxMatchLayer() self.box_match = ssd.MultiBoxMatch()
self.hard_mining_layer = HardMiningLayer() self.hard_mining = ssd.HardMining()
self.box_target_layer = MultiBoxTargetLayer() self.box_target = ssd.MultiBoxTarget()
self.cls_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = torch.nn.SmoothL1Loss(reduction='batch_size') self.bbox_loss = nn.SmoothL1Loss()
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -79,16 +74,16 @@ class SSD(torch.nn.Module): ...@@ -79,16 +74,16 @@ class SSD(torch.nn.Module):
# Initialization following the RPN # Initialization following the RPN
# Weight ~ Normal(0, 0.01) # Weight ~ Normal(0, 0.01)
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.01) init.normal(m.weight, std=0.01)
torch.nn.init.constant_(m.bias, 0) init.constant(m.bias, 0)
else: else:
# Careful Initialization # Careful Initialization
# Weight ~ Normal(0, 0.001) # Weight ~ Normal(0, 0.001)
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.001) init.normal(m.weight, std=0.001)
torch.nn.init.constant_(m.bias, 0) init.constant(m.bias, 0)
def compute_outputs(self, features): def compute_outputs(self, features):
"""Compute the SSD logits. """Compute the SSD logits.
...@@ -145,24 +140,24 @@ class SSD(torch.nn.Module): ...@@ -145,24 +140,24 @@ class SSD(torch.nn.Module):
""" """
# Collect the SSD training data # Collect the SSD training data
# See the paper(Liu et al. 2016) for details # See the paper(Liu et al. 2016) for details
self.ssd_data = \ self.data = \
self.box_match_layer( self.box_match(
prior_boxes=prior_boxes, prior_boxes,
gt_boxes=gt_boxes, gt_boxes,
) )
self.ssd_data.update( self.data.update(
self.hard_mining_layer( self.hard_mining(
conf_prob=cls_prob, cls_prob,
match_labels=self.ssd_data['match_labels'], self.data['match_labels'],
max_overlaps=self.ssd_data['max_overlaps'], self.data['max_overlaps'],
) )
) )
self.ssd_data.update( self.data.update(
self.box_target_layer( self.box_target(
match_inds=self.ssd_data['match_inds'], self.data['match_inds'],
match_labels=self.ssd_data['match_labels'], self.data['match_labels'],
prior_boxes=prior_boxes, prior_boxes,
gt_boxes=gt_boxes, gt_boxes,
) )
) )
return collections.OrderedDict([ return collections.OrderedDict([
...@@ -170,17 +165,17 @@ class SSD(torch.nn.Module): ...@@ -170,17 +165,17 @@ class SSD(torch.nn.Module):
# As we normalize both the pos and neg samples # As we normalize both the pos and neg samples
('cls_loss', self.cls_loss( ('cls_loss', self.cls_loss(
cls_score.view(-1, cfg.MODEL.NUM_CLASSES), cls_score.view(-1, cfg.MODEL.NUM_CLASSES),
self.ssd_data['labels']) * 4.), self.data['labels']) * 4.),
('bbox_loss', self.bbox_loss( ('bbox_loss', self.bbox_loss(
bbox_pred, bbox_pred,
self.ssd_data['bbox_targets'], self.data['bbox_targets'],
self.ssd_data['bbox_inside_weights'], self.data['bbox_inside_weights'],
self.ssd_data['bbox_outside_weights'], self.data['bbox_outside_weights'],
)), )),
]) ])
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
prior_boxes = self.prior_box_layer(kwargs['features']) prior_boxes = self.prior_box(kwargs['features'])
cls_score, bbox_pred = self.compute_outputs(kwargs['features']) cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float() cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
......
...@@ -13,24 +13,22 @@ from __future__ import absolute_import ...@@ -13,24 +13,22 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling import conv1x1 from lib.modules import init
from lib.modeling import conv3x3 from lib.modules import nn
class VGG(torch.nn.Module): class VGG(nn.Module):
def __init__(self, arch, extra_arch=None, reduced=False): def __init__(self, arch, extra_arch=None, reduced=False):
super(VGG, self).__init__() super(VGG, self).__init__()
self.reduced = reduced self.reduced = reduced
self.units, filter_list = arch self.units, filter_list = arch
self.feature_dims = filter_list[:] self.feature_dims = filter_list[:]
self.maxpool = torch.nn.MaxPool2d( self.maxpool = nn.MaxPool2d(
kernel_size=2, stride=2, ceil_mode=True) kernel_size=2, stride=2, ceil_mode=True)
self.s1pool = torch.nn.MaxPool2d( self.s1pool = nn.MaxPool2d(
kernel_size=3, stride=1, padding=1, ceil_mode=True) kernel_size=3, stride=1, padding=1, ceil_mode=True)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
for i in range(len(self.units)): for i in range(len(self.units)):
conv_name = 'conv{}'.format(i + 1) conv_name = 'conv{}'.format(i + 1)
dim_in = 3 if i == 0 else filter_list[i - 1] dim_in = 3 if i == 0 else filter_list[i - 1]
...@@ -38,21 +36,21 @@ class VGG(torch.nn.Module): ...@@ -38,21 +36,21 @@ class VGG(torch.nn.Module):
self.__setattr__( self.__setattr__(
'{}_{}' '{}_{}'
.format(conv_name, j + 1), .format(conv_name, j + 1),
conv3x3(dim_in, filter_list[i], bias=True), nn.Conv3x3(dim_in, filter_list[i], bias=True),
) )
if j == 0: if j == 0:
dim_in = filter_list[i] dim_in = filter_list[i]
if reduced: if reduced:
# L2Norm is redundant from the observation of # L2Norm is redundant from the observation
# empirical experiments. We just keep a trainable scale # We just keep a trainable scale
self.conv4_3_norm = torch.nn.Affine(filter_list[3], bias=False) self.conv4_3_norm = nn.Affine(filter_list[3], bias=False)
self.conv4_3_norm.weight.zero_() # Zero-Init self.conv4_3_norm.weight.zero_() # Zero-Init
self.fc6 = torch.nn.Conv2d( self.fc6 = nn.Conv2d(
filter_list[-1], 1024, filter_list[-1], 1024,
kernel_size=3, padding=6, kernel_size=3, padding=6,
stride=1, dilation=6, stride=1, dilation=6,
) )
self.fc7 = conv1x1(1024, 1024, bias=True) self.fc7 = nn.Conv1x1(1024, 1024, bias=True)
self.feature_dims = [filter_list[-2], 1024] self.feature_dims = [filter_list[-2], 1024]
if extra_arch is not None: if extra_arch is not None:
strides, filter_list, kps = extra_arch strides, filter_list, kps = extra_arch
...@@ -63,36 +61,44 @@ class VGG(torch.nn.Module): ...@@ -63,36 +61,44 @@ class VGG(torch.nn.Module):
dim_in = 1024 if i == 0 else filter_list[i - 1] * 2 dim_in = 1024 if i == 0 else filter_list[i - 1] * 2
self.__setattr__( self.__setattr__(
'{}_1'.format(conv_name), '{}_1'.format(conv_name),
conv1x1(dim_in, filter_list[i], bias=True), nn.Conv1x1(
dim_in,
filter_list[i],
bias=True,
),
) )
if strides[i] == 2: if strides[i] == 2:
self.__setattr__( self.__setattr__(
'{}_2'.format(conv_name), '{}_2'.format(conv_name),
conv3x3(filter_list[i], filter_list[i] * 2, 2, bias=True), nn.Conv3x3(
filter_list[i],
filter_list[i] * 2,
stride=2,
bias=True,
),
) )
else: else:
self.__setattr__( self.__setattr__(
'{}_2'.format(conv_name), '{}_2'.format(conv_name),
torch.nn.Conv2d( nn.Conv2d(
filter_list[i], filter_list[i] * 2, filter_list[i],
kernel_size=kps[0], padding=kps[1], stride=kps[2] filter_list[i] * 2,
kernel_size=kps[0],
padding=kps[1],
stride=kps[2]
), ),
) )
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
torch.nn.init.kaiming_uniform_( init.xaiver(m.weight)
m.weight, init.constant(m.bias, 0)
# Fix the gain for [-127, 127]
a=1,
) # Xavier Initialization
torch.nn.init.constant_(m.bias, 0)
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
......
...@@ -13,8 +13,65 @@ from __future__ import absolute_import ...@@ -13,8 +13,65 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.torch import nn
from dragon.vm.torch.autograd import function from dragon.vm.torch.autograd import function
from lib.ops import functions
from lib.core.config import cfg
class _RetinaNetDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(_RetinaNetDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'score_thresh': self.args['score_thresh'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
return self.run(inputs, [self.alloc()], unify_devices=False)
class _RPNDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(_RPNDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'post_nms_top_n': self.args['post_nms_top_n'],
'nms_thresh': self.args['nms_thresh'],
'min_size': self.args['min_size'],
'min_level': self.args['min_level'],
'max_level': self.args['max_level'],
'canonical_scale': self.args['canonical_scale'],
'canonical_level': self.args['canonical_level'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
outputs = [self.alloc() for _ in range(self.args['K'])]
return self.run(inputs, outputs, unify_devices=False)
def decode_retinanet( def decode_retinanet(
...@@ -29,7 +86,7 @@ def decode_retinanet( ...@@ -29,7 +86,7 @@ def decode_retinanet(
score_thresh, score_thresh,
): ):
return function.get( return function.get(
functions.RetinaNetDecoder, _RetinaNetDecoder,
cls_prob.device, cls_prob.device,
strides=strides, strides=strides,
ratios=ratios, ratios=ratios,
...@@ -58,7 +115,7 @@ def decode_rpn( ...@@ -58,7 +115,7 @@ def decode_rpn(
canonical_level, canonical_level,
): ):
return function.get( return function.get(
functions.RPNDecoder, _RPNDecoder,
cls_prob.device, cls_prob.device,
K=num_outputs, K=num_outputs,
strides=strides, strides=strides,
...@@ -74,3 +131,59 @@ def decode_rpn( ...@@ -74,3 +131,59 @@ def decode_rpn(
canonical_level=canonical_level, canonical_level=canonical_level,
).apply(features, cls_prob, bbox_pred, ims_info) ).apply(features, cls_prob, bbox_pred, ims_info)
class RetinaNetDecoder(nn.Module):
"""Generate pred regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
def forward(self, features, cls_prob, bbox_pred, ims_info):
return decode_retinanet(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=ims_info,
strides=self.strides,
ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
scales=self.scales,
pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
score_thresh=cfg.TEST.SCORE_THRESH,
)
class RPNDecoder(nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def forward(self, features, cls_prob, bbox_pred, ims_info):
outputs = decode_rpn(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=ims_info,
num_outputs=self.K,
strides=cfg.RPN.STRIDES,
ratios=[float(e) for e in cfg.RPN.ASPECT_RATIOS],
scales=[float(e) for e in cfg.RPN.SCALES],
pre_nms_top_n=cfg.TEST.RPN_PRE_NMS_TOP_N,
post_nms_top_n=cfg.TEST.RPN_POST_NMS_TOP_N,
nms_thresh=cfg.TEST.RPN_NMS_THRESH,
min_size=cfg.TEST.RPN_MIN_SIZE,
min_level=cfg.FPN.ROI_MIN_LEVEL,
max_level=cfg.FPN.ROI_MAX_LEVEL,
canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
)
return [outputs] if self.K == 1 else outputs
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch import nn
def xaiver(weight, mode='fan_in'):
"""The initializer of xavier uniform distribution."""
nn.init.kaiming_uniform_(
weight,
a=1, # Fix the gain for [-127, 127]
mode=mode,
)
def kaiming_normal(weight, mode='fan_in'):
"""The initializer of kaiming normal distribution."""
nn.init.kaiming_normal_(
weight,
mode=mode,
nonlinearity='relu',
)
# Aliases
constant = nn.init.constant_
normal = nn.init.normal_
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some basic structures."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch import nn
from lib.core.config import cfg
class Affine(object):
"""Affine transformation with weight and bias fixed."""
def __new__(cls, dim_in, bias=True, inplace=True):
return nn.Affine(
dim_in,
fix_weight=True,
fix_bias=True,
inplace=inplace,
)
class Conv1x1(object):
"""1x1 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, bias=False):
return nn.Conv2d(
dim_in,
dim_out,
kernel_size=1,
stride=stride,
bias=bias,
)
class Conv3x3(object):
"""3x3 convolution."""
def __new__(cls, dim_in, dim_out, stride=1, dilation=1, bias=False):
return nn.Conv2d(
dim_in,
dim_out,
kernel_size=3,
stride=stride,
padding=1 * dilation,
bias=bias,
)
class CrossEntropyLoss(object):
"""Cross entropy loss."""
def __new__(cls):
return nn.CrossEntropyLoss(ignore_index=-1)
class Identity(nn.Module):
"""Pass input to the output."""
def __init__(self, *args, **kwargs):
super(Identity, self).__init__()
_, _ = args, kwargs
def forward(self, x):
return x
class SigmoidFocalLoss(object):
"""Sigmoid focal loss."""
def __new__(cls):
return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
)
class SmoothL1Loss(object):
"""Smoothed l1 loss."""
def __new__(cls, beta=1.):
return nn.SmoothL1Loss(
beta=beta,
reduction='batch_size',
)
def is_conv2d(module):
"""Return a bool indicating the module is a Conv2d."""
return isinstance(module, nn.Conv2d) or \
isinstance(module, nn.DepthwiseConv2d)
AvgPool2d = nn.AvgPool2d
BatchNorm2d = nn.BatchNorm2d
BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d
Linear = nn.Linear
MaxPool2d = nn.MaxPool2d
Module = nn.Module
ModuleList = nn.ModuleList
Sequential = nn.Sequential
ReLU = nn.ReLU
Sigmoid = nn.Sigmoid
Softmax = nn.Softmax
...@@ -16,7 +16,22 @@ from __future__ import print_function ...@@ -16,7 +16,22 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.ops import functional as F
def roi_align(input, boxes, spatial_scale, size):
return torch.vision.ops.roi_align(
input, boxes,
output_size=(size, size),
spatial_scale=spatial_scale,
)
def roi_pool(input, boxes, spatial_scale, size):
return torch.vision.ops.roi_pool(
input, boxes,
output_size=(size, size),
spatial_scale=spatial_scale,
)
class Bootstrap(torch.nn.Module): class Bootstrap(torch.nn.Module):
...@@ -51,60 +66,3 @@ class Bootstrap(torch.nn.Module): ...@@ -51,60 +66,3 @@ class Bootstrap(torch.nn.Module):
return torch.vision.ops.image_data( return torch.vision.ops.image_data(
input, self.dtype, self.mean_values, input, self.dtype, self.mean_values,
) )
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
def forward(self, features, cls_prob, bbox_pred, ims_info):
return F.decode_retinanet(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=ims_info,
strides=self.strides,
ratios=[float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
scales=self.scales,
pre_nms_top_n=cfg.RETINANET.PRE_NMS_TOP_N,
score_thresh=cfg.TEST.SCORE_THRESH,
)
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(RPNDecoder, self).__init__()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def forward(self, features, cls_prob, bbox_pred, ims_info):
outputs = F.decode_rpn(
features=features,
cls_prob=cls_prob,
bbox_pred=bbox_pred,
ims_info=ims_info,
num_outputs=self.K,
strides=cfg.RPN.STRIDES,
ratios=[float(e) for e in cfg.RPN.ASPECT_RATIOS],
scales=[float(e) for e in cfg.RPN.SCALES],
pre_nms_top_n=cfg.TEST.RPN_PRE_NMS_TOP_N,
post_nms_top_n=cfg.TEST.RPN_POST_NMS_TOP_N,
nms_thresh=cfg.TEST.RPN_NMS_THRESH,
min_size=cfg.TEST.RPN_MIN_SIZE,
min_level=cfg.FPN.ROI_MIN_LEVEL,
max_level=cfg.FPN.ROI_MAX_LEVEL,
canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
)
return [outputs] if self.K == 1 else outputs
...@@ -18,8 +18,6 @@ from __future__ import division ...@@ -18,8 +18,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import logger
from lib.utils import rotated_boxes
try: try:
from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms from lib.nms.cpu_nms import cpu_nms, cpu_soft_nms
...@@ -36,8 +34,6 @@ def nms(detections, thresh, force_cpu=False): ...@@ -36,8 +34,6 @@ def nms(detections, thresh, force_cpu=False):
"""Perform either CPU or GPU Hard-NMS.""" """Perform either CPU or GPU Hard-NMS."""
if detections.shape[0] == 0: if detections.shape[0] == 0:
return [] return []
if detections.shape[1] == 6:
return rotated_boxes.cpu_nms(detections, thresh)
if cfg.USE_GPU_NMS and not force_cpu: if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID) return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else: else:
...@@ -56,7 +52,7 @@ def soft_nms( ...@@ -56,7 +52,7 @@ def soft_nms(
return [] return []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2} methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
if method not in methods: if method not in methods:
logger.fatal('Unknown soft nms method: {}'.format(method)) raise ValueError('Unknown soft nms method:', method)
return cpu_soft_nms( return cpu_soft_nms(
detections, detections,
thresh, thresh,
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.autograd import function
class RetinaNetDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RetinaNetDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'score_thresh': self.args['score_thresh'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
return self.run(inputs, [self.alloc()], unify_devices=False)
class RPNDecoder(function.Function):
def __init__(self, key, dev, **kwargs):
super(RPNDecoder, self).__init__(key, dev, **kwargs)
self.args = kwargs
def register_operator(self):
return {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': self.args['strides'],
'ratios': self.args['ratios'],
'scales': self.args['scales'],
'pre_nms_top_n': self.args['pre_nms_top_n'],
'post_nms_top_n': self.args['post_nms_top_n'],
'nms_thresh': self.args['nms_thresh'],
'min_size': self.args['min_size'],
'min_level': self.args['min_level'],
'max_level': self.args['max_level'],
'canonical_scale': self.args['canonical_scale'],
'canonical_level': self.args['canonical_level'],
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, ims_info]
self._unify_devices(inputs[:-1]) # Skip <ims_info>
outputs = [self.alloc() for _ in range(self.args['K'])]
return self.run(inputs, outputs, unify_devices=False)
...@@ -14,62 +14,146 @@ from __future__ import division ...@@ -14,62 +14,146 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
from lib.pycocotools.mask import encode as encode_masks, \
decode as decode_masks, frPyObjects from lib.pycocotools import mask as mask_tools
from lib.pycocotools.mask import frPyObjects
def decode_rle(R):
N = len(R['counts']) def poly2rle(poly, height, width):
M = np.zeros( (R['size'][0]*R['size'][1], ), dtype=np.uint8) """Convert polygon(s) into encoded rle.
n = 0
val = 1 The polygon(s) may be store in following format:
for pos in range(N):
val = not val 1. Polygon with uncompressed RLE:
for c in range(R['counts'][pos]): {'size': (h, w), 'counts', [1, 2, ...]}
R['counts'][pos]
M[n] = val 2. Polygons with number of coordinates > 4:
n += 1 [[x1, y1, x2, y2, x3, y3, ...], [x1, y1, x2, y2, x3, y3, ...]]
return M.reshape((R['size']), order='F')
3. Polygons with uncompressed RLE:
[{'size': (h, w), 'counts', [1, 2, ...]}]
def mask_poly2im(polys, im_height, im_width):
return frPyObjects(polys, im_height, im_width) COCO use **2** and **1** to annotate instances and crowed objects.
The output rle(s) will be:
def mask_coco2im(coco_masks, im_height, im_width): {'size': (h, w), 'counts': 'abc...'} or [{'size': (h, w), 'counts': 'abc...'}]
im_masks = []
for i, ann in enumerate(coco_masks): Parameters
if isinstance(ann, list): ----------
m = mask_poly2im(ann, im_height, im_width) poly : Union[List, Dict]
elif isinstance(ann, np.ndarray): The input polygons.
m = ann.astype(np.uint8) height : int
else: The height of image.
raise TypeError('Unknown type of mask: {}'.format(type(ann))) width : int
im_masks.append(m) The width of image.
return im_masks
Returns
-------
def mask_rle2im(rle_masks, im_height, im_width): Union[List, Dict]
coco_masks = [{'counts': rle, 'size': [im_height, im_width]} for rle in rle_masks] The bytes or a sequence of bytes.
coco_masks = decode_masks(coco_masks)
coco_masks = coco_masks.transpose((2, 0, 1)) Notes
return mask_coco2im(coco_masks, im_height, im_width) -----
COCODataset uses **2** and **1** to annotate instances and crowed objects.
def mask_bin2rle(bin_masks): """
rle_masks = [] return frPyObjects(poly, height, width)
for bin_mask in bin_masks:
if bin_mask is None:
rle_mask = '' def poly2bytes(poly, height, width):
else: """Convert polygon(s) into encoded mask bytes.
rle_mask = encode_masks(np.array(np.stack([bin_mask], axis=2), order='F'))[0]['counts']
rle_masks.append(rle_mask) The polygon(s) may be store in the following format:
return rle_masks
1. Polygon with uncompressed RLE:
{'size': (h, w), 'counts', [1, 2, ...]}
def mask_poly2rle(segmentations, im_height, im_width):
masks = [] 2. Polygons with number of coordinates > 4:
for polys in segmentations: [[x1, y1, x2, y2, x3, y3, ...], [x1, y1, x2, y2, x3, y3, ...]]
mask = mask_poly2im(polys, im_height, im_width)
masks.append(mask[0]['counts']) 3. Polygons with uncompressed RLE:
return masks [{'size': (h, w), 'counts', [1, 2, ...]}]
\ No newline at end of file
If the number of polygons >= 2, we will merge them into a single mask.
Parameters
----------
poly : Union[List, Dict]
The input polygons.
height : int
The height of image.
width : int
The width of image.
Returns
-------
bytes
The mask bytes.
Notes
-----
COCODataset uses **2** and **1** to annotate instances and crowed objects.
"""
rle_objects = poly2rle(poly, height, width)
if isinstance(rle_objects, list):
if len(rle_objects) == 1:
return rle_objects[0]['counts']
rle_objects = mask_tools.merge(rle_objects)
return rle_objects['counts']
def bytes2img(data, height, width):
"""Decode the RLE mask bytes to a 2d image.
Parameters
----------
data : bytes
The encoded bytes.
height : int
The height of image.
width : int
The width of image.
Returns
-------
numpy.ndarray
The mask image.
"""
rle_objects = [{'counts': data, 'size': [height, width]}]
mask_image = mask_tools.decode(rle_objects)
if mask_image.shape[2] != 1:
raise ValueError(
'{} instances are found in data.\n'
'Merge them before compressing.'
.format(mask_image.shape[2])
)
return mask_image[:, :, 0]
def img2bytes(data):
"""Compress a 2d mask image to RLE bytes.
Parameters
----------
data : numpy.ndarray
The image to compress.
Returns
-------
bytes
The encoded bytes.
"""
if len(data.shape) == 3:
raise ValueError(
'{} instances are found in data.\n'
'Merge them before compressing.'
.format(data.shape[2])
)
elif len(data.shape) != 2:
raise ValueError('Excepted a 2d mask.')
rle_objects = mask_tools.encode(
np.array(np.stack([data], 2), order='F'))
return rle_objects[0]['counts']
...@@ -13,5 +13,5 @@ from __future__ import absolute_import ...@@ -13,5 +13,5 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.faster_rcnn.data_layer import DataLayer from lib.faster_rcnn.data_loader import DataLoader
from lib.retinanet.anchor_target_layer import AnchorTargetLayer from lib.retinanet.anchor_target import AnchorTarget
...@@ -13,23 +13,21 @@ from __future__ import absolute_import ...@@ -13,23 +13,21 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2 from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.faster_rcnn import generate_grid_anchors
from lib.utils import boxes as box_util
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import array2tensor from lib.utils.framework import new_tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class AnchorTargetLayer(torch.nn.Module): class AnchorTarget(object):
"""Assign anchors to ground-truth targets.""" """Assign ground-truth targets to anchors."""
def __init__(self): def __init__(self):
super(AnchorTargetLayer, self).__init__() super(AnchorTarget, self).__init__()
# Load the basic configs # Load the basic configs
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
...@@ -49,10 +47,9 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -49,10 +47,9 @@ class AnchorTargetLayer(torch.nn.Module):
sizes=sizes, sizes=sizes,
)) ))
def forward(self, features, gt_boxes, ims_info): def __call__(self, features, gt_boxes, ims_info):
"""Produces anchor classification labels and bounding-box regression targets."""
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
if len(gt_boxes_wide) != num_images: if len(gt_boxes_wide) != num_images:
logger.fatal( logger.fatal(
...@@ -60,39 +57,23 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -60,39 +57,23 @@ class AnchorTargetLayer(torch.nn.Module):
.format(num_images, len(gt_boxes_wide)) .format(num_images, len(gt_boxes_wide))
) )
# Generate proposals from shifted anchors # Generate grid anchors from base
all_anchors, total_anchors = [], 0 all_anchors = \
for i in range(len(self.strides)): generate_grid_anchors(
height, width = features[i].shape[-2:] features,
shift_x = np.arange(0, width) * self.strides[i] self.base_anchors,
shift_y = np.arange(0, height) * self.strides[i] self.strides,
shift_x, shift_y = np.meshgrid(shift_x, shift_y) )
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), num_anchors = all_anchors.shape[0]
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (self.base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# [K, A, 4] -> [A, K, 4]
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
all_anchors.append(anchors)
total_anchors += anchors.shape[0]
all_anchors = np.concatenate(all_anchors, axis=0)
# label: 1 is positive, 0 is negative, -1 is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
labels_wide = -np.ones((num_images, total_anchors,), dtype=np.float32) labels_wide = -np.ones((num_images, num_anchors,), 'float32')
bbox_targets_wide = np.zeros((num_images, total_anchors, 4), dtype=np.float32) bbox_targets_wide = np.zeros((num_images, num_anchors, 4), 'float32')
bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32) bbox_inside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, dtype=np.float32) bbox_outside_weights_wide = np.zeros_like(bbox_targets_wide, 'float32')
anchors = all_anchors # Different from R-CNN, all anchors will be used
inds_inside = np.arange(all_anchors.shape[0]) inds_inside, anchors = np.arange(num_anchors), all_anchors
num_inside = len(inds_inside) num_inside = len(inds_inside)
for ix in range(num_images): for ix in range(num_images):
...@@ -104,12 +85,12 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -104,12 +85,12 @@ class AnchorTargetLayer(torch.nn.Module):
labels.fill(-1) labels.fill(-1)
# Overlaps between the anchors and the gt boxes # Overlaps between the anchors and the gt boxes
overlaps = bbox_overlaps(anchors, gt_boxes) overlaps = box_util.bbox_overlaps(anchors, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1) argmax_overlaps = overlaps.argmax(1)
max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps] max_overlaps = overlaps[np.arange(num_inside), argmax_overlaps]
# fg label: for each gt, anchor with highest overlap # fg label: for each gt, anchor with highest overlap
gt_argmax_overlaps = overlaps.argmax(axis=0) gt_argmax_overlaps = overlaps.argmax(0)
gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
gt_inds = argmax_overlaps[gt_argmax_overlaps] gt_inds = argmax_overlaps[gt_argmax_overlaps]
...@@ -125,8 +106,11 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -125,8 +106,11 @@ class AnchorTargetLayer(torch.nn.Module):
labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0 labels[max_overlaps < cfg.RETINANET.NEGATIVE_OVERLAP] = 0
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = bbox_transform( bbox_targets[fg_inds, :] = \
anchors[fg_inds, :], gt_boxes[argmax_overlaps[fg_inds], :4]) box_util.bbox_transform(
anchors[fg_inds, :],
gt_boxes[argmax_overlaps[fg_inds], :4],
)
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.)) bbox_inside_weights[fg_inds, :] = np.array((1., 1., 1., 1.))
...@@ -139,14 +123,14 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -139,14 +123,14 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights bbox_inside_weights_wide[ix, inds_inside] = bbox_inside_weights
bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights bbox_outside_weights_wide[ix, inds_inside] = bbox_outside_weights
labels = labels_wide.reshape((num_images, total_anchors)) labels = labels_wide.reshape((num_images, num_anchors))
bbox_targets = bbox_targets_wide.transpose((0, 2, 1)) bbox_targets = bbox_targets_wide.transpose((0, 2, 1))
bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1)) bbox_inside_weights = bbox_inside_weights_wide.transpose((0, 2, 1))
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': array2tensor(labels), 'labels': new_tensor(labels),
'bbox_targets': array2tensor(bbox_targets), 'bbox_targets': new_tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights), 'bbox_inside_weights': new_tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights), 'bbox_outside_weights': new_tensor(bbox_outside_weights),
} }
...@@ -17,12 +17,12 @@ import dragon.vm.torch as torch ...@@ -17,12 +17,12 @@ import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper from lib.nms import nms_wrapper
from lib.utils import framework from lib.utils import framework
from lib.utils import time_util from lib.utils import time_util
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.vis import vis_one_image
def ims_detect(detector, raw_images): def ims_detect(detector, raw_images):
...@@ -43,65 +43,67 @@ def ims_detect(detector, raw_images): ...@@ -43,65 +43,67 @@ def ims_detect(detector, raw_images):
], dtype=np.float32) ], dtype=np.float32)
# Do Forward # Do Forward
if not hasattr(detector, 'frozen_graph'): if not hasattr(detector, 'graph'):
inputs = { with framework.new_workspace().as_default():
'data': torch.from_numpy(blobs['data']), data = torch.from_numpy(blobs['data'])
'ims_info': torch.from_numpy(blobs['ims_info']), ims_info = torch.from_numpy(blobs['ims_info'])
}
with torch.no_grad(): with torch.no_grad():
with torch.jit.Recorder(retain_ops=True): with torch.jit.Tracer(retain_ops=True):
inputs = {'data': data, 'ims_info': ims_info}
outputs = detector.forward(inputs) outputs = detector.forward(inputs)
detector.frozen_graph = \ detector.graph = \
framework.FrozenGraph( framework.Graph({
{'data': inputs['data'], 'data': inputs['data'],
'ims_info': inputs['ims_info']}, 'ims_info': inputs['ims_info']
{'detections': outputs['detections']}, }, {'detections': outputs['detections']})
) outputs = detector.graph(**blobs)
outputs = detector.frozen_graph(**blobs)
# Unpack results # Unpack results
results = outputs['detections'] results = outputs['detections']
detections_wide = [[] for _ in range(len(ims_shape))] detections = [[] for _ in range(len(ims_shape))]
for i in range(len(ims)): for i in range(len(ims)):
indices = np.where(results[:, 0].astype(np.int32) == i)[0] inds = np.where(results[:, 0].astype(np.int32) == i)[0]
detections = results[indices, 1:] detections[i // num_scales].append(results[inds, 1:])
detections_wide[i // num_scales].append(detections)
for i in range(len(ims_shape)): for i in range(len(ims_shape)):
detections_wide[i] = np.vstack(detections_wide[i]) \ detections[i] = \
if len(detections_wide[i]) > 1 else detections_wide[i][0] np.vstack(detections[i]) \
if len(detections[i]) > 1 \
return detections_wide else detections[i][0]
return detections
def test_net(detector, server):
# Load settings
classes = server.classes def test_net(weights, num_classes, q_in, q_out, device):
num_images = server.num_images num_classes, cfg.GPU_ID = num_classes, device
num_classes = server.num_classes detector = new_detector(device, weights)
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
must_stop = False
_t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()} _t = time_util.new_timers('im_detect', 'misc')
while True:
if must_stop:
break
indices, raw_images = [], []
for i in range(cfg.TEST.IMS_PER_BATCH):
idx, raw_image = q_in.get()
if raw_image is None:
must_stop = True
break
indices.append(idx)
raw_images.append(raw_image)
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH): if len(raw_images) == 0:
# Collect raw images and ground-truths
image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images:
continue continue
image_id, raw_image = server.get_image()
image_ids.append(image_id)
raw_images.append(raw_image)
# Run detecting on specific scales # Run detecting on specific scales
with _t['im_detect'].tic_and_toc(): with _t['im_detect'].tic_and_toc():
results = ims_detect(detector, raw_images) results = ims_detect(detector, raw_images)
# Post-Processing # Post-Processing
for i, detections in enumerate(results):
_t['misc'].tic() _t['misc'].tic()
for item_idx, detections in enumerate(results):
i = batch_idx + item_idx
boxes_this_image = [[]] boxes_this_image = [[]]
# {x1, y1, x2, y2, score, cls} # {x1, y1, x2, y2, score, cls}
detections = np.array(detections) detections = np.array(detections)
...@@ -126,44 +128,16 @@ def test_net(detector, server): ...@@ -126,44 +128,16 @@ def test_net(detector, server):
force_cpu=True, force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' q_out.put((
.format(batch_idx + cfg.TEST.IMS_PER_BATCH, indices[i],
num_images, {
_t['im_detect'].average_time, 'im_detect': _t['im_detect'].average_time,
_t['misc'].average_time), 'misc': _t['misc'].average_time,
end='') },
{
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') 'boxes': boxes_this_image,
},
print('Evaluating detections') ))
server.evaluate_detections(all_boxes)
...@@ -48,29 +48,33 @@ class _LRScheduler(object): ...@@ -48,29 +48,33 @@ class _LRScheduler(object):
raise NotImplementedError raise NotImplementedError
class StepLR(_LRScheduler): class CosineLR(_LRScheduler):
def __init__( def __init__(
self, self,
lr_max, lr_max,
lr_min,
decay_step, decay_step,
decay_gamma, max_steps,
warmup_steps=0, warmup_steps=0,
warmup_factor=0., warmup_factor=0.,
): ):
super(StepLR, self).__init__( super(CosineLR, self).__init__(
lr_max=lr_max, lr_max=lr_max,
lr_min=lr_min,
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
warmup_factor=warmup_factor, warmup_factor=warmup_factor,
) )
self._decay_step = decay_step self._decay_step = decay_step
self._decay_gamma = decay_gamma self._max_steps = max_steps - warmup_steps
def schedule_impl(self): def schedule_impl(self):
step_count = self._step_count - self._last_steps step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0: if step_count % self._decay_step == 0:
decay_factor = step_count // self._decay_step decay_factor = 0.5 * (1. + math.cos(
self._last_lr = self._lr_max * ( math.pi * step_count / self._max_steps))
self._decay_gamma ** decay_factor) self._last_lr = self._lr_min + (
self._lr_max - self._lr_min
) * decay_factor
return self._last_lr return self._last_lr
...@@ -105,18 +109,19 @@ class MultiStepLR(_LRScheduler): ...@@ -105,18 +109,19 @@ class MultiStepLR(_LRScheduler):
return self._last_lr return self._last_lr
class LinearLR(_LRScheduler): class LinearCosineLR(_LRScheduler):
def __init__( def __init__(
self, self,
lr_max, lr_max,
lr_min,
decay_step, decay_step,
max_steps, max_steps,
warmup_steps=0, warmup_steps=0,
warmup_factor=0., warmup_factor=0.,
): ):
super(LinearLR, self).__init__( super(LinearCosineLR, self).__init__(
lr_max=lr_max, lr_max=lr_max,
lr_min=0., lr_min=lr_min,
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
warmup_factor=warmup_factor, warmup_factor=warmup_factor,
) )
...@@ -126,44 +131,63 @@ class LinearLR(_LRScheduler): ...@@ -126,44 +131,63 @@ class LinearLR(_LRScheduler):
def schedule_impl(self): def schedule_impl(self):
step_count = self._step_count - self._last_steps step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0: if step_count % self._decay_step == 0:
decay_factor = 1. - float(step_count) / self._max_steps linear_decay = 1. - float(step_count) / self._max_steps
self._last_lr = self._lr_max * decay_factor cosine_decay= 0.5 * (1. + math.cos(
math.pi * step_count / self._max_steps))
decay_factor = linear_decay * cosine_decay
self._last_lr = self._lr_min + (
self._lr_max - self._lr_min
) * decay_factor
return self._last_lr return self._last_lr
class CosineLR(_LRScheduler): class StepLR(_LRScheduler):
def __init__( def __init__(
self, self,
lr_max, lr_max,
lr_min,
decay_step, decay_step,
max_steps, decay_gamma,
warmup_steps=0, warmup_steps=0,
warmup_factor=0., warmup_factor=0.,
): ):
super(CosineLR, self).__init__( super(StepLR, self).__init__(
lr_max=lr_max, lr_max=lr_max,
lr_min=lr_min,
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
warmup_factor=warmup_factor, warmup_factor=warmup_factor,
) )
self._decay_step = decay_step self._decay_step = decay_step
self._max_steps = max_steps - warmup_steps self._decay_gamma = decay_gamma
def schedule_impl(self): def schedule_impl(self):
step_count = self._step_count - self._last_steps step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0: if step_count % self._decay_step == 0:
decay_factor = 0.5 * (1. + math.cos( decay_factor = step_count // self._decay_step
math.pi * step_count / self._max_steps)) self._last_lr = self._lr_max * (
self._last_lr = self._lr_min + ( self._decay_gamma ** decay_factor)
self._lr_max - self._lr_min
) * decay_factor
return self._last_lr return self._last_lr
def get_scheduler(): def get_scheduler():
lr_policy = cfg.SOLVER.LR_POLICY lr_policy = cfg.SOLVER.LR_POLICY
if lr_policy == 'step': if lr_policy == 'cosine_decay':
return CosineLR(
lr_max=cfg.SOLVER.BASE_LR,
lr_min=0.,
decay_step=cfg.SOLVER.DECAY_STEP,
max_steps=cfg.SOLVER.MAX_STEPS,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
elif lr_policy == 'linear_cosine_decay':
return LinearCosineLR(
lr_max=cfg.SOLVER.BASE_LR,
lr_min=0.,
decay_step=cfg.SOLVER.DECAY_STEP,
max_steps=cfg.SOLVER.MAX_STEPS,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
elif lr_policy == 'step':
return StepLR( return StepLR(
lr_max=cfg.SOLVER.BASE_LR, lr_max=cfg.SOLVER.BASE_LR,
decay_step=cfg.SOLVER.DECAY_STEP, decay_step=cfg.SOLVER.DECAY_STEP,
...@@ -179,15 +203,7 @@ def get_scheduler(): ...@@ -179,15 +203,7 @@ def get_scheduler():
warmup_steps=cfg.SOLVER.WARM_UP_STEPS, warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR, warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
) )
elif lr_policy == 'cosine_decay':
return CosineLR(
lr_max=cfg.SOLVER.BASE_LR,
lr_min=0.,
decay_step=cfg.SOLVER.DECAY_STEP,
max_steps=cfg.SOLVER.MAX_STEPS,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
else: else:
raise ValueError('Unknown lr policy: ' + lr_policy) raise ValueError('Unknown lr policy: ' + lr_policy)
...@@ -196,14 +212,16 @@ if __name__ == '__main__': ...@@ -196,14 +212,16 @@ if __name__ == '__main__':
def extract_label(scheduler): def extract_label(scheduler):
class_name = scheduler.__class__.__name__ class_name = scheduler.__class__.__name__
label = class_name + '(' label = class_name + '('
if class_name == 'StepLR': if class_name == 'CosineLR':
label += 'α=' + str(scheduler._decay_step) + ', ' label += 'α=' + str(scheduler._decay_step)
label += 'γ=' + str(scheduler._decay_gamma) elif class_name == 'LinearCosineLR':
label += 'α=' + str(scheduler._decay_step)
elif class_name == 'MultiStepLR': elif class_name == 'MultiStepLR':
label += 'α=' + str(scheduler._decay_steps) + ', ' label += 'α=' + str(scheduler._decay_steps) + ', '
label += 'γ=' + str(scheduler._decay_gamma) label += 'γ=' + str(scheduler._decay_gamma)
elif class_name == 'CosineLR': elif class_name == 'StepLR':
label += 'α=' + str(scheduler._decay_step) label += 'α=' + str(scheduler._decay_step) + ', '
label += 'γ=' + str(scheduler._decay_gamma)
label += ')' label += ')'
return label return label
...@@ -218,7 +236,7 @@ if __name__ == '__main__': ...@@ -218,7 +236,7 @@ if __name__ == '__main__':
StepLR(decay_step=1, decay_gamma=0.97, **shared_args), StepLR(decay_step=1, decay_gamma=0.97, **shared_args),
MultiStepLR(decay_steps=[60, 120, 180], decay_gamma=0.1, **shared_args), MultiStepLR(decay_steps=[60, 120, 180], decay_gamma=0.1, **shared_args),
CosineLR(lr_min=0., decay_step=1, max_steps=max_steps, **shared_args), CosineLR(lr_min=0., decay_step=1, max_steps=max_steps, **shared_args),
LinearLR(decay_step=1, max_steps=max_steps, **shared_args), LinearCosineLR(lr_min=0., decay_step=1, max_steps=max_steps, **shared_args),
] ]
for i in range(max_steps): for i in range(max_steps):
...@@ -240,7 +258,7 @@ if __name__ == '__main__': ...@@ -240,7 +258,7 @@ if __name__ == '__main__':
plt.title('Visualization of different LR Schedulers') plt.title('Visualization of different LR Schedulers')
plt.xlabel('Step') plt.xlabel('Step')
plt.ylabel('Learning Rate') plt.ylabel('Learning Rate')
line = '--' line = '-'
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k'] colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
for i, scheduler in enumerate(schedulers): for i, scheduler in enumerate(schedulers):
plt.plot( plt.plot(
...@@ -251,4 +269,5 @@ if __name__ == '__main__': ...@@ -251,4 +269,5 @@ if __name__ == '__main__':
label=extract_label(scheduler), label=extract_label(scheduler),
) )
plt.legend() plt.legend()
plt.grid(linestyle='--')
plt.show() plt.show()
...@@ -13,8 +13,8 @@ from __future__ import absolute_import ...@@ -13,8 +13,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from lib.ssd.data_layer import DataLayer from lib.ssd.data_loader import DataLoader
from lib.ssd.hard_mining_layer import HardMiningLayer from lib.ssd.hard_mining import HardMining
from lib.ssd.multibox_layer import MultiBoxMatchLayer from lib.ssd.multibox import MultiBoxMatch
from lib.ssd.multibox_layer import MultiBoxTargetLayer from lib.ssd.multibox import MultiBoxTarget
from lib.ssd.priorbox_layer import PriorBoxLayer from lib.ssd.priorbox import PriorBox
...@@ -26,11 +26,11 @@ from lib.ssd.data_transformer import DataTransformer ...@@ -26,11 +26,11 @@ from lib.ssd.data_transformer import DataTransformer
from lib.utils import logger from lib.utils import logger
class DataLayer(torch.nn.Module): class DataLoader(object):
"""Generate a mini-batch of data.""" """Provide mini-batches of data."""
def __init__(self): def __init__(self):
super(DataLayer, self).__init__() super(DataLoader, self).__init__()
database = get_imdb(cfg.TRAIN.DATABASE) database = get_imdb(cfg.TRAIN.DATABASE)
self.data_batch = DataBatch(**{ self.data_batch = DataBatch(**{
'dataset': lambda: dragon.io.SeetaRecordDataset(database.source), 'dataset': lambda: dragon.io.SeetaRecordDataset(database.source),
...@@ -38,12 +38,11 @@ class DataLayer(torch.nn.Module): ...@@ -38,12 +38,11 @@ class DataLayer(torch.nn.Module):
'shuffle': cfg.TRAIN.USE_SHUFFLE, 'shuffle': cfg.TRAIN.USE_SHUFFLE,
'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS, 'num_chunks': cfg.TRAIN.NUM_SHUFFLE_CHUNKS,
'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2, 'batch_size': cfg.TRAIN.IMS_PER_BATCH * 2,
'num_transformers': cfg.TRAIN.NUM_WORKERS,
}) })
def forward(self): def __call__(self):
# Get an array blob from the Queue
outputs = self.data_batch.get() outputs = self.data_batch.get()
# Zero-Copy the array to tensor
outputs['data'] = torch.from_numpy(outputs['data']) outputs['data'] = torch.from_numpy(outputs['data'])
return outputs return outputs
...@@ -58,14 +57,16 @@ class DataBatch(mp.Process): ...@@ -58,14 +57,16 @@ class DataBatch(mp.Process):
---------- ----------
dataset : lambda dataset : lambda
The creator of a dataset. The creator of a dataset.
classes : Sequence[str]
The class names.
shuffle : bool, optional, default=False shuffle : bool, optional, default=False
Whether to shuffle the data. Whether to shuffle the data.
num_chunks : int, optional, default=0 num_chunks : int, optional, default=0
The number of chunks to split. The number of chunks to split.
batch_size : int, optional, default=32 batch_size : int, optional, default=2
The size of a mini-batch. The size of a mini-batch.
prefetch : int, optional, default=5 num_transformers : int, optional, default=3
The prefetch count. The number of workers to transform data.
""" """
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
...@@ -82,16 +83,9 @@ class DataBatch(mp.Process): ...@@ -82,16 +83,9 @@ class DataBatch(mp.Process):
self._prefetch = kwargs.get('prefetch', 5) self._prefetch = kwargs.get('prefetch', 5)
self._batch_size = kwargs.get('batch_size', 32) self._batch_size = kwargs.get('batch_size', 32)
self._num_readers = kwargs.get('num_readers', 1) self._num_readers = kwargs.get('num_readers', 1)
self._num_transformers = kwargs.get('num_transformers', -1) self._num_transformers = kwargs.get('num_transformers', 3)
self._max_transformers = kwargs.get('max_transformers', 3)
self._num_fetchers = kwargs.get('num_fetchers', 1) self._num_fetchers = kwargs.get('num_fetchers', 1)
# Io-Aware Policy
if self._num_transformers == -1:
self._num_transformers = 3
self._num_transformers = min(
self._num_transformers, self._max_transformers)
# Initialize queues # Initialize queues
num_batches = self._prefetch * self._num_readers num_batches = self._prefetch * self._num_readers
self.Q1 = mp.Queue(num_batches * self._batch_size) self.Q1 = mp.Queue(num_batches * self._batch_size)
...@@ -162,14 +156,17 @@ class DataBatch(mp.Process): ...@@ -162,14 +156,17 @@ class DataBatch(mp.Process):
# Main prefetch loop # Main prefetch loop
while True: while True:
boxes_to_pack = [] boxes_to_pack = []
image_batch = np.zeros(image_batch_shape, 'uint8') img, gt_boxes = self.Q2.get()
for image_index in range(cfg.TRAIN.IMS_PER_BATCH): ims_blob = np.zeros(image_batch_shape, img.dtype)
image_batch[image_index], gt_boxes = self.Q2.get() for i in range(cfg.TRAIN.IMS_PER_BATCH):
ims_blob[i] = img
boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32') boxes = np.zeros((gt_boxes.shape[0], gt_boxes.shape[1] + 1), 'float32')
boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, image_index boxes[:, :gt_boxes.shape[1]], boxes[:, -1] = gt_boxes, i
boxes_to_pack.append(boxes) boxes_to_pack.append(boxes)
if i != (cfg.TRAIN.IMS_PER_BATCH - 1):
img, gt_boxes = self.Q2.get()
self.Q3.put({ self.Q3.put({
'data': image_batch, 'data': ims_blob,
'gt_boxes': np.concatenate(boxes_to_pack), 'gt_boxes': np.concatenate(boxes_to_pack),
}) })
...@@ -19,9 +19,9 @@ import cv2 ...@@ -19,9 +19,9 @@ import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.datasets.example import Example
from lib.ssd import transforms from lib.ssd import transforms
from lib.utils import rotated_boxes from lib.utils import boxes as box_util
from lib.utils.boxes import flip_boxes
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
...@@ -33,7 +33,8 @@ class DataTransformer(multiprocessing.Process): ...@@ -33,7 +33,8 @@ class DataTransformer(multiprocessing.Process):
self._classes = kwargs.get('classes', ('__background__',)) self._classes = kwargs.get('classes', ('__background__',))
self._num_classes = len(self._classes) self._num_classes = len(self._classes)
self._class_to_ind = dict(zip(self._classes, range(self._num_classes))) self._class_to_ind = dict(zip(self._classes, range(self._num_classes)))
self._image_aug = transforms.Compose( self.augment_image = \
transforms.Compose(
transforms.Distort(), # Color augmentation transforms.Distort(), # Color augmentation
transforms.Expand(), # Expand and padding transforms.Expand(), # Expand and padding
transforms.Sample(), # Sample a patch randomly transforms.Sample(), # Sample a patch randomly
...@@ -42,93 +43,72 @@ class DataTransformer(multiprocessing.Process): ...@@ -42,93 +43,72 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q_out = None self.q_in = self.q_out = None
self.daemon = True self.daemon = True
def make_roi_dict(self, example, flip=False): def make_roi_dict(self, example, apply_flip=False):
n_objects, box_dim = 0, len(cfg.BBOX_REG_WEIGHTS) objects, n_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff: if not self._use_diff:
for obj in example['object']: for obj in objects:
if obj.get('difficult', 0) == 0: if obj.get('difficult', 0) == 0:
n_objects += 1 n_objects += 1
else: else:
n_objects = len(example['object']) n_objects = len(objects)
roi_dict = { roi_dict = {
'width': example['width'], 'boxes': np.zeros((n_objects, 4), 'float32'),
'height': example['height'],
'gt_classes': np.zeros((n_objects,), 'int32'), 'gt_classes': np.zeros((n_objects,), 'int32'),
'boxes': np.zeros((n_objects, box_dim), 'float32'),
'normalized_boxes': np.zeros((n_objects, box_dim), 'float32'),
} }
# Filter the difficult instances # Filter the difficult instances
object_idx = 0 object_idx = 0
for obj in example['object']: for obj in objects:
if not self._use_diff and \ if not self._use_diff and \
obj.get('difficult', 0) > 0: obj.get('difficult', 0) > 0:
continue continue
if box_dim == 4: bbox = obj['bbox']
roi_dict['boxes'][object_idx, :] = [ roi_dict['boxes'][object_idx, :] = [
max(0, obj['xmin']), max(0, bbox[0]),
max(0, obj['ymin']), max(0, bbox[1]),
min(obj['xmax'], example['width'] - 1), min(bbox[2], width - 1),
min(obj['ymax'], example['height'] - 1), min(bbox[3], height - 1),
] ]
elif box_dim == 5:
if 'bbox' in obj:
roi_dict['boxes'][object_idx, :] = [
max(0, obj['bbox'][0]),
max(0, obj['bbox'][1]),
min(obj['bbox'][2], example['width'] - 1),
min(obj['bbox'][3], example['height'] - 1),
rotated_boxes.clip_angle(obj['bbox'][4]),
]
else:
roi_dict['boxes'][object_idx, :] = \
rotated_boxes.vertices2box(
[obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
obj['x4'], obj['y4']]
)
else:
raise ValueError('Excepted box4d or box5d.')
roi_dict['gt_classes'][object_idx] = \ roi_dict['gt_classes'][object_idx] = \
self._class_to_ind[obj['name']] self._class_to_ind[obj['name']]
object_idx += 1 object_idx += 1
if flip: if apply_flip:
roi_dict['boxes'] = flip_boxes( roi_dict['boxes'] = \
roi_dict['boxes'], roi_dict['width']) box_util.flip_boxes(
roi_dict['boxes'],
width,
)
roi_dict['boxes'][:, 0] /= roi_dict['width'] # Normalize to unit sizes
roi_dict['boxes'][:, 1] /= roi_dict['height'] roi_dict['boxes'][:, 0::2] /= width
roi_dict['boxes'][:, 2] /= roi_dict['width'] roi_dict['boxes'][:, 1::2] /= height
roi_dict['boxes'][:, 3] /= roi_dict['height']
return roi_dict return roi_dict
def get(self, example): def get(self, example):
img = np.frombuffer(example['content'], np.uint8) example = Example(example)
img = cv2.imdecode(img, 1) img = example.image
# Flip # Flip
flip = False apply_flip = False
if self._mirror: if self._mirror:
if np.random.randint(2) > 0: if np.random.randint(2) > 0:
img = img[:, ::-1, :] img = img[:, ::-1]
flip = True apply_flip = True
# Example -> RoIDict # Example -> RoIDict
roi_dict = self.make_roi_dict(example, flip) roi_dict = self.make_roi_dict(example, apply_flip)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
box_dim = roi_dict['boxes'].shape[1] gt_boxes = np.empty((roi_dict['gt_classes'].size, 5), 'float32')
gt_boxes = np.empty((roi_dict['gt_classes'].size, box_dim + 1), 'float32') gt_boxes[:, :4], gt_boxes[:, 4] = roi_dict['boxes'], roi_dict['gt_classes']
gt_boxes[:, :box_dim], gt_boxes[:, box_dim] = \
roi_dict['boxes'], roi_dict['gt_classes']
# Distort => Expand => Sample => Resize # Distort => Expand => Sample => Resize
img, gt_boxes = self._image_aug(img, gt_boxes) img, gt_boxes = self.augment_image(img, gt_boxes)
# Restore to the blob scale # Restore to the blob scale
gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH gt_boxes[:, 0] *= cfg.SSD.RESIZE.WIDTH
...@@ -136,6 +116,10 @@ class DataTransformer(multiprocessing.Process): ...@@ -136,6 +116,10 @@ class DataTransformer(multiprocessing.Process):
gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH gt_boxes[:, 2] *= cfg.SSD.RESIZE.WIDTH
gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT gt_boxes[:, 3] *= cfg.SSD.RESIZE.HEIGHT
# Post-Process for image
if img.dtype == 'uint16':
img = img.astype('float32') / 256.
return img, gt_boxes return img, gt_boxes
def run(self): def run(self):
......
...@@ -16,15 +16,8 @@ from __future__ import print_function ...@@ -16,15 +16,8 @@ from __future__ import print_function
import numpy as np import numpy as np
def generate_anchors(min_sizes, max_sizes, ratios, angles=()): def generate_anchors(min_sizes, max_sizes, ratios):
""" """Generate anchors by enumerating aspect ratios and sizes."""
Generate anchor (reference) windows by enumerating
aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
"""
if len(angles) > 0:
return generate_rotated_anchors(
min_sizes, max_sizes, ratios, angles)
total_anchors = [] total_anchors = []
for idx, min_size in enumerate(min_sizes): for idx, min_size in enumerate(min_sizes):
...@@ -47,37 +40,6 @@ def generate_anchors(min_sizes, max_sizes, ratios, angles=()): ...@@ -47,37 +40,6 @@ def generate_anchors(min_sizes, max_sizes, ratios, angles=()):
return np.vstack(total_anchors) return np.vstack(total_anchors)
def generate_rotated_anchors(min_sizes, max_sizes, ratios, angles):
"""
Generate anchor (reference) windows by enumerating
aspect ratios, min_sizes, max_sizes wrt a reference ctr (x, y, w, h).
"""
total_anchors = []
for angle in angles:
for idx, min_size in enumerate(min_sizes):
angle_array = np.ones((len(ratios), 1)) * angle
# Note that SSD assume it is a ctr-anchor
base_anchor = np.array([0, 0, min_size, min_size])
anchors = _ratio_enum(base_anchor, ratios, _mkanchors_v2)
if len(max_sizes) > 0:
max_size = max_sizes[idx]
_anchors = anchors[0].reshape((1, 4))
_anchors = np.vstack([
_anchors,
_max_size_enum(
base_anchor,
min_size,
max_size,
_mkanchors_v2,
)])
anchors = np.vstack([_anchors, anchors[1:]])
angle_array = np.vstack((angle_array, angle))
anchors = np.hstack((anchors, angle_array))
total_anchors.append(anchors)
return np.vstack(total_anchors)
def _whctrs(anchor): def _whctrs(anchor):
"""Return width, height, x center, and y center for an anchor (window).""" """Return width, height, x center, and y center for an anchor (window)."""
w, h = anchor[2], anchor[3] w, h = anchor[2], anchor[3]
...@@ -125,4 +87,3 @@ def _max_size_enum(base_anchor, min_size, max_size, make_fn): ...@@ -125,4 +87,3 @@ def _max_size_enum(base_anchor, min_size, max_size, make_fn):
if __name__ == '__main__': if __name__ == '__main__':
print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1])) print(generate_anchors(min_sizes=[30], max_sizes=[60], ratios=[1]))
print(generate_rotated_anchors(min_sizes=[30], max_sizes=[60], ratios=[1], angles=[1]))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from lib.core.config import cfg
from lib.utils.framework import new_tensor
class HardMining(object):
def __call__(self, prob_wide, labels_wide, overlaps_wide):
prob_wide = prob_wide.numpy(True)
neg_ovr = cfg.SSD.OHEM.NEG_OVERLAP
neg_ratio = cfg.SSD.OHEM.NEG_POS_RATIO
# label ``-1`` will be ignored
new_labels_wide = -np.ones(labels_wide.shape, 'int64')
for ix in range(labels_wide.shape[0]):
labels = labels_wide[ix]
overlaps = overlaps_wide[ix]
prob = prob_wide[ix]
loss = np.zeros(labels.shape, 'float32')
inds = np.where(labels >= 0)[0]
loss[inds] = -np.log(
np.maximum(
prob[inds, labels[inds]],
np.finfo(float).eps,
)
)
# Filter negatives
fg_inds = np.where(labels > 0)[0]
neg_inds = np.where(labels == 0)[0]
neg_overlaps = overlaps[neg_inds]
eligible_neg_inds = np.where(neg_overlaps < neg_ovr)[0]
neg_inds = neg_inds[eligible_neg_inds]
# Apply mining on negatives
neg_loss = loss[neg_inds]
num_pos, num_neg = len(fg_inds), len(neg_inds)
num_bg = min(int(num_pos * neg_ratio), num_neg)
bg_inds = neg_inds[np.argsort(-neg_loss)][:num_bg]
new_labels_wide[ix][fg_inds] = labels[fg_inds] # Keep fg indices
new_labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss
return {'labels': new_tensor(new_labels_wide)}
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import array2tensor
class HardMiningLayer(torch.nn.Module):
def __init__(self):
super(HardMiningLayer, self).__init__()
def forward(self, conf_prob, match_labels, max_overlaps):
# Confidence of each matched box
conf_prob_wide = conf_prob.numpy(True)
# Label of each matched box
match_labels_wide = match_labels
# Max overlaps between default boxes and gt boxes
max_overlaps_wide = max_overlaps
# label ``-1`` will be ignored
labels_wide = -np.ones(match_labels_wide.shape, dtype=np.int64)
for ix in range(match_labels_wide.shape[0]):
match_labels = match_labels_wide[ix]
max_overlaps = max_overlaps_wide[ix]
conf_prob = conf_prob_wide[ix]
conf_loss = np.zeros(match_labels.shape, dtype=np.float32)
inds = np.where(match_labels >= 0)[0]
flt_min = np.finfo(float).eps
# Softmax cross-entropy
conf_loss[inds] = -np.log(np.maximum(
conf_prob[inds, match_labels[inds]], flt_min))
# Filter negatives
fg_inds = np.where(match_labels > 0)[0]
neg_inds = np.where(match_labels == 0)[0]
neg_overlaps = max_overlaps[neg_inds]
eligible_neg_inds = np.where(neg_overlaps < cfg.SSD.OHEM.NEG_OVERLAP)[0]
sel_inds = neg_inds[eligible_neg_inds]
# Do Mining
sel_loss = conf_loss[sel_inds]
num_pos = len(fg_inds)
num_sel = min(int(num_pos * cfg.SSD.OHEM.NEG_POS_RATIO), len(sel_inds))
sorted_sel_inds = sel_inds[np.argsort(-sel_loss)]
bg_inds = sorted_sel_inds[:num_sel]
labels_wide[ix][fg_inds] = match_labels[fg_inds] # Keep fg indices
labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss
return {'labels': array2tensor(labels_wide)}
...@@ -14,22 +14,16 @@ from __future__ import division ...@@ -14,22 +14,16 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import array2tensor from lib.utils import boxes as box_util
from lib.utils.boxes import bbox_overlaps from lib.utils.framework import new_tensor
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
class MultiBoxMatchLayer(torch.nn.Module): class MultiBoxMatch(object):
def __init__(self): def __call__(self, prior_boxes, gt_boxes):
super(MultiBoxMatchLayer, self).__init__()
def forward(self, prior_boxes, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
num_priors, box_dim = prior_boxes.shape[:] num_priors, box_dim = prior_boxes.shape[:]
# Do matching between prior boxes and gt boxes # Do matching between prior boxes and gt boxes
...@@ -40,20 +34,20 @@ class MultiBoxMatchLayer(torch.nn.Module): ...@@ -40,20 +34,20 @@ class MultiBoxMatchLayer(torch.nn.Module):
for ix in range(num_images): for ix in range(num_images):
# GT boxes (x1, y1, x2, y2, label) # GT boxes (x1, y1, x2, y2, label)
gt_boxes = gt_boxes_wide[ix] gt_boxes = gt_boxes_wide[ix]
if gt_boxes.shape[0] == 0: num_gt = gt_boxes.shape[0]
if num_gt == 0:
continue continue
# Compute the overlaps between prior boxes and gt boxes # Compute the overlaps between prior boxes and gt boxes
overlaps = bbox_overlaps(prior_boxes, gt_boxes) overlaps = box_util.bbox_overlaps(prior_boxes, gt_boxes)
argmax_overlaps = overlaps.argmax(axis=1) argmax_overlaps = overlaps.argmax(1)
max_overlaps = overlaps[np.arange(num_priors), argmax_overlaps] max_overlaps = overlaps[np.arange(num_priors), argmax_overlaps]
max_overlaps_wide[ix] = max_overlaps max_overlaps_wide[ix] = max_overlaps
# Bipartite matching and assignments # Bipartite matching and assignments
bipartite_inds = overlaps.argmax(axis=0) bipartite_inds = overlaps.argmax(0)
class_assignment = gt_boxes[:, -1] class_assignment = gt_boxes[:, -1]
match_inds_wide[ix][bipartite_inds] = np.arange( match_inds_wide[ix][bipartite_inds] = np.arange(num_gt, dtype='int32')
gt_boxes.shape[0], dtype=np.int32)
match_labels_wide[ix][bipartite_inds] = class_assignment match_labels_wide[ix][bipartite_inds] = class_assignment
# Per prediction matching and assignments # Per prediction matching and assignments
...@@ -72,11 +66,8 @@ class MultiBoxMatchLayer(torch.nn.Module): ...@@ -72,11 +66,8 @@ class MultiBoxMatchLayer(torch.nn.Module):
} }
class MultiBoxTargetLayer(torch.nn.Module): class MultiBoxTarget(object):
def __init__(self): def __call__(
super(MultiBoxTargetLayer, self).__init__()
def forward(
self, self,
match_inds, match_inds,
match_labels, match_labels,
...@@ -90,15 +81,15 @@ class MultiBoxTargetLayer(torch.nn.Module): ...@@ -90,15 +81,15 @@ class MultiBoxTargetLayer(torch.nn.Module):
match_labels_wide = match_labels match_labels_wide = match_labels
num_priors, box_dim = prior_boxes.shape[:] num_priors, box_dim = prior_boxes.shape[:]
gt_boxes_wide = dismantle_gt_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
bbox_targets_wide = np.zeros((num_images, num_priors, box_dim), 'float32') bbox_targets_wide = np.zeros((num_images, num_priors, box_dim), 'float32')
bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, 'float32') bbox_inside_weights_wide = np.zeros(bbox_targets_wide.shape, 'float32')
bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, 'float32') bbox_outside_weights_wide = np.zeros(bbox_targets_wide.shape, 'float32')
# Number of matched boxes(#positive) # Number of matched boxes(#positive)
# We divide it by num of images, as SmoothLLLoss will divide it also
n_pos = float(max(len(np.where(match_labels_wide > 0)[0]), 1)) n_pos = float(max(len(np.where(match_labels_wide > 0)[0]), 1))
# Multiple by the num images to compensate the smooth l1 loss
bbox_reg_weight = cfg.SSD.BBOX_REG_WEIGHT * num_images / n_pos bbox_reg_weight = cfg.SSD.BBOX_REG_WEIGHT * num_images / n_pos
for ix in range(num_images): for ix in range(num_images):
...@@ -106,7 +97,7 @@ class MultiBoxTargetLayer(torch.nn.Module): ...@@ -106,7 +97,7 @@ class MultiBoxTargetLayer(torch.nn.Module):
if gt_boxes.shape[0] == 0: if gt_boxes.shape[0] == 0:
continue continue
# Sample fg-rois(default boxes) & gt-rois(gt boxes) # Select ground-truth
match_inds = match_inds_wide[ix] match_inds = match_inds_wide[ix]
match_labels = match_labels_wide[ix] match_labels = match_labels_wide[ix]
ex_inds = np.where(match_labels > 0)[0] ex_inds = np.where(match_labels > 0)[0]
...@@ -114,14 +105,18 @@ class MultiBoxTargetLayer(torch.nn.Module): ...@@ -114,14 +105,18 @@ class MultiBoxTargetLayer(torch.nn.Module):
gt_assignment = match_inds[ex_inds] gt_assignment = match_inds[ex_inds]
gt_rois = gt_boxes[gt_assignment] gt_rois = gt_boxes[gt_assignment]
# Assign targets & inside weights & outside weights # Assign bbox targets
bbox_targets_wide[ix][ex_inds] = bbox_transform( bbox_targets_wide[ix][ex_inds] = \
ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS) box_util.bbox_transform(
ex_rois,
gt_rois,
cfg.BBOX_REG_WEIGHTS,
)
bbox_inside_weights_wide[ix, :] = 1. bbox_inside_weights_wide[ix, :] = 1.
bbox_outside_weights_wide[ix][ex_inds] = bbox_reg_weight bbox_outside_weights_wide[ix][ex_inds] = bbox_reg_weight
return { return {
'bbox_targets': array2tensor(bbox_targets_wide), 'bbox_targets': new_tensor(bbox_targets_wide),
'bbox_inside_weights': array2tensor(bbox_inside_weights_wide), 'bbox_inside_weights': new_tensor(bbox_inside_weights_wide),
'bbox_outside_weights': array2tensor(bbox_outside_weights_wide), 'bbox_outside_weights': new_tensor(bbox_outside_weights_wide),
} }
...@@ -14,18 +14,17 @@ from __future__ import division ...@@ -14,18 +14,17 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.ssd.generate_anchors import generate_anchors from lib.ssd.generate_anchors import generate_anchors
from lib.utils import logger from lib.utils import logger
class PriorBoxLayer(torch.nn.Module): class PriorBox(object):
"""Generate default boxes(anchors).""" """Generate default boxes(anchors)."""
def __init__(self): def __init__(self):
super(PriorBoxLayer, self).__init__() super(PriorBox, self).__init__()
min_sizes = cfg.SSD.MULTIBOX.MIN_SIZES min_sizes = cfg.SSD.MULTIBOX.MIN_SIZES
max_sizes = cfg.SSD.MULTIBOX.MAX_SIZES max_sizes = cfg.SSD.MULTIBOX.MAX_SIZES
if len(max_sizes) > 0: if len(max_sizes) > 0:
...@@ -34,7 +33,6 @@ class PriorBoxLayer(torch.nn.Module): ...@@ -34,7 +33,6 @@ class PriorBoxLayer(torch.nn.Module):
len(min_sizes), len(max_sizes))) len(min_sizes), len(max_sizes)))
self.strides = cfg.SSD.MULTIBOX.STRIDES self.strides = cfg.SSD.MULTIBOX.STRIDES
aspect_ratios = cfg.SSD.MULTIBOX.ASPECT_RATIOS aspect_ratios = cfg.SSD.MULTIBOX.ASPECT_RATIOS
aspect_angles = cfg.SSD.MULTIBOX.ASPECT_ANGLES
self.base_anchors = [] self.base_anchors = []
for i in range(len(min_sizes)): for i in range(len(min_sizes)):
self.base_anchors.append( self.base_anchors.append(
...@@ -44,11 +42,10 @@ class PriorBoxLayer(torch.nn.Module): ...@@ -44,11 +42,10 @@ class PriorBoxLayer(torch.nn.Module):
max_sizes[i] if isinstance( max_sizes[i] if isinstance(
max_sizes[i], (list, tuple)) else [max_sizes[i]], max_sizes[i], (list, tuple)) else [max_sizes[i]],
aspect_ratios[i], aspect_ratios[i],
aspect_angles,
) )
) )
def forward(self, features): def __call__(self, features):
all_anchors = [] all_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
# 1. Generate base grids # 1. Generate base grids
......
...@@ -18,12 +18,11 @@ import dragon.vm.torch as torch ...@@ -18,12 +18,11 @@ import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.detector import new_detector
from lib.nms import nms_wrapper from lib.nms import nms_wrapper
from lib.utils import boxes as box_util
from lib.utils import framework from lib.utils import framework
from lib.utils import time_util from lib.utils import time_util
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_boxes
from lib.utils.vis import vis_one_image
def get_images(ims): def get_images(ims):
...@@ -34,7 +33,10 @@ def get_images(ims): ...@@ -34,7 +33,10 @@ def get_images(ims):
im_scales.append((float(target_h) / im.shape[0], im_scales.append((float(target_h) / im.shape[0],
float(target_w) / im.shape[1])) float(target_w) / im.shape[1]))
processed_ims.append(cv2.resize(im, (target_w, target_h))) processed_ims.append(cv2.resize(im, (target_w, target_h)))
ims_blob = np.array(processed_ims, dtype=np.uint8) if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256.
else:
ims_blob = np.array(processed_ims, dtype='uint8')
return ims_blob, im_scales return ims_blob, im_scales
...@@ -43,24 +45,23 @@ def ims_detect(detector, ims): ...@@ -43,24 +45,23 @@ def ims_detect(detector, ims):
data, im_scales = get_images(ims) data, im_scales = get_images(ims)
# Do Forward # Do Forward
if not hasattr(detector, 'frozen_graph'): if not hasattr(detector, 'graph'):
image = torch.from_numpy(data) with framework.new_workspace().as_default():
with torch.no_grad(): with torch.no_grad():
with torch.jit.Recorder(retain_ops=True): with torch.jit.Tracer(retain_ops=True):
outputs = detector.forward(inputs={'data': image}) inputs = {'data': torch.from_numpy(data)}
detector.frozen_graph = \ outputs = detector.forward(inputs)
framework.FrozenGraph( detector.graph = \
{'data': image}, framework.Graph(inputs, {
{'cls_prob': outputs['cls_prob'], 'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']}, 'bbox_pred': outputs['bbox_pred']
{'prior_boxes': outputs['prior_boxes']}, }, {'prior_boxes': outputs['prior_boxes']})
) outputs = detector.graph(data=data)
outputs = detector.frozen_graph(data=data)
# Decode results # Decode results
batch_boxes = [] batch_boxes = []
for i in range(len(im_scales)): for i in range(len(im_scales)):
boxes = bbox_transform_inv( boxes = box_util.bbox_transform_inv(
outputs['prior_boxes'], outputs['prior_boxes'],
outputs['bbox_pred'][i], outputs['bbox_pred'][i],
cfg.BBOX_REG_WEIGHTS, cfg.BBOX_REG_WEIGHTS,
...@@ -69,39 +70,40 @@ def ims_detect(detector, ims): ...@@ -69,39 +70,40 @@ def ims_detect(detector, ims):
boxes[:, 1] /= im_scales[i][0] boxes[:, 1] /= im_scales[i][0]
boxes[:, 2] /= im_scales[i][1] boxes[:, 2] /= im_scales[i][1]
boxes[:, 3] /= im_scales[i][0] boxes[:, 3] /= im_scales[i][0]
batch_boxes.append(clip_boxes(boxes, ims[i].shape)) batch_boxes.append(box_util.clip_boxes(boxes, ims[i].shape))
return outputs['cls_prob'], batch_boxes return outputs['cls_prob'], batch_boxes
def test_net(detector, server): def test_net(weights, num_classes, q_in, q_out, device):
# Load settings num_classes, cfg.GPU_ID = num_classes, device
classes = server.classes detector = new_detector(device, weights)
num_images = server.num_images
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()} must_stop = False
_t = time_util.new_timers('im_detect', 'misc')
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH): while True:
# Collect raw images and ground-truths if must_stop:
image_ids, raw_images = [], [] break
indices, raw_images = [], []
for i in range(cfg.TEST.IMS_PER_BATCH):
idx, raw_image = q_in.get()
if raw_image is None:
must_stop = True
break
indices.append(idx)
raw_images.append(raw_image)
for item_idx in range(cfg.TEST.IMS_PER_BATCH): if len(raw_images) == 0:
if batch_idx + item_idx >= num_images:
continue continue
image_id, raw_image = server.get_image()
image_ids.append(image_id)
raw_images.append(raw_image)
with _t['im_detect'].tic_and_toc(): with _t['im_detect'].tic_and_toc():
batch_scores, batch_boxes = ims_detect(detector, raw_images) batch_scores, batch_boxes = \
ims_detect(detector, raw_images)
for i in range(len(batch_scores)):
_t['misc'].tic() _t['misc'].tic()
for item_idx in range(len(batch_scores)): scores, boxes = batch_scores[i], batch_boxes[i]
i = batch_idx + item_idx
scores = batch_scores[item_idx]
boxes = batch_boxes[item_idx]
boxes_this_image = [[]] boxes_this_image = [[]]
for j in range(1, num_classes): for j in range(1, num_classes):
inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
...@@ -127,44 +129,16 @@ def test_net(detector, server): ...@@ -127,44 +129,16 @@ def test_net(detector, server):
force_cpu=True, force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_images[item_idx],
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_ids[item_idx]),
)
# Limit to max_per_image detections *over all classes*
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
for j in range(1, num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
_t['misc'].toc() _t['misc'].toc()
print('\rim_detect: {:d}/{:d} {:.3f}s {:.3f}s' q_out.put((
.format(batch_idx + cfg.TEST.IMS_PER_BATCH, indices[i],
num_images, {
_t['im_detect'].average_time, 'im_detect': _t['im_detect'].average_time,
_t['misc'].average_time), 'misc': _t['misc'].average_time,
end='') },
{
print('\n>>>>>>>>>>>>>>>>>>> Evaluating <<<<<<<<<<<<<<<<<<<<') 'boxes': boxes_this_image,
},
print('Evaluating detections') ))
server.evaluate_detections(all_boxes)
...@@ -23,9 +23,8 @@ import numpy as np ...@@ -23,9 +23,8 @@ import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import boxes as box_util
from lib.utils import logger from lib.utils import logger
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import iou
class Compose(object): class Compose(object):
...@@ -52,7 +51,7 @@ class Distort(object): ...@@ -52,7 +51,7 @@ class Distort(object):
(PIL.ImageEnhance.Contrast, self._contrast_prob), (PIL.ImageEnhance.Contrast, self._contrast_prob),
(PIL.ImageEnhance.Color, self._saturation_prob), (PIL.ImageEnhance.Color, self._saturation_prob),
] ]
npr.shuffle(transforms) np.random.shuffle(transforms)
for transform_fn, prob in transforms: for transform_fn, prob in transforms:
if npr.uniform() < prob: if npr.uniform() < prob:
img = transform_fn(img) img = transform_fn(img)
...@@ -145,7 +144,7 @@ class Sample(object): ...@@ -145,7 +144,7 @@ class Sample(object):
@classmethod @classmethod
def _compute_overlaps(cls, rand_box, gt_boxes): def _compute_overlaps(cls, rand_box, gt_boxes):
return iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4]) return box_util.iou(np.expand_dims(rand_box, 0), gt_boxes[:, 0:4])
@classmethod @classmethod
def _generate_sample(cls, sample_param): def _generate_sample(cls, sample_param):
...@@ -217,7 +216,7 @@ class Sample(object): ...@@ -217,7 +216,7 @@ class Sample(object):
new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off) new_gt_boxes[:, 1] = (gt_boxes[:, 1] * im_h - h_off)
new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off) new_gt_boxes[:, 2] = (gt_boxes[:, 2] * im_w - w_off)
new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off) new_gt_boxes[:, 3] = (gt_boxes[:, 3] * im_h - h_off)
new_gt_boxes = clip_tiled_boxes(new_gt_boxes, (crop_h, crop_w)) new_gt_boxes = box_util.clip_boxes(new_gt_boxes, (crop_h, crop_w))
new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w new_gt_boxes[:, 0] = new_gt_boxes[:, 0] / crop_w
new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h new_gt_boxes[:, 1] = new_gt_boxes[:, 1] / crop_h
new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w new_gt_boxes[:, 2] = new_gt_boxes[:, 2] / crop_w
......
...@@ -31,6 +31,7 @@ def im_list_to_blob(ims): ...@@ -31,6 +31,7 @@ def im_list_to_blob(ims):
Assume that images are not means subtracted, and with BGR order. Assume that images are not means subtracted, and with BGR order.
""" """
blob_dtype = 'uint8' if ims[0].dtype == 'uint8' else 'float32'
max_shape = np.array([im.shape for im in ims]).max(axis=0) max_shape = np.array([im.shape for im in ims]).max(axis=0)
if cfg.MODEL.COARSEST_STRIDE > 0: if cfg.MODEL.COARSEST_STRIDE > 0:
...@@ -38,11 +39,13 @@ def im_list_to_blob(ims): ...@@ -38,11 +39,13 @@ def im_list_to_blob(ims):
max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride) max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride) max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)
unify_shape = (len(ims), max_shape[0], max_shape[1], 3) blob_shape = (len(ims), max_shape[0], max_shape[1], 3)
blob = np.empty(unify_shape, dtype=np.uint8) blob = np.empty(blob_shape, blob_dtype)
blob[:] = cfg.PIXEL_MEANS blob[:] = cfg.PIXEL_MEANS
for i, im in enumerate(ims): for i, im in enumerate(ims):
if im.dtype == 'uint16':
im = im.astype(blob_dtype) / 256.
blob[i, :im.shape[0], :im.shape[1], :] = im blob[i, :im.shape[0], :im.shape[1], :] = im
return blob return blob
...@@ -52,12 +55,16 @@ def mask_list_to_blob(masks): ...@@ -52,12 +55,16 @@ def mask_list_to_blob(masks):
"""Convert a list of masks into a network input.""" """Convert a list of masks into a network input."""
max_shape = np.array([mask.shape[1:] for mask in masks]).max(axis=0) max_shape = np.array([mask.shape[1:] for mask in masks]).max(axis=0)
num_masks = np.array([mask.shape[0] for mask in masks]).sum() num_masks = np.array([mask.shape[0] for mask in masks]).sum()
blob = np.zeros((num_masks, max_shape[0], max_shape[1]), dtype=np.uint8)
pos = 0 blob_shape = ((num_masks, max_shape[0], max_shape[1]))
blob = np.zeros(blob_shape, 'uint8')
count = 0
for mask in masks: for mask in masks:
blob[pos : pos + mask.shape[0], n, h, w = mask.shape
0 : mask.shape[1], 0 : mask.shape[2]] = mask blob[count:count + n, :h, :w] = mask
pos += mask.shape[0] count += n
return blob return blob
...@@ -88,22 +95,3 @@ def prep_im_for_blob(img, target_size, max_size): ...@@ -88,22 +95,3 @@ def prep_im_for_blob(img, target_size, max_size):
im_scale *= jitter im_scale *= jitter
return resize_image(img, im_scale, im_scale), im_scale, jitter return resize_image(img, im_scale, im_scale), im_scale, jitter
def array2tensor(array, enforce_cpu=False):
if isinstance(array, np.ndarray):
# Zero-Copy from numpy
cpu_tensor = torch.from_numpy(array)
else:
cpu_tensor = array
return cpu_tensor if enforce_cpu else \
cpu_tensor.cuda(cfg.GPU_ID)
def tensor2array(tensor, copy=False):
if isinstance(tensor, torch.Tensor):
# Zero-Copy from numpy
array = tensor.numpy(True)
else:
array = tensor
return array.copy() if copy else array
...@@ -20,7 +20,6 @@ from __future__ import print_function ...@@ -20,7 +20,6 @@ from __future__ import print_function
import numpy as np import numpy as np
from lib.utils import cython_bbox from lib.utils import cython_bbox
from lib.utils import rotated_boxes
def intersection(boxes1, boxes2): def intersection(boxes1, boxes2):
...@@ -109,8 +108,6 @@ def ioa2(boxes1, boxes2): ...@@ -109,8 +108,6 @@ def ioa2(boxes1, boxes2):
def bbox_overlaps(boxes1, boxes2): def bbox_overlaps(boxes1, boxes2):
"""Compute the overlaps between two group of boxes.""" """Compute the overlaps between two group of boxes."""
if boxes1.shape[1] == 5:
return rotated_boxes.bbox_overlaps(boxes1, boxes2)
return cython_bbox.bbox_overlaps( return cython_bbox.bbox_overlaps(
np.ascontiguousarray(boxes1, dtype=np.float), np.ascontiguousarray(boxes1, dtype=np.float),
np.ascontiguousarray(boxes2, dtype=np.float), np.ascontiguousarray(boxes2, dtype=np.float),
...@@ -119,10 +116,6 @@ def bbox_overlaps(boxes1, boxes2): ...@@ -119,10 +116,6 @@ def bbox_overlaps(boxes1, boxes2):
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)): def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
"""Transform the boxes to the regression targets.""" """Transform the boxes to the regression targets."""
if len(weights) == 5:
# Transform the rotated boxes
return rotated_boxes.bbox_transform(ex_rois, gt_rois, weights)
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1. ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1. ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
...@@ -134,20 +127,16 @@ def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)): ...@@ -134,20 +127,16 @@ def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1.)):
gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights wx, wy, ww, wh = weights
targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets = [wx * (gt_ctr_x - ex_ctr_x) / ex_widths]
targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets += [wy * (gt_ctr_y - ex_ctr_y) / ex_heights]
targets_dw = ww * np.log(gt_widths / ex_widths) targets += [ww * np.log(gt_widths / ex_widths)]
targets_dh = wh * np.log(gt_heights / ex_heights) targets += [wh * np.log(gt_heights / ex_heights)]
return np.vstack((targets_dx, targets_dy, targets_dw, targets_dh)).transpose() return np.vstack(targets).transpose()
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)): def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas.""" """Decode the final boxes according to the deltas."""
if len(weights) == 5:
# Decode the rotated boxes
return rotated_boxes.bbox_transform_inv(boxes, deltas, weights)
if boxes.shape[0] == 0: if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
...@@ -188,8 +177,6 @@ def boxes_area(boxes): ...@@ -188,8 +177,6 @@ def boxes_area(boxes):
def clip_boxes(boxes, im_shape): def clip_boxes(boxes, im_shape):
if boxes.shape[1] == 5:
return rotated_boxes.clip_boxes(boxes, im_shape)
# x1 >= 0 # x1 >= 0
boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], im_shape[1] - 1), 0) boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], im_shape[1] - 1), 0)
# y1 >= 0 # y1 >= 0
...@@ -234,8 +221,6 @@ def expand_boxes(boxes, scale): ...@@ -234,8 +221,6 @@ def expand_boxes(boxes, scale):
def flip_boxes(boxes, width): def flip_boxes(boxes, width):
"""Flip the boxes horizontally.""" """Flip the boxes horizontally."""
if boxes.shape[1] == 5:
return rotated_boxes.flip_boxes(boxes, width)
flip_boxes = boxes.copy() flip_boxes = boxes.copy()
old_x1 = boxes[:, 0].copy() old_x1 = boxes[:, 0].copy()
old_x2 = boxes[:, 2].copy() old_x2 = boxes[:, 2].copy()
...@@ -252,10 +237,10 @@ def filter_boxes(boxes, min_size): ...@@ -252,10 +237,10 @@ def filter_boxes(boxes, min_size):
return keep return keep
def dismantle_gt_boxes(gt_boxes, num_images): def dismantle_boxes(gt_boxes, num_images):
"""Dismantle the packed ground-truth boxes.""" """Dismantle the packed ground-truth boxes."""
return [ return [
gt_boxes[ gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == ix)[0] np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
][:, :-1] for ix in range(num_images) ][:, :-1] for i in range(num_images)
] ]
...@@ -16,9 +16,16 @@ from __future__ import print_function ...@@ -16,9 +16,16 @@ from __future__ import print_function
import collections import collections
import dragon import dragon
import dragon.vm.torch as torch
from dragon.core.framework import tensor_util from dragon.core.framework import tensor_util
from dragon.core.util import six from dragon.core.util import six
import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
def feed_tensor(tensor, array):
tensor_util.set_array(tensor, array)
def get_param_groups(module, bias_lr=1., bias_decay=0.): def get_param_groups(module, bias_lr=1., bias_decay=0.):
...@@ -52,7 +59,7 @@ def get_param_groups(module, bias_lr=1., bias_decay=0.): ...@@ -52,7 +59,7 @@ def get_param_groups(module, bias_lr=1., bias_decay=0.):
} }
] ]
for name, param in module.named_parameters(): for name, param in module.named_parameters():
gi = 1 if 'bias' in name else 0 gi = 0 if 'weight' in name and param.dim() > 1 else 1
param_groups[gi]['params'].append(param) param_groups[gi]['params'].append(param)
if len(param_groups[1]['params']) == 0: if len(param_groups[1]['params']) == 0:
param_groups.pop() # Remove empty group param_groups.pop() # Remove empty group
...@@ -68,7 +75,54 @@ def get_workspace(): ...@@ -68,7 +75,54 @@ def get_workspace():
The default workspace. The default workspace.
""" """
return dragon.workspace.get_default() return dragon.get_workspace()
def new_placeholder(device=None):
"""Create a new tensor to feed data.
Parameters
----------
device : int, optional
The device index.
Returns
-------
dragon.vm.torch.Tensor
The placeholder tensor.
"""
value = torch.zeros(1)
if device is not None:
return value.cuda(device)
return value
def new_tensor(data, enforce_cpu=False):
"""Create a new tensor from the data.
Parameters
----------
data : array_like
The data value.
enforce_cpu : bool, optional, default=False
**True** to enforce the cpu storage.
Returns
-------
dragon.vm.torch.Tensor
The tensor taken with the data.
"""
if isinstance(data, np.ndarray):
tensor = torch.from_numpy(data)
elif isinstance(data, torch.Tensor):
tensor = data
else:
tensor = torch.tensor(data)
if not enforce_cpu:
tensor = tensor.cuda(cfg.GPU_ID)
return tensor
def new_workspace(merge_default=True): def new_workspace(merge_default=True):
...@@ -112,10 +166,10 @@ def reset_workspace(workspace=None, merge_default=True): ...@@ -112,10 +166,10 @@ def reset_workspace(workspace=None, merge_default=True):
return new_workspace(merge_default) return new_workspace(merge_default)
class FrozenGraph(object): class Graph(object):
"""Simple sequential graph to accelerate inference. """Simple sequential graph to accelerate inference.
The frozen graph reduces the overhead of python functions Graph reduces the overhead of python functions
under eager execution. Such cost will be at least 15ms under eager execution. Such cost will be at least 15ms
for common backbones, which limits to about 60FPS. for common backbones, which limits to about 60FPS.
...@@ -130,11 +184,20 @@ class FrozenGraph(object): ...@@ -130,11 +184,20 @@ class FrozenGraph(object):
for k, v in input_dict.items(): for k, v in input_dict.items():
input_dict[k] = v.name if hasattr(v, 'name') else v input_dict[k] = v.name if hasattr(v, 'name') else v
return input_dict return input_dict
self.placeholders = {}
self._inputs = canonicalize(inputs) self._inputs = canonicalize(inputs)
self._outputs = canonicalize(outputs) self._outputs = canonicalize(outputs)
self._constants = canonicalize(constants) self._constants = canonicalize(constants)
self._graph = new_workspace() self._workspace = get_workspace()
self._tape = torch.jit.get_default_recorder() self._tracer = torch.jit.get_tracer()
@property
def workspace(self):
return self._workspace
@workspace.setter
def workspace(self, value):
self._workspace = value
def forward(self, **kwargs): def forward(self, **kwargs):
# Assign inputs # Assign inputs
...@@ -142,8 +205,8 @@ class FrozenGraph(object): ...@@ -142,8 +205,8 @@ class FrozenGraph(object):
value = kwargs.get(name, None) value = kwargs.get(name, None)
tensor_util.set_array(tensor, value) tensor_util.set_array(tensor, value)
# Replay the tape # Replay the traced expressions
self._tape.replay() self._tracer.replay()
# Collect outputs # Collect outputs
# 1) Target results # 1) Target results
...@@ -159,7 +222,7 @@ class FrozenGraph(object): ...@@ -159,7 +222,7 @@ class FrozenGraph(object):
return outputs return outputs
def __call__(self, **kwargs): def __call__(self, **kwargs):
with self._graph.as_default(): with self._workspace.as_default():
return self.forward(**kwargs) return self.forward(**kwargs)
......
...@@ -30,7 +30,7 @@ def distort_image(img): ...@@ -30,7 +30,7 @@ def distort_image(img):
] ]
np.random.shuffle(transforms) np.random.shuffle(transforms)
for transform in transforms: for transform in transforms:
if np.random.uniform() < .5: if np.random.uniform() < 0.5:
img = transform(img) img = transform(img)
img = img.enhance(1. + np.random.uniform(-.4, .4)) img = img.enhance(1. + np.random.uniform(-.4, .4))
return np.array(img) return np.array(img)
...@@ -71,12 +71,6 @@ def resize_image(img, fx, fy): ...@@ -71,12 +71,6 @@ def resize_image(img, fx, fy):
) )
# Faster and robust resizing than OpenCV methods
def resize_mask(mask, size):
mask = PIL.Image.fromarray(mask)
return np.array(mask.resize(size, PIL.Image.NEAREST))
def scale_image(img): def scale_image(img):
processed_ims, ims_scales = [], [] processed_ims, ims_scales = [], []
......
...@@ -17,7 +17,24 @@ from __future__ import absolute_import ...@@ -17,7 +17,24 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import cv2
import numpy as np import numpy as np
import PIL.Image
from lib.utils import boxes as box_util
def dismantle_masks(gt_boxes, gt_masks, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
][:, :-1] for i in range(num_images)
], [
gt_masks[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
] for i in range(num_images)
]
def intersect_box_mask(ex_box, gt_box, gt_mask): def intersect_box_mask(ex_box, gt_box, gt_mask):
...@@ -66,3 +83,100 @@ def mask_overlap(box1, box2, mask1, mask2): ...@@ -66,3 +83,100 @@ def mask_overlap(box1, box2, mask1, mask2):
if union < 1.: if union < 1.:
return 0. return 0.
return float(inter) / float(union) return float(inter) / float(union)
def project_masks(
masks,
boxes,
height,
width,
thresh=0.5,
data_format='HWC',
data_order='F',
):
"""Project the predicting masks to a image.
Parameters
----------
masks : numpy.ndarray
The masks packed in (C, H, W) format.
boxes : numpy.ndarray
The predicting bounding boxes.
height : int
The height of image.
width : int
The width of image.
thresh : float, optional, default=0.5
The threshold to binarize floating mask.
data_format : {'HWC', 'CHW'}, optional
The data format of output image.
data_order : {'F', 'C'}, optional
The fortran-style or c-style order.
Returns
-------
numpy.ndarray
The output image.
"""
num_pred = boxes.shape[0]
assert masks.shape[0] == num_pred
mask_shape = [height, width]
if data_format == 'HWC':
mask_shape += [num_pred]
elif data_format == 'CHW':
mask_shape = [num_pred] + mask_shape
else:
raise ValueError('Unknown data format', data_format)
mask_image = np.zeros(mask_shape, 'uint8', data_order)
M = masks[0].shape[0]
scale = (M + 2.) / M
ref_boxes = box_util.expand_boxes(boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), 'float32')
for i in range(num_pred):
ref_box = ref_boxes[i, :4]
mask = masks[i]
padded_mask[1:-1, 1:-1] = mask[:, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > thresh, 'uint8')
x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, width)
y2 = min(ref_box[3] + 1, height)
if data_format == 'HWC':
mask_image[y1:y2, x1:x2, i] = \
mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
(x1 - ref_box[0]):(x2 - ref_box[0])]
elif data_format == 'CHW':
mask_image[i, y1:y2, x1:x2] = \
mask[(y1 - ref_box[1]):(y2 - ref_box[1]),
(x1 - ref_box[0]):(x2 - ref_box[0])]
return mask_image
def resize_mask(mask, size):
"""Resize the mask with nearest neighbor method.
PIL implementation while not OpenCV is used,
as we found the former will provide higher mask AP.
Parameters
----------
mask : numpy.ndarray
The 2d mask array.
size : Sequence[int]
The output width and height.
Returns
-------
numpy.ndarray
The resizing mask.
"""
mask = PIL.Image.fromarray(mask)
return np.array(mask.resize(size, PIL.Image.NEAREST))
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ctypes
import math
import os
import numpy as np
class _CppExtension(object):
dtype_mapping = {
'int32': ctypes.c_int32,
'float64': ctypes.c_double,
}
def __init__(self, library_name):
libc = ctypes.cdll.LoadLibrary(
os.path.join(os.path.split(
os.path.abspath(__file__))[0],
library_name,
)
)
def load_func(name, arg_types):
func = getattr(libc, name)
func.argtypes = self.get_arg_types(*arg_types)
return func
self._apply_cpu_nms = load_func(
'apply_cpu_nms', (
('float64', 1), # dets
('int32', 1), # indices
('int32', 1), # n
('float64', 0), # thresh
)
)
self._bbox_overlaps = load_func(
'bbox_overlaps', (
('float64', 1), # boxes1
('float64', 1), # boxes2
('int32', 1), # n, k
('float64', 1) # overlaps
)
)
@staticmethod
def array2ptr(array):
return array.ctypes.data_as(
_CppExtension.get_ptr(str(array.dtype)))
@staticmethod
def contiguous(array, dtype='float64'):
return np.ascontiguousarray(array.flatten(), dtype)
@staticmethod
def get_arg_types(*args):
arg_types = []
for (dtype, is_pointer) in args:
arg_types.append(
_CppExtension.get_ptr(dtype) if is_pointer
else _CppExtension.dtype_mapping[dtype]
)
return arg_types
@staticmethod
def get_ptr(dtype):
return ctypes.POINTER(_CppExtension.dtype_mapping[dtype])
@staticmethod
def ptr2array(ptr, shape):
return np.ctypeslib.as_array(
shape.from_address(
ctypes.addressof(ptr.contents)
))
def bbox_overlaps(self, boxes1, boxes2):
"""Computer overlaps between boxes and query boxes."""
def canonicalize(boxes):
box_dim = boxes.shape[1]
if box_dim > 5:
boxes = boxes[:, :5]
elif box_dim < 5:
raise ValueError('Excepted box5d.')
return self.contiguous(boxes, 'float64')
n, k = boxes1.shape[0], boxes2.shape[0]
boxes1 = canonicalize(boxes1)
boxes2 = canonicalize(boxes2)
overlaps_shape = (ctypes.c_int32 * 2)()
overlaps_shape[:] = (n, k)
overlaps = np.zeros((n * k,), 'float64')
overlaps_ptr = self.array2ptr(overlaps)
self._bbox_overlaps(
self.array2ptr(boxes1),
self.array2ptr(boxes2),
ctypes.cast(overlaps_shape, self.get_ptr('int32')),
overlaps_ptr,
)
return self.ptr2array(overlaps_ptr, ctypes.c_double * k * n)
def cpu_nms(self, dets, thresh):
"""Apply Hard-NMS."""
if dets.shape[1] != 6:
raise ValueError('Excepted det6d.')
order = dets[:, 5].argsort()[::-1]
sorted_dets = dets[order, :]
num_keep = sorted_dets.shape[0]
num_keep_ins = ctypes.c_int32(num_keep)
indices = np.zeros((num_keep,), np.int32)
indices_ptr = self.array2ptr(indices)
self._apply_cpu_nms(
self.array2ptr(self.contiguous(dets, 'float64')),
indices_ptr,
ctypes.byref(num_keep_ins),
ctypes.c_double(thresh),
)
keep_indices = self.ptr2array(
indices_ptr, (ctypes.c_int32 * num_keep_ins.value))
return list(order[keep_indices])
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1., 1.)):
"""Transform the boxes to the regression targets."""
ex_ctr_x = ex_rois[:, 0]
ex_ctr_y = ex_rois[:, 1]
ex_widths = ex_rois[:, 2]
ex_heights = ex_rois[:, 3]
ex_angles = ex_rois[:, 4]
gt_ctr_x = gt_rois[:, 0]
gt_ctr_y = gt_rois[:, 1]
gt_widths = gt_rois[:, 2]
gt_heights = gt_rois[:, 3]
gt_angles = gt_rois[:, 4]
wx, wy, ww, wh, wa = weights
targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = ww * np.log(gt_widths / ex_widths)
targets_dh = wh * np.log(gt_heights / ex_heights)
targets_da = wa * np.sin(np.radians(gt_angles - ex_angles))
return np.vstack((
targets_dx,
targets_dy,
targets_dw,
targets_dh,
targets_da,
)).transpose()
def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1., 1.)):
"""Decode the final boxes according to the deltas."""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
ctr_x = boxes[:, 0]
ctr_y = boxes[:, 1]
widths = boxes[:, 2]
heights = boxes[:, 3]
angles = boxes[:, 4:5]
wx, wy, ww, wh, wa = weights
dx = deltas[:, 0::5] / wx
dy = deltas[:, 1::5] / wy
dw = deltas[:, 2::5] / ww
dh = deltas[:, 3::5] / wh
da = deltas[:, 4::5] / wa
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
da = np.minimum(np.maximum(da, -1), 1)
pred_a = np.rad2deg(np.arcsin(da)) + angles
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
pred_boxes[:, 0::5] = pred_ctr_x # x_ctr
pred_boxes[:, 1::5] = pred_ctr_y # y_ctr
pred_boxes[:, 2::5] = pred_w # w
pred_boxes[:, 3::5] = pred_h # h
pred_boxes[:, 4::5] = pred_a # angle
return pred_boxes
def box2vertices(values):
x_ctr, y_ctr, w, h, a = values
theta = a * 0.01745329251
cos_theta2 = math.cos(theta) * 0.5
sin_theta2 = math.sin(theta) * 0.5
vertices = [
x_ctr - sin_theta2 * h - cos_theta2 * w,
y_ctr + cos_theta2 * h - sin_theta2 * w,
x_ctr + sin_theta2 * h - cos_theta2 * w,
y_ctr - cos_theta2 * h - sin_theta2 * w,
]
vertices.extend([
2 * x_ctr - vertices[0],
2 * y_ctr - vertices[1],
2 * x_ctr - vertices[2],
2 * y_ctr - vertices[3],
])
return vertices
def vertices2box(vertices):
def sort(vertices):
poly = np.array(vertices).reshape((4, 2))
# lt, rt, rb, lb
edge = [
(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
(poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
(poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
(poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])
]
p_area = np.sum(edge) / 2.
_poly = poly.copy()
if abs(p_area) < 1:
raise ValueError
if p_area > 0:
_poly = _poly[(0, 3, 2, 1), :] # clock wise
anchor = np.array([np.min(poly[:, 0]), np.min(poly[:, 1])])
line0 = np.linalg.norm(anchor - _poly[0])
line1 = np.linalg.norm(anchor - _poly[1])
line2 = np.linalg.norm(anchor - _poly[2])
line3 = np.linalg.norm(anchor - _poly[3])
argmin = np.argmin([line0, line1, line2, line3])
lt = _poly[argmin]
rt = _poly[(argmin + 1) % 4]
rb = _poly[(argmin + 2) % 4]
lb = _poly[(argmin + 3) % 4]
return np.array([lt, rt, rb, lb]).flatten()
values = sort(vertices)
y4my3 = values[7] - values[5]
if y4my3 != 0:
x2mx1 = values[2] - values[0]
theta = math.atan(x2mx1 / y4my3)
cos_theta = math.cos(theta)
sin_theta = math.sin(theta)
h = x2mx1 / sin_theta
x2px1 = values[2] + values[0]
x4px3 = values[6] + values[4]
w = (x4px3 - x2px1) / (2. * cos_theta)
a = theta / 0.01745329251
else:
w = values[2] - values[0]
h = values[5] - values[1]
a = 0.
x_ctr = 0.5 * (values[0] + values[4])
y_ctr = 0.5 * (values[1] + values[5])
return x_ctr, y_ctr, w, h, a
def clip_angle(d):
while d < 0:
d += 360
while d >= 360:
d -= 360
return d
def clip_boxes(boxes, im_shape):
# ctr_x >= 0
boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], im_shape[1] - 1), 0)
# ctr_y >= 0
boxes[:, 1] = np.maximum(np.minimum(boxes[:, 1], im_shape[0] - 1), 0)
# w < im_shape[1]
boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], im_shape[1] - 1), 0)
# h < im_shape[0]
boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], im_shape[0] - 1), 0)
# 0 < a < 360
boxes[:, 4] = np.maximum(np.minimum(boxes[:, 4], 359), 0)
return boxes
def clip_tiled_boxes(boxes, im_shape):
# ctr_x >= 0
boxes[:, 0::5] = np.maximum(np.minimum(boxes[:, 0::5], im_shape[1] - 1), 0)
# ctr_y >= 0
boxes[:, 1::5] = np.maximum(np.minimum(boxes[:, 1::5], im_shape[0] - 1), 0)
# w < im_shape[1]
boxes[:, 2::5] = np.maximum(np.minimum(boxes[:, 2::5], im_shape[1] - 1), 0)
# h < im_shape[0]
boxes[:, 3::5] = np.maximum(np.minimum(boxes[:, 3::5], im_shape[0] - 1), 0)
# 0 < a < 360
boxes[:, 4::5] = np.maximum(np.minimum(boxes[:, 4::5], 359), 0)
return boxes
def flip_boxes(boxes, width):
ca = np.vectorize(clip_angle)
flip_boxes = boxes.copy()
old_cx = boxes[:, 0].copy()
old_a = boxes[:, 4].copy()
flip_boxes[:, 0] = width - old_cx - 1
flip_boxes[:, 4] = ca(180 - old_a)
return flip_boxes
# Aliases
libc = _CppExtension('ctypes_rbox.so')
bbox_overlaps = libc.bbox_overlaps
cpu_nms = libc.cpu_nms
if __name__ == "__main__":
prior_boxes = np.array([[4, 4, 15, 15, 150], [4, 4, 15, 15, 45]], dtype='float64')
gt_boxes = np.array([[4, 4, 15, 15, 45, 1.]], dtype='float64')
ov = bbox_overlaps(prior_boxes, gt_boxes)
indices = cpu_nms(gt_boxes, 0.45)
print(ov)
print(indices)
...@@ -22,11 +22,7 @@ import numpy as np ...@@ -22,11 +22,7 @@ import numpy as np
class SmoothedValue(object): class SmoothedValue(object):
""" """Track a series of values and provide smoothed report."""
Track a series of values and provide access to smoothed values
over a window or the global series average.
"""
def __init__(self, window_size): def __init__(self, window_size):
self.deque = collections.deque(maxlen=window_size) self.deque = collections.deque(maxlen=window_size)
......
...@@ -24,6 +24,7 @@ import time ...@@ -24,6 +24,7 @@ import time
class Timer(object): class Timer(object):
"""A simple timer.""" """A simple timer."""
def __init__(self): def __init__(self):
self.total_time = 0. self.total_time = 0.
self.calls = 0 self.calls = 0
...@@ -31,6 +32,15 @@ class Timer(object): ...@@ -31,6 +32,15 @@ class Timer(object):
self.diff = 0. self.diff = 0.
self.average_time = 0. self.average_time = 0.
def add_diff(self, diff, average=True):
self.total_time += diff
self.calls += 1
self.average_time = self.total_time / self.calls
if average:
return self.average_time
else:
return self.diff
@contextlib.contextmanager @contextlib.contextmanager
def tic_and_toc(self): def tic_and_toc(self):
try: try:
...@@ -78,3 +88,20 @@ def get_progress_info(timer, curr_step, max_steps): ...@@ -78,3 +88,20 @@ def get_progress_info(timer, curr_step, max_steps):
progress = (curr_step + 1.) / max_steps progress = (curr_step + 1.) / max_steps
return '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' \ return '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' \
.format(progress, timer.average_time, eta) .format(progress, timer.average_time, eta)
def new_timers(*args):
"""Return a dict that contains specified timers.
Parameters
----------
args : str...
The key(s) to create timers.
Returns
-------
Dict[Timer]
The timer dict.
"""
return dict([(k, Timer()) for k in args])
...@@ -120,21 +120,21 @@ def get_bbox_contours(rotated_box): ...@@ -120,21 +120,21 @@ def get_bbox_contours(rotated_box):
return quad, main_direction return quad, main_direction
def get_mask(boxes, segms, im_shape, mask_thresh=0.4): def get_mask(boxes, segms, im_shape, mask_thresh=0.5):
i, masks = 0, np.zeros(list(im_shape) + [len(boxes)], dtype=np.uint8) i, masks = 0, np.zeros(list(im_shape) + [len(boxes)], 'uint8')
for det, msk in zip(boxes, segms): for det, msk in zip(boxes, segms):
M = msk.shape[0] M = msk.shape[0]
scale = (M + 2.0) / M scale = (M + 2.) / M
ref_box = expand_boxes(np.array([det[0:4]]), scale)[0] ref_box = expand_boxes(np.array([det[:4]]), scale)[0]
ref_box = ref_box.astype(np.int32) ref_box = ref_box.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) padded_mask = np.zeros((M + 2, M + 2), 'float32')
padded_mask[1:-1, 1:-1] = msk[:, :] padded_mask[1:-1, 1:-1] = msk[:, :]
w = ref_box[2] - ref_box[0] + 1 w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1 h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1) w = np.maximum(w, 1)
h = np.maximum(h, 1) h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h)) mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > mask_thresh, dtype=np.uint8) mask = np.array(mask > mask_thresh, 'uint8')
x1 = max(ref_box[0], 0) x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0) y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, im_shape[1]) x2 = min(ref_box[2] + 1, im_shape[1])
...@@ -157,6 +157,7 @@ def vis_one_image( ...@@ -157,6 +157,7 @@ def vis_one_image(
dpi=100, dpi=100,
box_alpha=0., box_alpha=0.,
show_class=True, show_class=True,
show_rotated=False,
filename=None, filename=None,
): ):
"""Visual debugging of detections.""" """Visual debugging of detections."""
...@@ -199,7 +200,7 @@ def vis_one_image( ...@@ -199,7 +200,7 @@ def vis_one_image(
continue continue
# Show box # Show box
if bbox.size == 4: if bbox.size == 4 and not show_rotated:
ax.add_patch( ax.add_patch(
plt.Rectangle( plt.Rectangle(
(bbox[0], bbox[1]), (bbox[0], bbox[1]),
...@@ -211,28 +212,6 @@ def vis_one_image( ...@@ -211,28 +212,6 @@ def vis_one_image(
alpha=box_alpha, alpha=box_alpha,
) )
) )
elif bbox.size == 5:
quad, md = get_bbox_contours(bbox)
ax.add_patch(
Polygon(
quad,
fill=False,
edgecolor='g',
linewidth=1.,
alpha=box_alpha,
)
)
ax.add_patch(
plt.arrow(
md[0, 0],
md[0, 1],
md[1, 0] - md[0, 0],
md[1, 1] - md[0, 1],
width=2,
color='g',
alpha=box_alpha,
)
)
# Show class # Show class
if show_class: if show_class:
...@@ -258,10 +237,28 @@ def vis_one_image( ...@@ -258,10 +237,28 @@ def vis_one_image(
img[:, :, c] = color_mask[c] img[:, :, c] = color_mask[c]
e = masks[:, :, i] e = masks[:, :, i]
_, contour, hier = cv2.findContours( results = cv2.findContours(
e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) e.copy(),
cv2.RETR_CCOMP,
cv2.CHAIN_APPROX_NONE,
)
contours = results[0] if len(results) == 2 else results[1]
if show_rotated and len(contours) > 1:
counters = [max(contours, key=cv2.contourArea)]
for c in contour: for c in contours:
if show_rotated:
rect = cv2.minAreaRect(c)
ax.add_patch(
Polygon(
cv2.boxPoints(rect),
fill=False,
edgecolor='g',
linewidth=1.,
alpha=box_alpha,
)
)
ax.add_patch(Polygon( ax.add_patch(Polygon(
c.reshape((-1, 2)), c.reshape((-1, 2)),
fill=True, fill=True,
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Make record file for COCO dataset."""
import os
import shutil
from maker import make_record
from maskgen import make_mask, merge_mask
if __name__ == '__main__':
COCO_ROOT = '/data'
# Encode masks to RLE bytes
if not os.path.exists('build'):
os.makedirs('build')
make_mask('train', '2014', COCO_ROOT)
make_mask('valminusminival', '2014', COCO_ROOT)
make_mask('minival', '2014', COCO_ROOT)
merge_mask('trainval35k', '2014', [
'build/coco_2014_train_mask.pkl',
'build/coco_2014_valminusminival_mask.pkl']
)
# coco_2014_trainval35k
make_record(
record_file=os.path.join(COCO_ROOT, 'coco_2014_trainval35k'),
images_path=[os.path.join(COCO_ROOT, 'images/train2014'),
os.path.join(COCO_ROOT, 'images/val2014')],
splits_path=[os.path.join(COCO_ROOT, 'ImageSets'),
os.path.join(COCO_ROOT, 'ImageSets')],
mask_file='build/coco_2014_trainval35k_mask.pkl',
splits=['train', 'valminusminival'],
)
# coco_2014_minival
make_record(
record_file=os.path.join(COCO_ROOT, 'coco_2014_minival'),
images_path=os.path.join(COCO_ROOT, 'images/val2014'),
mask_file='build/coco_2014_minival_mask.pkl',
splits_path=os.path.join(COCO_ROOT, 'ImageSets'),
splits=['minival'],
)
shutil.rmtree('build')
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
import os
import time
import cv2
import dragon
import numpy as np
try:
import cPickle
except:
import pickle as cPickle
def make_example(image_file, mask_objects, im_scale=None):
filename = os.path.split(image_file)[-1]
example = {'id': filename.split('.')[0], 'object': []}
if im_scale:
img = cv2.imread(image_file)
img = cv2.resize(
img, None,
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR,
)
example['height'], example['width'], example['depth'] = img.shape
_, img = cv2.imencode('.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
example['content'] = img.tostring()
else:
with open(image_file, 'rb') as f:
img_bytes = bytes(f.read())
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 3)
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes
for ix, obj in enumerate(mask_objects):
x1, y1, x2, y2 = obj['bbox']
example['object'].append({
'name': obj['name'],
'xmin': x1,
'ymin': y1,
'xmax': x2,
'ymax': y2,
'mask': obj['mask'],
'difficult': obj.get('crowd', 0),
})
return example
def make_record(
record_file,
images_path,
mask_file,
splits_path,
splits,
ext='.jpg',
im_scale=None,
):
if os.path.exists(record_file):
raise ValueError('The record file is already exist.')
os.makedirs(record_file)
if not isinstance(images_path, list):
images_path = [images_path]
if not isinstance(splits_path, list):
splits_path = [splits_path]
assert len(splits) == len(splits_path)
assert len(splits) == len(images_path)
if mask_file is not None:
with open(mask_file, 'rb') as f:
all_masks = cPickle.load(f)
else:
all_masks = {}
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter(
path=record_file,
protocol={
'id': 'string',
'content': 'bytes',
'height': 'int64',
'width': 'int64',
'depth': 'int64',
'object': [{
'name': 'string',
'xmin': 'float64',
'ymin': 'float64',
'xmax': 'float64',
'ymax': 'float64',
'mask': 'bytes',
'difficult': 'int64',
}]
}
)
count, total_line = 0, 0
start_time = time.time()
for db_idx, split in enumerate(splits):
split_file = os.path.join(splits_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + ext)
mask_objects = all_masks[filename] if filename in all_masks else None
if mask_objects is None:
raise ValueError('The image({}) takes invalid mask settings.'.format(filename))
writer.write( make_example(image_file, mask_objects, im_scale))
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
writer.close()
end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
import os
import sys
import os.path as osp
from collections import OrderedDict
try:
import cPickle
except:
import pickle as cPickle
sys.path.insert(0, '../..')
from lib.pycocotools.coco import COCO
from lib.pycocotools import mask_utils
class imdb(object):
def __init__(self, image_set, year, data_dir):
self._year = year
self._image_set = image_set
self._data_path = osp.join(data_dir)
self.invalid_cnt = 0
self.ignore_cnt = 0
# Load COCO API, classes, class <-> id mappings
self._COCO = COCO(self._get_ann_file())
cats = self._COCO.loadCats(self._COCO.getCatIds())
self._classes = tuple(['__background__'] + [c['name'] for c in cats])
self._class_to_ind = dict(zip(self._classes, range(self.num_classes)))
self._ind_to_class = dict(zip(range(self.num_classes), self._classes))
self._class_to_cat_id = dict(zip([c['name'] for c in cats], self._COCO.getCatIds()))
self._cat_id_to_class_id = dict([(self._class_to_cat_id[cls],
self._class_to_ind[cls])
for cls in self._classes[1:]])
self._data_name = {
# 5k ``val2014`` subset
'minival2014': 'val2014',
# ``val2014`` minus ``minival2014``
'valminusminival2014': 'val2014',
}.get(image_set + year, image_set + year)
self._image_index = self._load_image_set_index()
self._annotations = self._load_annotations()
def _get_ann_file(self):
prefix = 'instances' \
if self._image_set.find('test') == -1 \
else 'image_info'
return osp.join(
self._data_path,
'annotations',
prefix + '_' +
self._image_set +
self._year + '.json'
)
def _load_image_set_index(self):
"""Load image ids."""
image_ids = self._COCO.getImgIds()
return image_ids
def _load_annotations(self):
"""Load annotations."""
annotations = [self._load_coco_annotation(index)
for index in self._image_index]
return annotations
def image_path_from_index(self, index):
"""Construct an image path from the image's "index" identifier."""
# Example image path for index=119993:
# images/train2014/COCO_train2014_000000119993.jpg
file_name = ('COCO_' + self._data_name + '_' +
str(index).zfill(12) + '.jpg')
image_path = osp.join(self._data_path, 'images',
self._data_name, file_name)
assert osp.exists(image_path), \
'Path does not exist: {}'.format(image_path)
return image_path
def image_path_at(self, i):
"""Return the absolute path to image i in the image sequence."""
return self.image_path_from_index(self._image_index[i])
def annotation_at(self, i):
"""Return the absolute path to image i in the image sequence."""
return self._annotations[i]
def _load_coco_annotation(self, index):
"""Loads COCO bounding-box instance annotations."""
im_ann = self._COCO.loadImgs(index)[0]
width, height = im_ann['width'], im_ann['height']
ann_ids = self._COCO.getAnnIds(imgIds=index, iscrowd=None)
objects = self._COCO.loadAnns(ann_ids)
# Sanitize boxes -- some are invalid
valid_objects = []
for obj in objects:
x1 = float(max(0, obj['bbox'][0]))
y1 = float(max(0, obj['bbox'][1]))
x2 = float(min(width - 1, x1 + max(0, obj['bbox'][2] - 1)))
y2 = float(min(height - 1, y1 + max(0, obj['bbox'][3] - 1)))
if isinstance(obj['segmentation'], list):
for p in obj['segmentation']:
if len(p) < 6:
print('Remove Invalid segm.')
# Valid polygons have >= 3 points, so require >= 6 coordinates
poly = [p for p in obj['segmentation'] if len(p) >= 6]
mask_bytes = mask_utils.poly2bytes(poly, height, width)
else:
# Crowd masks
# Some are encoded with height or width
# running out of the image bound
# Do not use them or decoding error is inevitable
mask_bytes = mask_utils.poly2bytes(obj['segmentation'], height, width)
if not isinstance(mask_bytes, bytes):
print(type(mask_bytes))
if obj['area'] > 0 and x2 > x1 and y2 > y1:
obj['clean_bbox'] = [x1, y1, x2, y2]
valid_objects.append({
'bbox': [x1, y1, x2, y2],
'mask': mask_bytes,
'category_id': obj['category_id'],
'class_id': self._cat_id_to_class_id[obj['category_id']],
'crowd': obj['iscrowd'],
})
valid_objects[-1]['name'] = \
self._ind_to_class[valid_objects[-1]['class_id']]
return height, width, valid_objects
@property
def num_images(self):
return len(self._image_index)
@property
def num_classes(self):
return len(self._classes)
def make_mask(split, year, data_dir):
coco = imdb(split, year, data_dir)
print('Preparing to make split: {}, total {} images'.format(split, coco.num_images))
if not osp.exists(osp.join(coco._data_path, 'ImageSets')):
os.makedirs(osp.join(coco._data_path, 'ImageSets'))
gt_recs = OrderedDict()
for i in range(coco.num_images):
filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0]
h, w, objects = coco.annotation_at(i)
gt_recs[filename] = objects
with open(osp.join('build',
'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f:
cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
with open(osp.join(coco._data_path, 'ImageSets', split + '.txt'), 'w') as f:
for i in range(coco.num_images):
filename = (coco.image_path_at(i).split('/')[-1]).split('.')[0]
if i != coco.num_images - 1: filename += '\n'
f.write(filename)
def merge_mask(split, year, mask_files):
gt_recs = OrderedDict()
data_path = os.path.dirname(mask_files[0])
for mask_file in mask_files:
with open(mask_file, 'rb') as f:
recs = cPickle.load(f)
gt_recs.update(recs)
with open(osp.join(data_path, 'coco_' + year + '_' + split + '_mask.pkl'), 'wb') as f:
cPickle.dump(gt_recs, f, cPickle.HIGHEST_PROTOCOL)
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
"""Make record file for Rotated dataset."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -18,23 +20,12 @@ from maker import make_record ...@@ -18,23 +20,12 @@ from maker import make_record
if __name__ == '__main__': if __name__ == '__main__':
voc_root = '/data/VOC' data_root = '/data'
make_record(
record_file=osp.join(voc_root, 'voc_0712_trainval'),
images_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval']
)
make_record( make_record(
record_file=osp.join(voc_root, 'voc_2007_test'), record_file=osp.join(data_root, 'rotated_train'),
images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'), images_path=[osp.join(data_root, 'JPEGImages')],
annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'), annotations_path=[osp.join(data_root, 'Annotations')],
imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'), imagesets_path=[osp.join(data_root, 'ImageSets')],
splits=['test'] splits=['train']
) )
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
"""Make record file for VOC dataset."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -18,7 +20,7 @@ from maker import make_record ...@@ -18,7 +20,7 @@ from maker import make_record
if __name__ == '__main__': if __name__ == '__main__':
voc_root = '/data/VOC' voc_root = '/data'
make_record( make_record(
record_file=osp.join(voc_root, 'voc_0712_trainval'), record_file=osp.join(voc_root, 'voc_0712_trainval'),
......
...@@ -23,7 +23,7 @@ import pprint ...@@ -23,7 +23,7 @@ import pprint
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.coordinator import Coordinator from lib.core.coordinator import Coordinator
from lib.modeling.detector import Detector from lib.modeling.detector import new_detector
from lib.utils import logger from lib.utils import logger
...@@ -69,13 +69,7 @@ if __name__ == '__main__': ...@@ -69,13 +69,7 @@ if __name__ == '__main__':
# Ready to export the network # Ready to export the network
logger.info('Exporting model will be saved to `{:s}`' logger.info('Exporting model will be saved to `{:s}`'
.format(coordinator.exports_dir())) .format(coordinator.exports_dir()))
detector = Detector().eval().cuda(cfg.GPU_ID) detector = new_detector(cfg.GPU_ID, checkpoint)
detector.load_weights(checkpoint)
detector.optimize_for_inference()
# Mixed precision training?
if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half() # Powerful FP16 Support
data = torch.zeros(*args.input_shape).byte() data = torch.zeros(*args.input_shape).byte()
ims_info = torch.zeros(args.input_shape[0], 3).float() ims_info = torch.zeros(args.input_shape[0], 3).float()
......
...@@ -13,7 +13,6 @@ from __future__ import absolute_import ...@@ -13,7 +13,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import importlib
import os import os
import sys import sys
sys.path.insert(0, '..') sys.path.insert(0, '..')
...@@ -21,19 +20,23 @@ sys.path.insert(0, '..') ...@@ -21,19 +20,23 @@ sys.path.insert(0, '..')
import argparse import argparse
import pprint import pprint
from lib.core import test_engine
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.coordinator import Coordinator from lib.core.coordinator import Coordinator
from lib.core.test import TestServer from lib.core.test import TestServer
from lib.datasets.factory import get_imdb from lib.datasets.factory import get_imdb
from lib.modeling.detector import Detector
from lib.utils import logger from lib.utils import logger
def parse_args(): def parse_args():
"""Parse input arguments""" """Parse input arguments"""
parser = argparse.ArgumentParser(description='Test a Detection Network') parser = argparse.ArgumentParser(description='Test a Detection Network')
parser.add_argument('--gpus', dest='gpus',
help='index of GPUs to use',
default=None, nargs='+', type=int)
parser.add_argument('--cfg', dest='cfg_file', parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str) help='optional config file',
default=None, type=str)
parser.add_argument('--exp_dir', dest='exp_dir', parser.add_argument('--exp_dir', dest='exp_dir',
help='experiment dir', help='experiment dir',
default=None, type=str) default=None, type=str)
...@@ -70,30 +73,24 @@ if __name__ == '__main__': ...@@ -70,30 +73,24 @@ if __name__ == '__main__':
logger.info('Called with args:') logger.info('Called with args:')
logger.info(args) logger.info(args)
coordinator = Coordinator(args.cfg_file, exp_dir=args.exp_dir) coordinator = Coordinator(args.cfg_file, args.exp_dir)
logger.info('Using config:\n' + pprint.pformat(cfg)) logger.info('Using config:\n' + pprint.pformat(cfg))
# Load the checkpoint and test engine # Load the checkpoint and test engine
checkpoint, _ = coordinator.checkpoint(global_step=args.iter, wait=args.wait) checkpoint, _ = coordinator.checkpoint(args.iter, wait=args.wait)
if checkpoint is None: if checkpoint is None:
raise RuntimeError('The checkpoint of global step {} does not exist.'.format(args.iter)) raise RuntimeError('The checkpoint of global step {} does not exist.'.format(args.iter))
test_engine = importlib.import_module('lib.{}.test'.format(cfg.MODEL.TYPE))
# Inspect the database # Inspect the database
database = get_imdb(cfg.TEST.DATABASE) database = get_imdb(cfg.TEST.DATABASE)
cfg.TEST.PROTOCOL = 'null' if args.dump else cfg.TEST.PROTOCOL cfg.TEST.PROTOCOL = 'dump' if args.dump else cfg.TEST.PROTOCOL
logger.info('Database({}): {} images will be used to test.' logger.info('Database({}): {} images will be used to test.'
.format(cfg.TEST.DATABASE, database.num_images)) .format(cfg.TEST.DATABASE, database.num_images))
# Ready to test the network # Ready to test the network
output_dir = coordinator.results_dir(checkpoint, args.output_dir) output_dir = coordinator.results_dir(checkpoint, args.output_dir)
logger.info('Results will be saved to `{:s}`'.format(output_dir)) logger.info('Results will be saved to `{:s}`'.format(output_dir))
detector = Detector().eval().cuda(cfg.GPU_ID)
detector.load_weights(checkpoint)
detector.optimize_for_inference()
# Mixed precision training?
if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half() # Powerful FP16 Support
test_engine.test_net(detector, TestServer(output_dir)) # Bind the server and run the test
server = TestServer(coordinator.results_dir(checkpoint))
test_engine.run_test_net(checkpoint, server, args.gpus)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!