Commit 41b3932b by Ting PAN

Refactor the API of rotated boxes

1 parent c020594c
Showing with 1944 additions and 1296 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.2.3 (20191101)
Dragon Minimum Required (Version 0.3.0.dev20191021)
Changes:
Preview Features:
- Refactor the API of rotated boxes.
- Simplify the solver by adding LRScheduler.
- Change the ``ITER`` naming to ``STEP``.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.2.2 (20191021)
Dragon Minimum Required (Version 0.3.0.dev20191021)
Changes:
Preview Features:
- Add the dumping if detection results.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.2.1 (20191017) SeetaDet 0.2.1 (20191017)
Dragon Minimum Required (Version 0.3.0.dev20191017) Dragon Minimum Required (Version 0.3.0.dev20191017)
......
#!/bin/sh #!/bin/sh
# delete cache # Delete cache
rm -r build install *.c *.cpp rm -r build install *.c *.cpp
# compile cython modules # Compile cpp modules
python setup.py build_ext --inplace python setup.py build_ext --inplace
g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -fopenmp g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -std=c++11 -fopenmp
# compile cuda modules # Compile cuda modules
cd build && cmake .. && make install && cd .. cd build && cmake .. && make install && cd ..
# setup # Copy to the library root
cp -r install/lib ../ cp -r install/lib ../
...@@ -9,363 +9,355 @@ ...@@ -9,363 +9,355 @@
// //
// Codes are based on: // Codes are based on:
// //
// <https://github.com/liulei01/DRBox/blob/master/examples/rbox/deploy/librbox.cpp.code> // <https://github.com/facebookresearch/detectron2/blob/master/detectron2
// /layers/csrc/box_iou_rotated/box_iou_rotated_utils.h>
// //
// ------------------------------------------------------------ // ------------------------------------------------------------
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
#include <omp.h> #include <omp.h>
using namespace std;
template <typename T>
struct Line { struct RotatedBox {
int crossnum; // 0:ignore; -1:all inner point; 2:two crossing point; 1:one crossing point T x_ctr, y_ctr, w, h, a;
int p1; // index of the start point
int p2; // index of the end point
int d[2][2]; // the index of the start point after division
double length; // the length after division
}; };
void _Overlap(double *rbox1, double *rbox2, double *area) { template <typename T>
double xcenter1 = rbox1[0]; struct Point {
double ycenter1 = rbox1[1]; T x, y;
double width1 = rbox1[2]; Point(const T& px = 0, const T& py = 0) : x(px), y(py) {}
double height1 = rbox1[3]; Point operator+(const Point& p) const {
double angle1 = rbox1[4]; return Point(x + p.x, y + p.y);
double xcenter2 = rbox2[0]; }
double ycenter2 = rbox2[1]; Point& operator+=(const Point& p) {
double width2 = rbox2[2]; x += p.x;
double height2 = rbox2[3]; y += p.y;
double angle2 = rbox2[4]; return *this;
}
angle1 = -angle1; Point operator-(const Point& p) const {
angle2 = -angle2; return Point(x - p.x, y - p.y);
double angled = angle2 - angle1; }
angled *= (double)3.14159265/180; Point operator*(const T coeff) const {
angle1 *= (double)3.14159265/180; return Point(x * coeff, y * coeff);
}
area[0] = 0; };
double hw1 = width1 / 2;
double hh1 = height1 / 2;
double hw2 = width2 / 2;
double hh2 = height2 / 2;
double xcenterd = xcenter2 - xcenter1;
double ycenterd = ycenter2 - ycenter1;
double tmp = xcenterd * cosf(angle1) + ycenterd * sinf(angle1);
ycenterd = -xcenterd * sinf(angle1) + ycenterd * cosf(angle1);
xcenterd = tmp;
double max_width_height1 = width1 > height1? width1 : height1;
double max_width_height2 = width2 > height2? width2 : height2;
if (sqrt(xcenterd * xcenterd + ycenterd * ycenterd) >
(max_width_height1 + max_width_height2) * 1.414214/2) {
area[0] = 0;
return;
}
if (fabs(sin(angled)) < 1e-3) {
if (fabs(xcenterd) > (hw1 + hw2) || fabs(ycenterd) > (hh1 + hh2)) {
area[0] = 0;
return;
} else {
double x_min_inter = -hw1 > (xcenterd - hw2)? -hw1 : (xcenterd - hw2);
double x_max_inter = hw1 < (xcenterd + hw2)? hw1 : (xcenterd + hw2);
double y_min_inter = -hh1 > (ycenterd - hh2)? -hh1 : (ycenterd - hh2);
double y_max_inter = hh1 < (ycenterd + hh2)? hh1 : (ycenterd + hh2);
const double inter_width = x_max_inter - x_min_inter;
const double inter_height = y_max_inter - y_min_inter;
const double inter_size = inter_width * inter_height;
area[0] = inter_size;
area[0] = area[0] / (width1 * height1 + width2 * height2 - area[0]);
return;
}
}
if (fabs(cos(angled)) < 1e-3) {
double x_min_inter = -hw1 > (xcenterd - hh2)? -hw1 : (xcenterd - hh2);
double x_max_inter = hw1 < (xcenterd + hh2)? hw1 : (xcenterd + hh2);
double y_min_inter = -hh1 > (ycenterd - hw2)? -hh1 : (ycenterd - hw2);
double y_max_inter = hh1 < (ycenterd + hw2)? hh1 : (ycenterd + hw2);
const double inter_width = x_max_inter - x_min_inter;
const double inter_height = y_max_inter - y_min_inter;
const double inter_size = inter_width * inter_height;
area[0] = inter_size;
area[0] = area[0] / (width1 * height1 + width2 * height2 - area[0]);
return;
}
double cos_angled = cosf(angled); template <typename T>
double sin_angled = sinf(angled); T dot_2d(const Point<T>& A, const Point<T>& B) {
double cos_angled_hw1 = cos_angled * hw1; return A.x * B.x + A.y * B.y;
double sin_angled_hw1 = sin_angled * hw1; }
double cos_angled_hh1 = cos_angled * hh1;
double sin_angled_hh1 = sin_angled * hh1;
double cos_angled_hw2 = cos_angled * hw2;
double sin_angled_hw2 = sin_angled * hw2;
double cos_angled_hh2 = cos_angled * hh2;
double sin_angled_hh2 = sin_angled * hh2;
// point20: (w/2, h/2)
double point2x[4], point2y[4];
point2x[0] = xcenterd + cos_angled_hw2 - sin_angled_hh2;
point2y[0] = ycenterd + sin_angled_hw2 + cos_angled_hh2;
// point21: (-w/2, h/2)
point2x[1] = xcenterd - cos_angled_hw2 - sin_angled_hh2;
point2y[1] = ycenterd - sin_angled_hw2 + cos_angled_hh2;
// point22: (-w/2, -h/2)
point2x[2] = xcenterd - cos_angled_hw2 + sin_angled_hh2;
point2y[2] = ycenterd - sin_angled_hw2 - cos_angled_hh2;
// point23: (w/2, -h/2)
point2x[3] = xcenterd + cos_angled_hw2 + sin_angled_hh2;
point2y[3] = ycenterd + sin_angled_hw2 - cos_angled_hh2;
double pcenter_x = 0, pcenter_y = 0;
int count = 0;
// determine the inner point
bool inner_side2[4][4], inner2[4];
for(int i = 0; i < 4; i++) {
inner_side2[i][0] = point2y[i] < hh1;
inner_side2[i][1] = point2x[i] > -hw1;
inner_side2[i][2] = point2y[i] > -hh1;
inner_side2[i][3] = point2x[i] < hw1;
inner2[i] = inner_side2[i][0] & inner_side2[i][1] & inner_side2[i][2] & inner_side2[i][3];
if (inner2[i]) { pcenter_x += point2x[i]; pcenter_y += point2y[i]; count++;}
}
//similar operating for rbox1: angled -> -angled, xcenterd -> -xcenterd, ycenterd -> -ycenterd template <typename T>
// point10: (w/2, h/2) T cross_2d(const Point<T>& A, const Point<T>& B) {
double xcenterd_hat = - xcenterd * cos_angled - ycenterd * sin_angled; return A.x * B.y - B.x * A.y;
double ycenterd_hat = xcenterd * sin_angled - ycenterd * cos_angled; }
double point1x[4], point1y[4];
template <typename T>
point1x[0] = xcenterd_hat + cos_angled_hw1 + sin_angled_hh1; void get_rotated_vertices(
point1y[0] = ycenterd_hat - sin_angled_hw1 + cos_angled_hh1; const RotatedBox<T>& box,
// point21: (-w/2, h/2) Point<T> (&pts)[4]) {
point1x[1] = xcenterd_hat - cos_angled_hw1 + sin_angled_hh1; // M_PI / 180. == 0.01745329251
point1y[1] = ycenterd_hat + sin_angled_hw1 + cos_angled_hh1; double theta = box.a * 0.01745329251;
// point22: (-w/2, -h/2) T cosTheta2 = (T)cos(theta) * 0.5f;
point1x[2] = xcenterd_hat - cos_angled_hw1 - sin_angled_hh1; T sinTheta2 = (T)sin(theta) * 0.5f;
point1y[2] = ycenterd_hat + sin_angled_hw1 - cos_angled_hh1; // y: top --> down; x: left --> right
// point23: (w/2, -h/2) pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w;
point1x[3] = xcenterd_hat + cos_angled_hw1 - sin_angled_hh1; pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
point1y[3] = ycenterd_hat - sin_angled_hw1 - cos_angled_hh1; pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w;
pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
// determine the inner point pts[2].x = 2 * box.x_ctr - pts[0].x;
// determine the inner point pts[2].y = 2 * box.y_ctr - pts[0].y;
bool inner_side1[4][4], inner1[4]; pts[3].x = 2 * box.x_ctr - pts[1].x;
for(int i = 0; i < 4; i++) pts[3].y = 2 * box.y_ctr - pts[1].y;
{
inner_side1[i][0] = point1y[i] < hh2;
inner_side1[i][1] = point1x[i] > -hw2;
inner_side1[i][2] = point1y[i] > -hh2;
inner_side1[i][3] = point1x[i] < hw2;
inner1[i] = inner_side1[i][0] & inner_side1[i][1] & inner_side1[i][2] & inner_side1[i][3];
}
point1x[0] = hw1;
point1y[0] = hh1;
// point21: (-w/2, h/2)
point1x[1] = -hw1;
point1y[1] = hh1;
// point22: (-w/2, -h/2)
point1x[2] = -hw1;
point1y[2] = -hh1;
// point23: (w/2, -h/2)
point1x[3] = hw1;
point1y[3] = -hh1;
if (inner1[0]) { pcenter_x += hw1; pcenter_y += hh1; count++;}
if (inner1[1]) { pcenter_x -= hw1; pcenter_y += hh1; count++;}
if (inner1[2]) { pcenter_x -= hw1; pcenter_y -= hh1; count++;}
if (inner1[3]) { pcenter_x += hw1; pcenter_y -= hh1; count++;}
//find cross_points
Line line1[4], line2[4];
line1[0].p1 = 0; line1[0].p2 = 1;
line1[1].p1 = 1; line1[1].p2 = 2;
line1[2].p1 = 2; line1[2].p2 = 3;
line1[3].p1 = 3; line1[3].p2 = 0;
line2[0].p1 = 0; line2[0].p2 = 1;
line2[1].p1 = 1; line2[1].p2 = 2;
line2[2].p1 = 2; line2[2].p2 = 3;
line2[3].p1 = 3; line2[3].p2 = 0;
double pointc_x[4][4], pointc_y[4][4];
for (int i = 0; i < 4; i++) {
int index1 = line1[i].p1;
int index2 = line1[i].p2;
line1[i].crossnum = 0;
if (inner1[index1] && inner1[index2]) {
if (i == 0 || i == 2) line1[i].length = width1;
else line1[i].length = height1;
line1[i].crossnum = -1;
continue;
}
if (inner1[index1]) {
line1[i].crossnum ++;
line1[i].d[0][0] = index1;
line1[i].d[0][1] = -1;
continue;
}
if (inner1[index2]) {
line1[i].crossnum ++;
line1[i].d[0][0] = index2;
line1[i].d[0][1] = -1;
continue;
}
}
for (int i = 0; i < 4; i++) {
int index1 = line2[i].p1;
double x1 = point2x[index1];
double y1 = point2y[index1];
int index2 = line2[i].p2;
double x2 = point2x[index2];
double y2 = point2y[index2];
line2[i].crossnum = 0;
if (inner2[index1] && inner2[index2]) {
if (i == 0 || i == 2) line2[i].length = width2;
else line2[i].length = height1;
line2[i].crossnum = -1;
continue;
}
if (inner2[index1]) {
line2[i].crossnum ++;
line2[i].d[0][0] = index1;
line2[i].d[0][1] = -1;
} else if (inner2[index2]) {
line2[i].crossnum ++;
line2[i].d[0][0] = index2;
line2[i].d[0][1] = -1;
}
double tmp1 = (y1*x2 - y2*x1) / (y1 - y2);
double tmp2 = (x1 - x2) / (y1 - y2);
double tmp3 = (x1*y2 - x2*y1) / (x1 - x2);
double tmp4 = 1/tmp2 * hw1;
tmp2 *= hh1;
for (int j = 0; j < 4; j++) {
int index3 = line1[j].p1;
int index4 = line1[j].p2;
if ((inner_side2[index1][j] != inner_side2[index2][j])
&& (inner_side1[index3][i] != inner_side1[index4][i])) {
switch (j) {
case 0:
pointc_x[i][j] = tmp1 + tmp2;
pointc_y[i][j] = hh1;
break;
case 1:
pointc_y[i][j] = tmp3 - tmp4;
pointc_x[i][j] = -hw1;
break;
case 2:
pointc_x[i][j] = tmp1 - tmp2;
pointc_y[i][j] = -hh1;
break;
case 3:
pointc_y[i][j] = tmp3 + tmp4;
pointc_x[i][j] = hw1;
break;
default:
break;
}
line1[j].d[line1[j].crossnum][0] = i;
line1[j].d[line1[j].crossnum ++][1] = j;
line2[i].d[line2[i].crossnum][0] = i;
line2[i].d[line2[i].crossnum ++][1] = j;
pcenter_x += pointc_x[i][j];
pcenter_y += pointc_y[i][j];
count ++;
}
}
}
pcenter_x /= (double)count;
pcenter_y /= (double)count;
double pcenter_x_hat, pcenter_y_hat;
pcenter_x_hat = pcenter_x - xcenterd;
pcenter_y_hat = pcenter_y - ycenterd;
tmp = cos_angled * pcenter_x_hat + sin_angled * pcenter_y_hat;
pcenter_y_hat = -sin_angled * pcenter_x_hat + cos_angled * pcenter_y_hat;
pcenter_x_hat = tmp;
for (int i = 0; i < 4; i++) {
if (line1[i].crossnum > 0) {
if (line1[i].d[0][1] == -1) {
if (i==0 || i==2)
line1[i].length = fabs(point1x[line1[i].d[0][0]] - pointc_x[line1[i].d[1][0]][line1[i].d[1][1]]);
else
line1[i].length = fabs(point1y[line1[i].d[0][0]] - pointc_y[line1[i].d[1][0]][line1[i].d[1][1]]);
} else {
if (i==0 || i==2)
line1[i].length = fabs(pointc_x[line1[i].d[0][0]][line1[i].d[0][1]] - pointc_x[line1[i].d[1][0]][line1[i].d[1][1]]);
else
line1[i].length = fabs(pointc_y[line1[i].d[0][0]][line1[i].d[0][1]] - pointc_y[line1[i].d[1][0]][line1[i].d[1][1]]);
}
}
if (line2[i].crossnum >0) {
if (line2[i].d[0][1] == -1)
line2[i].length = fabs(point2x[line2[i].d[0][0]] - pointc_x[line2[i].d[1][0]][line2[i].d[1][1]]);
else
line2[i].length = fabs(pointc_x[line2[i].d[0][0]][line2[i].d[0][1]] - pointc_x[line2[i].d[1][0]][line2[i].d[1][1]]);
if(i == 0 || i == 2) line2[i].length *= width2 / fabs(point2x[line2[i].p1] - point2x[line2[i].p2]);
else line2[i].length *= height2 / fabs(point2x[line2[i].p1] - point2x[line2[i].p2]);
}
}
double dis1[4], dis2[4];
dis1[0] = fabs(pcenter_y - hh1);
dis1[1] = fabs(pcenter_x + hw1);
dis1[2] = fabs(pcenter_y + hh1);
dis1[3] = fabs(pcenter_x - hw1);
dis2[0] = fabs(pcenter_y_hat - hh2);
dis2[1] = fabs(pcenter_x_hat + hw2);
dis2[2] = fabs(pcenter_y_hat + hh2);
dis2[3] = fabs(pcenter_x_hat - hw2);
for (int i=0; i < 4; i++) {
if (line1[i].crossnum != 0)
area[0] += dis1[i] * line1[i].length;
if (line2[i].crossnum != 0)
area[0] += dis2[i] * line2[i].length;
}
area[0] /= 2;
area[0] = area[0] / (width1 * height1 + width2 * height2 - area[0]);
} }
void _Overlaps(double *boxes, double *query_boxes, int* n, double *area) { template <typename T>
int p = n[0]; int get_intersection_points(
int k = n[1]; const Point<T> (&pts1)[4],
const int nthreads = std::min(omp_get_num_procs(), 4); const Point<T> (&pts2)[4],
#pragma omp parallel for num_threads(nthreads) Point<T> (&intersections)[24]) {
for (int i = 0; i < p; i++) { // Line vector
double box1[5] = {boxes[5 * i], boxes[5 * i + 1], boxes[5 * i + 2], boxes[5 * i + 3], boxes[5 * i + 4]}; // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
for (int j = 0; j < k; j++) { Point<T> vec1[4], vec2[4];
double box2[5] = {query_boxes[5 * j], query_boxes[5 * j + 1], query_boxes[5 * j + 2], query_boxes[5 * j +3], query_boxes[5 * j + 4]}; for (int i = 0; i < 4; i++) {
double area_tmp[1]; vec1[i] = pts1[(i + 1) % 4] - pts1[i];
_Overlap(box1, box2, area_tmp); vec2[i] = pts2[(i + 1) % 4] - pts2[i];
area[i * k + j] = area_tmp[0]; }
// Line test - test all line combos for intersection
int num = 0; // number of intersections
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
// Solve for 2x2 Ax=b
T det = cross_2d(vec2[j], vec1[i]);
// This takes care of parallel lines
if (fabs(det) <= 1e-14) {
continue;
}
auto vec12 = pts2[j] - pts1[i];
T t1 = cross_2d(vec2[j], vec12) / det;
T t2 = cross_2d(vec1[i], vec12) / det;
if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
intersections[num++] = pts1[i] + vec1[i] * t1;
}
} }
} }
// Check for vertices of rect1 inside rect2
{
const auto& AB = vec2[0];
const auto& DA = vec2[3];
auto ABdotAB = dot_2d(AB, AB);
auto ADdotAD = dot_2d(DA, DA);
for (int i = 0; i < 4; i++) {
// assume ABCD is the rectangle, and P is the point to be judged
// P is inside ABCD iff. P's projection on AB lies within AB
// and P's projection on AD lies within AD
auto AP = pts1[i] - pts2[0];
auto APdotAB = dot_2d<T>(AP, AB);
auto APdotAD = -dot_2d<T>(AP, DA);
if ((APdotAB >= 0) &&
(APdotAD >= 0) &&
(APdotAB <= ABdotAB) &&
(APdotAD <= ADdotAD)) {
intersections[num++] = pts1[i];
}
}
}
// Reverse the check - check for vertices of rect2 inside rect1
{
const auto& AB = vec1[0];
const auto& DA = vec1[3];
auto ABdotAB = dot_2d<T>(AB, AB);
auto ADdotAD = dot_2d<T>(DA, DA);
for (int i = 0; i < 4; i++) {
auto AP = pts2[i] - pts1[0];
auto APdotAB = dot_2d<T>(AP, AB);
auto APdotAD = -dot_2d<T>(AP, DA);
if ((APdotAB >= 0) &&
(APdotAD >= 0) &&
(APdotAB <= ABdotAB) &&
(APdotAD <= ADdotAD)) {
intersections[num++] = pts2[i];
}
}
}
return num;
} }
void _NMS(double* preds, int* indices, double* scores, int& n, double threshold) { template <typename T>
int count = 0; int convex_hull_graham(
for(int i = 0; i < n; i++) { const Point<T> (&p)[24],
int ind_n = i; const int& num_in,
bool keep = true; Point<T> (&q)[24],
for(int j = 0; j < count; j++) { bool shift_to_zero = false) {
int ind_p = indices[j];
double area[1]; // Step 1:
_Overlap(preds + ind_p * 5, preds + ind_n * 5, &area[0]); // Find point with minimum y
if (area[0] > threshold) { // if more than 1 points have the same minimum y,
keep = false; // pick the one with the minimum x.
break; int t = 0;
} for (int i = 1; i < num_in; i++) {
} if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
if(keep) { t = i;
indices[count] = ind_n; }
count++; }
} auto& start = p[t]; // starting point
}
n = count; // Step 2:
// Subtract starting point from every points (for sorting in the next step)
for (int i = 0; i < num_in; i++) {
q[i] = p[i] - start;
}
// Swap the starting point to position 0
auto tmp = q[0];
q[0] = q[t];
q[t] = tmp;
// Step 3:
// Sort point 1 ~ num_in according to their relative cross-product values
// (essentially sorting according to angles)
// If the angles are the same, sort according to their distance to origin
T dist[24];
for (int i = 0; i < num_in; i++) {
dist[i] = dot_2d(q[i], q[i]);
}
std::sort(
q + 1, q + num_in, [](const Point<T>& A, const Point<T>& B) -> bool {
T temp = cross_2d<T>(A, B);
if (fabs(temp) < 1e-6) {
return dot_2d(A, A) < dot_2d(B, B);
} else {
return temp > 0;
}
});
// Step 4:
// Make sure there are at least 2 points (that don't overlap with each other)
// in the stack
int k; // index of the non-overlapped second point
for (k = 1; k < num_in; k++) {
if (dist[k] > 1e-8) {
break;
}
}
if (k == num_in) {
// We reach the end, which means the convex hull is just one point
q[0] = p[t];
return 1;
}
q[1] = q[k];
int m = 2; // 2 points in the stack
// Step 5:
// Finally we can start the scanning process.
// When a non-convex relationship between the 3 points is found
// (either concave shape or duplicated points),
// we pop the previous point from the stack
// until the 3-point relationship is convex again, or
// until the stack only contains two points
for (int i = k + 1; i < num_in; i++) {
while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
m--;
}
q[m++] = q[i];
}
// Step 6 (Optional):
// In general sense we need the original coordinates, so we
// need to shift the points back (reverting Step 2)
// But if we're only interested in getting the area/perimeter of the shape
// We can simply return.
if (!shift_to_zero) {
for (int i = 0; i < m; i++) {
q[i] += start;
}
}
return m;
}
template <typename T>
T polygon_area(const Point<T> (&q)[24], const int& m) {
if (m <= 2) {
return 0;
}
T area = 0;
for (int i = 1; i < m - 1; i++) {
area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0]));
}
return area / 2.0;
}
template <typename T>
T rotated_boxes_intersection(
const RotatedBox<T>& box1,
const RotatedBox<T>& box2) {
// There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
// from rotated_rect_intersection_pts
Point<T> intersectPts[24], orderedPts[24];
Point<T> pts1[4];
Point<T> pts2[4];
get_rotated_vertices(box1, pts1);
get_rotated_vertices(box2, pts2);
int num = get_intersection_points(pts1, pts2, intersectPts);
if (num <= 2) {
return 0.0;
}
// Convex Hull to order the intersection points in clockwise order and find
// the contour area.
int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true);
return polygon_area(orderedPts, num_convex);
}
template <typename T>
T single_box_iou_rotated(
T const* const box1_raw,
T const* const box2_raw) {
// shift center to the middle point to achieve higher precision in result
RotatedBox<T> box1, box2;
auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
box1.x_ctr = box1_raw[0] - center_shift_x;
box1.y_ctr = box1_raw[1] - center_shift_y;
box1.w = box1_raw[2];
box1.h = box1_raw[3];
box1.a = box1_raw[4];
box2.x_ctr = box2_raw[0] - center_shift_x;
box2.y_ctr = box2_raw[1] - center_shift_y;
box2.w = box2_raw[2];
box2.h = box2_raw[3];
box2.a = box2_raw[4];
const T area1 = box1.w * box1.h;
const T area2 = box2.w * box2.h;
if (area1 < 1e-14 || area2 < 1e-14) {
return 0.f;
}
const T inter = rotated_boxes_intersection(box1, box2);
const T iou = inter / (area1 + area2 - inter);
return iou;
} }
extern "C" { extern "C" {
void NMS(double *preds, int *indices, double *scores, int& n, double threshold) { void apply_cpu_nms(
_NMS(preds, indices, scores, n, threshold); double* dets,
int* indices,
int& n,
double threshold) {
int count = 0;
for(int i = 0; i < n; i++) {
bool keep = true;
auto* box1 = dets + i * 6;
for(int j = 0; j < count; j++) {
auto* box2 = dets + indices[j] * 6;
auto ovr = single_box_iou_rotated(box1, box2);
if (ovr > threshold) {
keep = false;
break;
}
}
if (keep) {
indices[count] = i;
count++;
}
}
n = count;
} }
void Overlaps(double *boxes, double *query_boxes, int* n, double *area) { void bbox_overlaps(
_Overlaps(boxes, query_boxes, n, area); double* boxes1,
double* boxes2,
int* shape,
double* overlaps) {
int N = shape[0], K = shape[1];
#pragma omp parallel for num_threads(std::min(omp_get_num_procs(), 4))
for (int i = 0; i < N; i++) {
auto* box1 = boxes1 + i * 5;
for (int j = 0; j < K; j++) {
auto* box2 = boxes2 + j * 5;
overlaps[i * K + j] = single_box_iou_rotated(box1, box2);
}
}
} }
} }
...@@ -22,11 +22,9 @@ MODEL: ...@@ -22,11 +22,9 @@ MODEL:
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
WEIGHT_DECAY: 0.0001 DECAY_STEPS: [60000, 80000]
LR_POLICY: steps_with_decay MAX_STEPS: 90000
STEPS: [60000, 80000] SNAPSHOT_EVERY: 5000
MAX_ITERS: 90000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn SNAPSHOT_PREFIX: coco_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
......
...@@ -22,11 +22,9 @@ MODEL: ...@@ -22,11 +22,9 @@ MODEL:
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
WEIGHT_DECAY: 0.0001 DECAY_STEPS: [120000, 160000]
LR_POLICY: steps_with_decay MAX_STEPS: 180000
STEPS: [120000, 160000] SNAPSHOT_EVERY: 5000
MAX_ITERS: 180000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn SNAPSHOT_PREFIX: coco_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
......
...@@ -13,11 +13,9 @@ MODEL: ...@@ -13,11 +13,9 @@ MODEL:
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.002 BASE_LR: 0.002
WEIGHT_DECAY: 0.0001 DECAY_STEPS: [100000, 140000]
LR_POLICY: steps_with_decay MAX_STEPS: 140000
STEPS: [100000, 140000] SNAPSHOT_EVERY: 5000
MAX_ITERS: 140000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn SNAPSHOT_PREFIX: voc_faster_rcnn
FRCNN: FRCNN:
ROI_XFORM_METHOD: RoIAlign ROI_XFORM_METHOD: RoIAlign
......
...@@ -14,10 +14,9 @@ MODEL: ...@@ -14,10 +14,9 @@ MODEL:
SOLVER: SOLVER:
BASE_LR: 0.001 BASE_LR: 0.001
WEIGHT_DECAY: 0.0005 WEIGHT_DECAY: 0.0005
LR_POLICY: steps_with_decay DECAY_STEPS: [100000, 140000]
STEPS: [100000, 140000] MAX_STEPS: 140000
MAX_ITERS: 140000 SNAPSHOT_EVERY: 5000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn SNAPSHOT_PREFIX: voc_faster_rcnn
RPN: RPN:
STRIDES: [16] STRIDES: [16]
......
...@@ -22,11 +22,9 @@ MODEL: ...@@ -22,11 +22,9 @@ MODEL:
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
WEIGHT_DECAY: 0.0001 DECAY_STEPS: [30000, 40000]
LR_POLICY: steps_with_decay MAX_STEPS: 45000
STEPS: [30000, 40000] SNAPSHOT_EVERY: 5000
MAX_ITERS: 45000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: coco_retinanet_400 SNAPSHOT_PREFIX: coco_retinanet_400
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
......
...@@ -22,12 +22,10 @@ MODEL: ...@@ -22,12 +22,10 @@ MODEL:
NUM_CLASSES: 81 NUM_CLASSES: 81
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
WEIGHT_DECAY: 0.0001 WARM_UP_STEPS: 2000 # default: 500
WARM_UP_ITERS: 2000 # default: 500 DECAY_STEPS: [120000, 160000]
LR_POLICY: steps_with_decay MAX_STEPS: 180000
STEPS: [120000, 160000] SNAPSHOT_EVERY: 5000
MAX_ITERS: 180000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: coco_retinanet_400 SNAPSHOT_PREFIX: coco_retinanet_400
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
...@@ -41,9 +39,9 @@ TRAIN: ...@@ -41,9 +39,9 @@ TRAIN:
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [400] SCALES: [400]
MAX_SIZE: 666 MAX_SIZE: 666
SCALE_JITTERING: True USE_SCALE_JITTER: True
COLOR_JITTERING: True USE_COLOR_JITTER: True
SCALE_RANGE: [0.75, 1.33] SCALE_JITTER_RANGE: [0.75, 1.33]
TEST: TEST:
DATABASE: '/data/coco_2014_minival' DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
......
...@@ -13,11 +13,9 @@ MODEL: ...@@ -13,11 +13,9 @@ MODEL:
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
WEIGHT_DECAY: 0.0001 DECAY_STEPS: [40000, 50000, 60000]
LR_POLICY: steps_with_decay MAX_STEPS: 60000
STEPS: [40000, 50000, 60000] SNAPSHOT_EVERY: 5000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300 SNAPSHOT_PREFIX: voc_retinanet_300
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
...@@ -28,9 +26,9 @@ TRAIN: ...@@ -28,9 +26,9 @@ TRAIN:
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
MAX_SIZE: 500 MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0] SCALE_JITTER_RANGE: [0.5, 2.0]
SCALE_JITTERING: True USE_SCALE_JITTER: True
COLOR_JITTERING: True USE_COLOR_JITTER: True
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -13,12 +13,10 @@ MODEL: ...@@ -13,12 +13,10 @@ MODEL:
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.01 BASE_LR: 0.01
WEIGHT_DECAY: 0.0001 DECAY_STEPS: [40000, 50000, 60000]
LR_POLICY: steps_with_decay WARM_UP_STEPS: 2000
STEPS: [40000, 50000, 60000] MAX_STEPS: 60000
WARM_UP_ITERS: 2000 SNAPSHOT_EVERY: 5000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300 SNAPSHOT_PREFIX: voc_retinanet_300
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
...@@ -29,9 +27,9 @@ TRAIN: ...@@ -29,9 +27,9 @@ TRAIN:
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
MAX_SIZE: 500 MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0] SCALE_JITTER_RANGE: [0.5, 2.0]
SCALE_JITTERING: True USE_SCALE_JITTER: True
COLOR_JITTERING: True USE_COLOR_JITTER: True
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -13,12 +13,10 @@ MODEL: ...@@ -13,12 +13,10 @@ MODEL:
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.01 BASE_LR: 0.01
WEIGHT_DECAY: 0.0001 DECAY_STEPS: [40000, 50000, 60000]
LR_POLICY: steps_with_decay WARM_UP_STEPS: 2000
STEPS: [40000, 50000, 60000] MAX_STEPS: 60000
WARM_UP_ITERS: 2000 SNAPSHOT_EVERY: 5000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300 SNAPSHOT_PREFIX: voc_retinanet_300
FPN: FPN:
RPN_MIN_LEVEL: 3 RPN_MIN_LEVEL: 3
...@@ -29,9 +27,9 @@ TRAIN: ...@@ -29,9 +27,9 @@ TRAIN:
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
MAX_SIZE: 500 MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0] SCALE_JITTER_RANGE: [0.5, 2.0]
SCALE_JITTERING: True USE_SCALE_JITTER: True
COLOR_JITTERING: True USE_COLOR_JITTER: True
TEST: TEST:
DATABASE: '/data/voc_2007_test' DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -13,11 +13,9 @@ MODEL: ...@@ -13,11 +13,9 @@ MODEL:
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.001 BASE_LR: 0.001
WEIGHT_DECAY: 0.0001 DECAY_STEPS: [80000, 100000, 120000]
LR_POLICY: steps_with_decay MAX_STEPS: 120000
STEPS: [80000, 100000, 120000] SNAPSHOT_EVERY: 5000
MAX_ITERS: 120000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_ssd_300 SNAPSHOT_PREFIX: voc_ssd_300
SSD: SSD:
RESIZE: RESIZE:
......
...@@ -13,13 +13,12 @@ MODEL: ...@@ -13,13 +13,12 @@ MODEL:
'sheep', 'sofa', 'train', 'tvmonitor'] 'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21 NUM_CLASSES: 21
SOLVER: SOLVER:
BASE_LR: 0.002 BASE_LR: 0.001
WARM_UP_FACTOR: 0. WARM_UP_FACTOR: 0.
WEIGHT_DECAY: 0.0005 WEIGHT_DECAY: 0.0005
LR_POLICY: steps_with_decay DECAY_STEPS: [80000, 100000, 120000]
STEPS: [80000, 100000, 120000] MAX_STEPS: 120000
MAX_ITERS: 120000 SNAPSHOT_EVERY: 5000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_ssd_300 SNAPSHOT_PREFIX: voc_ssd_300
SSD: SSD:
RESIZE: RESIZE:
......
NUM_GPUS: 1
VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: ssd
BACKBONE: resnet50.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 8
SOLVER:
BASE_LR: 0.001
DECAY_STEPS: [80000, 100000, 120000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_320
SSD:
NUM_CONVS: 2
RESIZE:
HEIGHT: 320
WIDTH: 320
MULTIBOX:
STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
TEST:
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
NMS_TOP_K: 400
NMS: 0.45
SCORE_THRESH: 0.01
DETECTIONS_PER_IM: 200
...@@ -20,10 +20,10 @@ from __future__ import print_function ...@@ -20,10 +20,10 @@ from __future__ import print_function
import os.path as osp import os.path as osp
import numpy as np import numpy as np
from lib.utils.attrdict import AttrDict as edict from lib.utils.attrdict import AttrDict
__C = edict()
cfg = __C cfg = __C = AttrDict()
########################################### ###########################################
...@@ -33,7 +33,7 @@ cfg = __C ...@@ -33,7 +33,7 @@ cfg = __C
########################################### ###########################################
__C.TRAIN = edict() __C.TRAIN = AttrDict()
# Initialize network with weights from this file # Initialize network with weights from this file
__C.TRAIN.WEIGHTS = '' __C.TRAIN.WEIGHTS = ''
...@@ -82,17 +82,17 @@ __C.TRAIN.USE_DIFF = True ...@@ -82,17 +82,17 @@ __C.TRAIN.USE_DIFF = True
__C.TRAIN.BBOX_THRESH = 0.5 __C.TRAIN.BBOX_THRESH = 0.5
# If True, randomly scale the image by scale range # If True, randomly scale the image by scale range
__C.TRAIN.SCALE_JITTERING = False __C.TRAIN.USE_SCALE_JITTER = False
__C.TRAIN.SCALE_RANGE = [0.75, 1.0] __C.TRAIN.SCALE_JITTER_RANGE = [0.75, 1.0]
# If True, randomly distort the image by brightness, contrast, and saturation # If True, randomly distort the image by brightness, contrast, and saturation
__C.TRAIN.COLOR_JITTERING = False __C.TRAIN.USE_COLOR_JITTER = False
# IOU >= thresh: positive example # IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example # IOU < thresh: negative example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
# If an anchor statisfied by positive and negative conditions set to negative # If an anchor satisfied by positive and negative conditions set to negative
__C.TRAIN.RPN_CLOBBER_POSITIVES = False __C.TRAIN.RPN_CLOBBER_POSITIVES = False
# Max number of foreground examples # Max number of foreground examples
__C.TRAIN.RPN_FG_FRACTION = 0.5 __C.TRAIN.RPN_FG_FRACTION = 0.5
...@@ -118,7 +118,7 @@ __C.TRAIN.RPN_STRADDLE_THRESH = 0 ...@@ -118,7 +118,7 @@ __C.TRAIN.RPN_STRADDLE_THRESH = 0
########################################### ###########################################
__C.TEST = edict() __C.TEST = AttrDict()
# Database to test # Database to test
__C.TEST.DATABASE = '' __C.TEST.DATABASE = ''
...@@ -151,10 +151,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5 ...@@ -151,10 +151,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5
# The top-k prior boxes before nms. # The top-k prior boxes before nms.
__C.TEST.NMS_TOP_K = 400 __C.TEST.NMS_TOP_K = 400
# The threshold for predicting boxes # The threshold for prAttrDicting boxes
__C.TEST.SCORE_THRESH = 0.05 __C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting masks # The threshold for prAttrDicting masks
__C.TEST.BINARY_THRESH = 0.5 __C.TEST.BINARY_THRESH = 0.5
# NMS threshold used on RPN proposals # NMS threshold used on RPN proposals
...@@ -188,37 +188,32 @@ __C.TEST.DETECTIONS_PER_IM = 100 ...@@ -188,37 +188,32 @@ __C.TEST.DETECTIONS_PER_IM = 100
########################################### ###########################################
__C.MODEL = edict() __C.MODEL = AttrDict()
# The type of the model # The type of the model
# ('faster_rcnn', # ('faster_rcnn',
# 'mask_rcnn',
# 'ssd', # 'ssd',
# 'rssd',
# 'retinanet, # 'retinanet,
# ) # )
__C.MODEL.TYPE = '' __C.MODEL.TYPE = ''
# The float precision for training and inference # The float precision for training and inference
# (FLOAT32, FLOAT16,) # (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE = 'FLOAT32' __C.MODEL.PRECISION = 'FLOAT32'
# The backbone # The backbone
__C.MODEL.BACKBONE = '' __C.MODEL.BACKBONE = ''
# The number of classes in the dataset # The number of classes in the dataset
__C.MODEL.NUM_CLASSES = -1 __C.MODEL.NUM_CLASSES = -1
# The name for each object class
# Keep it for TaaS DataSet
__C.MODEL.CLASSES = ['__background__'] __C.MODEL.CLASSES = ['__background__']
# Add StopGrad at a specified stage so the bottom layers are frozen # Frozen the gradient since the convolution stage K
# The value of ``K`` is usually set to 2
__C.MODEL.FREEZE_AT = 2 __C.MODEL.FREEZE_AT = 2
# Whether to use focal loss for one-stage detectors? # Setting of focal loss
# Enabled if model type in ('ssd',)
# Retinanet is force to use focal loss
__C.MODEL.USE_FOCAL_LOSS = False
__C.MODEL.FOCAL_LOSS_ALPHA = 0.25 __C.MODEL.FOCAL_LOSS_ALPHA = 0.25
__C.MODEL.FOCAL_LOSS_GAMMA = 2.0 __C.MODEL.FOCAL_LOSS_GAMMA = 2.0
...@@ -234,7 +229,7 @@ __C.MODEL.COARSEST_STRIDE = 32 ...@@ -234,7 +229,7 @@ __C.MODEL.COARSEST_STRIDE = 32
########################################### ###########################################
__C.RPN = edict() __C.RPN = AttrDict()
# Strides for multiple rpn heads # Strides for multiple rpn heads
__C.RPN.STRIDES = [4, 8, 16, 32, 64] __C.RPN.STRIDES = [4, 8, 16, 32, 64]
...@@ -253,7 +248,7 @@ __C.RPN.ASPECT_RATIOS = [0.5, 1, 2] ...@@ -253,7 +248,7 @@ __C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
########################################### ###########################################
__C.RETINANET = edict() __C.RETINANET = AttrDict()
# Anchor aspect ratios to use # Anchor aspect ratios to use
__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0) __C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
...@@ -291,7 +286,7 @@ __C.RETINANET.NEGATIVE_OVERLAP = 0.4 ...@@ -291,7 +286,7 @@ __C.RETINANET.NEGATIVE_OVERLAP = 0.4
########################################### ###########################################
__C.FPN = edict() __C.FPN = AttrDict()
# Channel dimension of the FPN feature levels # Channel dimension of the FPN feature levels
__C.FPN.DIM = 256 __C.FPN.DIM = 256
...@@ -317,7 +312,7 @@ __C.FPN.ROI_MIN_LEVEL = 2 ...@@ -317,7 +312,7 @@ __C.FPN.ROI_MIN_LEVEL = 2
########################################### ###########################################
__C.FRCNN = edict() __C.FRCNN = AttrDict()
# RoI transformation function (e.g., RoIPool or RoIAlign) # RoI transformation function (e.g., RoIPool or RoIAlign)
__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool' __C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
...@@ -338,7 +333,7 @@ __C.FRCNN.ROI_XFORM_RESOLUTION = 7 ...@@ -338,7 +333,7 @@ __C.FRCNN.ROI_XFORM_RESOLUTION = 7
########################################### ###########################################
__C.MRCNN = edict() __C.MRCNN = AttrDict()
# Resolution of mask predictions # Resolution of mask predictions
__C.MRCNN.RESOLUTION = 28 __C.MRCNN.RESOLUTION = 28
...@@ -357,10 +352,7 @@ __C.MRCNN.ROI_XFORM_RESOLUTION = 14 ...@@ -357,10 +352,7 @@ __C.MRCNN.ROI_XFORM_RESOLUTION = 14
########################################### ###########################################
__C.SSD = edict() __C.SSD = AttrDict()
# Whether to enable FPN enhancement?
__C.SSD.FPN_ON = False
# Convolutions to use in the cls and bbox tower # Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits # NOTE: this doesn't include the last conv for logits
...@@ -369,7 +361,7 @@ __C.SSD.NUM_CONVS = 0 ...@@ -369,7 +361,7 @@ __C.SSD.NUM_CONVS = 0
# Weight for bbox regression loss # Weight for bbox regression loss
__C.SSD.BBOX_REG_WEIGHT = 1. __C.SSD.BBOX_REG_WEIGHT = 1.
__C.SSD.MULTIBOX = edict() __C.SSD.MULTIBOX = AttrDict()
# MultiBox configs # MultiBox configs
__C.SSD.MULTIBOX.STRIDES = [] __C.SSD.MULTIBOX.STRIDES = []
__C.SSD.MULTIBOX.MIN_SIZES = [] __C.SSD.MULTIBOX.MIN_SIZES = []
...@@ -377,25 +369,25 @@ __C.SSD.MULTIBOX.MAX_SIZES = [] ...@@ -377,25 +369,25 @@ __C.SSD.MULTIBOX.MAX_SIZES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = [] __C.SSD.MULTIBOX.ASPECT_RATIOS = []
__C.SSD.MULTIBOX.ASPECT_ANGLES = [] __C.SSD.MULTIBOX.ASPECT_ANGLES = []
__C.SSD.OHEM = edict() __C.SSD.OHEM = AttrDict()
# The threshold for selecting negative bbox in hard example mining # The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_OVERLAP = 0.5 __C.SSD.OHEM.NEG_OVERLAP = 0.5
# The ratio used in hard example mining # The ratio used in hard example mining
__C.SSD.OHEM.NEG_POS_RATIO = 3.0 __C.SSD.OHEM.NEG_POS_RATIO = 3.0
# Distort the image? # Distort the image?
__C.SSD.DISTORT = edict() __C.SSD.DISTORT = AttrDict()
__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5 __C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
__C.SSD.DISTORT.CONTRAST_PROB = 0.5 __C.SSD.DISTORT.CONTRAST_PROB = 0.5
__C.SSD.DISTORT.SATURATION_PROB = 0.5 __C.SSD.DISTORT.SATURATION_PROB = 0.5
# Expand the image? # Expand the image?
__C.SSD.EXPAND = edict() __C.SSD.EXPAND = AttrDict()
__C.SSD.EXPAND.PROB = 0.5 __C.SSD.EXPAND.PROB = 0.5
__C.SSD.EXPAND.MAX_RATIO = 4.0 __C.SSD.EXPAND.MAX_RATIO = 4.0
# Resize the image? # Resize the image?
__C.SSD.RESIZE = edict() __C.SSD.RESIZE = AttrDict()
__C.SSD.RESIZE.HEIGHT = 300 __C.SSD.RESIZE.HEIGHT = 300
__C.SSD.RESIZE.WIDTH = 300 __C.SSD.RESIZE.WIDTH = 300
__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4'] __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
...@@ -403,7 +395,7 @@ __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4'] ...@@ -403,7 +395,7 @@ __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
# Samplers # Samplers
# Format as (min_scale, max_scale, # Format as (min_scale, max_scale,
# min_aspect_ratio, max_aspect_ratio, # min_aspect_ratio, max_aspect_ratio,
# min_jaccard_overlap, max_jaccard_overlap, # min_overlap, max_overlap,
# max_trials, max_sample) # max_trials, max_sample)
__C.SSD.SAMPLERS = [ __C.SSD.SAMPLERS = [
(1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image (1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image
...@@ -423,7 +415,7 @@ __C.SSD.SAMPLERS = [ ...@@ -423,7 +415,7 @@ __C.SSD.SAMPLERS = [
########################################### ###########################################
__C.RESNET = edict() __C.RESNET = AttrDict()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt # Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
__C.RESNET.NUM_GROUPS = 1 __C.RESNET.NUM_GROUPS = 1
...@@ -439,7 +431,7 @@ __C.RESNET.GROUP_WIDTH = 64 ...@@ -439,7 +431,7 @@ __C.RESNET.GROUP_WIDTH = 64
########################################### ###########################################
__C.DROPBLOCK = edict() __C.DROPBLOCK = AttrDict()
# Whether to use drop block for more regulization # Whether to use drop block for more regulization
__C.DROPBLOCK.DROP_ON = False __C.DROPBLOCK.DROP_ON = False
...@@ -455,59 +447,46 @@ __C.DROPBLOCK.DECREMENT = 1e-6 ...@@ -455,59 +447,46 @@ __C.DROPBLOCK.DECREMENT = 1e-6
########################################### ###########################################
__C.SOLVER = edict() __C.SOLVER = AttrDict()
# Base learning rate for the specified schedule # The interval to display logs
__C.SOLVER.BASE_LR = 0.001 __C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
__C.SOLVER.SNAPSHOT_EVERY = 5000
# Prefix to yield the path: <prefix>_iters_XYZ.pth
__C.SOLVER.SNAPSHOT_PREFIX = ''
# Optional scaling factor for total loss # Optional scaling factor for total loss
# This option is helpful to scale the magnitude # This option is helpful to scale the magnitude
# of gradients during FP16 training # of gradients during FP16 training
__C.SOLVER.LOSS_SCALING = 1. __C.SOLVER.LOSS_SCALING = 1.
# Schedule type (see functions in utils.lr_policy for options)
# E.g., 'step', 'steps_with_decay', ...
__C.SOLVER.LR_POLICY = 'steps_with_decay'
# Hyperparameter used by the specified policy
# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
__C.SOLVER.GAMMA = 0.1
# Uniform step size for 'steps' policy
__C.SOLVER.STEP_SIZE = 30000
__C.SOLVER.STEPS = []
# Maximum number of SGD iterations # Maximum number of SGD iterations
__C.SOLVER.MAX_ITERS = 40000 __C.SOLVER.MAX_STEPS = 40000
# Base learning rate for the specified schedule
__C.SOLVER.BASE_LR = 0.001
# The uniform interval for LRScheduler
__C.SOLVER.DECAY_STEP = 1
# The custom intervals for LRScheduler
__C.SOLVER.DECAY_STEPS = []
# The decay factor for exponential LRScheduler
__C.SOLVER.DECAY_GAMMA = 0.1
# Warm up to ``BASE_LR`` over this number of steps
__C.SOLVER.WARM_UP_STEPS = 500
# Start the warm up from ``BASE_LR`` * ``FACTOR``
__C.SOLVER.WARM_UP_FACTOR = 0.333
# The type of LRScheduler
__C.SOLVER.LR_POLICY = 'steps_with_decay'
# Momentum to use with SGD # Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9 __C.SOLVER.MOMENTUM = 0.9
# L2 regularization hyper parameters # L2 regularization hyper parameters
__C.SOLVER.WEIGHT_DECAY = 0.0005 __C.SOLVER.WEIGHT_DECAY = 0.0001
# L2 norm factor for clipping gradients # L2 norm factor for clipping gradients
__C.SOLVER.CLIP_NORM = -1.0 __C.SOLVER.CLIP_NORM = -1.0
# Warm up to SOLVER.BASE_LR over this number of SGD iterations
__C.SOLVER.WARM_UP_ITERS = 500
# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
# The steps for accumulating gradients
__C.SOLVER.ITER_SIZE = 1
# The interval to display logs
__C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
__C.SOLVER.SNAPSHOT_ITERS = 5000
# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
__C.SOLVER.SNAPSHOT_PREFIX = ''
########################################### ###########################################
# # # #
...@@ -532,9 +511,6 @@ __C.PIXEL_MEANS = [102., 115., 122.] ...@@ -532,9 +511,6 @@ __C.PIXEL_MEANS = [102., 115., 122.]
# These are empirically chosen to approximately lead to unit variance targets # These are empirically chosen to approximately lead to unit variance targets
__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.) __C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5., 5., 10.)
# Prior prob for the positives at the beginning of training. # Prior prob for the positives at the beginning of training.
# This is used to set the bias init for the logits layer # This is used to set the bias init for the logits layer
__C.PRIOR_PROB = 0.01 __C.PRIOR_PROB = 0.01
...@@ -581,7 +557,7 @@ def _merge_a_into_b(a, b): ...@@ -581,7 +557,7 @@ def _merge_a_into_b(a, b):
# the types must match, too # the types must match, too
v = _check_and_coerce_cfg_value_type(v, b[k], k) v = _check_and_coerce_cfg_value_type(v, b[k], k)
# recursively merge dicts # recursively merge dicts
if type(v) is edict: if type(v) is AttrDict:
try: try:
_merge_a_into_b(a[k], b[k]) _merge_a_into_b(a[k], b[k])
except: except:
...@@ -595,7 +571,7 @@ def cfg_from_file(filename): ...@@ -595,7 +571,7 @@ def cfg_from_file(filename):
"""Load a config file and merge it into the default options.""" """Load a config file and merge it into the default options."""
import yaml import yaml
with open(filename, 'r') as f: with open(filename, 'r') as f:
yaml_cfg = edict(yaml.load(f)) yaml_cfg = AttrDict(yaml.load(f))
global __C global __C
_merge_a_into_b(yaml_cfg, __C) _merge_a_into_b(yaml_cfg, __C)
...@@ -643,8 +619,8 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key): ...@@ -643,8 +619,8 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
value_a = list(value_a) value_a = list(value_a)
elif isinstance(value_a, list) and isinstance(value_b, tuple): elif isinstance(value_a, list) and isinstance(value_b, tuple):
value_a = tuple(value_a) value_a = tuple(value_a)
elif isinstance(value_a, dict) and isinstance(value_b, edict): elif isinstance(value_a, dict) and isinstance(value_b, AttrDict):
value_a = edict(value_a) value_a = AttrDict(value_a)
else: else:
raise ValueError( raise ValueError(
'Type mismatch ({} vs. {}) with values ({} vs. {}) for config ' 'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
......
...@@ -23,10 +23,8 @@ from lib.core.config import cfg_from_file ...@@ -23,10 +23,8 @@ from lib.core.config import cfg_from_file
class Coordinator(object): class Coordinator(object):
"""Coordinator is a simple tool to manage the """Manage the unique experiments."""
unique experiments from the YAML configurations.
"""
def __init__(self, cfg_file, exp_dir=None): def __init__(self, cfg_file, exp_dir=None):
# Override the default configs # Override the default configs
cfg_from_file(cfg_file) cfg_from_file(cfg_file)
...@@ -44,9 +42,14 @@ class Coordinator(object): ...@@ -44,9 +42,14 @@ class Coordinator(object):
self.experiment_dir = exp_dir self.experiment_dir = exp_dir
def _path_at(self, file, auto_create=True): def _path_at(self, file, auto_create=True):
path = os.path.abspath(os.path.join(self.experiment_dir, file)) try:
if auto_create and not os.path.exists(path): path = os.path.abspath(os.path.join(self.experiment_dir, file))
os.makedirs(path) if auto_create and not os.path.exists(path):
os.makedirs(path)
except OSError:
path = os.path.abspath(os.path.join('/tmp', file))
if auto_create and not os.path.exists(path):
os.makedirs(path)
return path return path
def checkpoints_dir(self): def checkpoints_dir(self):
...@@ -55,7 +58,9 @@ class Coordinator(object): ...@@ -55,7 +58,9 @@ class Coordinator(object):
def exports_dir(self): def exports_dir(self):
return self._path_at('exports') return self._path_at('exports')
def results_dir(self, checkpoint=None): def results_dir(self, checkpoint=None, output_dir=None):
if output_dir is not None:
return output_dir
sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else '' sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
return self._path_at(os.path.join('results', sub_dir)) return self._path_at(os.path.join('results', sub_dir))
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling.detector import Detector
from lib.utils import logger
class Solver(object):
def __init__(self):
# Define the generic detector
self.detector = Detector()
# Define the optimizer and its arguments
self.optimizer = None
self.opt_arguments = {
'scale_gradient': 1. / (
cfg.SOLVER.LOSS_SCALING *
cfg.SOLVER.ITER_SIZE
),
'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
}
# Define the global step
self.iter = 0
# Define the decay step
self._current_step = 0
def _get_param_groups(self):
param_groups = [
{
'params': [],
'lr_mult': 1.,
'decay_mult': 1.,
},
# Special treatment for biases (mainly to match historical impl.
# details):
# (1) Do not apply weight decay
# (2) Use a 2x higher learning rate
{
'params': [],
'lr_mult': 2.,
'decay_mult': 0.,
}
]
for name, param in self.detector.named_parameters():
if 'bias' in name:
param_groups[1]['params'].append(param)
else:
param_groups[0]['params'].append(param)
return param_groups
def set_learning_rate(self):
policy = cfg.SOLVER.LR_POLICY
if policy == 'steps_with_decay':
if self._current_step < len(cfg.SOLVER.STEPS) \
and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
self._current_step = self._current_step + 1
logger.info(
'MultiStep Status: Iteration {}, step = {}'
.format(self.iter, self._current_step)
)
new_lr = cfg.SOLVER.BASE_LR * (
cfg.SOLVER.GAMMA ** self._current_step)
self.optimizer.param_groups[0]['lr'] = \
self.optimizer.param_groups[1]['lr'] = new_lr
else:
raise ValueError('Unknown lr policy: ' + policy)
def one_step(self):
def add_loss(x, y):
return y if x is None else x + y
# Forward & Backward & Compute_loss
iter_size = cfg.SOLVER.ITER_SIZE
loss_scaling = cfg.SOLVER.LOSS_SCALING
stats = {'loss': {'total': 0.}, 'iter': self.iter}
run_time, tic = 0., time.time()
if iter_size > 1:
# Dragon is designed for manual gradients accumulating
# ``zero_grad`` is only required if calling ``accumulate_grad``
self.optimizer.zero_grad()
for i in range(iter_size):
outputs, total_loss = self.detector(), None
# Sum the partial losses
for k, v in outputs.items():
if 'loss' in k:
if k not in stats['loss']:
stats['loss'][k] = 0.
total_loss = add_loss(total_loss, v)
stats['loss'][k] += float(v) * loss_scaling
if loss_scaling != 1.:
total_loss *= loss_scaling
stats['loss']['total'] += float(total_loss)
total_loss.backward()
if iter_size > 1:
self.optimizer.accumulate_grad()
run_time += (time.time() - tic)
# Apply Update
self.set_learning_rate()
tic = time.time()
self.optimizer.step()
run_time += (time.time() - tic)
self.iter += 1
# Average loss by the iter size
for k in stats['loss'].keys():
stats['loss'][k] /= cfg.SOLVER.ITER_SIZE
# Misc stats
stats['lr'] = self.base_lr
stats['time'] = run_time
return stats
@property
def base_lr(self):
return self.optimizer.param_groups[0]['lr']
@base_lr.setter
def base_lr(self, value):
self.optimizer.param_groups[0]['lr'] = \
self.optimizer.param_groups[1]['lr'] = value
class SGDSolver(Solver):
def __init__(self):
super(SGDSolver, self).__init__()
self.opt_arguments.update(**{
'lr': cfg.SOLVER.BASE_LR,
'momentum': cfg.SOLVER.MOMENTUM,
})
self.optimizer = torch.optim.SGD(
self._get_param_groups(), **self.opt_arguments)
class NesterovSolver(Solver):
def __init__(self):
super(NesterovSolver, self).__init__()
self.opt_arguments.update(**{
'lr': cfg.SOLVER.BASE_LR,
'momentum': cfg.SOLVER.MOMENTUM,
'nesterov': True,
})
self.optimizer = torch.optim.SGD(
self._get_param_groups(), **self.opt_arguments)
class RMSPropSolver(Solver):
def __init__(self):
super(RMSPropSolver, self).__init__()
self.opt_arguments.update(**{
'lr': cfg.SOLVER.BASE_LR,
'alpha': 0.9,
'eps': 1e-5,
})
self.optimizer = torch.optim.RMSprop(
self._get_param_groups(), **self.opt_arguments)
class AdamSolver(Solver):
def __init__(self):
super(AdamSolver, self).__init__()
self.opt_arguments.update(**{
'lr': cfg.SOLVER.BASE_LR,
'beta1': 0.9,
'beta2': 0.999,
'eps': 1e-5,
})
self.optimizer = torch.optim.RMSprop(
self._get_param_groups(), **self.opt_arguments)
def get_solver_func(type):
if type == 'MomentumSGD':
return SGDSolver
elif type == 'Nesterov':
return NesterovSolver
elif type == 'RMSProp':
return RMSPropSolver
elif type == 'Adam':
return AdamSolver
else:
raise ValueError(
'Unsupported solver type: {}.\n'
'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam).'
.format(type)
)
...@@ -34,7 +34,7 @@ class TestServer(object): ...@@ -34,7 +34,7 @@ class TestServer(object):
self.data_reader = dragon.io.DataReader( self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source)) dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_transformer = DataTransformer() self.data_transformer = DataTransformer()
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH) self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start() self.data_reader.start()
self.gt_recs = collections.OrderedDict() self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir self.output_dir = output_dir
...@@ -70,11 +70,14 @@ class TestServer(object): ...@@ -70,11 +70,14 @@ class TestServer(object):
return self.gt_recs return self.gt_recs
def evaluate_detections(self, all_boxes): def evaluate_detections(self, all_boxes):
self.imdb.evaluate_detections( if cfg.TEST.PROTOCOL == 'null':
all_boxes, self.imdb.dump_detections(all_boxes, self.output_dir)
self.get_records(), else:
self.output_dir, self.imdb.evaluate_detections(
) all_boxes,
self.get_records(),
self.output_dir,
)
def evaluate_segmentations(self, all_boxes, all_masks): def evaluate_segmentations(self, all_boxes, all_masks):
self.imdb.evaluate_segmentations( self.imdb.evaluate_segmentations(
......
...@@ -18,53 +18,48 @@ from __future__ import division ...@@ -18,53 +18,48 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import datetime
import os import os
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.core.solver import get_solver_func from lib.solver.sgd import SGDSolver
from lib.utils import logger from lib.utils import logger
from lib.utils import time_util
from lib.utils.stats import SmoothedValue from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer
class SolverWrapper(object): class SolverWrapper(object):
def __init__(self, coordinator): def __init__(self, coordinator):
self.solver = SGDSolver()
self.detector = self.solver.detector
self.output_dir = coordinator.checkpoints_dir() self.output_dir = coordinator.checkpoints_dir()
self.solver = get_solver_func('MomentumSGD')()
# Load the pre-trained weights # Setup the detector
init_weights = cfg.TRAIN.WEIGHTS self.detector.load_weights(cfg.TRAIN.WEIGHTS)
if init_weights != '': if cfg.MODEL.PRECISION.lower() == 'float16':
if os.path.exists(init_weights): # Mixed precision training
logger.info('Loading weights from {}.'.format(init_weights)) self.detector.half()
self.solver.detector.load_weights(init_weights) self.detector.cuda(cfg.GPU_ID)
else:
raise ValueError('Invalid path of weights: {}'.format(init_weights))
# Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16':
self.solver.detector.half() # Powerful FP16 Support
self.solver.detector.cuda(cfg.GPU_ID)
# Plan the metrics # Plan the metrics
self.board = None
self.metrics = collections.OrderedDict() self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD and logger.is_root(): if cfg.ENABLE_TENSOR_BOARD and logger.is_root():
from dragon.tools.tensorboard import TensorBoard try:
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs') from dragon.tools.tensorboard import TensorBoard
log_dir = coordinator.experiment_dir + '/logs'
self.board = TensorBoard(log_dir=log_dir)
except ImportError:
pass
def snapshot(self): def snapshot(self):
if not logger.is_root(): filename = cfg.SOLVER.SNAPSHOT_PREFIX + \
return None '_iter_{}.pth'.format(self.solver.iter)
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth')
filename = os.path.join(self.output_dir, filename) filename = os.path.join(self.output_dir, filename)
torch.save(self.solver.detector.state_dict(), filename) if logger.is_root() and not os.path.exists(filename):
logger.info('Wrote snapshot to: {:s}'.format(filename)) torch.save(self.detector.state_dict(), filename)
return filename logger.info('Wrote snapshot to: {:s}'.format(filename))
def add_metrics(self, stats): def add_metrics(self, stats):
for k, v in stats['loss'].items(): for k, v in stats['loss'].items():
...@@ -73,7 +68,7 @@ class SolverWrapper(object): ...@@ -73,7 +68,7 @@ class SolverWrapper(object):
self.metrics[k].AddValue(v) self.metrics[k].AddValue(v)
def send_metrics(self, stats): def send_metrics(self, stats):
if hasattr(self, 'board'): if self.board is not None:
self.board.scalar_summary('lr', stats['lr'], stats['iter']) self.board.scalar_summary('lr', stats['lr'], stats['iter'])
self.board.scalar_summary('time', stats['time'], stats['iter']) self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items(): for k, v in self.metrics.items():
...@@ -90,10 +85,12 @@ class SolverWrapper(object): ...@@ -90,10 +85,12 @@ class SolverWrapper(object):
stats['iter'], stats['iter'],
) )
def step(self, display=False): def step(self):
display = self.solver.iter % cfg.SOLVER.DISPLAY == 0
stats = self.solver.one_step() stats = self.solver.one_step()
self.add_metrics(stats) self.add_metrics(stats)
self.send_metrics(stats) self.send_metrics(stats)
if display: if display:
logger.info( logger.info(
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % ( 'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
...@@ -110,43 +107,28 @@ class SolverWrapper(object): ...@@ -110,43 +107,28 @@ class SolverWrapper(object):
def train_model(self): def train_model(self):
"""Network training loop.""" """Network training loop."""
last_snapshot_iter = -1 timer = time_util.Timer()
timer = Timer() max_steps = cfg.SOLVER.MAX_STEPS
model_paths = []
start_lr = self.solver.base_lr
while self.solver.iter < cfg.SOLVER.MAX_ITERS:
if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
self.solver.base_lr = \
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
while self.solver.iter < max_steps:
# Apply 1-step SGD update # Apply 1-step SGD update
with timer.tic_and_toc(): with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0) _, global_step = self.step(), self.solver.iter
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0: if global_step % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time
eta_seconds = average_time * (
cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
logger.info( logger.info(
'< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' time_util.get_progress_info(
.format(progress, timer.average_time, eta) timer, global_step, max_steps
)
) )
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0: if global_step % cfg.SOLVER.SNAPSHOT_EVERY == 0:
last_snapshot_iter = self.solver.iter self.snapshot()
model_paths.append(self.snapshot())
if last_snapshot_iter != self.solver.iter:
model_paths.append(self.snapshot())
return model_paths
def train_net(coordinator, start_iter=0): def train_net(coordinator, start_iter=0):
sw = SolverWrapper(coordinator) sw = SolverWrapper(coordinator)
sw.solver.iter = start_iter sw.solver.iter = start_iter
logger.info('Solving...') logger.info('Solving...')
model_paths = sw.train_model() sw.train_model()
return model_paths sw.snapshot()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# ------------------------------------------------------------ # ------------------------------------------------------------
import os import os
import shutil
import dragon import dragon
from lib.core.config import cfg from lib.core.config import cfg
...@@ -59,6 +60,35 @@ class imdb(object): ...@@ -59,6 +60,35 @@ class imdb(object):
def num_images(self): def num_images(self):
return dragon.io.SeetaRecordDataset(self.source).size return dragon.io.SeetaRecordDataset(self.source).size
def dump_detections(self, all_boxes, output_dir):
dataset = dragon.io.SeetaRecordDataset(self.source)
for file in ('data.data', 'data.index', 'data.meta'):
file = os.path.join(output_dir, file)
if os.path.exists(file):
os.remove(file)
writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
for i in range(len(dataset)):
example = dataset.get()
example['object'] = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][i]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
example['object'].append({
'name': cls,
'xmin': float(detections[k][0]),
'ymin': float(detections[k][1]),
'xmax': float(detections[k][2]),
'ymax': float(detections[k][3]),
'difficult': 0,
})
writer.write(example)
def evaluate_detections(self, all_boxes, gt_recs, output_dir): def evaluate_detections(self, all_boxes, gt_recs, output_dir):
pass pass
......
...@@ -109,36 +109,6 @@ class TaaS(imdb): ...@@ -109,36 +109,6 @@ class TaaS(imdb):
# # # #
############################################## ##############################################
def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
from xml.dom import minidom
import xml.etree.ElementTree as ET
ix = 0
for image_id, rec in gt_recs.items():
root = ET.Element('annotation')
ET.SubElement(root, 'filename').text = str(image_id)
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][ix]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
object = ET.SubElement(root, 'object')
ET.SubElement(object, 'name').text = cls
ET.SubElement(object, 'difficult').text = '0'
bnd_box = ET.SubElement(object, 'bndbox')
ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
ix += 1
rawText = ET.tostring(root)
dom = minidom.parseString(rawText)
with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
dom.writexml(f, "", "\t", "\n", "utf-8")
def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir): def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
for cls_ind, cls in enumerate(self.classes): for cls_ind, cls in enumerate(self.classes):
if cls == '__background__': if cls == '__background__':
...@@ -486,10 +456,6 @@ class TaaS(imdb): ...@@ -486,10 +456,6 @@ class TaaS(imdb):
self._do_voc_bbox_eval( self._do_voc_bbox_eval(
gt_recs, output_dir, IoU=0.7, gt_recs, output_dir, IoU=0.7,
use_07_metric='2007' in protocol) use_07_metric='2007' in protocol)
elif 'xml' in protocol:
if cfg.EXP_DIR != '':
output_dir = cfg.EXP_DIR
self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
elif 'coco' in protocol: elif 'coco' in protocol:
from lib.pycocotools.coco import COCO from lib.pycocotools.coco import COCO
if os.path.exists(cfg.TEST.JSON_FILE): if os.path.exists(cfg.TEST.JSON_FILE):
......
...@@ -20,7 +20,7 @@ import dragon.vm.torch as torch ...@@ -20,7 +20,7 @@ import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
...@@ -194,8 +194,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -194,8 +194,8 @@ class AnchorTargetLayer(torch.nn.Module):
.transpose(0, 3, 1, 2) .transpose(0, 3, 1, 2)
return { return {
'labels': blob_to_tensor(labels), 'labels': array2tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets), 'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights), 'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights), 'bbox_outside_weights': array2tensor(bbox_outside_weights),
} }
...@@ -92,7 +92,7 @@ class DataBatch(mp.Process): ...@@ -92,7 +92,7 @@ class DataBatch(mp.Process):
if self._num_transformers == -1: if self._num_transformers == -1:
self._num_transformers = 2 self._num_transformers = 2
# Add 1 transformer for color augmentation # Add 1 transformer for color augmentation
if cfg.TRAIN.COLOR_JITTERING: if cfg.TRAIN.USE_COLOR_JITTER:
self._num_transformers += 1 self._num_transformers += 1
self._num_transformers = min( self._num_transformers = min(
self._num_transformers, self._max_transformers) self._num_transformers, self._max_transformers)
......
...@@ -19,8 +19,10 @@ import cv2 ...@@ -19,8 +19,10 @@ import cv2
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils import rotated_boxes
from lib.utils.blob import prep_im_for_blob from lib.utils.blob import prep_im_for_blob
from lib.utils.boxes import flip_boxes from lib.utils.boxes import flip_boxes
from lib.utils.image import get_image_with_target_size
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
...@@ -101,23 +103,29 @@ class DataTransformer(multiprocessing.Process): ...@@ -101,23 +103,29 @@ class DataTransformer(multiprocessing.Process):
def get_annotations(cls, example): def get_annotations(cls, example):
objects = [] objects = []
for ix, obj in enumerate(example['object']): for ix, obj in enumerate(example['object']):
if 'xmin' in obj: if 'x3' in obj:
objects.append({ bbox = rotated_boxes.vertices2box(
'name': obj['name'], [obj['x1'], obj['y1'],
'difficult': obj.get('difficult', 0), obj['x2'], obj['y2'],
'bbox': [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']], obj['x3'], obj['y3'],
}) obj['x4'], obj['y4']]
)
elif 'x2' in obj:
bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
elif 'xmin' in obj:
bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
else: else:
objects.append({ bbox = obj['bbox']
'name': obj['name'], objects.append({
'difficult': obj.get('difficult', 0), 'name': obj['name'],
'bbox': obj['bbox'], 'difficult': obj.get('difficult', 0),
}) 'bbox': bbox,
})
return example['id'], objects return example['id'], objects
def get(self, example): def get(self, example):
img = np.frombuffer(example['content'], np.uint8) img = np.frombuffer(example['content'], np.uint8)
img = cv2.imdecode(img, -1) img = cv2.imdecode(img, 1)
# Scale # Scale
scale_indices = np.random.randint(len(cfg.TRAIN.SCALES)) scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
...@@ -137,10 +145,10 @@ class DataTransformer(multiprocessing.Process): ...@@ -137,10 +145,10 @@ class DataTransformer(multiprocessing.Process):
if jitter != 1.0: if jitter != 1.0:
# To a rectangle (scale, max_size) # To a rectangle (scale, max_size)
target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int) target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int)
im, offsets = _get_image_with_target_size(target_size, im) im, offsets = get_image_with_target_size(target_size, im)
else: else:
# To a square (target_size, target_size) # To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im) im, offsets = get_image_with_target_size([target_size] * 2, im)
# Example -> RoIDict # Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets) roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
...@@ -166,29 +174,3 @@ class DataTransformer(multiprocessing.Process): ...@@ -166,29 +174,3 @@ class DataTransformer(multiprocessing.Process):
self.q1_out.put(outputs) self.q1_out.put(outputs)
else: else:
self.q2_out.put(outputs) self.q2_out.put(outputs)
def _get_image_with_target_size(target_size, img):
im_shape = list(img.shape)
height_diff = target_size[0] - im_shape[0]
width_diff = target_size[1] - im_shape[1]
ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
im_shape[:2] = target_size
new_img = np.empty(im_shape, dtype=img.dtype)
new_img[:] = cfg.PIXEL_MEANS
new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
ofs_pad_width:ofs_pad_width + img.shape[1]] = \
img[ofs_crop_height:ofs_crop_height + target_size[0],
ofs_crop_width:ofs_crop_width + target_size[1]]
return new_img, (
ofs_pad_width - ofs_crop_width,
ofs_pad_height - ofs_crop_height,
target_size,
)
...@@ -18,19 +18,15 @@ import numpy as np ...@@ -18,19 +18,15 @@ import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms from lib.nms import nms_wrapper
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module): class ProposalLayer(torch.nn.Module):
""" """Compute proposals by applying transformations to anchors."""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self): def __init__(self):
super(ProposalLayer, self).__init__() super(ProposalLayer, self).__init__()
...@@ -48,8 +44,8 @@ class ProposalLayer(torch.nn.Module): ...@@ -48,8 +44,8 @@ class ProposalLayer(torch.nn.Module):
def forward(self, features, cls_prob, bbox_pred, ims_info): def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST' cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE min_size = cfg[cfg_key].RPN_MIN_SIZE
...@@ -86,14 +82,15 @@ class ProposalLayer(torch.nn.Module): ...@@ -86,14 +82,15 @@ class ProposalLayer(torch.nn.Module):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1] scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4)) deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze()) order = np.argsort(-scores.squeeze())
else: else:
# Avoid sorting possibly large arrays; First partition to get top K # Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores) # unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze()) order = np.argsort(-scores[inds].squeeze())
order = inds[order] order = inds[order]
deltas = deltas[order] deltas = deltas[order]
anchors = all_anchors[order] anchors = all_anchors[order]
scores = scores[order] scores = scores[order]
...@@ -111,11 +108,11 @@ class ProposalLayer(torch.nn.Module): ...@@ -111,11 +108,11 @@ class ProposalLayer(torch.nn.Module):
scores = scores[keep] scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7) # 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300) # 7. Take after_nms_top_n (e.g. 300)
# 8. Return the top proposals (-> RoIs top) # 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh) keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: if post_nms_top_n > 0:
keep = keep[:post_nms_topN] keep = keep[:post_nms_top_n]
proposals = proposals[keep, :] proposals = proposals[keep, :]
# Output rois blob # Output rois blob
...@@ -129,4 +126,4 @@ class ProposalLayer(torch.nn.Module): ...@@ -129,4 +126,4 @@ class ProposalLayer(torch.nn.Module):
if cfg_key == 'TRAIN': if cfg_key == 'TRAIN':
return rpn_rois return rpn_rois
else: else:
return [blob_to_tensor(rpn_rois)] return [array2tensor(rpn_rois)]
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import numpy.random as npr import numpy.random as npr
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
...@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module):
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0) batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return { return {
'rois': [blob_to_tensor(batch_outputs['rois'])], 'rois': [array2tensor(batch_outputs['rois'])],
'labels': blob_to_tensor(batch_outputs['labels']), 'labels': array2tensor(batch_outputs['labels']),
'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']), 'bbox_targets': array2tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']), 'bbox_inside_weights': array2tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']), 'bbox_outside_weights': array2tensor(batch_outputs['bbox_outside_weights']),
} }
......
...@@ -17,14 +17,13 @@ import dragon.vm.torch as torch ...@@ -17,14 +17,13 @@ import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms from lib.nms import nms_wrapper
from lib.nms.nms_wrapper import soft_nms from lib.utils import framework
from lib.utils import time_util
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
from lib.utils.boxes import bbox_transform_inv from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.timer import Timer
from lib.utils.graph import FrozenGraph
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
...@@ -48,13 +47,14 @@ def im_detect(detector, raw_image): ...@@ -48,13 +47,14 @@ def im_detect(detector, raw_image):
with torch.no_grad(): with torch.no_grad():
with torch.jit.Recorder(retain_ops=True): with torch.jit.Recorder(retain_ops=True):
outputs = detector.forward(inputs) outputs = detector.forward(inputs)
detector.frozen_graph = FrozenGraph( detector.frozen_graph = \
{'data': inputs['data'], framework.FrozenGraph(
'ims_info': inputs['ims_info']}, {'data': inputs['data'],
{'rois': outputs['rois'], 'ims_info': inputs['ims_info']},
'cls_prob': outputs['cls_prob'], {'rois': outputs['rois'],
'bbox_pred': outputs['bbox_pred']}, 'cls_prob': outputs['cls_prob'],
) 'bbox_pred': outputs['bbox_pred']},
)
outputs = detector.frozen_graph(**blobs) outputs = detector.frozen_graph(**blobs)
# Decode results # Decode results
...@@ -88,14 +88,13 @@ def test_net(detector, server): ...@@ -88,14 +88,13 @@ def test_net(detector, server):
num_classes = server.num_classes num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()} _t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}
for i in range(num_images): for i in range(num_images):
image_id, raw_image = server.get_image() image_id, raw_image = server.get_image()
_t['im_detect'].tic() with _t['im_detect'].tic_and_toc():
scores, boxes = im_detect(detector, raw_image) scores, boxes = im_detect(detector, raw_image)
_t['im_detect'].toc()
_t['misc'].tic() _t['misc'].tic()
boxes_this_image = [[]] boxes_this_image = [[]]
...@@ -107,21 +106,30 @@ def test_net(detector, server): ...@@ -107,21 +106,30 @@ def test_net(detector, server):
(cls_boxes, cls_scores[:, np.newaxis]) (cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False) ).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms( keep = nms_wrapper.soft_nms(
cls_detections, cfg.TEST.NMS, cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True) keep = nms_wrapper.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections) boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE: if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image( vis_one_image(
raw_image, classes, boxes_this_image, raw_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True, classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_id), filename=server.get_save_filename(image_id),
) )
...@@ -129,7 +137,8 @@ def test_net(detector, server): ...@@ -129,7 +137,8 @@ def test_net(detector, server):
if cfg.TEST.DETECTIONS_PER_IM > 0: if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = [] image_scores = []
for j in range(1, num_classes): for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1]) image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0: if len(image_scores) > 0:
image_scores = np.hstack(image_scores) image_scores = np.hstack(image_scores)
......
...@@ -14,6 +14,7 @@ from __future__ import division ...@@ -14,6 +14,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
...@@ -21,7 +22,7 @@ import numpy.random as npr ...@@ -21,7 +22,7 @@ import numpy.random as npr
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
...@@ -180,8 +181,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -180,8 +181,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': blob_to_tensor(labels), 'labels': array2tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets), 'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights), 'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights), 'bbox_outside_weights': array2tensor(bbox_outside_weights),
} }
...@@ -19,20 +19,16 @@ import numpy as np ...@@ -19,20 +19,16 @@ import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms from lib.nms import nms_wrapper
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module): class ProposalLayer(torch.nn.Module):
""" """Compute proposals by applying transformations anchors."""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self): def __init__(self):
super(ProposalLayer, self).__init__() super(ProposalLayer, self).__init__()
...@@ -86,8 +82,8 @@ class ProposalLayer(torch.nn.Module): ...@@ -86,8 +82,8 @@ class ProposalLayer(torch.nn.Module):
def forward(self, features, cls_prob, bbox_pred, ims_info): def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST' cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE min_size = cfg[cfg_key].RPN_MIN_SIZE
...@@ -110,14 +106,15 @@ class ProposalLayer(torch.nn.Module): ...@@ -110,14 +106,15 @@ class ProposalLayer(torch.nn.Module):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1] scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4] deltas = batch_deltas[ix] # [n, 4]
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze()) order = np.argsort(-scores.squeeze())
else: else:
# Avoid sorting possibly large arrays; First partition to get top K # Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores) # unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze()) order = np.argsort(-scores[inds].squeeze())
order = inds[order] order = inds[order]
deltas = deltas[order] deltas = deltas[order]
anchors = all_anchors[order] anchors = all_anchors[order]
scores = scores[order] scores = scores[order]
...@@ -136,9 +133,9 @@ class ProposalLayer(torch.nn.Module): ...@@ -136,9 +133,9 @@ class ProposalLayer(torch.nn.Module):
# 6. Apply nms (e.g. threshold = 0.7) # 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300) # 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top) # 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh) keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0: if post_nms_top_n > 0:
keep = keep[:post_nms_topN] keep = keep[:post_nms_top_n]
proposals = proposals[keep, :] proposals = proposals[keep, :]
# Output rois blob # Output rois blob
...@@ -156,16 +153,16 @@ class ProposalLayer(torch.nn.Module): ...@@ -156,16 +153,16 @@ class ProposalLayer(torch.nn.Module):
# Distribute rois into K levels # Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1 k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level) fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
all_rois = [] all_rois = []
for i in range(K): for i in range(k):
lv_indices = np.where(fpn_levels == (i + min_level))[0] lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0: if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling # Fake a tiny roi to avoid empty roi pooling
all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32))) all_rois.append(array2tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else: else:
all_rois.append(blob_to_tensor(rpn_rois[lv_indices])) all_rois.append(array2tensor(rpn_rois[lv_indices]))
return all_rois return all_rois
......
...@@ -13,12 +13,12 @@ from __future__ import absolute_import ...@@ -13,12 +13,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np import numpy as np
import numpy.random as npr import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
...@@ -87,9 +87,9 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -87,9 +87,9 @@ class ProposalTargetLayer(torch.nn.Module):
# Distribute rois into K levels # Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1 k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level) fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)] lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(k)]
_fmap_rois( _fmap_rois(
inputs=[batch_outputs[key] for key in keys], inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs, fake_outputs=self.fake_outputs,
...@@ -99,11 +99,11 @@ class ProposalTargetLayer(torch.nn.Module): ...@@ -99,11 +99,11 @@ class ProposalTargetLayer(torch.nn.Module):
) )
return { return {
'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)], 'rois': [array2tensor(outputs['rois'][i]) for i in range(k)],
'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)), 'labels': array2tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])), 'bbox_targets': array2tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])), 'bbox_inside_weights': array2tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])), 'bbox_outside_weights': array2tensor(np.vstack(outputs['bbox_outside_weights'])),
} }
......
...@@ -29,7 +29,7 @@ from lib.utils.logger import is_root ...@@ -29,7 +29,7 @@ from lib.utils.logger import is_root
class Detector(torch.nn.Module): class Detector(torch.nn.Module):
"""The "Detector" organizes the detection pipelines. """Organize the detection pipelines.
A bunch of classic algorithms are integrated, see the A bunch of classic algorithms are integrated, see the
``lib.core.config`` for their hyper-parameters. ``lib.core.config`` for their hyper-parameters.
...@@ -112,9 +112,10 @@ class Detector(torch.nn.Module): ...@@ -112,9 +112,10 @@ class Detector(torch.nn.Module):
# 1. Extract features # 1. Extract features
# Process the data: # Process the data:
# 1) NHWC => NCHW # 0) CPU => CUDA
# 2) uint8 => float32 or float16 # 1) NHWC => NCHW
# 3) Mean subtraction # 2) uint8 => float32 or float16
# 3) Mean subtraction
image_data = self.bootstrap(inputs['data']) image_data = self.bootstrap(inputs['data'])
features = self.body(image_data) features = self.body(image_data)
......
...@@ -30,17 +30,18 @@ class FPN(torch.nn.Module): ...@@ -30,17 +30,18 @@ class FPN(torch.nn.Module):
super(FPN, self).__init__() super(FPN, self).__init__()
self.C = torch.nn.ModuleList() self.C = torch.nn.ModuleList()
self.P = torch.nn.ModuleList() self.P = torch.nn.ModuleList()
self.apply_func = self.apply_on_rcnn
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1): for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True)) self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True)) self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
if 'retinanet' in cfg.MODEL.TYPE or 'ssd' in cfg.MODEL.TYPE: if 'rcnn' in cfg.MODEL.TYPE:
self.apply_func = self.apply_on_rcnn
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
else:
self.apply_func = self.apply_on_generic
self.relu = torch.nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1): for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True)) self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
self.apply_func = self.apply_on_retinanet
self.relu = torch.nn.ReLU(inplace=False)
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
self.reset_parameters() self.reset_parameters()
self.feature_dims = [cfg.FPN.DIM] self.feature_dims = [cfg.FPN.DIM]
...@@ -69,7 +70,7 @@ class FPN(torch.nn.Module): ...@@ -69,7 +70,7 @@ class FPN(torch.nn.Module):
outputs.insert(0, self.P[i - min_lvl](fpn_input)) outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs return outputs
def apply_on_retinanet(self, features): def apply_on_generic(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
......
...@@ -37,7 +37,7 @@ def nms(detections, thresh, force_cpu=False): ...@@ -37,7 +37,7 @@ def nms(detections, thresh, force_cpu=False):
if detections.shape[0] == 0: if detections.shape[0] == 0:
return [] return []
if detections.shape[1] == 6: if detections.shape[1] == 6:
return rotated_boxes.nms(detections, thresh) return rotated_boxes.cpu_nms(detections, thresh)
if cfg.USE_GPU_NMS and not force_cpu: if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID) return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else: else:
......
...@@ -17,7 +17,6 @@ import dragon.vm.torch as torch ...@@ -17,7 +17,6 @@ import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.ops import functional as F from lib.ops import functional as F
from lib.utils.blob import blob_to_tensor
class Bootstrap(torch.nn.Module): class Bootstrap(torch.nn.Module):
...@@ -25,7 +24,7 @@ class Bootstrap(torch.nn.Module): ...@@ -25,7 +24,7 @@ class Bootstrap(torch.nn.Module):
def __init__(self): def __init__(self):
super(Bootstrap, self).__init__() super(Bootstrap, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower() self.dtype = cfg.MODEL.PRECISION.lower()
self.mean_values = cfg.PIXEL_MEANS self.mean_values = cfg.PIXEL_MEANS
self.dummy_buffer = torch.ones(1) self.dummy_buffer = torch.ones(1)
......
...@@ -19,7 +19,7 @@ import numpy as np ...@@ -19,7 +19,7 @@ import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2 from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger from lib.utils import logger
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
...@@ -145,8 +145,8 @@ class AnchorTargetLayer(torch.nn.Module): ...@@ -145,8 +145,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1)) bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return { return {
'labels': blob_to_tensor(labels), 'labels': array2tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets), 'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights), 'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights), 'bbox_outside_weights': array2tensor(bbox_outside_weights),
} }
...@@ -17,44 +17,14 @@ import dragon.vm.torch as torch ...@@ -17,44 +17,14 @@ import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms from lib.nms import nms_wrapper
from lib.nms.nms_wrapper import soft_nms from lib.utils import framework
from lib.utils import time_util
from lib.utils.blob import im_list_to_blob from lib.utils.blob import im_list_to_blob
from lib.utils.graph import FrozenGraph
from lib.utils.image import scale_image from lib.utils.image import scale_image
from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward
if not hasattr(detector, 'frozen_graph'):
inputs = {
'data': torch.from_numpy(blobs['data']),
'ims_info': torch.from_numpy(blobs['ims_info']),
}
with torch.no_grad():
with torch.jit.Recorder(retain_ops=True):
outputs = detector.forward(inputs)
detector.frozen_graph = FrozenGraph(
{'data': inputs['data'],
'ims_info': inputs['ims_info']},
{'detections': outputs['detections']},
)
outputs = detector.frozen_graph(**blobs)
return outputs['detections'][:, 1:]
def ims_detect(detector, raw_images): def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales.""" """Detect images, with single or multiple scales."""
ims, ims_scale = scale_image(raw_images[0]) ims, ims_scale = scale_image(raw_images[0])
...@@ -81,11 +51,12 @@ def ims_detect(detector, raw_images): ...@@ -81,11 +51,12 @@ def ims_detect(detector, raw_images):
with torch.no_grad(): with torch.no_grad():
with torch.jit.Recorder(retain_ops=True): with torch.jit.Recorder(retain_ops=True):
outputs = detector.forward(inputs) outputs = detector.forward(inputs)
detector.frozen_graph = FrozenGraph( detector.frozen_graph = \
{'data': inputs['data'], framework.FrozenGraph(
'ims_info': inputs['ims_info']}, {'data': inputs['data'],
{'detections': outputs['detections']}, 'ims_info': inputs['ims_info']},
) {'detections': outputs['detections']},
)
outputs = detector.frozen_graph(**blobs) outputs = detector.frozen_graph(**blobs)
# Unpack results # Unpack results
...@@ -111,24 +82,21 @@ def test_net(detector, server): ...@@ -111,24 +82,21 @@ def test_net(detector, server):
num_classes = server.num_classes num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()} _t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH): for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths # Collect raw images and ground-truths
image_ids, raw_images = [], [] image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH): for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue if batch_idx + item_idx >= num_images:
continue
image_id, raw_image = server.get_image() image_id, raw_image = server.get_image()
image_ids.append(image_id) image_ids.append(image_id)
raw_images.append(raw_image) raw_images.append(raw_image)
# Run detecting on specific scales # Run detecting on specific scales
_t['im_detect'].tic() with _t['im_detect'].tic_and_toc():
if cfg.TEST.IMS_PER_BATCH > 1:
results = ims_detect(detector, raw_images) results = ims_detect(detector, raw_images)
else:
results = [im_detect(detector, raw_images[0])]
_t['im_detect'].toc()
# Post-Processing # Post-Processing
_t['misc'].tic() _t['misc'].tic()
...@@ -139,22 +107,22 @@ def test_net(detector, server): ...@@ -139,22 +107,22 @@ def test_net(detector, server):
detections = np.array(detections) detections = np.array(detections)
for j in range(1, num_classes): for j in range(1, num_classes):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0] cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4] cls_boxes = detections[cls_indices, :4]
cls_scores = detections[cls_indices, 4] cls_scores = detections[cls_indices, 4]
cls_detections = np.hstack(( cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])) \ cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms( keep = nms_wrapper.soft_nms(
cls_detections, cls_detections,
cfg.TEST.NMS, thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms( keep = nms_wrapper.nms(
cls_detections, cls_detections,
cfg.TEST.NMS, thresh=cfg.TEST.NMS,
force_cpu=True, force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from lib.core.config import cfg
class _LRScheduler(object):
def __init__(
self,
lr_max,
lr_min=0.,
warmup_steps=0,
warmup_factor=0.,
):
self._step_count = 0
self._lr_max, self._lr_min = lr_max, lr_min
self._warmup_steps = warmup_steps
self._warmup_factor = warmup_factor
self._last_lr = self._lr_max
self._last_steps = self._warmup_steps
def step(self):
self._step_count += 1
def get_lr(self):
if self._step_count < self._warmup_steps:
alpha = (self._step_count + 1.) / self._warmup_steps
decay_factor = self._warmup_factor * (1 - alpha) + alpha
self._last_lr = self._lr_max * decay_factor
return self._last_lr
return self.schedule_impl()
def schedule_impl(self):
raise NotImplementedError
class StepLR(_LRScheduler):
def __init__(
self,
lr_max,
decay_step,
decay_gamma,
warmup_steps=0,
warmup_factor=0.,
):
super(StepLR, self).__init__(
lr_max=lr_max,
warmup_steps=warmup_steps,
warmup_factor=warmup_factor,
)
self._decay_step = decay_step
self._decay_gamma = decay_gamma
def schedule_impl(self):
step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0:
decay_factor = step_count // self._decay_step
self._last_lr = self._lr_max * (
self._decay_gamma ** decay_factor)
return self._last_lr
class MultiStepLR(_LRScheduler):
def __init__(
self,
lr_max,
decay_steps,
decay_gamma,
warmup_steps=0,
warmup_factor=0.,
):
super(MultiStepLR, self).__init__(
lr_max=lr_max,
warmup_steps=warmup_steps,
warmup_factor=warmup_factor,
)
self._decay_steps = decay_steps
self._decay_gamma = decay_gamma
self._stage_count, self._num_stages = 0, len(self._decay_steps)
def schedule_impl(self):
if self._stage_count < self._num_stages:
k = self._decay_steps[self._stage_count]
while self._step_count >= k:
self._stage_count += 1
if self._stage_count >= self._num_stages:
break
k = self._decay_steps[self._stage_count]
self._last_lr = self._lr_max * (
self._decay_gamma ** self._stage_count)
return self._last_lr
class LinearLR(_LRScheduler):
def __init__(
self,
lr_max,
decay_step,
max_steps,
warmup_steps=0,
warmup_factor=0.,
):
super(LinearLR, self).__init__(
lr_max=lr_max,
lr_min=0.,
warmup_steps=warmup_steps,
warmup_factor=warmup_factor,
)
self._decay_step = decay_step
self._max_steps = max_steps - warmup_steps
def schedule_impl(self):
step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0:
decay_factor = 1. - float(step_count) / self._max_steps
self._last_lr = self._lr_max * decay_factor
return self._last_lr
class CosineLR(_LRScheduler):
def __init__(
self,
lr_max,
lr_min,
decay_step,
max_steps,
warmup_steps=0,
warmup_factor=0.,
):
super(CosineLR, self).__init__(
lr_max=lr_max,
lr_min=lr_min,
warmup_steps=warmup_steps,
warmup_factor=warmup_factor,
)
self._decay_step = decay_step
self._max_steps = max_steps - warmup_steps
def schedule_impl(self):
step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0:
decay_factor = 0.5 * (1. + math.cos(
math.pi * step_count / self._max_steps))
self._last_lr = self._lr_min + (
self._lr_max - self._lr_min
) * decay_factor
return self._last_lr
def get_scheduler():
lr_policy = cfg.SOLVER.LR_POLICY
if lr_policy == 'step':
return StepLR(
lr_max=cfg.SOLVER.BASE_LR,
decay_step=cfg.SOLVER.DECAY_STEP,
decay_gamma=cfg.SOLVER.DECAY_GAMMA,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
elif lr_policy == 'steps_with_decay':
return MultiStepLR(
lr_max=cfg.SOLVER.BASE_LR,
decay_steps=cfg.SOLVER.DECAY_STEPS,
decay_gamma=cfg.SOLVER.DECAY_GAMMA,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
elif lr_policy == 'cosine_decay':
return CosineLR(
lr_max=cfg.SOLVER.BASE_LR,
lr_min=0.,
decay_step=cfg.SOLVER.DECAY_STEP,
max_steps=cfg.SOLVER.MAX_STEPS,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
else:
raise ValueError('Unknown lr policy: ' + lr_policy)
if __name__ == '__main__':
def extract_label(scheduler):
class_name = scheduler.__class__.__name__
label = class_name + '('
if class_name == 'StepLR':
label += 'α=' + str(scheduler._decay_step) + ', '
label += 'γ=' + str(scheduler._decay_gamma)
elif class_name == 'MultiStepLR':
label += 'α=' + str(scheduler._decay_steps) + ', '
label += 'γ=' + str(scheduler._decay_gamma)
elif class_name == 'CosineLR':
label += 'α=' + str(scheduler._decay_step)
label += ')'
return label
vis = True
max_steps = 240
shared_args = {
'lr_max': 0.4,
'warmup_steps': 5,
'warmup_factor': 0.,
}
schedulers = [
StepLR(decay_step=1, decay_gamma=0.97, **shared_args),
MultiStepLR(decay_steps=[60, 120, 180], decay_gamma=0.1, **shared_args),
CosineLR(lr_min=0., decay_step=1, max_steps=max_steps, **shared_args),
LinearLR(decay_step=1, max_steps=max_steps, **shared_args),
]
for i in range(max_steps):
info = 'Step = %d\n' % i
for scheduler in schedulers:
if i == 0:
scheduler.lr_seq = []
info += ' * {}: {}\n'.format(
extract_label(scheduler),
scheduler.get_lr())
scheduler.lr_seq.append(scheduler.get_lr())
scheduler.step()
if not vis:
print(info)
if vis:
import matplotlib.pyplot as plt
plt.figure(1)
plt.title('Visualization of different LR Schedulers')
plt.xlabel('Step')
plt.ylabel('Learning Rate')
line = '--'
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
for i, scheduler in enumerate(schedulers):
plt.plot(
range(max_steps),
scheduler.lr_seq,
colors[i] + line,
linewidth=1.,
label=extract_label(scheduler),
)
plt.legend()
plt.show()
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling.detector import Detector
from lib.solver import lr_scheduler
from lib.utils import framework
from lib.utils import time_util
class SGDSolver(object):
def __init__(self):
# Define the generic detector
self.detector = Detector()
# Define the optimizer and its arguments
self.optimizer = torch.optim.SGD(
framework.get_param_groups(self.detector),
lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
clip_gradient=float(cfg.SOLVER.CLIP_NORM),
scale_gradient=1. / cfg.SOLVER.LOSS_SCALING,
)
self.lr_scheduler = lr_scheduler.get_scheduler()
def one_step(self):
def add_loss(x, y):
return y if x is None else x + y
stats = {
'iter': self.iter,
'loss': {'total': 0.},
'time': time_util.Timer(),
}
with stats['time'].tic_and_toc():
# Forward pass
outputs = self.detector()
# Backward pass
total_loss = None
loss_scaling = cfg.SOLVER.LOSS_SCALING
for k, v in outputs.items():
if 'loss' in k:
if k not in stats['loss']:
stats['loss'][k] = 0.
total_loss = add_loss(total_loss, v)
stats['loss'][k] += float(v) * loss_scaling
if loss_scaling != 1.:
total_loss *= loss_scaling
stats['loss']['total'] += float(total_loss)
total_loss.backward()
# Apply Update
self.base_lr = self.lr_scheduler.get_lr()
self.optimizer.step()
self.lr_scheduler.step()
# Misc stats
stats['lr'] = self.base_lr
stats['time'] = stats['time'].total_time
return stats
@property
def base_lr(self):
return self.optimizer.param_groups[0]['lr']
@base_lr.setter
def base_lr(self, value):
for group in self.optimizer.param_groups:
group['lr'] = value
@property
def iter(self):
return self.lr_scheduler._step_count
@iter.setter
def iter(self, value):
self.lr_scheduler._step_count = value
...@@ -83,7 +83,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -83,7 +83,7 @@ class DataTransformer(multiprocessing.Process):
] ]
else: else:
roi_dict['boxes'][object_idx, :] = \ roi_dict['boxes'][object_idx, :] = \
rotated_boxes.canonicalize( rotated_boxes.vertices2box(
[obj['x1'], obj['y1'], [obj['x1'], obj['y1'],
obj['x2'], obj['y2'], obj['x2'], obj['y2'],
obj['x3'], obj['y3'], obj['x3'], obj['y3'],
...@@ -108,7 +108,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -108,7 +108,7 @@ class DataTransformer(multiprocessing.Process):
def get(self, example): def get(self, example):
img = np.frombuffer(example['content'], np.uint8) img = np.frombuffer(example['content'], np.uint8)
img = cv2.imdecode(img, -1) img = cv2.imdecode(img, 1)
# Flip # Flip
flip = False flip = False
......
...@@ -17,7 +17,7 @@ import dragon.vm.torch as torch ...@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
class HardMiningLayer(torch.nn.Module): class HardMiningLayer(torch.nn.Module):
...@@ -63,4 +63,4 @@ class HardMiningLayer(torch.nn.Module): ...@@ -63,4 +63,4 @@ class HardMiningLayer(torch.nn.Module):
labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss # Feed labels to compute cls loss
return {'labels': blob_to_tensor(labels_wide)} return {'labels': array2tensor(labels_wide)}
...@@ -17,7 +17,7 @@ import numpy as np ...@@ -17,7 +17,7 @@ import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes from lib.utils.boxes import dismantle_gt_boxes
...@@ -121,7 +121,7 @@ class MultiBoxTargetLayer(torch.nn.Module): ...@@ -121,7 +121,7 @@ class MultiBoxTargetLayer(torch.nn.Module):
bbox_outside_weights_wide[ix][ex_inds] = bbox_reg_weight bbox_outside_weights_wide[ix][ex_inds] = bbox_reg_weight
return { return {
'bbox_targets': blob_to_tensor(bbox_targets_wide), 'bbox_targets': array2tensor(bbox_targets_wide),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights_wide), 'bbox_inside_weights': array2tensor(bbox_inside_weights_wide),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights_wide), 'bbox_outside_weights': array2tensor(bbox_outside_weights_wide),
} }
...@@ -18,12 +18,11 @@ import dragon.vm.torch as torch ...@@ -18,12 +18,11 @@ import dragon.vm.torch as torch
import numpy as np import numpy as np
from lib.core.config import cfg from lib.core.config import cfg
from lib.nms.nms_wrapper import nms from lib.nms import nms_wrapper
from lib.nms.nms_wrapper import soft_nms from lib.utils import framework
from lib.utils import time_util
from lib.utils.boxes import bbox_transform_inv from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_boxes from lib.utils.boxes import clip_boxes
from lib.utils.timer import Timer
from lib.utils.graph import FrozenGraph
from lib.utils.vis import vis_one_image from lib.utils.vis import vis_one_image
...@@ -49,12 +48,13 @@ def ims_detect(detector, ims): ...@@ -49,12 +48,13 @@ def ims_detect(detector, ims):
with torch.no_grad(): with torch.no_grad():
with torch.jit.Recorder(retain_ops=True): with torch.jit.Recorder(retain_ops=True):
outputs = detector.forward(inputs={'data': image}) outputs = detector.forward(inputs={'data': image})
detector.frozen_graph = FrozenGraph( detector.frozen_graph = \
{'data': image}, framework.FrozenGraph(
{'cls_prob': outputs['cls_prob'], {'data': image},
'bbox_pred': outputs['bbox_pred']}, {'cls_prob': outputs['cls_prob'],
{'prior_boxes': outputs['prior_boxes']}, 'bbox_pred': outputs['bbox_pred']},
) {'prior_boxes': outputs['prior_boxes']},
)
outputs = detector.frozen_graph(data=data) outputs = detector.frozen_graph(data=data)
# Decode results # Decode results
...@@ -81,21 +81,21 @@ def test_net(detector, server): ...@@ -81,21 +81,21 @@ def test_net(detector, server):
num_classes = server.num_classes num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()} _t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH): for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths # Collect raw images and ground-truths
image_ids, raw_images = [], [] image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH): for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue if batch_idx + item_idx >= num_images:
continue
image_id, raw_image = server.get_image() image_id, raw_image = server.get_image()
image_ids.append(image_id) image_ids.append(image_id)
raw_images.append(raw_image) raw_images.append(raw_image)
_t['im_detect'].tic() with _t['im_detect'].tic_and_toc():
batch_scores, batch_boxes = ims_detect(detector, raw_images) batch_scores, batch_boxes = ims_detect(detector, raw_images)
_t['im_detect'].toc()
_t['misc'].tic() _t['misc'].tic()
for item_idx in range(len(batch_scores)): for item_idx in range(len(batch_scores)):
...@@ -114,16 +114,16 @@ def test_net(detector, server): ...@@ -114,16 +114,16 @@ def test_net(detector, server):
(cls_boxes, cls_scores[:, np.newaxis])) \ (cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) .astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS: if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms( keep = nms_wrapper.soft_nms(
cls_detections, cls_detections,
cfg.TEST.NMS, thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA, sigma=cfg.TEST.SOFT_NMS_SIGMA,
) )
else: else:
keep = nms( keep = nms_wrapper.nms(
cls_detections, cls_detections,
cfg.TEST.NMS, thresh=cfg.TEST.NMS,
force_cpu=True, force_cpu=True,
) )
cls_detections = cls_detections[keep, :] cls_detections = cls_detections[keep, :]
......
...@@ -47,18 +47,16 @@ class Distort(object): ...@@ -47,18 +47,16 @@ class Distort(object):
def apply(self, img, boxes=None): def apply(self, img, boxes=None):
img = PIL.Image.fromarray(img) img = PIL.Image.fromarray(img)
if npr.uniform() < self._brightness_prob: transforms = [
delta = npr.uniform(-0.3, 0.3) + 1. (PIL.ImageEnhance.Brightness, self._brightness_prob),
img = PIL.ImageEnhance.Brightness(img) (PIL.ImageEnhance.Contrast, self._contrast_prob),
img = img.enhance(delta) (PIL.ImageEnhance.Color, self._saturation_prob),
if npr.uniform() < self._contrast_prob: ]
delta = npr.uniform(-0.3, 0.3) + 1. npr.shuffle(transforms)
img = PIL.ImageEnhance.Contrast(img) for transform_fn, prob in transforms:
img = img.enhance(delta) if npr.uniform() < prob:
if npr.uniform() < self._saturation_prob: img = transform_fn(img)
delta = npr.uniform(-0.3, 0.3) + 1. img = img.enhance(1. + npr.uniform(-.4, .4))
img = PIL.ImageEnhance.Color(img)
img = img.enhance(delta)
return np.array(img), boxes return np.array(img), boxes
......
...@@ -21,7 +21,8 @@ import numpy as np ...@@ -21,7 +21,8 @@ import numpy as np
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.utils.image import resize_image, distort_image from lib.utils.image import distort_image
from lib.utils.image import resize_image
def im_list_to_blob(ims): def im_list_to_blob(ims):
...@@ -60,17 +61,17 @@ def mask_list_to_blob(masks): ...@@ -60,17 +61,17 @@ def mask_list_to_blob(masks):
return blob return blob
def prep_im_for_blob(im, target_size, max_size): def prep_im_for_blob(img, target_size, max_size):
"""Scale an image for use in a blob.""" """Scale an image for use in a blob."""
im_shape, jitter = im.shape, 1. im_shape, jitter = img.shape, 1.
if cfg.TRAIN.COLOR_JITTERING: if cfg.TRAIN.USE_COLOR_JITTER:
im = distort_image(im) img = distort_image(img)
if max_size > 0: if max_size > 0:
# Scale image along the shortest side # Scale image along the shortest side
im_size_min = np.min(im_shape[0:2]) im_size_min = np.min(im_shape[:2])
im_size_max = np.max(im_shape[0:2]) im_size_max = np.max(im_shape[:2])
im_scale = float(target_size) / float(im_size_min) im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE # Prevent the biggest axis from being more than MAX_SIZE
...@@ -78,31 +79,31 @@ def prep_im_for_blob(im, target_size, max_size): ...@@ -78,31 +79,31 @@ def prep_im_for_blob(im, target_size, max_size):
im_scale = float(max_size) / float(im_size_max) im_scale = float(max_size) / float(im_size_max)
else: else:
# Scale image along the longest side # Scale image along the longest side
im_size_max = np.max(im_shape[0:2]) im_size_max = np.max(im_shape[:2])
im_scale = float(target_size) / float(im_size_max) im_scale = float(target_size) / float(im_size_max)
if cfg.TRAIN.SCALE_JITTERING: if cfg.TRAIN.USE_SCALE_JITTER:
r = cfg.TRAIN.SCALE_RANGE r = cfg.TRAIN.SCALE_JITTER_RANGE
jitter = r[0] + np.random.rand() * (r[1] - r[0]) jitter = r[0] + np.random.rand() * (r[1] - r[0])
im_scale *= jitter im_scale *= jitter
return resize_image(im, im_scale, im_scale), im_scale, jitter return resize_image(img, im_scale, im_scale), im_scale, jitter
def blob_to_tensor(blob, enforce_cpu=False): def array2tensor(array, enforce_cpu=False):
if isinstance(blob, np.ndarray): if isinstance(array, np.ndarray):
# Zero-Copy from numpy # Zero-Copy from numpy
cpu_tensor = torch.from_numpy(blob) cpu_tensor = torch.from_numpy(array)
else: else:
cpu_tensor = blob cpu_tensor = array
return cpu_tensor if enforce_cpu else \ return cpu_tensor if enforce_cpu else \
cpu_tensor.cuda(cfg.GPU_ID) cpu_tensor.cuda(cfg.GPU_ID)
def tensor_to_blob(blob, copy=False): def tensor2array(tensor, copy=False):
if isinstance(blob, torch.Tensor): if isinstance(tensor, torch.Tensor):
# Zero-Copy from numpy # Zero-Copy from numpy
array = blob.numpy(True) array = tensor.numpy(True)
else: else:
array = blob array = tensor
return array.copy() if copy else array return array.copy() if copy else array
...@@ -16,8 +16,100 @@ from __future__ import print_function ...@@ -16,8 +16,100 @@ from __future__ import print_function
import collections import collections
import dragon import dragon
import dragon.vm.torch as torch
from dragon.core.framework import tensor_util from dragon.core.framework import tensor_util
from dragon.vm.torch.jit.recorder import get_default_recorder from dragon.core.util import six
def get_param_groups(module, bias_lr=1., bias_decay=0.):
"""Separate weight and bias into parameters groups.
Parameters
----------
module : dragon.vm.torch.nn.Module
The module to collect parameters.
bias_lr : float, optional, default=1.
The lr multiplier of bias.
bias_decay : float, optional, default=0.
The decay multiplier of bias.
Returns
-------
Sequence[ParamGroup]
The parameter groups.
"""
param_groups = [
{
'params': [],
'lr_mult': 1.,
'decay_mult': 1.,
},
{
'params': [],
'lr_mult': bias_lr,
'decay_mult': bias_decay,
}
]
for name, param in module.named_parameters():
gi = 1 if 'bias' in name else 0
param_groups[gi]['params'].append(param)
if len(param_groups[1]['params']) == 0:
param_groups.pop() # Remove empty group
return param_groups
def get_workspace():
"""Return the current default workspace.
Returns
-------
dragon.Workspace
The default workspace.
"""
return dragon.workspace.get_default()
def new_workspace(merge_default=True):
"""Create a new workspace.
Parameters
----------
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
workspace = dragon.Workspace()
if merge_default:
workspace.merge_from(get_workspace())
return workspace
def reset_workspace(workspace=None, merge_default=True):
"""Reset a workspace and return a new one.
Parameters
----------
workspace : dragon.Workspace, optional
The workspace to reset.
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
if workspace is not None:
workspace.Clear() # Block the GIL
return new_workspace(merge_default)
class FrozenGraph(object): class FrozenGraph(object):
...@@ -41,9 +133,8 @@ class FrozenGraph(object): ...@@ -41,9 +133,8 @@ class FrozenGraph(object):
self._inputs = canonicalize(inputs) self._inputs = canonicalize(inputs)
self._outputs = canonicalize(outputs) self._outputs = canonicalize(outputs)
self._constants = canonicalize(constants) self._constants = canonicalize(constants)
self._graph = dragon.Workspace() \ self._graph = new_workspace()
.merge_from(dragon.workspace.get_default()) self._tape = torch.jit.get_default_recorder()
self._tape = get_default_recorder()
def forward(self, **kwargs): def forward(self, **kwargs):
# Assign inputs # Assign inputs
...@@ -70,3 +161,7 @@ class FrozenGraph(object): ...@@ -70,3 +161,7 @@ class FrozenGraph(object):
def __call__(self, **kwargs): def __call__(self, **kwargs):
with self._graph.as_default(): with self._graph.as_default():
return self.forward(**kwargs) return self.forward(**kwargs)
# Aliases
pickle = six.moves.pickle
...@@ -21,9 +21,50 @@ import PIL.ImageEnhance ...@@ -21,9 +21,50 @@ import PIL.ImageEnhance
from lib.core.config import cfg from lib.core.config import cfg
def resize_image(im, fx, fy): def distort_image(img):
img = PIL.Image.fromarray(img)
transforms = [
PIL.ImageEnhance.Brightness,
PIL.ImageEnhance.Contrast,
PIL.ImageEnhance.Color,
]
np.random.shuffle(transforms)
for transform in transforms:
if np.random.uniform() < .5:
img = transform(img)
img = img.enhance(1. + np.random.uniform(-.4, .4))
return np.array(img)
def get_image_with_target_size(target_size, img):
im_shape = list(img.shape)
height_diff = target_size[0] - im_shape[0]
width_diff = target_size[1] - im_shape[1]
ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
im_shape[:2] = target_size
new_img = np.empty(im_shape, dtype=img.dtype)
new_img[:] = cfg.PIXEL_MEANS
new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
ofs_pad_width:ofs_pad_width + img.shape[1]] = \
img[ofs_crop_height:ofs_crop_height + target_size[0],
ofs_crop_width:ofs_crop_width + target_size[1]]
return new_img, (
ofs_pad_width - ofs_crop_width,
ofs_pad_height - ofs_crop_height,
target_size,
)
def resize_image(img, fx, fy):
return cv2.resize( return cv2.resize(
im, img,
dsize=None, dsize=None,
fx=fx, fy=fy, fx=fx, fy=fy,
interpolation=cv2.INTER_LINEAR, interpolation=cv2.INTER_LINEAR,
...@@ -36,29 +77,12 @@ def resize_mask(mask, size): ...@@ -36,29 +77,12 @@ def resize_mask(mask, size):
return np.array(mask.resize(size, PIL.Image.NEAREST)) return np.array(mask.resize(size, PIL.Image.NEAREST))
def distort_image(im): def scale_image(img):
im = PIL.Image.fromarray(im)
if np.random.uniform() < 0.5:
delta_brightness = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness)
if np.random.uniform() < 0.5:
delta_contrast = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast)
if np.random.uniform() < 0.5:
delta_saturation = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation)
return np.array(im)
def scale_image(im):
processed_ims, ims_scales = [], [] processed_ims, ims_scales = [], []
if cfg.TEST.MAX_SIZE > 0: if cfg.TEST.MAX_SIZE > 0:
im_size_min = np.min(im.shape[:2]) im_size_min = np.min(img.shape[:2])
im_size_max = np.max(im.shape[:2]) im_size_max = np.max(img.shape[:2])
for target_size in cfg.TEST.SCALES: for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min) im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE # Prevent the biggest axis from being more than MAX_SIZE
...@@ -66,7 +90,7 @@ def scale_image(im): ...@@ -66,7 +90,7 @@ def scale_image(im):
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
processed_ims.append( processed_ims.append(
cv2.resize( cv2.resize(
im, img,
dsize=None, dsize=None,
fx=im_scale, fy=im_scale, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR, interpolation=cv2.INTER_LINEAR,
...@@ -74,12 +98,12 @@ def scale_image(im): ...@@ -74,12 +98,12 @@ def scale_image(im):
ims_scales.append(im_scale) ims_scales.append(im_scale)
else: else:
# Scale image along the longest side # Scale image along the longest side
im_size_max = np.max(im.shape[0:2]) im_size_max = np.max(img.shape[:2])
for target_size in cfg.TEST.SCALES: for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_max) im_scale = float(target_size) / float(im_size_max)
processed_ims.append( processed_ims.append(
cv2.resize( cv2.resize(
im, img,
dsize=None, dsize=None,
fx=im_scale, fy=im_scale, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR, interpolation=cv2.INTER_LINEAR,
......
...@@ -13,138 +13,124 @@ from __future__ import absolute_import ...@@ -13,138 +13,124 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from ctypes import * import ctypes
import os.path as osp import math
import os
import numpy as np import numpy as np
class LibRotatedBoxes(object): class _CppExtension(object):
def __init__(self): dtype_mapping = {
self._nms = cdll.LoadLibrary( 'int32': ctypes.c_int32,
osp.join(osp.split( 'float64': ctypes.c_double,
osp.abspath(__file__))[0], }
"ctypes_rbox.so")
).NMS
self._overlaps = cdll.LoadLibrary(
osp.join(osp.split(
osp.abspath(__file__))[0],
"ctypes_rbox.so")
).Overlaps
self._nms.argtypes = (
POINTER(c_double),
POINTER(c_int),
POINTER(c_double),
POINTER(c_int),
c_double,
)
self._overlaps.argtypes = \
(POINTER(c_double),
POINTER(c_double),
POINTER(c_int),
POINTER(c_double)
)
self._nms.restype = None
self._overlaps.restype = None
def nms(self, dets, thresh):
"""CPU Hard-NMS.
Parameters
----------
dets: (N, 6) ndarray of double [cx, cy, w, h, a, scores]
thresh : float
"""
assert dets.shape[1] == 6
order = dets[:, 5].argsort()[::-1]
sorted_dets = dets[order, :]
N = sorted_dets.shape[0]
num_ctypes = c_int(N)
thresh = c_double(thresh)
pred_boxes = sorted_dets[:, 0:-1].flatten()
pred_scores = sorted_dets[:, -1:].flatten()
indices = np.zeros(N)
_boxes = np.ascontiguousarray(pred_boxes, dtype=np.double)
_scores = np.ascontiguousarray(pred_scores, dtype=np.double)
_inds = np.ascontiguousarray(indices, dtype=np.int32)
boxes_ctypes_ptr = _boxes.ctypes.data_as(POINTER(c_double)) def __init__(self, library_name):
scores_ctypes_ptr = _scores.ctypes.data_as(POINTER(c_double)) libc = ctypes.cdll.LoadLibrary(
inds_ctypes_ptr = _inds.ctypes.data_as(POINTER(c_int32)) os.path.join(os.path.split(
os.path.abspath(__file__))[0],
self._nms( library_name,
boxes_ctypes_ptr, )
inds_ctypes_ptr,
scores_ctypes_ptr,
byref(num_ctypes),
thresh,
) )
keep_indices = np.ctypeslib.as_array(
(c_int32 * num_ctypes.value).from_address( def load_func(name, arg_types):
addressof(inds_ctypes_ptr.contents))) func = getattr(libc, name)
return list(order[keep_indices.astype(np.int32)]) func.argtypes = self.get_arg_types(*arg_types)
return func
def overlaps(self, boxes, query_boxes):
"""Computer overlaps between boxes and query boxes. self._apply_cpu_nms = load_func(
'apply_cpu_nms', (
Parameters ('float64', 1), # dets
---------- ('int32', 1), # indices
boxes: (N, 5) ndarray of double [cx, cy, w, h, a] ('int32', 1), # n
query_boxes: (K, 6) ndarray of double [cx, cy, w, h, a, cls] ('float64', 0), # thresh
)
Returns
-------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
assert boxes.shape[1] == 5
if query_boxes.shape[1] == 6:
query_boxes = query_boxes[:, :-1]
N = boxes.shape[0]
K = query_boxes.shape[0]
num_ctypes = (c_int * 2)()
num_ctypes[0] = N
num_ctypes[1] = K
num_ctypes_ptr = cast(num_ctypes, POINTER(c_int))
_boxes = boxes.flatten()
_query_boxes = query_boxes.flatten()
_areas = np.zeros((N, K), dtype=np.double).flatten()
_boxes = np.ascontiguousarray(_boxes, dtype=np.double)
_query_boxes = np.ascontiguousarray(_query_boxes, dtype=np.double)
_areas = np.ascontiguousarray(_areas, dtype=np.double)
boxes_ctypes_ptr = _boxes.ctypes.data_as(POINTER(c_double))
query_boxes_ctypes_ptr = _query_boxes.ctypes.data_as(POINTER(c_double))
areas_ctypes_ptr = _areas.ctypes.data_as(POINTER(c_double))
self._overlaps(
boxes_ctypes_ptr,
query_boxes_ctypes_ptr,
num_ctypes_ptr,
areas_ctypes_ptr,
) )
area = np.ctypeslib.as_array( self._bbox_overlaps = load_func(
(c_double * K * N).from_address( 'bbox_overlaps', (
addressof(areas_ctypes_ptr.contents) ('float64', 1), # boxes1
('float64', 1), # boxes2
('int32', 1), # n, k
('float64', 1) # overlaps
) )
) )
rarea = np.nan_to_num(area.astype(np.float32))
return rarea
@staticmethod
def array2ptr(array):
return array.ctypes.data_as(
_CppExtension.get_ptr(str(array.dtype)))
@staticmethod
def contiguous(array, dtype='float64'):
return np.ascontiguousarray(array.flatten(), dtype)
@staticmethod
def get_arg_types(*args):
arg_types = []
for (dtype, is_pointer) in args:
arg_types.append(
_CppExtension.get_ptr(dtype) if is_pointer
else _CppExtension.dtype_mapping[dtype]
)
return arg_types
@staticmethod
def get_ptr(dtype):
return ctypes.POINTER(_CppExtension.dtype_mapping[dtype])
@staticmethod
def ptr2array(ptr, shape):
return np.ctypeslib.as_array(
shape.from_address(
ctypes.addressof(ptr.contents)
))
def bbox_overlaps(self, boxes1, boxes2):
"""Computer overlaps between boxes and query boxes."""
def canonicalize(boxes):
box_dim = boxes.shape[1]
if box_dim > 5:
boxes = boxes[:, :5]
elif box_dim < 5:
raise ValueError('Excepted box5d.')
return self.contiguous(boxes, 'float64')
n, k = boxes1.shape[0], boxes2.shape[0]
boxes1 = canonicalize(boxes1)
boxes2 = canonicalize(boxes2)
overlaps_shape = (ctypes.c_int32 * 2)()
overlaps_shape[:] = (n, k)
overlaps = np.zeros((n * k,), 'float64')
overlaps_ptr = self.array2ptr(overlaps)
self._bbox_overlaps(
self.array2ptr(boxes1),
self.array2ptr(boxes2),
ctypes.cast(overlaps_shape, self.get_ptr('int32')),
overlaps_ptr,
)
return self.ptr2array(overlaps_ptr, ctypes.c_double * k * n)
libc = LibRotatedBoxes() def cpu_nms(self, dets, thresh):
"""Apply Hard-NMS."""
if dets.shape[1] != 6:
raise ValueError('Excepted det6d.')
order = dets[:, 5].argsort()[::-1]
sorted_dets = dets[order, :]
num_keep = sorted_dets.shape[0]
num_keep_ins = ctypes.c_int32(num_keep)
indices = np.zeros((num_keep,), np.int32)
indices_ptr = self.array2ptr(indices)
def bbox_overlaps(boxes1, boxes2): self._apply_cpu_nms(
"""Compute the overlaps between two group of boxes.""" self.array2ptr(self.contiguous(dets, 'float64')),
return libc.overlaps(boxes1, boxes2) indices_ptr,
ctypes.byref(num_keep_ins),
ctypes.c_double(thresh),
)
keep_indices = self.ptr2array(
indices_ptr, (ctypes.c_int32 * num_keep_ins.value))
return list(order[keep_indices])
def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1., 1.)): def bbox_transform(ex_rois, gt_rois, weights=(1., 1., 1., 1., 1.)):
...@@ -214,36 +200,72 @@ def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1., 1.)): ...@@ -214,36 +200,72 @@ def bbox_transform_inv(boxes, deltas, weights=(1., 1., 1., 1., 1.)):
return pred_boxes return pred_boxes
def canonicalize(values): def box2vertices(values):
def poly8_to_poly5(values): x_ctr, y_ctr, w, h, a = values
pt1, pt2 = values[0:2], values[2:4] theta = a * 0.01745329251
pt3, pt4 = values[4:6], values[6:8] cos_theta2 = math.cos(theta) * 0.5
edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (pt1[1] - pt2[1]) * (pt1[1] - pt2[1])) sin_theta2 = math.sin(theta) * 0.5
edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (pt2[1] - pt3[1]) * (pt2[1] - pt3[1])) vertices = [
angle, width, height = 0, 0, 0 x_ctr - sin_theta2 * h - cos_theta2 * w,
if edge1 > edge2: y_ctr + cos_theta2 * h - sin_theta2 * w,
width = edge1 x_ctr + sin_theta2 * h - cos_theta2 * w,
height = edge2 y_ctr - cos_theta2 * h - sin_theta2 * w,
if pt1[0] - pt2[0] != 0: ]
angle = -np.arctan(float(pt1[1] - pt2[1]) / float(pt1[0] - pt2[0])) / 3.1415926 * 180 vertices.extend([
else: 2 * x_ctr - vertices[0],
angle = 90. 2 * y_ctr - vertices[1],
elif edge2 >= edge1: 2 * x_ctr - vertices[2],
width = edge2 2 * y_ctr - vertices[3],
height = edge1 ])
if pt2[0] - pt3[0] != 0: return vertices
angle = -np.arctan(float(pt2[1] - pt3[1]) / float(pt2[0] - pt3[0])) / 3.1415926 * 180
else:
angle = 90. def vertices2box(vertices):
if angle < -45.: def sort(vertices):
angle = angle + 180. poly = np.array(vertices).reshape((4, 2))
x_ctr = (pt1[0] + pt3[0]) / 2. # lt, rt, rb, lb
y_ctr = (pt1[1] + pt3[1]) / 2. edge = [
return x_ctr, y_ctr, width, height, angle (poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
(poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
if len(values) == 8: (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
return poly8_to_poly5(values) (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])
return values ]
p_area = np.sum(edge) / 2.
_poly = poly.copy()
if abs(p_area) < 1:
raise ValueError
if p_area > 0:
_poly = _poly[(0, 3, 2, 1), :] # clock wise
anchor = np.array([np.min(poly[:, 0]), np.min(poly[:, 1])])
line0 = np.linalg.norm(anchor - _poly[0])
line1 = np.linalg.norm(anchor - _poly[1])
line2 = np.linalg.norm(anchor - _poly[2])
line3 = np.linalg.norm(anchor - _poly[3])
argmin = np.argmin([line0, line1, line2, line3])
lt = _poly[argmin]
rt = _poly[(argmin + 1) % 4]
rb = _poly[(argmin + 2) % 4]
lb = _poly[(argmin + 3) % 4]
return np.array([lt, rt, rb, lb]).flatten()
values = sort(vertices)
y4my3 = values[7] - values[5]
if y4my3 != 0:
x2mx1 = values[2] - values[0]
theta = math.atan(x2mx1 / y4my3)
cos_theta = math.cos(theta)
sin_theta = math.sin(theta)
h = x2mx1 / sin_theta
x2px1 = values[2] + values[0]
x4px3 = values[6] + values[4]
w = (x4px3 - x2px1) / (2. * cos_theta)
a = theta / 0.01745329251
else:
w = values[2] - values[0]
h = values[5] - values[1]
a = 0.
x_ctr = 0.5 * (values[0] + values[4])
y_ctr = 0.5 * (values[1] + values[5])
return x_ctr, y_ctr, w, h, a
def clip_angle(d): def clip_angle(d):
...@@ -292,12 +314,16 @@ def flip_boxes(boxes, width): ...@@ -292,12 +314,16 @@ def flip_boxes(boxes, width):
return flip_boxes return flip_boxes
def nms(dets, thresh): # Aliases
return libc.nms(dets, thresh) libc = _CppExtension('ctypes_rbox.so')
bbox_overlaps = libc.bbox_overlaps
cpu_nms = libc.cpu_nms
if __name__ == "__main__": if __name__ == "__main__":
prior_boxes = np.array([[4, 4, 5, 5, 90], [4, 4, 15, 15, 90]], dtype=np.double) prior_boxes = np.array([[4, 4, 15, 15, 150], [4, 4, 15, 15, 45]], dtype='float64')
gt_boxes = np.array([[4, 4, 15, 15, 90, 1]], dtype=np.double) gt_boxes = np.array([[4, 4, 15, 15, 45, 1.]], dtype='float64')
ov = bbox_overlaps(prior_boxes, gt_boxes) ov = bbox_overlaps(prior_boxes, gt_boxes)
print(ov) indices = cpu_nms(gt_boxes, 0.45)
\ No newline at end of file print(ov)
print(indices)
# ------------------------------------------------------------ # ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd. # Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# #
# Licensed under the BSD 2-Clause License. # Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on: # Codes are based on:
# #
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/timer.py> # <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/timer.py>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import contextlib import contextlib
import time import datetime
import time
class Timer(object):
"""A simple timer.""" class Timer(object):
def __init__(self): """A simple timer."""
self.total_time = 0. def __init__(self):
self.calls = 0 self.total_time = 0.
self.start_time = 0. self.calls = 0
self.diff = 0. self.start_time = 0.
self.average_time = 0. self.diff = 0.
self.average_time = 0.
def tic(self):
# Using time.time instead of time.clock because time time.clock @contextlib.contextmanager
# does not normalize for multi-threading def tic_and_toc(self):
self.start_time = time.time() try:
yield self.tic()
def toc(self, average=True): finally:
self.diff = time.time() - self.start_time self.toc()
self.total_time += self.diff
self.calls += 1 def tic(self):
self.average_time = self.total_time / self.calls # Using time.time instead of time.clock because time time.clock
if average: # does not normalize for multithreading
return self.average_time self.start_time = time.time()
else:
return self.diff def toc(self, average=True):
self.diff = time.time() - self.start_time
@contextlib.contextmanager self.total_time += self.diff
def tic_and_toc(self): self.calls += 1
try: self.average_time = self.total_time / self.calls
yield self.tic() if average:
finally: return self.average_time
self.toc() else:
return self.diff
def get_progress_info(timer, curr_step, max_steps):
"""Return a info of current progress.
Parameters
----------
timer : Timer
The timer to get progress.
curr_step : int
The current step.
max_steps : int
The total number of steps.
Returns
-------
str
The progress info.
"""
average_time = timer.average_time
eta_seconds = average_time * (max_steps - curr_step)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = (curr_step + 1.) / max_steps
return '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' \
.format(progress, timer.average_time, eta)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from os import path as osp
from maker import make_record
if __name__ == '__main__':
voc_root = '/data/VOC'
make_record(
record_file=osp.join(voc_root, 'voc_0712_trainval'),
images_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval']
)
make_record(
record_file=osp.join(voc_root, 'voc_2007_test'),
images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test']
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import cv2
import dragon
import numpy as np
import xml.etree.ElementTree as ET
def make_example(image_file, xml_file):
tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1]
objs = tree.findall('object')
example = {'id': filename.split('.')[0], 'object': []}
with open(image_file, 'rb') as f:
img_bytes = bytes(f.read())
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1)
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
is_diff = 0
if obj.find('difficult') is not None:
is_diff = int(obj.find('difficult').text) == 1
example['object'].append({
'name': obj.find('name').text.strip(),
'x1': float(bbox.find('x1').text),
'y1': float(bbox.find('y1').text),
'x2': float(bbox.find('x2').text),
'y2': float(bbox.find('y2').text),
'x3': float(bbox.find('x3').text),
'y3': float(bbox.find('y3').text),
'x4': float(bbox.find('x4').text),
'y4': float(bbox.find('y4').text),
'difficult': is_diff,
})
return example
def make_record(
record_file,
images_path,
annotations_path,
imagesets_path,
splits
):
if os.path.exists(record_file):
raise ValueError('The record file is already exist.')
os.makedirs(record_file)
if not isinstance(images_path, list):
images_path = [images_path]
if not isinstance(annotations_path, list):
annotations_path = [annotations_path]
if not isinstance(imagesets_path, list):
imagesets_path = [imagesets_path]
assert len(splits) == len(imagesets_path)
assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter(
path=record_file,
protocol={
'id': 'string',
'content': 'bytes',
'height': 'int64',
'width': 'int64',
'depth': 'int64',
'object': [{
'name': 'string',
'x1': 'float64',
'y1': 'float64',
'x2': 'float64',
'y2': 'float64',
'x3': 'float64',
'y3': 'float64',
'x4': 'float64',
'y4': 'float64',
'difficult': 'int64',
}]
}
)
count, total_line = 0, 0
start_time = time.time()
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
writer.write(make_example(image_file, xml_file))
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
writer.close()
end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from os import path as osp
from maker import make_record
if __name__ == '__main__':
voc_root = '/data/VOC'
make_record(
record_file=osp.join(voc_root, 'voc_0712_trainval'),
images_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval']
)
make_record(
record_file=osp.join(voc_root, 'voc_2007_test'),
images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test']
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import cv2
import dragon
import numpy as np
import xml.etree.ElementTree as ET
def make_example(image_file, xml_file):
tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1]
objs = tree.findall('object')
example = {'id': filename.split('.')[0], 'object': []}
with open(image_file, 'rb') as f:
img_bytes = bytes(f.read())
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1)
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
is_diff = 0
if obj.find('difficult') is not None:
is_diff = int(obj.find('difficult').text) == 1
example['object'].append({
'name': obj.find('name').text.strip(),
'xmin': float(bbox.find('xmin').text),
'ymin': float(bbox.find('ymin').text),
'xmax': float(bbox.find('xmax').text),
'ymax': float(bbox.find('ymax').text),
'difficult': is_diff,
})
return example
def make_record(
record_file,
images_path,
annotations_path,
imagesets_path,
splits
):
if os.path.exists(record_file):
raise ValueError('The record file is already exist.')
os.makedirs(record_file)
if not isinstance(images_path, list):
images_path = [images_path]
if not isinstance(annotations_path, list):
annotations_path = [annotations_path]
if not isinstance(imagesets_path, list):
imagesets_path = [imagesets_path]
assert len(splits) == len(imagesets_path)
assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter(
path=record_file,
protocol={
'id': 'string',
'content': 'bytes',
'height': 'int64',
'width': 'int64',
'depth': 'int64',
'object': [{
'name': 'string',
'xmin': 'float64',
'ymin': 'float64',
'xmax': 'float64',
'ymax': 'float64',
'difficult': 'int64',
}]
}
)
count, total_line = 0, 0
start_time = time.time()
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
writer.write(make_example(image_file, xml_file))
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
writer.close()
end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time))
...@@ -74,7 +74,7 @@ if __name__ == '__main__': ...@@ -74,7 +74,7 @@ if __name__ == '__main__':
detector.optimize_for_inference() detector.optimize_for_inference()
# Mixed precision training? # Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16': if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half() # Powerful FP16 Support detector.half() # Powerful FP16 Support
data = torch.zeros(*args.input_shape).byte() data = torch.zeros(*args.input_shape).byte()
......
...@@ -37,8 +37,14 @@ def parse_args(): ...@@ -37,8 +37,14 @@ def parse_args():
parser.add_argument('--exp_dir', dest='exp_dir', parser.add_argument('--exp_dir', dest='exp_dir',
help='experiment dir', help='experiment dir',
default=None, type=str) default=None, type=str)
parser.add_argument('--output_dir', dest='output_dir',
help='output dir',
default=None, type=str)
parser.add_argument('--iter', dest='iter', help='global step', parser.add_argument('--iter', dest='iter', help='global step',
default=0, type=int) default=None, type=int)
parser.add_argument('--dump', dest='dump',
help='dump the result back to record?',
action='store_true')
parser.add_argument('--wait', dest='wait', parser.add_argument('--wait', dest='wait',
help='wait the checkpoint?', help='wait the checkpoint?',
action='store_true') action='store_true')
...@@ -75,19 +81,19 @@ if __name__ == '__main__': ...@@ -75,19 +81,19 @@ if __name__ == '__main__':
# Inspect the database # Inspect the database
database = get_imdb(cfg.TEST.DATABASE) database = get_imdb(cfg.TEST.DATABASE)
cfg.TEST.PROTOCOL = 'null' if args.dump else cfg.TEST.PROTOCOL
logger.info('Database({}): {} images will be used to test.' logger.info('Database({}): {} images will be used to test.'
.format(cfg.TEST.DATABASE, database.num_images)) .format(cfg.TEST.DATABASE, database.num_images))
# Ready to test the network # Ready to test the network
logger.info('Results will be saved to `{:s}`' output_dir = coordinator.results_dir(checkpoint, args.output_dir)
.format(coordinator.results_dir(checkpoint))) logger.info('Results will be saved to `{:s}`'.format(output_dir))
detector = Detector().eval().cuda(cfg.GPU_ID) detector = Detector().eval().cuda(cfg.GPU_ID)
detector.load_weights(checkpoint) detector.load_weights(checkpoint)
detector.optimize_for_inference() detector.optimize_for_inference()
# Mixed precision training? # Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16': if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half() # Powerful FP16 Support detector.half() # Powerful FP16 Support
server = TestServer(coordinator.results_dir(checkpoint)) test_engine.test_net(detector, TestServer(output_dir))
test_engine.test_net(detector, server)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!