Commit 8558d3df by Ting PAN

Adapt to the latest dragon preview version

Summary:
This commit changes repo to match dragon.0.3.0.dev20200707.
1 parent 4bcab266
Showing with 540 additions and 350 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.2 (20200707)
Dragon Minimum Required (Version 0.3.0.dev20200707)
Changes:
- Adapt to the latest dragon preview version.
Preview Features:
- None
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.4.1 (20200421)
Dragon Minimum Required (Version 0.3.0.dev20200421)
......
......@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements
seeta-dragon >= 0.3.0.dev20200421
seeta-dragon >= 0.3.0.dev20200707
## Installation
......
......@@ -32,16 +32,17 @@ FRCNN:
TRAIN:
WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800]
MAX_SIZE: 1333
NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
......@@ -32,16 +32,16 @@ FRCNN:
TRAIN:
WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800]
MAX_SIZE: 1333
NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
......@@ -30,7 +30,7 @@ TRAIN:
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [600]
MAX_SIZE: 1000
NMS: 0.45
RPN_POST_NMS_TOP_N: 1000
\ No newline at end of file
......@@ -29,16 +29,16 @@ FRCNN:
TRAIN:
WEIGHTS: '/model/VGG16.RCNN.pth'
DATASET: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2
BATCH_SIZE: 128
SCALES: [600]
MAX_SIZE: 1000
RPN_MIN_SIZE: 16
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300
SCALES: [600]
MAX_SIZE: 1000
RPN_MIN_SIZE: 16
NMS: 0.45
RPN_POST_NMS_TOP_N: 300
\ No newline at end of file
......@@ -32,11 +32,11 @@ FPN:
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: True
IMS_PER_BATCH: 16
SCALES: [416]
RANDOM_SCALES: [0.25, 1.0]
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: False
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
......
......@@ -23,10 +23,10 @@ FPN:
TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.25, 1.0]
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -24,10 +24,10 @@ FPN:
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.25, 2.0]
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -38,6 +38,7 @@ TRAIN:
IMS_PER_BATCH: 32
SCALES: [300]
RANDOM_SCALES: [0.25, 1.00]
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -3,7 +3,7 @@ VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: ssd
BACKBONE: airnet5b.mbox
BACKBONE: airnet.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
......@@ -17,19 +17,30 @@ SOLVER:
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_320
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 8
SSD:
NUM_CONVS: 2
MULTIBOX:
STRIDES: [8, 16, 32]
MIN_SIZES: [30, 90, 150]
MAX_SIZES: [90, 150, 210]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [
[1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5],
]
TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -37,9 +37,10 @@ SSD:
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
...
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h"
#include "../utils/detection_utils.h"
namespace dragon {
template <class Context> template <typename T>
template <class Context>
template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected;
......@@ -16,10 +14,10 @@ void NonMaxSuppressionOp<Context>::DoRunWithType() {
iou_threshold_,
Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx()
);
num_selected,
ctx());
Output(0)->Reshape({ num_selected });
Output(0)->Reshape({num_selected});
}
template <class Context>
......@@ -27,8 +25,7 @@ void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) });
Output(0)->Reshape({Input(0).dim(0)});
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
}
......
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include <dragon/utils/math_functions.h>
#include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
template <class Context>
template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
......@@ -38,22 +38,13 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() {
roi_indices_.resize(num_candidates);
num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_)
roi_indices_[num_candidates++] = i;
if (scores[i] > score_thr_) roi_indices_[num_candidates++] = i;
scores_.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i)
scores_[i] = scores[roi_indices_[i]];
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true,
scores_.data(),
indices_
);
num_candidates, num_proposals, true, scores_.data(), indices_);
for (int i = 0; i < num_proposals; ++i)
indices_[i] = roi_indices_[indices_[i]];
// Decode the candidates
......@@ -70,22 +61,24 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() {
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
anchors_.data());
utils::detection::GenerateGridAnchors(
num_proposals, C, A,
feat_h, feat_w,
num_proposals,
C,
A,
feat_h,
feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
y
);
y);
base_offset += (A * K);
}
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes, C,
num_boxes,
C,
n,
im_h,
im_w,
......@@ -94,14 +87,13 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() {
scores,
deltas,
indices_.data(),
y
);
y);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 });
Output(0)->Reshape({total_proposals, 7});
}
template <class Context>
......@@ -109,12 +101,10 @@ void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< "\nExcepted " << num_images_ << " groups info, got "
<< Input(-1).dim(0) << ".";
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 });
Output(0)->Reshape({num_images_ * pre_nms_topn_, 7});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
......@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder);
DEPLOY_CUDA(RetinaNetDecoder);
#endif
OPERATOR_SCHEMA(RetinaNetDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
OPERATOR_SCHEMA(RetinaNetDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
} // namespace dragon
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include <dragon/utils/math_functions.h>
#include "../utils/detection_utils.h"
#include "rpn_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
template <class Context>
template <typename T>
void RPNDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
......@@ -36,66 +36,60 @@ void RPNDecoderOp<Context>::DoRunWithType() {
A = int(ratios_.size() * scales_.size());
// Select the Top-K candidates as proposals
num_candidates = A * K;
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
num_candidates, num_proposals, true, scores, indices_);
// Decode the candidates
anchors_.resize((size_t)(A * 4));
proposals_.Reshape({ num_proposals, 5 });
proposals_.Reshape({num_proposals, 5});
utils::detection::GenerateAnchors(
strides_[0],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
anchors_.data());
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
num_proposals,
A,
feat_h,
feat_w,
strides_[0],
0,
anchors_.data(),
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
proposals_.template mutable_data<BT, BC>());
utils::detection::GenerateSSProposals(
K, num_proposals,
im_h, im_w,
min_box_h, min_box_w,
K,
num_proposals,
im_h,
im_w,
min_box_h,
min_box_w,
scores,
deltas,
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
proposals_.template mutable_data<BT, BC>());
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals_.template mutable_data<BT, BC>()
);
proposals_.template mutable_data<BT, BC>());
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
num_rois,
ctx());
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals_.template data<BT, BC>(),
roi_indices_.data(),
y
);
y);
} else if (strides_.size() > 1) {
// Case 2: multiple strides
CHECK_EQ(strides_.size(), InputSize() - 3)
......@@ -106,20 +100,13 @@ void RPNDecoderOp<Context>::DoRunWithType() {
<< "and " << scales_.size() << " scales";
// Select the top-k candidates as proposals
num_candidates = Input(-3).dim(1);
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
num_candidates, num_proposals, true, scores, indices_);
// Decode the candidates
int base_offset = 0;
proposals_.Reshape({ num_proposals, 5 });
auto* proposals = proposals_
.template mutable_data<BT, BC>();
proposals_.Reshape({num_proposals, 5});
auto* proposals = proposals_.template mutable_data<BT, BC>();
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
......@@ -132,51 +119,43 @@ void RPNDecoderOp<Context>::DoRunWithType() {
1,
ratios_.data(),
scales_.data(),
anchors_.data()
);
anchors_.data());
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
num_proposals,
A,
feat_h,
feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
proposals
);
proposals);
base_offset += (A * K);
}
utils::detection::GenerateMSProposals(
num_candidates,
num_proposals,
im_h, im_w,
min_box_h, min_box_w,
im_h,
im_w,
min_box_h,
min_box_w,
scores,
deltas,
&indices_[0],
proposals
);
proposals);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals
);
0, num_proposals - 1, num_proposals, proposals);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals,
roi_indices_.data(),
y
);
ctx());
utils::detection::RetrieveRoIs(
num_rois, n, proposals, roi_indices_.data(), y);
} else {
LOG(FATAL) << "Excepted at least one stride for proposals.";
}
......@@ -185,23 +164,22 @@ void RPNDecoderOp<Context>::DoRunWithType() {
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_rois, 5 });
Output(0)->Reshape({total_rois, 5});
// Distribute rois into K bins
if (OutputSize() > 1) {
CHECK_EQ(max_level_ - min_level_ + 1, OutputSize())
<< "\nExcepted " << OutputSize() << " outputs for levels "
"between [" << min_level_ << ", " << max_level_ << "].";
<< "between [" << min_level_ << ", " << max_level_ << "].";
vector<BT*> ys(OutputSize());
vector<vec64_t> bins(OutputSize());
Tensor RoIs; RoIs.ReshapeLike(*Output(0));
Tensor RoIs;
RoIs.ReshapeLike(*Output(0));
auto* rois = RoIs.template mutable_data<BT, BC>();
ctx()->template Copy<BT, BC, BC>(
Output(0)->count(),
rois, Output(0)->template data<BT, BC>()
);
Output(0)->count(), rois, Output(0)->template data<BT, BC>());
utils::detection::CollectRoIs(
total_rois,
......@@ -209,11 +187,11 @@ void RPNDecoderOp<Context>::DoRunWithType() {
max_level_,
canonical_level_,
canonical_scale_,
rois, bins
);
rois,
bins);
for (int i = 0; i < OutputSize(); i++) {
Output(i)->Reshape({ std::max((int)bins[i].size(), 1), 5 });
Output(i)->Reshape({std::max((int)bins[i].size(), 1), 5});
ys[i] = Output(i)->template mutable_data<BT, BC>();
}
......@@ -226,12 +204,11 @@ void RPNDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< "\nExcepted " << num_images_ << " groups info, got "
<< Input(-1).dim(0) << ".";
roi_indices_.resize(post_nms_topn_);
Output(0)->Reshape({ num_images_ * post_nms_topn_, 5 });
Output(0)->Reshape({num_images_ * post_nms_topn_, 5});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
......@@ -241,8 +218,6 @@ DEPLOY_CPU(RPNDecoder);
DEPLOY_CUDA(RPNDecoder);
#endif
OPERATOR_SCHEMA(RPNDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
OPERATOR_SCHEMA(RPNDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
} // namespace dragon
......@@ -15,25 +15,35 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import glob
from distutils.core import setup
from dragon.tools import cpp_extension
from dragon.tools import cpp_extension
if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension
else:
Extension = cpp_extension.CppExtension
def find_sources(*dirs):
ext_suffixes = ['.cc']
if Extension is cpp_extension.CUDAExtension:
ext_suffixes.append('.cu')
sources = []
for path in dirs:
for ext_suffix in ext_suffixes:
sources += glob.glob(
path + '/*' + ext_suffix,
recursive=True,
)
return sources
ext_modules = [
Extension(
name='install.lib.modules._C',
sources=[
'utils/detection_utils.cc',
'utils/detection_utils.cu',
'operators/nms_op.cc',
'operators/retinanet_decoder_op.cc',
'operators/rpn_decoder_op.cc',
],
sources=find_sources('**'),
),
]
......
#include <dragon/core/context.h>
#include "detection_utils.h"
#include <dragon/core/context.h>
namespace dragon {
......@@ -9,8 +9,7 @@ namespace detection {
template <typename T>
T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] ||
A[2] < B[0] || A[3] < B[1]) return 0;
if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) return 0;
const T x1 = std::max(A[0], B[0]);
const T y1 = std::max(A[1], B[1]);
const T x2 = std::min(A[2], B[2]);
......@@ -23,7 +22,8 @@ T IoU(const T A[], const T B[]) {
return area / (A_area + B_area - area);
}
template <> void ApplyNMS<float, CPUContext>(
template <>
void ApplyNMS<float, CPUContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
......@@ -33,16 +33,17 @@ template <> void ApplyNMS<float, CPUContext>(
CPUContext* ctx) {
int count = 0;
std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i)
is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i) {
if (is_dead[i]) continue;
keep_indices[count++] = i;
if (count == max_keeps) break;
for (int j = i + 1; j < num_boxes; ++j)
if (!is_dead[j] && IoU(&boxes[i * 5],
&boxes[j * 5]) > thresh)
if (!is_dead[j] && IoU(&boxes[i * 5], &boxes[j * 5]) > thresh) {
is_dead[j] = 1;
}
}
num_keep = count;
}
......
......@@ -9,16 +9,13 @@ namespace utils {
namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define DIV_UP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64
namespace {
template <typename T>
__device__ bool _CheckIoU(
const T* a,
const T* b,
const float thresh) {
__device__ bool _CheckIoU(const T* a, const T* b, const float thresh) {
const T x1 = max(a[0], b[0]);
const T y1 = max(a[1], b[1]);
const T x2 = min(a[2], b[2]);
......@@ -74,7 +71,8 @@ __global__ void _NonMaxSuppression(
} // namespace
template <> void ApplyNMS<float, CUDAContext>(
template <>
void ApplyNMS<float, CUDAContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
......@@ -87,23 +85,18 @@ template <> void ApplyNMS<float, CUDAContext>(
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression
<<< dim3(num_blocks, num_blocks), NUM_THREADS,
0, ctx->cuda_stream() >>>(
num_blocks,
num_boxes,
thresh,
boxes,
mask_dev
);
_NonMaxSuppression<<<
dim3(num_blocks, num_blocks),
NUM_THREADS,
0,
ctx->cuda_stream()>>>(num_blocks, num_boxes, thresh, boxes, mask_dev);
CUDA_CHECK(cudaMemcpyAsync(
mask_host.data(),
mask_dev,
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost,
ctx->cuda_stream()
));
ctx->cuda_stream()));
ctx->FinishDeviceComputation();
......@@ -117,12 +110,13 @@ template <> void ApplyNMS<float, CUDAContext>(
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
for (int j = nblock; j < num_blocks; ++j)
dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
}
num_keep = num_selected;
num_keep = num_selected;
ctx->Delete(mask_dev);
}
......
......@@ -13,8 +13,7 @@
#ifndef SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#define SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#include "dragon/core/context.h"
#include "dragon/core/operator.h"
#include "dragon/core/common.h"
namespace dragon {
......@@ -24,7 +23,9 @@ namespace detection {
#define ROUND(x) ((int)((x) + (T)0.5))
/******************** BBox ********************/
/*!
* Box API
*/
template <typename T>
inline int FilterBoxes(
......@@ -94,7 +95,9 @@ inline void BBoxTransform(
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h;
}
/******************** Anchor ********************/
/*!
* Anchor API
*/
template <typename T>
inline void GenerateAnchors(
......@@ -183,7 +186,9 @@ inline void GenerateGridAnchors(
}
}
/******************** Proposal ********************/
/*!
* Proposal API
*/
template <typename T>
void GenerateSSProposals(
......@@ -210,12 +215,16 @@ void GenerateSSProposals(
d_log_w = delta[(a * 4 + 2) * K];
d_log_h = delta[(a * 4 + 3) * K];
proposal[4] = FilterBoxes(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
min_box_w, min_box_h,
proposal
) * scores[index];
dx,
dy,
d_log_w,
d_log_h,
im_w,
im_h,
min_box_w,
min_box_h,
proposal) *
scores[index];
proposal += 5;
}
}
......@@ -244,12 +253,16 @@ void GenerateMSProposals(
d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[num_candidates_3x + index];
proposal[4] = FilterBoxes(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
min_box_w, min_box_h,
proposal
) * scores[index];
dx,
dy,
d_log_w,
d_log_h,
im_w,
im_h,
min_box_w,
min_box_h,
proposal) *
scores[index];
proposal += 5;
}
}
......@@ -282,12 +295,15 @@ void GenerateMCProposals(
d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx;
BBoxTransform(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
im_scale_h, im_scale_w,
proposal + 1
);
dx,
dy,
d_log_w,
d_log_h,
im_w,
im_h,
im_scale_h,
im_scale_w,
proposal + 1);
proposal[5] = scores[indices[i]];
proposal[6] = cls + 1;
proposal += 7;
......@@ -295,16 +311,15 @@ void GenerateMCProposals(
}
template <typename T>
inline void SortProposals(
const int start,
const int end,
const int num_top,
T* proposals) {
inline void
SortProposals(const int start, const int end, const int num_top, T* proposals) {
const T pivot_score = proposals[start * 5 + 4];
int left = start + 1, right = end;
while (left <= right) {
while (left <= end && proposals[left * 5 + 4] >= pivot_score) ++left;
while (right > start && proposals[right * 5 + 4] <= pivot_score) --right;
while (left <= end && proposals[left * 5 + 4] >= pivot_score)
++left;
while (right > start && proposals[right * 5 + 4] <= pivot_score)
--right;
if (left <= right) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[left * 5 + i], proposals[right * 5 + i]);
......@@ -348,8 +363,8 @@ inline int roi_level(
T w = roi[3] - roi[1] + 1;
T h = roi[4] - roi[2] + 1;
// Refer the settings of paper
int level = canonical_level + std::log2(
std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale);
int level = canonical_level +
std::log2(std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale);
return std::min(max_level, std::max(min_level, level));
}
......@@ -364,8 +379,8 @@ inline void CollectRoIs(
vector<vec64_t>& roi_bins) {
const T* roi = rois;
for (int i = 0; i < num_rois; ++i) {
int bin_idx = roi_level(min_level, max_level,
canonical_level, canonical_scale, roi);
int bin_idx =
roi_level(min_level, max_level, canonical_level, canonical_scale, roi);
bin_idx = std::max(bin_idx - min_level, 0);
roi_bins[bin_idx].push_back(i);
roi += 5;
......@@ -385,14 +400,17 @@ inline void DistributeRoIs(
} else {
for (int j = 0; j < roi_bins[i].size(); ++j) {
const T* roi = rois + roi_bins[i][j] * 5;
for (int k = 0; k < 5; ++k) y[k] = roi[k];
for (int k = 0; k < 5; ++k)
y[k] = roi[k];
y += 5;
}
}
}
}
/******************** NMS ********************/
/*!
* NMS API
*/
template <typename T, class Context>
void ApplyNMS(
......
......@@ -52,12 +52,9 @@ class AnchorTarget(object):
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
grid_shapes = [f.shape[-2:] for f in features]
all_anchors = generate_grid_anchors(
grid_shapes, self.base_anchors, self.strides)
num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
......
......@@ -58,12 +58,9 @@ class Proposal(object):
# Get resources
num_images = ims_info.shape[0]
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
grid_shapes = [f.shape[-2:] for f in features]
all_anchors = generate_grid_anchors(
grid_shapes, self.base_anchors, self.strides)
# Prepare for the outputs
batch_rois = []
......
......@@ -19,40 +19,40 @@ import numpy as np
from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides):
def generate_grid_anchors(grid_shapes, base_anchors, strides):
num_strides = len(strides)
if len(features) != num_strides:
if len(grid_shapes) != num_strides:
raise ValueError(
'Given %d features for %d strides.'
% (len(features), num_strides)
'Given %d grids for %d strides.'
% (len(grid_shapes), num_strides)
)
# Generate proposals from shifted anchors
anchors_to_pack = []
for i in range(len(features)):
height, width = features[i].shape[-2:]
for i in range(len(grid_shapes)):
height, width = grid_shapes[i]
shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# Add a anchors (1, a, 4) to
# cell k shifts (k, 1, 4) to get
# shift anchors (k, a, 4)
# Reshape to (k * a, 4) shifted anchors
a = base_anchors[i].shape[0]
k = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, a, 4)) +
shifts.reshape((1, k, 4)).transpose((1, 0, 2)))
if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to
# match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors = anchors.reshape((a * k, 4))
anchors_to_pack.append(anchors)
else:
# Original order of Faster R-CNN
return anchors.reshape((K * A, 4))
return anchors.reshape((k * a, 4))
return np.vstack(anchors_to_pack)
......
......@@ -46,6 +46,9 @@ class AnchorTarget(object):
ratios=self.ratios,
sizes=sizes,
))
# Store the cached grid anchors
self.last_grid_shapes = None
self.last_grid_anchors = None
def __call__(self, features, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
......@@ -58,9 +61,14 @@ class AnchorTarget(object):
)
# Generate grid anchors from base
all_anchors = \
grid_shapes = [f.shape[-2:] for f in features]
if grid_shapes == self.last_grid_shapes:
all_anchors = self.last_grid_anchors
else:
self.last_grid_shapes = grid_shapes
self.last_grid_anchors = all_anchors = \
generate_grid_anchors(
features,
grid_shapes,
self.base_anchors,
self.strides,
)
......
......@@ -15,6 +15,7 @@ from __future__ import print_function
import types
import dragon
import dragon.vm.torch as torch
import numpy as np
......@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images):
# Unpack results
results = outputs['detections']
detections = [[] for _ in range(len((raw_images)))]
detections = [[] for _ in range(len(raw_images))]
for i in range(len(ims)):
inds = np.where(results[:, 0].astype(np.int32) == i)[0]
......@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put((
indices[i],
dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]),
('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]),
))
......@@ -45,14 +45,14 @@ class PriorBox(object):
aspect_ratios[i],
)
)
self.grid_anchors = None
# Store the cached grid anchors
self.last_grid_anchors = None
def __call__(self, features):
if self.grid_anchors is not None:
return self.grid_anchors
self.grid_anchors = []
if self.last_grid_anchors is not None:
return self.last_grid_anchors
all_anchors = []
for i in range(len(self.strides)):
# 1. Generate base grids
height, width = features[i].shape[-2:]
......@@ -61,23 +61,23 @@ class PriorBox(object):
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
# 2. Apply anchors on base grids
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
D = self.base_anchors[i].shape[1]
# Add a anchors (1, a, 4) to
# cell k shifts (k, 1, 4) to get
# shift anchors (k, a, 4)
# Reshape to (k * a, 4) shifted anchors
a = self.base_anchors[i].shape[0]
d = self.base_anchors[i].shape[1]
shifts = np.vstack((
shift_x.ravel(),
shift_y.ravel(),
shift_x.ravel(),
shift_y.ravel())
).transpose()
K = shifts.shape[0] # K = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, D)) +
shifts.reshape((1, K, D)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, D)).astype(np.float32)
self.grid_anchors.append(anchors)
self.grid_anchors = np.concatenate(self.grid_anchors)
k = shifts.shape[0] # k = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, a, d)) +
shifts.reshape((1, k, d)).transpose((1, 0, 2)))
anchors = anchors.reshape((k * a, d)).astype(np.float32)
all_anchors.append(anchors)
return self.grid_anchors
self.last_grid_anchors = np.concatenate(all_anchors)
return self.last_grid_anchors
......@@ -32,11 +32,9 @@ def get_images(ims):
for im in ims:
im_scales.append((float(out_size) / im.shape[0],
float(out_size) / im.shape[1]))
processed_ims.append(
cv2.resize(
processed_ims.append(cv2.resize(
im, (out_size, out_size),
interpolation=cv2.INTER_AREA,
))
interpolation=cv2.INTER_AREA))
if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256.
else:
......
......@@ -49,7 +49,7 @@ class Distort(object):
]
def apply(self, img, boxes=None):
if self._prob > 0:
self._prob = 0.5 if cfg.TRAIN.USE_COLOR_JITTER else 0
img = PIL.Image.fromarray(img)
for transform_fn, prob in self._transforms:
if npr.uniform() < prob:
......
......@@ -27,8 +27,9 @@ if __name__ == '__main__':
np.random.seed(3)
cfg.TRAIN.SCALES = [300]
cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00]
cfg.TRAIN.USE_COLOR_JITTER = True
augmentor = transforms.Compose(
transformer = transforms.Compose(
transforms.Distort(),
transforms.Expand(),
transforms.Sample(),
......@@ -38,12 +39,12 @@ if __name__ == '__main__':
while True:
img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes)
img, boxes = transformer(img, boxes)
for box in boxes:
x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0])
x2 = int(box[2] * img.shape[1])
y2 = int(box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2)
cv2.imshow('Sample', img)
cv2.imshow('Transforms - Preview', img)
cv2.waitKey(0)
......@@ -70,7 +70,8 @@ class Pipeline(dali.Pipeline):
# Decode image
image = self.decode(inputs['image'])
# Augment the color space
# Augment the color space if necessary
if cfg.TRAIN.USE_COLOR_JITTER:
image = self.hsv(
self.brightness_contrast(
image,
......
......@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function
import os
from seetadet.datasets import kpl_record
from seetadet.datasets import kpl_dataset
def get_dataset(name):
......@@ -42,5 +42,5 @@ def list_dataset():
_GLOBAL_REGISTERED_DATASET = {
'default': lambda source:
kpl_record.KPLRecordDataset(source),
kpl_dataset.KPLRecordDataset(source),
}
......@@ -149,8 +149,10 @@ class AirNet(nn.Module):
x = self.layer1(x)
outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
if hasattr(self, 'layer3'):
outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'):
outputs += [self.layer4(outputs[-1])]
return outputs
......
......@@ -39,16 +39,17 @@ class Detector(nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:]
# + DataLoader
# DataLoader
self.data_loader = None
self.data_loader_cls = importlib.import_module(
'seetadet.algo.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap()
# + FeatureExtractor
# FeatureExtractor
self.body = backbones.get(body)()
feature_dims = self.body.feature_dims
# + FeatureEnhancer
# FeatureEnhancer
if 'fpn' in modules:
self.fpn = models.FPN(feature_dims)
feature_dims = self.fpn.feature_dims
......@@ -57,7 +58,7 @@ class Detector(nn.Module):
else:
feature_dims = [feature_dims[-1]]
# + Detection Modules
# Detection Modules
if 'rcnn' in model:
self.rpn = models.RPN(feature_dims[0])
if 'faster' in model:
......@@ -106,7 +107,7 @@ class Detector(nn.Module):
if inputs is None:
# 1) Training: <= DataLayer
# 2) Inference: <= Given
if not hasattr(self, 'data_loader'):
if self.data_loader is None:
self.data_loader = self.data_loader_cls()
inputs = self.data_loader()
......@@ -171,29 +172,34 @@ class Detector(nn.Module):
# Merge Affine into Convolution #
###################################
last_module = None
for e in self.modules():
if isinstance(e, nn.Affine) and \
for module in self.modules():
if isinstance(module, nn.Affine) and \
isinstance(last_module, nn.Conv2d):
if last_module.bias is None:
delattr(last_module, 'bias')
e.forward = lambda x: x
last_module.bias = e.bias
last_module.weight.data.mul_(e.weight.data)
last_module = e
module.forward = lambda x: x
last_module.bias = module.bias
weight = module.weight.data.view(
0, *([1] * (last_module.weight.ndimension() - 1)))
last_module.weight.data.mul_(weight)
last_module = module
######################################
# Merge BatchNorm into Convolution #
######################################
last_module = None
for e in self.modules():
if isinstance(e, nn.BatchNorm2d) and \
for module in self.modules():
if isinstance(module, nn.BatchNorm2d) and \
isinstance(last_module, nn.Conv2d):
if last_module.bias is None:
delattr(last_module, 'bias')
e.forward = lambda x: x
term = torch.sqrt(e.running_var.data + e.eps)
term = e.weight.data / term
last_module.bias = e.bias.data - term * e.running_mean.data
module.forward = lambda x: x
term = torch.sqrt(module.running_var.data + module.eps)
term = module.weight.data / term
last_module.bias = \
module.bias.data - \
term * module.running_mean.data
term = term.view(0, *([1] * (last_module.weight.ndimension() - 1)))
if last_module.weight.dtype == 'float16':
last_module.bias.half_()
weight = last_module.weight.data.float()
......@@ -201,7 +207,7 @@ class Detector(nn.Module):
last_module.weight.copy_(weight)
else:
last_module.weight.data.mul_(term)
last_module = e
last_module = module
def new_detector(device, weights=None, training=False):
......
......@@ -31,7 +31,8 @@ class FPN(nn.Module):
dim = cfg.FPN.DIM
self.C = nn.ModuleList()
self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.highest_backbone_lvl = min(cfg.FPN.RPN_MAX_LEVEL, HIGHEST_BACKBONE_LVL)
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, self.highest_backbone_lvl + 1):
self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE:
......@@ -40,8 +41,8 @@ class FPN(nn.Module):
else:
self.apply_func = self.apply_on_generic
self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim
for lvl in range(self.highest_backbone_lvl + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == self.highest_backbone_lvl + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim]
self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE
......@@ -56,12 +57,12 @@ class FPN(nn.Module):
def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Apply max pool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1]))
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample(
......@@ -76,15 +77,15 @@ class FPN(nn.Module):
def apply_on_generic(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Add extra convolutions for higher features
extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample(
......
......@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module):
def reset_parameters(self):
for m in self.modules():
if nn.is_conv2d(m):
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None:
init.constant(m.bias, 0)
......@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module):
# Stop the gradients if necessary
def freeze_func(m):
if nn.is_conv2d(m):
if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
......
......@@ -17,8 +17,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from seetadet.core.config import cfg
from seetadet.core.registry import backbones
from seetadet.modules import nn
......@@ -37,11 +35,12 @@ class BasicBlock(nn.Module):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample
self.dropblock = dropblock
self.dropblock1 = nn.DropBlock2d(**dropblock) if dropblock else None
self.dropblock2 = nn.DropBlock2d(**dropblock) if dropblock else None
def forward(self, x):
residual = x
......@@ -50,14 +49,14 @@ class BasicBlock(nn.Module):
out = self.bn1(out)
out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
if self.dropblock1 is not None:
out = self.dropblock1(out)
out = self.conv2(out)
out = self.bn2(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.dropblock2 is not None:
residual = self.dropblock2(residual)
if self.downsample is not None:
residual = self.downsample(residual)
......@@ -67,7 +66,7 @@ class BasicBlock(nn.Module):
return out
class Bottleneck(torch.nn.Module):
class Bottleneck(nn.Module):
# 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \
......@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module):
self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.FrozenAffine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.drop2 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn2 = nn.FrozenAffine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out)
self.drop3 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn3 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.dropblock = dropblock
def forward(self, x):
residual = x
......@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module):
out = self.relu(out)
out = self.conv2(out)
if self.drop2 is not None:
out = self.drop2(out)
out = self.bn2(out)
out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv3(out)
out = self.bn3(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None:
residual = self.downsample(residual)
out += residual
if self.drop3 is not None:
out = self.drop3(out)
out = self.relu(out)
return out
class ResNet(torch.nn.Module):
class ResNet(nn.Module):
def __init__(self, block, layers, filters):
super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d(
self.conv1 = nn.Conv2d(
3, 64,
kernel_size=7,
stride=2,
......@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module):
bias=False,
)
self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(
kernel_size=3,
stride=2,
padding=0,
ceil_mode=True,
)
self.drop3 = torch.nn.DropBlock2d(
kp=0.9,
block_size=7,
alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d(
kp=0.9,
block_size=7,
alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
drop3 = {
'kp': 0.9,
'block_size': 7,
'alpha': 1.00,
'decrement': cfg.DROPBLOCK.DECREMENT,
'inplace': True,
} if cfg.DROPBLOCK.DROP_ON else None
drop4 = {
'kp': 0.9,
'block_size': 7,
'alpha': 1.00,
'decrement': cfg.DROPBLOCK.DECREMENT,
'inplace': True,
} if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, drop4)
self.reset_parameters()
def reset_parameters(self):
......@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module):
# Stop the gradients if necessary
def freeze_func(m):
if isinstance(m, torch.nn.Conv2d):
if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
......
......@@ -29,7 +29,6 @@ class SSD(nn.Module):
########################################
# SSD outputs #
########################################
self.cls_conv = torch.nn.ModuleList(
nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True)
for _ in range(cfg.SSD.NUM_CONVS)
......
......@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function):
return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function):
"""Decode predictions from RetinaNet."""
......
......@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'):
nonlinearity='relu',
)
# Aliases
constant = nn.init.constant_
normal = nn.init.normal_
......@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object):
return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
negative_index=0, # Background index
)
......@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d
DropBlock2d = nn.DropBlock2d
Linear = nn.Linear
MaxPool2d = nn.MaxPool2d
Module = nn.Module
......
......@@ -15,7 +15,7 @@ from __future__ import print_function
import functools
import dragon.vm.torch as torch
from dragon.vm import torch
from seetadet.core.config import cfg
......@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module):
def __init__(self):
super(Bootstrap, self).__init__()
self.normalize_func = functools.partial(
self._device = torch.device('cpu')
self._dummy_buffer = torch.ones(1)
self._normalize_func = functools.partial(
torch.channel_normalize,
mean=cfg.PIXEL_MEANS,
std=[1., 1., 1.],
......@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module):
dims=(0, 3, 1, 2),
dtype=cfg.MODEL.PRECISION.lower(),
)
self.dummy_buffer = torch.ones(1)
def _apply(self, fn):
fn(self.dummy_buffer)
fn(self._dummy_buffer)
def cpu(self):
self._device = torch.device('cpu')
......@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module):
self._device = torch.device('cuda', device)
def device(self):
"""Return the device of this module."""
return self.dummy_buffer.device
return self._dummy_buffer.device
def forward(self, input):
if isinstance(input, torch.Tensor):
if input.size(1) <= 3:
if input.shape[1] <= 3:
return input
cur_device = self.device()
if input._device != cur_device:
......@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module):
input = input.cpu()
else:
input = input.cuda(cur_device.index)
return self.normalize_func(input)
return self._normalize_func(input)
......@@ -32,8 +32,8 @@ class SGDSolver(object):
lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
clip_gradient=float(cfg.SOLVER.CLIP_NORM),
scale_gradient=1. / cfg.SOLVER.LOSS_SCALING,
clip_norm=float(cfg.SOLVER.CLIP_NORM),
scale=1. / cfg.SOLVER.LOSS_SCALING,
)
self.lr_scheduler = lr_scheduler.get_scheduler()
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import operator
from dragon.vm import torch
from seetadet.modules import nn
def dense_conv_flops(m, inputs, output):
"""Hook to compute flops for a dense convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
in_c, out_c = inputs[0].shape[1], output.shape[1]
m.__params__ = (k_dim * in_c + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def depthwise_conv_flops(m, inputs, output):
"""Hook to compute flops for a depthwise convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
out_c = output.shape[1]
m.__params__ = (k_dim + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def register_flops(module):
"""Register hooks to collect flops info."""
if not hasattr(module, '__flops__'):
module.__flops__ = 0.
for m in module.modules():
if isinstance(m, nn.DepthwiseConv2d):
m.register_forward_hook(depthwise_conv_flops)
elif isinstance(m, nn.Conv2d):
m.register_forward_hook(dense_conv_flops)
def collect_flops(module, normalizer=1e6):
"""Collect flops from the last forward."""
total_flops = 0.
for m in module.modules():
if hasattr(m, '__flops__'):
total_flops += m.__flops__
m.__flops__ = 0.
return total_flops / normalizer
def benchmark_flops(module, normalizer=1e6):
"""Return the flops by running benchmark once."""
register_flops(module)
collect_flops(module)
original_training = module.training
if original_training:
module.eval()
with torch.no_grad():
module()
if original_training:
module.train()
return collect_flops(module, normalizer)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!