------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.2 (20200707)
Dragon Minimum Required (Version 0.3.0.dev20200707)
- Adapt to the latest dragon preview version.
Preview Features:
- None
Bugs fixed:
- None
SeetaDet 0.4.1 (20200421)
Dragon Minimum Required (Version 0.3.0.dev20200421)
...@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de ...@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements ## Requirements
seeta-dragon >= 0.3.0.dev20200421 seeta-dragon >= 0.3.0.dev20200707
## Installation ## Installation
...@@ -32,16 +32,17 @@ FRCNN: ...@@ -32,16 +32,17 @@ FRCNN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
...@@ -32,16 +32,16 @@ FRCNN: ...@@ -32,16 +32,16 @@ FRCNN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
...@@ -30,7 +30,7 @@ TRAIN: ...@@ -30,7 +30,7 @@ TRAIN:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 NMS: 0.45
\ No newline at end of file RPN_POST_NMS_TOP_N: 1000
\ No newline at end of file
...@@ -29,16 +29,16 @@ FRCNN: ...@@ -29,16 +29,16 @@ FRCNN:
WEIGHTS: '/model/VGG16.RCNN.pth' WEIGHTS: '/model/VGG16.RCNN.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 RPN_MIN_SIZE: 16
\ No newline at end of file NMS: 0.45
\ No newline at end of file
...@@ -32,11 +32,11 @@ FPN: ...@@ -32,11 +32,11 @@ FPN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
SCALES: [416] SCALES: [416]
RANDOM_SCALES: [0.25, 1.0] RANDOM_SCALES: [0.25, 1.0]
USE_DIFF: False # Do not use crowd objects
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
...@@ -23,10 +23,10 @@ FPN: ...@@ -23,10 +23,10 @@ FPN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.0] RANDOM_SCALES: [0.25, 1.0]
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
...@@ -24,10 +24,10 @@ FPN: ...@@ -24,10 +24,10 @@ FPN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 2.0] RANDOM_SCALES: [0.25, 2.0]
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
...@@ -38,6 +38,7 @@ TRAIN: ...@@ -38,6 +38,7 @@ TRAIN:
SCALES: [300] SCALES: [300]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
...@@ -3,7 +3,7 @@ VIS: False ...@@ -3,7 +3,7 @@ VIS: False
TYPE: ssd TYPE: ssd
BACKBONE: airnet5b.mbox BACKBONE: airnet.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
...@@ -17,19 +17,30 @@ SOLVER: ...@@ -17,19 +17,30 @@ SOLVER:
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_PREFIX: voc_ssd_320 SNAPSHOT_PREFIX: voc_ssd_320
STRIDES: [8, 16, 32] STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 90, 150] MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [90, 150, 210] MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] ASPECT_RATIOS: [
[1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5],
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
...@@ -37,9 +37,10 @@ SSD: ...@@ -37,9 +37,10 @@ SSD:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
- Regex: '^<.*\.h(pp)?>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h" #include "nms_op.h"
#include "../utils/detection_utils.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() { void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected; int num_selected;
utils::detection::ApplyNMS( utils::detection::ApplyNMS(
Output(0)->count(), Output(0)->count(),
Output(0)->count(), Output(0)->count(),
iou_threshold_, iou_threshold_,
Input(0).template mutable_data<T, Context>(), Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(), Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx() num_selected,
); ctx());
Output(0)->Reshape({ num_selected }); Output(0)->Reshape({num_selected});
} }
template <class Context> template <class Context>
void NonMaxSuppressionOp<Context>::RunOnDevice() { void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5) CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5)."; << "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) });
DispatchHelper<TensorTypes<float>>::Call(this, Input(0)); Output(0)->Reshape({Input(0).dim(0)});
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
} }
DEPLOY_CPU(NonMaxSuppression); DEPLOY_CPU(NonMaxSuppression);
...@@ -41,4 +38,4 @@ OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1); ...@@ -41,4 +38,4 @@ OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1);
NO_GRADIENT(NonMaxSuppression); NO_GRADIENT(NonMaxSuppression);
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <> * <>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,20 +20,20 @@ namespace dragon { ...@@ -20,20 +20,20 @@ namespace dragon {
template <class Context> template <class Context>
class NonMaxSuppressionOp final : public Operator<Context> { class NonMaxSuppressionOp final : public Operator<Context> {
public: public:
NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws) NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {} iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float iou_threshold_; float iou_threshold_;
}; };
} // namespace dragon } // namespace dragon
#include <dragon/core/workspace.h> #include <dragon/utils/math_functions.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h" #include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h" #include "retinanet_decoder_op.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() { void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox using BC = CPUContext; // Context of BBox
int feat_h, feat_w; int feat_h, feat_w;
int C = Input(-3).dim(2), A, K; int C = Input(-3).dim(2), A, K;
int total_proposals = 0; int total_proposals = 0;
int num_candidates, num_boxes, num_proposals; int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>(); auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>(); auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>(); auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>(); auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) { for (int n = 0; n < num_images_; ++n) {
BT im_h = im_info[0]; BT im_h = im_info[0];
BT im_w = im_info[1]; BT im_w = im_info[1];
BT im_scale_h = im_info[2]; BT im_scale_h = im_info[2];
BT im_scale_w = im_info[2]; BT im_scale_w = im_info[2];
if (Input(-1).dim(1) == 4) im_scale_w = im_info[3]; if (Input(-1).dim(1) == 4) im_scale_w = im_info[3];
auto* scores = batch_scores + n * Input(-3).stride(0); auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0); auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides_.size(), InputSize() - 3) CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides " << "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " features"; << "and " << InputSize() - 3 << " features";
// Select the top-k candidates as proposals // Select the top-k candidates as proposals
num_boxes = Input(-3).dim(1); num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1); num_candidates = Input(-3).count(1);
roi_indices_.resize(num_candidates); roi_indices_.resize(num_candidates);
num_candidates = 0; num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i) for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_) if (scores[i] > score_thr_) roi_indices_[num_candidates++] = i;
roi_indices_[num_candidates++] = i; scores_.resize(num_candidates);
scores_.resize(num_candidates); for (int i = 0; i < num_candidates; ++i)
for (int i = 0; i < num_candidates; ++i) scores_[i] = scores[roi_indices_[i]];
scores_[i] = scores[roi_indices_[i]]; num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
num_proposals = std::min( utils::math::ArgPartition(
num_candidates, num_candidates, num_proposals, true,, indices_);
(int)pre_nms_topn_ for (int i = 0; i < num_proposals; ++i)
); indices_[i] = roi_indices_[indices_[i]];
utils::math::ArgPartition( // Decode the candidates
num_candidates, int base_offset = 0;
num_proposals, for (int i = 0; i < strides_.size(); i++) {
true, feat_h = Input(i).dim(2);, feat_w = Input(i).dim(3);
indices_ K = feat_h * feat_w;
); A = int(ratios_.size() * scales_.size());
for (int i = 0; i < num_proposals; ++i) anchors_.resize((size_t)(A * 4));
indices_[i] = roi_indices_[indices_[i]]; utils::detection::GenerateAnchors(
// Decode the candidates strides_[i],
int base_offset = 0; (int)ratios_.size(),
for (int i = 0; i < strides_.size(); i++) { (int)scales_.size(),
feat_h = Input(i).dim(2);,
feat_w = Input(i).dim(3);,
K = feat_h * feat_w;;
A = int(ratios_.size() * scales_.size()); utils::detection::GenerateGridAnchors(
anchors_.resize((size_t)(A * 4)); num_proposals,
utils::detection::GenerateAnchors( C,
strides_[i], A,
(int)ratios_.size(), feat_h,
(int)scales_.size(), feat_w,, strides_[i],, base_offset,,
utils::detection::GenerateGridAnchors( y);
num_proposals, C, A, base_offset += (A * K);
feat_h, feat_w,
base_offset += (A * K);
num_boxes, C,
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
} }
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
Output(0)->Reshape({ total_proposals, 7 }); Output(0)->Reshape({total_proposals, 7});
} }
template <class Context> template <class Context>
void RetinaNetDecoderOp<Context>::RunOnDevice() { void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0); num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 }); CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_ << " groups info, got "
<< Input(-1).dim(0) << ".";
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3)); Output(0)->Reshape({num_images_ * pre_nms_topn_, 7});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
} }
DEPLOY_CPU(RetinaNetDecoder); DEPLOY_CPU(RetinaNetDecoder);
...@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder); ...@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder);
DEPLOY_CUDA(RetinaNetDecoder); DEPLOY_CUDA(RetinaNetDecoder);
#endif #endif
OPERATOR_SCHEMA(RetinaNetDecoder) OPERATOR_SCHEMA(RetinaNetDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <> * <>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,27 +20,27 @@ namespace dragon { ...@@ -20,27 +20,27 @@ namespace dragon {
template <class Context> template <class Context>
class RetinaNetDecoderOp final : public Operator<Context> { class RetinaNetDecoderOp final : public Operator<Context> {
public: public:
RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws) RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")), strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")), ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")), scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)), pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
score_thr_(OpArg<float>("score_thresh", 0.05f)) {} score_thr_(OpArg<float>("score_thresh", 0.05f)) {}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float score_thr_; float score_thr_;
vec64_t strides_, indices_, roi_indices_; vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_; vector<float> ratios_, scales_, scores_, anchors_;
int64_t num_images_, pre_nms_topn_; int64_t num_images_, pre_nms_topn_;
}; };
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <> * <>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,36 +20,36 @@ namespace dragon { ...@@ -20,36 +20,36 @@ namespace dragon {
template <class Context> template <class Context>
class RPNDecoderOp final : public Operator<Context> { class RPNDecoderOp final : public Operator<Context> {
public: public:
RPNDecoderOp(const OperatorDef& def, Workspace* ws) RPNDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")), strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")), ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")), scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)), pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)), post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)),
nms_thr_(OpArg<float>("nms_thresh", 0.7f)), nms_thr_(OpArg<float>("nms_thresh", 0.7f)),
min_size_(OpArg<int64_t>("min_size", 16)), min_size_(OpArg<int64_t>("min_size", 16)),
min_level_(OpArg<int64_t>("min_level", 2)), min_level_(OpArg<int64_t>("min_level", 2)),
max_level_(OpArg<int64_t>("max_level", 5)), max_level_(OpArg<int64_t>("max_level", 5)),
canonical_level_(OpArg<int64_t>("canonical_level", 4)), canonical_level_(OpArg<int64_t>("canonical_level", 4)),
canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {} canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {}
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float nms_thr_; float nms_thr_;
vec64_t strides_, indices_, roi_indices_; vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_; vector<float> ratios_, scales_, scores_, anchors_;
int64_t min_size_, pre_nms_topn_, post_nms_topn_; int64_t min_size_, pre_nms_topn_, post_nms_topn_;
int64_t num_images_, min_level_, max_level_; int64_t num_images_, min_level_, max_level_;
int64_t canonical_level_, canonical_scale_; int64_t canonical_level_, canonical_scale_;
Tensor proposals_; Tensor proposals_;
}; };
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <> # <>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
...@@ -15,25 +15,35 @@ from __future__ import absolute_import ...@@ -15,25 +15,35 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import glob
from distutils.core import setup from distutils.core import setup
from import cpp_extension
from import cpp_extension
if cpp_extension.CUDA_HOME is not None and \ if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available(): cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension Extension = cpp_extension.CUDAExtension
else: else:
Extension = cpp_extension.CppExtension Extension = cpp_extension.CppExtension
def find_sources(*dirs):
ext_suffixes = ['.cc']
if Extension is cpp_extension.CUDAExtension:
sources = []
for path in dirs:
for ext_suffix in ext_suffixes:
sources += glob.glob(
path + '/*' + ext_suffix,
return sources
ext_modules = [ ext_modules = [
Extension( Extension(
name='install.lib.modules._C', name='install.lib.modules._C',
sources=[ sources=find_sources('**'),
), ),
] ]
#include <dragon/core/context.h>
#include "detection_utils.h" #include "detection_utils.h"
#include <dragon/core/context.h>
namespace dragon { namespace dragon {
...@@ -9,45 +9,46 @@ namespace detection { ...@@ -9,45 +9,46 @@ namespace detection {
template <typename T> template <typename T>
T IoU(const T A[], const T B[]) { T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] || if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) return 0;
A[2] < B[0] || A[3] < B[1]) return 0; const T x1 = std::max(A[0], B[0]);
const T x1 = std::max(A[0], B[0]); const T y1 = std::max(A[1], B[1]);
const T y1 = std::max(A[1], B[1]); const T x2 = std::min(A[2], B[2]);
const T x2 = std::min(A[2], B[2]); const T y2 = std::min(A[3], B[3]);
const T y2 = std::min(A[3], B[3]); const T width = std::max((T)0, x2 - x1 + 1);
const T width = std::max((T)0, x2 - x1 + 1); const T height = std::max((T)0, y2 - y1 + 1);
const T height = std::max((T)0, y2 - y1 + 1); const T area = width * height;
const T area = width * height; const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1); const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1); return area / (A_area + B_area - area);
return area / (A_area + B_area - area);
} }
template <> void ApplyNMS<float, CPUContext>( template <>
const int num_boxes, void ApplyNMS<float, CPUContext>(
const int max_keeps, const int num_boxes,
const float thresh, const int max_keeps,
const float* boxes, const float thresh,
int64_t* keep_indices, const float* boxes,
int& num_keep, int64_t* keep_indices,
CPUContext* ctx) { int& num_keep,
int count = 0; CPUContext* ctx) {
std::vector<char> is_dead(num_boxes); int count = 0;
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0; std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) { for (int i = 0; i < num_boxes; ++i)
if (is_dead[i]) continue; is_dead[i] = 0;
keep_indices[count++] = i; for (int i = 0; i < num_boxes; ++i) {
if (count == max_keeps) break; if (is_dead[i]) continue;
for (int j = i + 1; j < num_boxes; ++j) keep_indices[count++] = i;
if (!is_dead[j] && IoU(&boxes[i * 5], if (count == max_keeps) break;
&boxes[j * 5]) > thresh) for (int j = i + 1; j < num_boxes; ++j)
is_dead[j] = 1; if (!is_dead[j] && IoU(&boxes[i * 5], &boxes[j * 5]) > thresh) {
} is_dead[j] = 1;
num_keep = count; }
num_keep = count;
} }
} // namespace detection } // namespace detection
} // namespace utils } // namespace utils
} // namespace dragon } // namespace dragon
...@@ -9,127 +9,121 @@ namespace utils { ...@@ -9,127 +9,121 @@ namespace utils {
namespace detection { namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define DIV_UP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64 #define NUM_THREADS 64
namespace { namespace {
template <typename T> template <typename T>
__device__ bool _CheckIoU( __device__ bool _CheckIoU(const T* a, const T* b, const float thresh) {
const T* a, const T x1 = max(a[0], b[0]);
const T* b, const T y1 = max(a[1], b[1]);
const float thresh) { const T x2 = min(a[2], b[2]);
const T x1 = max(a[0], b[0]); const T y2 = min(a[3], b[3]);
const T y1 = max(a[1], b[1]); const T width = max(T(0), x2 - x1 + 1);
const T x2 = min(a[2], b[2]); const T height = max(T(0), y2 - y1 + 1);
const T y2 = min(a[3], b[3]); const T inter = width * height;
const T width = max(T(0), x2 - x1 + 1); const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T height = max(T(0), y2 - y1 + 1); const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
const T inter = width * height; return inter > thresh * (Sa + Sb - inter);
const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
return inter > thresh * (Sa + Sb - inter);
} }
template <typename T> template <typename T>
__global__ void _NonMaxSuppression( __global__ void _NonMaxSuppression(
const int num_blocks, const int num_blocks,
const int num_boxes, const int num_boxes,
const T thresh, const T thresh,
const T* dev_boxes, const T* dev_boxes,
uint64_t* dev_mask) { uint64_t* dev_mask) {
const int row_start = blockIdx.y; const int row_start = blockIdx.y;
const int col_start = blockIdx.x; const int col_start = blockIdx.x;
if (row_start > col_start) return; if (row_start > col_start) return;
const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS); const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS);
const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS); const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS);
__shared__ T block_boxes[NUM_THREADS * 4]; __shared__ T block_boxes[NUM_THREADS * 4];
if (threadIdx.x < col_size) { if (threadIdx.x < col_size) {
const int c1 = threadIdx.x * 4; const int c1 = threadIdx.x * 4;
const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5; const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5;
block_boxes[c1] = dev_boxes[c2]; block_boxes[c1] = dev_boxes[c2];
block_boxes[c1 + 1] = dev_boxes[c2 + 1]; block_boxes[c1 + 1] = dev_boxes[c2 + 1];
block_boxes[c1 + 2] = dev_boxes[c2 + 2]; block_boxes[c1 + 2] = dev_boxes[c2 + 2];
block_boxes[c1 + 3] = dev_boxes[c2 + 3]; block_boxes[c1 + 3] = dev_boxes[c2 + 3];
} }
__syncthreads(); __syncthreads();
if (threadIdx.x < row_size) { if (threadIdx.x < row_size) {
const int index = row_start * NUM_THREADS + threadIdx.x; const int index = row_start * NUM_THREADS + threadIdx.x;
const T* dev_box = dev_boxes + index * 5; const T* dev_box = dev_boxes + index * 5;
unsigned long long val = 0; unsigned long long val = 0;
const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0; const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0;
for (int i = start; i < col_size; ++i) { for (int i = start; i < col_size; ++i) {
if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) { if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) {
val |= 1ULL << i; val |= 1ULL << i;
} }
dev_mask[index * num_blocks + col_start] = val;
} }
dev_mask[index * num_blocks + col_start] = val;
} }
} // namespace } // namespace
template <> void ApplyNMS<float, CUDAContext>( template <>
const int num_boxes, void ApplyNMS<float, CUDAContext>(
const int max_keeps, const int num_boxes,
const float thresh, const int max_keeps,
const float* boxes, const float thresh,
int64_t* keep_indices, const float* boxes,
int& num_keep, int64_t* keep_indices,
CUDAContext* ctx) { int& num_keep,
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS); CUDAContext* ctx) {
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS);
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t)); vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
<<< dim3(num_blocks, num_blocks), NUM_THREADS, _NonMaxSuppression<<<
0, ctx->cuda_stream() >>>( dim3(num_blocks, num_blocks),
num_blocks, NUM_THREADS,
num_boxes, 0,
thresh, ctx->cuda_stream()>>>(num_blocks, num_boxes, thresh, boxes, mask_dev);
mask_dev CUDA_CHECK(cudaMemcpyAsync(
CUDA_CHECK(cudaMemcpyAsync( mask_host.size() * sizeof(uint64_t),, cudaMemcpyDeviceToHost,
mask_dev, ctx->cuda_stream()));
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost, ctx->FinishDeviceComputation();
)); vector<uint64_t> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks);
int num_selected = 0;
vector<uint64_t> dead_bit(num_blocks); for (int i = 0; i < num_boxes; ++i) {
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks); const int nblock = i / NUM_THREADS;
const int inblock = i % NUM_THREADS;
int num_selected = 0; if (!(dead_bit[nblock] & (1ULL << inblock))) {
for (int i = 0; i < num_boxes; ++i) { keep_indices[num_selected++] = i;
const int nblock = i / NUM_THREADS; auto* mask_i = &mask_host[0] + i * num_blocks;
const int inblock = i % NUM_THREADS; for (int j = nblock; j < num_blocks; ++j)
if (!(dead_bit[nblock] & (1ULL << inblock))) { dead_bit[j] |= mask_i[j];
keep_indices[num_selected++] = i; if (num_selected == max_keeps) break;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
} }
num_keep = num_selected; }
ctx->Delete(mask_dev); num_keep = num_selected;
} }
} // namespace detection } // namespace detection
} // namespace utils } // namespace utils
} // namespace dragon } // namespace dragon
#endif // USE_CUDA #endif // USE_CUDA
...@@ -52,12 +52,9 @@ class AnchorTarget(object): ...@@ -52,12 +52,9 @@ class AnchorTarget(object):
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base # Generate grid anchors from base
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( all_anchors = generate_grid_anchors(
features, grid_shapes, self.base_anchors, self.strides)
num_anchors = all_anchors.shape[0] num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
...@@ -58,12 +58,9 @@ class Proposal(object): ...@@ -58,12 +58,9 @@ class Proposal(object):
# Get resources # Get resources
num_images = ims_info.shape[0] num_images = ims_info.shape[0]
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( all_anchors = generate_grid_anchors(
features, grid_shapes, self.base_anchors, self.strides)
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
...@@ -19,40 +19,40 @@ import numpy as np ...@@ -19,40 +19,40 @@ import numpy as np
from seetadet.core.config import cfg from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides): def generate_grid_anchors(grid_shapes, base_anchors, strides):
num_strides = len(strides) num_strides = len(strides)
if len(features) != num_strides: if len(grid_shapes) != num_strides:
raise ValueError( raise ValueError(
'Given %d features for %d strides.' 'Given %d grids for %d strides.'
% (len(features), num_strides) % (len(grid_shapes), num_strides)
) )
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
anchors_to_pack = [] anchors_to_pack = []
for i in range(len(features)): for i in range(len(grid_shapes)):
height, width = features[i].shape[-2:] height, width = grid_shapes[i]
shift_x = np.arange(0, width) * strides[i] shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i] shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add a anchors (1, a, 4) to
# cell K shifts (K, 1, 4) to get # cell k shifts (k, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (k, a, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (k * a, 4) shifted anchors
A = base_anchors[i].shape[0] a = base_anchors[i].shape[0]
K = shifts.shape[0] k = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) + anchors = (base_anchors[i].reshape((1, a, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, k, 4)).transpose((1, 0, 2)))
if num_strides > 1: if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4) # Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to # We will pack it with other strides to
# match the data format of (N, C, H, W) # match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2)) anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4)) anchors = anchors.reshape((a * k, 4))
anchors_to_pack.append(anchors) anchors_to_pack.append(anchors)
else: else:
# Original order of Faster R-CNN # Original order of Faster R-CNN
return anchors.reshape((K * A, 4)) return anchors.reshape((k * a, 4))
return np.vstack(anchors_to_pack) return np.vstack(anchors_to_pack)
...@@ -46,6 +46,9 @@ class AnchorTarget(object): ...@@ -46,6 +46,9 @@ class AnchorTarget(object):
ratios=self.ratios, ratios=self.ratios,
sizes=sizes, sizes=sizes,
)) ))
# Store the cached grid anchors
self.last_grid_shapes = None
self.last_grid_anchors = None
def __call__(self, features, gt_boxes): def __call__(self, features, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
...@@ -58,12 +61,17 @@ class AnchorTarget(object): ...@@ -58,12 +61,17 @@ class AnchorTarget(object):
) )
# Generate grid anchors from base # Generate grid anchors from base
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( if grid_shapes == self.last_grid_shapes:
features, all_anchors = self.last_grid_anchors
self.base_anchors, else:
self.strides, self.last_grid_shapes = grid_shapes
) self.last_grid_anchors = all_anchors = \
num_anchors = all_anchors.shape[0] num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
...@@ -15,6 +15,7 @@ from __future__ import print_function ...@@ -15,6 +15,7 @@ from __future__ import print_function
import types import types
import dragon
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
...@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images): ...@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images):
# Unpack results # Unpack results
results = outputs['detections'] results = outputs['detections']
detections = [[] for _ in range(len((raw_images)))] detections = [[] for _ in range(len(raw_images))]
for i in range(len(ims)): for i in range(len(ims)):
inds = np.where(results[:, 0].astype(np.int32) == i)[0] inds = np.where(results[:, 0].astype(np.int32) == i)[0]
...@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
indices[i], indices[i],
dict([('im_detect', _t['im_detect'].average_time), dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]), ('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]), dict([('boxes', boxes_this_image)]),
)) ))
...@@ -45,14 +45,14 @@ class PriorBox(object): ...@@ -45,14 +45,14 @@ class PriorBox(object):
aspect_ratios[i], aspect_ratios[i],
) )
) )
self.grid_anchors = None # Store the cached grid anchors
self.last_grid_anchors = None
def __call__(self, features): def __call__(self, features):
if self.grid_anchors is not None: if self.last_grid_anchors is not None:
return self.grid_anchors return self.last_grid_anchors
self.grid_anchors = []
all_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
# 1. Generate base grids # 1. Generate base grids
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
...@@ -61,23 +61,23 @@ class PriorBox(object): ...@@ -61,23 +61,23 @@ class PriorBox(object):
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
# 2. Apply anchors on base grids # 2. Apply anchors on base grids
# Add A anchors (1, A, 4) to # Add a anchors (1, a, 4) to
# cell K shifts (K, 1, 4) to get # cell k shifts (k, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (k, a, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (k * a, 4) shifted anchors
A = self.base_anchors[i].shape[0] a = self.base_anchors[i].shape[0]
D = self.base_anchors[i].shape[1] d = self.base_anchors[i].shape[1]
shifts = np.vstack(( shifts = np.vstack((
shift_x.ravel(), shift_x.ravel(),
shift_y.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_x.ravel(),
shift_y.ravel()) shift_y.ravel())
).transpose() ).transpose()
K = shifts.shape[0] # K = map_h * map_w k = shifts.shape[0] # k = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, D)) + anchors = (self.base_anchors[i].reshape((1, a, d)) +
shifts.reshape((1, K, D)).transpose((1, 0, 2))) shifts.reshape((1, k, d)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, D)).astype(np.float32) anchors = anchors.reshape((k * a, d)).astype(np.float32)
self.grid_anchors.append(anchors) all_anchors.append(anchors)
self.grid_anchors = np.concatenate(self.grid_anchors)
return self.grid_anchors self.last_grid_anchors = np.concatenate(all_anchors)
return self.last_grid_anchors
...@@ -32,11 +32,9 @@ def get_images(ims): ...@@ -32,11 +32,9 @@ def get_images(ims):
for im in ims: for im in ims:
im_scales.append((float(out_size) / im.shape[0], im_scales.append((float(out_size) / im.shape[0],
float(out_size) / im.shape[1])) float(out_size) / im.shape[1]))
processed_ims.append( processed_ims.append(cv2.resize(
im, (out_size, out_size), im, (out_size, out_size),
interpolation=cv2.INTER_AREA, interpolation=cv2.INTER_AREA))
if ims[0].dtype == 'uint16': if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256. ims_blob = np.array(processed_ims, dtype='float32') / 256.
else: else:
...@@ -49,12 +49,12 @@ class Distort(object): ...@@ -49,12 +49,12 @@ class Distort(object):
] ]
def apply(self, img, boxes=None): def apply(self, img, boxes=None):
if self._prob > 0: self._prob = 0.5 if cfg.TRAIN.USE_COLOR_JITTER else 0
img = PIL.Image.fromarray(img) img = PIL.Image.fromarray(img)
for transform_fn, prob in self._transforms: for transform_fn, prob in self._transforms:
if npr.uniform() < prob: if npr.uniform() < prob:
img = transform_fn(img) img = transform_fn(img)
img = img.enhance(1. + npr.uniform(-.4, .4)) img = img.enhance(1. + npr.uniform(-.4, .4))
return np.array(img), boxes return np.array(img), boxes
return img, boxes return img, boxes
...@@ -27,8 +27,9 @@ if __name__ == '__main__': ...@@ -27,8 +27,9 @@ if __name__ == '__main__':
np.random.seed(3) np.random.seed(3)
cfg.TRAIN.SCALES = [300] cfg.TRAIN.SCALES = [300]
cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00] cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00]
augmentor = transforms.Compose( transformer = transforms.Compose(
transforms.Distort(), transforms.Distort(),
transforms.Expand(), transforms.Expand(),
transforms.Sample(), transforms.Sample(),
...@@ -38,12 +39,12 @@ if __name__ == '__main__': ...@@ -38,12 +39,12 @@ if __name__ == '__main__':
while True: while True:
img = cv2.imread('cat.jpg') img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32) boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes) img, boxes = transformer(img, boxes)
for box in boxes: for box in boxes:
x1 = int(box[0] * img.shape[1]) x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0]) y1 = int(box[1] * img.shape[0])
x2 = int(box[2] * img.shape[1]) x2 = int(box[2] * img.shape[1])
y2 = int(box[3] * img.shape[0]) y2 = int(box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2) cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2)
cv2.imshow('Sample', img) cv2.imshow('Transforms - Preview', img)
cv2.waitKey(0) cv2.waitKey(0)
...@@ -70,14 +70,15 @@ class Pipeline(dali.Pipeline): ...@@ -70,14 +70,15 @@ class Pipeline(dali.Pipeline):
# Decode image # Decode image
image = self.decode(inputs['image']) image = self.decode(inputs['image'])
# Augment the color space # Augment the color space if necessary
image = self.hsv( if cfg.TRAIN.USE_COLOR_JITTER:
self.brightness_contrast( image = self.hsv(
image, self.brightness_contrast(
brightness=self.twist_rng(), image,
contrast=self.twist_rng(), brightness=self.twist_rng(),
), saturation=self.twist_rng() contrast=self.twist_rng(),
) ), saturation=self.twist_rng()
# Expand randomly to get smaller objects # Expand randomly to get smaller objects
pr = self.paste_ratio() * self.flip_rng() + 1. pr = self.paste_ratio() * self.flip_rng() + 1.
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
from seetadet.datasets import kpl_record from seetadet.datasets import kpl_dataset
def get_dataset(name): def get_dataset(name):
...@@ -42,5 +42,5 @@ def list_dataset(): ...@@ -42,5 +42,5 @@ def list_dataset():
'default': lambda source: 'default': lambda source:
kpl_record.KPLRecordDataset(source), kpl_dataset.KPLRecordDataset(source),
} }
...@@ -149,8 +149,10 @@ class AirNet(nn.Module): ...@@ -149,8 +149,10 @@ class AirNet(nn.Module):
x = self.layer1(x) x = self.layer1(x)
outputs = [None, None, self.layer2(x)] outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])] if hasattr(self, 'layer3'):
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])] outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'):
outputs += [self.layer4(outputs[-1])]
return outputs return outputs
...@@ -39,16 +39,17 @@ class Detector(nn.Module): ...@@ -39,16 +39,17 @@ class Detector(nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.') backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:] body, modules = backbone[0], backbone[1:]
# + DataLoader # DataLoader
self.data_loader = None
self.data_loader_cls = importlib.import_module( self.data_loader_cls = importlib.import_module(
'seetadet.algo.{}'.format(model)).DataLoader 'seetadet.algo.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap() self.bootstrap = vision.Bootstrap()
# + FeatureExtractor # FeatureExtractor
self.body = backbones.get(body)() self.body = backbones.get(body)()
feature_dims = self.body.feature_dims feature_dims = self.body.feature_dims
# + FeatureEnhancer # FeatureEnhancer
if 'fpn' in modules: if 'fpn' in modules:
self.fpn = models.FPN(feature_dims) self.fpn = models.FPN(feature_dims)
feature_dims = self.fpn.feature_dims feature_dims = self.fpn.feature_dims
...@@ -57,7 +58,7 @@ class Detector(nn.Module): ...@@ -57,7 +58,7 @@ class Detector(nn.Module):
else: else:
feature_dims = [feature_dims[-1]] feature_dims = [feature_dims[-1]]
# + Detection Modules # Detection Modules
if 'rcnn' in model: if 'rcnn' in model:
self.rpn = models.RPN(feature_dims[0]) self.rpn = models.RPN(feature_dims[0])
if 'faster' in model: if 'faster' in model:
...@@ -106,7 +107,7 @@ class Detector(nn.Module): ...@@ -106,7 +107,7 @@ class Detector(nn.Module):
if inputs is None: if inputs is None:
# 1) Training: <= DataLayer # 1) Training: <= DataLayer
# 2) Inference: <= Given # 2) Inference: <= Given
if not hasattr(self, 'data_loader'): if self.data_loader is None:
self.data_loader = self.data_loader_cls() self.data_loader = self.data_loader_cls()
inputs = self.data_loader() inputs = self.data_loader()
...@@ -171,29 +172,34 @@ class Detector(nn.Module): ...@@ -171,29 +172,34 @@ class Detector(nn.Module):
# Merge Affine into Convolution # # Merge Affine into Convolution #
################################### ###################################
last_module = None last_module = None
for e in self.modules(): for module in self.modules():
if isinstance(e, nn.Affine) and \ if isinstance(module, nn.Affine) and \
isinstance(last_module, nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x module.forward = lambda x: x
last_module.bias = e.bias last_module.bias = module.bias weight =
last_module = e 0, *([1] * (last_module.weight.ndimension() - 1)))
last_module = module
###################################### ######################################
# Merge BatchNorm into Convolution # # Merge BatchNorm into Convolution #
###################################### ######################################
last_module = None last_module = None
for e in self.modules(): for module in self.modules():
if isinstance(e, nn.BatchNorm2d) and \ if isinstance(module, nn.BatchNorm2d) and \
isinstance(last_module, nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x module.forward = lambda x: x
term = torch.sqrt( + e.eps) term = torch.sqrt( + module.eps)
term = / term term = / term
last_module.bias = - term * last_module.bias = \ - \
term *
term = term.view(0, *([1] * (last_module.weight.ndimension() - 1)))
if last_module.weight.dtype == 'float16': if last_module.weight.dtype == 'float16':
last_module.bias.half_() last_module.bias.half_()
weight = weight =
...@@ -201,7 +207,7 @@ class Detector(nn.Module): ...@@ -201,7 +207,7 @@ class Detector(nn.Module):
last_module.weight.copy_(weight) last_module.weight.copy_(weight)
else: else:
last_module = e last_module = module
def new_detector(device, weights=None, training=False): def new_detector(device, weights=None, training=False):
...@@ -31,7 +31,8 @@ class FPN(nn.Module): ...@@ -31,7 +31,8 @@ class FPN(nn.Module):
dim = cfg.FPN.DIM dim = cfg.FPN.DIM
self.C = nn.ModuleList() self.C = nn.ModuleList()
self.P = nn.ModuleList() self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1): self.highest_backbone_lvl = min(cfg.FPN.RPN_MAX_LEVEL, HIGHEST_BACKBONE_LVL)
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, self.highest_backbone_lvl + 1):
self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True)) self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(nn.Conv3x3(dim, dim, bias=True)) self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE: if 'rcnn' in cfg.MODEL.TYPE:
...@@ -40,8 +41,8 @@ class FPN(nn.Module): ...@@ -40,8 +41,8 @@ class FPN(nn.Module):
else: else:
self.apply_func = self.apply_on_generic self.apply_func = self.apply_on_generic
self.relu = nn.ReLU(inplace=False) self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1): for lvl in range(self.highest_backbone_lvl + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim dim_in = feature_dims[-1] if lvl == self.highest_backbone_lvl + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True)) self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim] self.feature_dims = [dim]
self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE
...@@ -56,12 +57,12 @@ class FPN(nn.Module): ...@@ -56,12 +57,12 @@ class FPN(nn.Module):
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Apply max pool for higher features # Apply max pool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1])) outputs.append(self.maxpool(outputs[-1]))
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0: if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample( upscale_output = nn_funcs.upsample(
...@@ -76,15 +77,15 @@ class FPN(nn.Module): ...@@ -76,15 +77,15 @@ class FPN(nn.Module):
def apply_on_generic(self, features): def apply_on_generic(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Add extra convolutions for higher features # Add extra convolutions for higher features
extra_input = features[-1] extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input)) outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: if i != max_lvl:
extra_input = self.relu(outputs[-1]) extra_input = self.relu(outputs[-1])
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0: if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample( upscale_output = nn_funcs.upsample(
...@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module): ...@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module):
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if nn.is_conv2d(m): if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, 'fan_out') init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None: if m.bias is not None:
init.constant(m.bias, 0) init.constant(m.bias, 0)
...@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module): ...@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module):
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if nn.is_conv2d(m): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
...@@ -17,8 +17,6 @@ from __future__ import absolute_import ...@@ -17,8 +17,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch
from seetadet.core.config import cfg from seetadet.core.config import cfg
from seetadet.core.registry import backbones from seetadet.core.registry import backbones
from seetadet.modules import nn from seetadet.modules import nn
...@@ -37,11 +35,12 @@ class BasicBlock(nn.Module): ...@@ -37,11 +35,12 @@ class BasicBlock(nn.Module):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.FrozenAffine(dim_out) self.bn1 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.FrozenAffine(dim_out) self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock1 = nn.DropBlock2d(**dropblock) if dropblock else None
self.dropblock2 = nn.DropBlock2d(**dropblock) if dropblock else None
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -50,14 +49,14 @@ class BasicBlock(nn.Module): ...@@ -50,14 +49,14 @@ class BasicBlock(nn.Module):
out = self.bn1(out) out = self.bn1(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None: if self.dropblock1 is not None:
out = self.dropblock(out) out = self.dropblock1(out)
out = self.conv2(out) out = self.conv2(out)
out = self.bn2(out) out = self.bn2(out)
if self.dropblock is not None: if self.dropblock2 is not None:
residual = self.dropblock(residual) residual = self.dropblock2(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
...@@ -67,7 +66,7 @@ class BasicBlock(nn.Module): ...@@ -67,7 +66,7 @@ class BasicBlock(nn.Module):
return out return out
class Bottleneck(torch.nn.Module): class Bottleneck(nn.Module):
# 1x64d => 0.25 (ResNet) # 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt) # 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \ contraction = cfg.RESNET.NUM_GROUPS \
...@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module): ...@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module):
self.conv1 = nn.Conv1x1(dim_in, dim) self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.FrozenAffine(dim) self.bn1 = nn.FrozenAffine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride) self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.drop2 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn2 = nn.FrozenAffine(dim) self.bn2 = nn.FrozenAffine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out) self.conv3 = nn.Conv1x1(dim, dim_out)
self.drop3 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn3 = nn.FrozenAffine(dim_out) self.bn3 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module): ...@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module):
out = self.relu(out) out = self.relu(out)
out = self.conv2(out) out = self.conv2(out)
if self.drop2 is not None:
out = self.drop2(out)
out = self.bn2(out) out = self.bn2(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv3(out) out = self.conv3(out)
out = self.bn3(out) out = self.bn3(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
out += residual out += residual
if self.drop3 is not None:
out = self.drop3(out)
out = self.relu(out) out = self.relu(out)
return out return out
class ResNet(torch.nn.Module): class ResNet(nn.Module):
def __init__(self, block, layers, filters): def __init__(self, block, layers, filters):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:] self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d( self.conv1 = nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
stride=2, stride=2,
...@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module): ...@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module):
bias=False, bias=False,
) )
self.bn1 = nn.FrozenAffine(self.dim_in) self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = nn.MaxPool2d(
kernel_size=3, kernel_size=3,
stride=2, stride=2,
padding=0, padding=0,
ceil_mode=True, ceil_mode=True,
) )
self.drop3 = torch.nn.DropBlock2d( drop3 = {
kp=0.9, 'kp': 0.9,
block_size=7, 'block_size': 7,
alpha=0.25, 'alpha': 1.00,
) if cfg.DROPBLOCK.DROP_ON else None 'inplace': True,
self.drop4 = torch.nn.DropBlock2d( } if cfg.DROPBLOCK.DROP_ON else None
kp=0.9, drop4 = {
block_size=7, 'kp': 0.9,
alpha=1.00, 'block_size': 7,
decrement=cfg.DROPBLOCK.DECREMENT 'alpha': 1.00,
) if cfg.DROPBLOCK.DROP_ON else None 'decrement': cfg.DROPBLOCK.DECREMENT,
'inplace': True,
} if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0]) self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2) self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3) self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4) self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, drop4)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module): ...@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module):
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
...@@ -29,7 +29,6 @@ class SSD(nn.Module): ...@@ -29,7 +29,6 @@ class SSD(nn.Module):
######################################## ########################################
# SSD outputs # # SSD outputs #
######################################## ########################################
self.cls_conv = torch.nn.ModuleList( self.cls_conv = torch.nn.ModuleList(
nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True) nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True)
for _ in range(cfg.SSD.NUM_CONVS) for _ in range(cfg.SSD.NUM_CONVS)
...@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function): ...@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function):
return self.dispatch([dets], [self.alloc()]) return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function): class _RetinaNetDecoder(Function):
"""Decode predictions from RetinaNet.""" """Decode predictions from RetinaNet."""
...@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'): ...@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'):
nonlinearity='relu', nonlinearity='relu',
) )
# Aliases # Aliases
constant = nn.init.constant_ constant = nn.init.constant_
normal = nn.init.normal_ normal = nn.init.normal_
...@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object): ...@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object):
return nn.SigmoidFocalLoss( return nn.SigmoidFocalLoss(
negative_index=0, # Background index
) )
...@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss ...@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d DepthwiseConv2d = nn.DepthwiseConv2d
DropBlock2d = nn.DropBlock2d
Linear = nn.Linear Linear = nn.Linear
MaxPool2d = nn.MaxPool2d MaxPool2d = nn.MaxPool2d
Module = nn.Module Module = nn.Module
...@@ -15,7 +15,7 @@ from __future__ import print_function ...@@ -15,7 +15,7 @@ from __future__ import print_function
import functools import functools
import dragon.vm.torch as torch from dragon.vm import torch
from seetadet.core.config import cfg from seetadet.core.config import cfg
...@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module): ...@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module):
def __init__(self): def __init__(self):
super(Bootstrap, self).__init__() super(Bootstrap, self).__init__()
self.normalize_func = functools.partial( self._device = torch.device('cpu')
self._dummy_buffer = torch.ones(1)
self._normalize_func = functools.partial(
torch.channel_normalize, torch.channel_normalize,
mean=cfg.PIXEL_MEANS, mean=cfg.PIXEL_MEANS,
std=[1., 1., 1.], std=[1., 1., 1.],
...@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module): ...@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module):
dims=(0, 3, 1, 2), dims=(0, 3, 1, 2),
dtype=cfg.MODEL.PRECISION.lower(), dtype=cfg.MODEL.PRECISION.lower(),
) )
self.dummy_buffer = torch.ones(1)
def _apply(self, fn): def _apply(self, fn):
fn(self.dummy_buffer) fn(self._dummy_buffer)
def cpu(self): def cpu(self):
self._device = torch.device('cpu') self._device = torch.device('cpu')
...@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module): ...@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module):
self._device = torch.device('cuda', device) self._device = torch.device('cuda', device)
def device(self): def device(self):
"""Return the device of this module.""" return self._dummy_buffer.device
return self.dummy_buffer.device
def forward(self, input): def forward(self, input):
if isinstance(input, torch.Tensor): if isinstance(input, torch.Tensor):
if input.size(1) <= 3: if input.shape[1] <= 3:
return input return input
cur_device = self.device() cur_device = self.device()
if input._device != cur_device: if input._device != cur_device:
...@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module): ...@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module):
input = input.cpu() input = input.cpu()
else: else:
input = input.cuda(cur_device.index) input = input.cuda(cur_device.index)
return self.normalize_func(input) return self._normalize_func(input)
...@@ -32,8 +32,8 @@ class SGDSolver(object): ...@@ -32,8 +32,8 @@ class SGDSolver(object):
momentum=cfg.SOLVER.MOMENTUM, momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY, weight_decay=cfg.SOLVER.WEIGHT_DECAY,
clip_gradient=float(cfg.SOLVER.CLIP_NORM), clip_norm=float(cfg.SOLVER.CLIP_NORM),
scale_gradient=1. / cfg.SOLVER.LOSS_SCALING, scale=1. / cfg.SOLVER.LOSS_SCALING,
) )
self.lr_scheduler = lr_scheduler.get_scheduler() self.lr_scheduler = lr_scheduler.get_scheduler()
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
# <>
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import operator
from dragon.vm import torch
from seetadet.modules import nn
def dense_conv_flops(m, inputs, output):
"""Hook to compute flops for a dense convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
in_c, out_c = inputs[0].shape[1], output.shape[1]
m.__params__ = (k_dim * in_c + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def depthwise_conv_flops(m, inputs, output):
"""Hook to compute flops for a depthwise convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
out_c = output.shape[1]
m.__params__ = (k_dim + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def register_flops(module):
"""Register hooks to collect flops info."""
if not hasattr(module, '__flops__'):
module.__flops__ = 0.
for m in module.modules():
if isinstance(m, nn.DepthwiseConv2d):
elif isinstance(m, nn.Conv2d):
def collect_flops(module, normalizer=1e6):
"""Collect flops from the last forward."""
total_flops = 0.
for m in module.modules():
if hasattr(m, '__flops__'):
total_flops += m.__flops__
m.__flops__ = 0.
return total_flops / normalizer
def benchmark_flops(module, normalizer=1e6):
"""Return the flops by running benchmark once."""
original_training =
if original_training:
with torch.no_grad():
if original_training:
return collect_flops(module, normalizer)
