Commit 8558d3df by Ting PAN

Adapt to the latest dragon preview version

Summary:
This commit changes repo to match dragon.0.3.0.dev20200707.
1 parent 4bcab266
Showing with 1252 additions and 1064 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.2 (20200707)
Dragon Minimum Required (Version 0.3.0.dev20200707)
Changes:
- Adapt to the latest dragon preview version.
Preview Features:
- None
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.4.1 (20200421) SeetaDet 0.4.1 (20200421)
Dragon Minimum Required (Version 0.3.0.dev20200421) Dragon Minimum Required (Version 0.3.0.dev20200421)
......
...@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de ...@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements ## Requirements
seeta-dragon >= 0.3.0.dev20200421 seeta-dragon >= 0.3.0.dev20200707
## Installation ## Installation
......
...@@ -32,16 +32,17 @@ FRCNN: ...@@ -32,16 +32,17 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
...@@ -32,16 +32,16 @@ FRCNN: ...@@ -32,16 +32,16 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
...@@ -30,7 +30,7 @@ TRAIN: ...@@ -30,7 +30,7 @@ TRAIN:
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 NMS: 0.45
\ No newline at end of file RPN_POST_NMS_TOP_N: 1000
\ No newline at end of file
...@@ -29,16 +29,16 @@ FRCNN: ...@@ -29,16 +29,16 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/VGG16.RCNN.pth' WEIGHTS: '/model/VGG16.RCNN.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
RPN_MIN_SIZE: 16
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 RPN_MIN_SIZE: 16
\ No newline at end of file NMS: 0.45
RPN_POST_NMS_TOP_N: 300
\ No newline at end of file
...@@ -32,11 +32,11 @@ FPN: ...@@ -32,11 +32,11 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: True
IMS_PER_BATCH: 16 IMS_PER_BATCH: 16
SCALES: [416] SCALES: [416]
RANDOM_SCALES: [0.25, 1.0] RANDOM_SCALES: [0.25, 1.0]
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: False
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
......
...@@ -23,10 +23,10 @@ FPN: ...@@ -23,10 +23,10 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.0] RANDOM_SCALES: [0.25, 1.0]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -24,10 +24,10 @@ FPN: ...@@ -24,10 +24,10 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 2.0] RANDOM_SCALES: [0.25, 2.0]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -38,6 +38,7 @@ TRAIN: ...@@ -38,6 +38,7 @@ TRAIN:
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -3,7 +3,7 @@ VIS: False ...@@ -3,7 +3,7 @@ VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: ssd TYPE: ssd
BACKBONE: airnet5b.mbox BACKBONE: airnet.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
...@@ -17,19 +17,30 @@ SOLVER: ...@@ -17,19 +17,30 @@ SOLVER:
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_320 SNAPSHOT_PREFIX: voc_ssd_320
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 8
SSD: SSD:
NUM_CONVS: 2 NUM_CONVS: 2
MULTIBOX: MULTIBOX:
STRIDES: [8, 16, 32] STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 90, 150] MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [90, 150, 210] MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] ASPECT_RATIOS: [
[1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5],
]
TRAIN: TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -37,9 +37,10 @@ SSD: ...@@ -37,9 +37,10 @@ SSD:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
...
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h" #include "nms_op.h"
#include "../utils/detection_utils.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() { void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected; int num_selected;
utils::detection::ApplyNMS( utils::detection::ApplyNMS(
Output(0)->count(), Output(0)->count(),
Output(0)->count(), Output(0)->count(),
iou_threshold_, iou_threshold_,
Input(0).template mutable_data<T, Context>(), Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(), Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx() num_selected,
); ctx());
Output(0)->Reshape({ num_selected }); Output(0)->Reshape({num_selected});
} }
template <class Context> template <class Context>
void NonMaxSuppressionOp<Context>::RunOnDevice() { void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5) CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5)."; << "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) });
DispatchHelper<TensorTypes<float>>::Call(this, Input(0)); Output(0)->Reshape({Input(0).dim(0)});
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
} }
DEPLOY_CPU(NonMaxSuppression); DEPLOY_CPU(NonMaxSuppression);
...@@ -41,4 +38,4 @@ OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1); ...@@ -41,4 +38,4 @@ OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1);
NO_GRADIENT(NonMaxSuppression); NO_GRADIENT(NonMaxSuppression);
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <https://opensource.org/licenses/BSD-2-Clause> * <https://opensource.org/licenses/BSD-2-Clause>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,20 +20,20 @@ namespace dragon { ...@@ -20,20 +20,20 @@ namespace dragon {
template <class Context> template <class Context>
class NonMaxSuppressionOp final : public Operator<Context> { class NonMaxSuppressionOp final : public Operator<Context> {
public: public:
NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws) NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {} iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float iou_threshold_; float iou_threshold_;
}; };
} // namespace dragon } // namespace dragon
#endif // SEETADET_CXX_OPERATORS_NMS_OP_H_ #endif // SEETADET_CXX_OPERATORS_NMS_OP_H_
#include <dragon/core/workspace.h> #include <dragon/utils/math_functions.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h" #include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h" #include "retinanet_decoder_op.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() { void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox using BC = CPUContext; // Context of BBox
int feat_h, feat_w; int feat_h, feat_w;
int C = Input(-3).dim(2), A, K; int C = Input(-3).dim(2), A, K;
int total_proposals = 0; int total_proposals = 0;
int num_candidates, num_boxes, num_proposals; int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>(); auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>(); auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>(); auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>(); auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) { for (int n = 0; n < num_images_; ++n) {
BT im_h = im_info[0]; BT im_h = im_info[0];
BT im_w = im_info[1]; BT im_w = im_info[1];
BT im_scale_h = im_info[2]; BT im_scale_h = im_info[2];
BT im_scale_w = im_info[2]; BT im_scale_w = im_info[2];
if (Input(-1).dim(1) == 4) im_scale_w = im_info[3]; if (Input(-1).dim(1) == 4) im_scale_w = im_info[3];
auto* scores = batch_scores + n * Input(-3).stride(0); auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0); auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides_.size(), InputSize() - 3) CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides " << "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " features"; << "and " << InputSize() - 3 << " features";
// Select the top-k candidates as proposals // Select the top-k candidates as proposals
num_boxes = Input(-3).dim(1); num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1); num_candidates = Input(-3).count(1);
roi_indices_.resize(num_candidates); roi_indices_.resize(num_candidates);
num_candidates = 0; num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i) for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_) if (scores[i] > score_thr_) roi_indices_[num_candidates++] = i;
roi_indices_[num_candidates++] = i; scores_.resize(num_candidates);
scores_.resize(num_candidates); for (int i = 0; i < num_candidates; ++i)
for (int i = 0; i < num_candidates; ++i) scores_[i] = scores[roi_indices_[i]];
scores_[i] = scores[roi_indices_[i]]; num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
num_proposals = std::min( utils::math::ArgPartition(
num_candidates, num_candidates, num_proposals, true, scores_.data(), indices_);
(int)pre_nms_topn_ for (int i = 0; i < num_proposals; ++i)
); indices_[i] = roi_indices_[indices_[i]];
utils::math::ArgPartition( // Decode the candidates
num_candidates, int base_offset = 0;
num_proposals, for (int i = 0; i < strides_.size(); i++) {
true, feat_h = Input(i).dim(2);
scores_.data(), feat_w = Input(i).dim(3);
indices_ K = feat_h * feat_w;
); A = int(ratios_.size() * scales_.size());
for (int i = 0; i < num_proposals; ++i) anchors_.resize((size_t)(A * 4));
indices_[i] = roi_indices_[indices_[i]]; utils::detection::GenerateAnchors(
// Decode the candidates strides_[i],
int base_offset = 0; (int)ratios_.size(),
for (int i = 0; i < strides_.size(); i++) { (int)scales_.size(),
feat_h = Input(i).dim(2); ratios_.data(),
feat_w = Input(i).dim(3); scales_.data(),
K = feat_h * feat_w; anchors_.data());
A = int(ratios_.size() * scales_.size()); utils::detection::GenerateGridAnchors(
anchors_.resize((size_t)(A * 4)); num_proposals,
utils::detection::GenerateAnchors( C,
strides_[i], A,
(int)ratios_.size(), feat_h,
(int)scales_.size(), feat_w,
ratios_.data(), strides_[i],
scales_.data(), base_offset,
anchors_.data() anchors_.data(),
); indices_.data(),
utils::detection::GenerateGridAnchors( y);
num_proposals, C, A, base_offset += (A * K);
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
y
);
base_offset += (A * K);
}
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes, C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y
);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
} }
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes,
C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 }); Output(0)->Reshape({total_proposals, 7});
} }
template <class Context> template <class Context>
void RetinaNetDecoderOp<Context>::RunOnDevice() { void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0); num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 }); CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_ << " groups info, got "
<< Input(-1).dim(0) << ".";
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3)); Output(0)->Reshape({num_images_ * pre_nms_topn_, 7});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
} }
DEPLOY_CPU(RetinaNetDecoder); DEPLOY_CPU(RetinaNetDecoder);
...@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder); ...@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder);
DEPLOY_CUDA(RetinaNetDecoder); DEPLOY_CUDA(RetinaNetDecoder);
#endif #endif
OPERATOR_SCHEMA(RetinaNetDecoder) OPERATOR_SCHEMA(RetinaNetDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <https://opensource.org/licenses/BSD-2-Clause> * <https://opensource.org/licenses/BSD-2-Clause>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,27 +20,27 @@ namespace dragon { ...@@ -20,27 +20,27 @@ namespace dragon {
template <class Context> template <class Context>
class RetinaNetDecoderOp final : public Operator<Context> { class RetinaNetDecoderOp final : public Operator<Context> {
public: public:
RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws) RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")), strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")), ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")), scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)), pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
score_thr_(OpArg<float>("score_thresh", 0.05f)) {} score_thr_(OpArg<float>("score_thresh", 0.05f)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float score_thr_; float score_thr_;
vec64_t strides_, indices_, roi_indices_; vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_; vector<float> ratios_, scales_, scores_, anchors_;
int64_t num_images_, pre_nms_topn_; int64_t num_images_, pre_nms_topn_;
}; };
} // namespace dragon } // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_ #endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#include <dragon/core/workspace.h> #include <dragon/utils/math_functions.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h" #include "../utils/detection_utils.h"
#include "rpn_decoder_op.h" #include "rpn_decoder_op.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void RPNDecoderOp<Context>::DoRunWithType() { void RPNDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox using BC = CPUContext; // Context of BBox
int feat_h, feat_w, K, A; int feat_h, feat_w, K, A;
int total_rois = 0, num_rois; int total_rois = 0, num_rois;
int num_candidates, num_proposals; int num_candidates, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>(); auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>(); auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>(); auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>(); auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) { for (int n = 0; n < num_images_; ++n) {
const BT im_h = im_info[0]; const BT im_h = im_info[0];
const BT im_w = im_info[1]; const BT im_w = im_info[1];
const BT scale = im_info[2]; const BT scale = im_info[2];
const BT min_box_h = min_size_ * scale; const BT min_box_h = min_size_ * scale;
const BT min_box_w = min_size_ * scale; const BT min_box_w = min_size_ * scale;
auto* scores = batch_scores + n * Input(-3).stride(0); auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0); auto* deltas = batch_deltas + n * Input(-2).stride(0);
if (strides_.size() == 1) { if (strides_.size() == 1) {
// Case 1: single stride // Case 1: single stride
feat_h = Input(0).dim(2); feat_h = Input(0).dim(2);
feat_w = Input(0).dim(3); feat_w = Input(0).dim(3);
K = feat_h * feat_w; K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size()); A = int(ratios_.size() * scales_.size());
// Select the Top-K candidates as proposals // Select the Top-K candidates as proposals
num_candidates = A * K; num_candidates = A * K;
num_proposals = std::min( num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
num_candidates, utils::math::ArgPartition(
(int)pre_nms_topn_ num_candidates, num_proposals, true, scores, indices_);
); // Decode the candidates
utils::math::ArgPartition( anchors_.resize((size_t)(A * 4));
num_candidates, proposals_.Reshape({num_proposals, 5});
num_proposals, utils::detection::GenerateAnchors(
true, scores, indices_ strides_[0],
); (int)ratios_.size(),
// Decode the candidates (int)scales_.size(),
anchors_.resize((size_t)(A * 4)); ratios_.data(),
proposals_.Reshape({ num_proposals, 5 }); scales_.data(),
utils::detection::GenerateAnchors( anchors_.data());
strides_[0], utils::detection::GenerateGridAnchors(
(int)ratios_.size(), num_proposals,
(int)scales_.size(), A,
ratios_.data(), feat_h,
scales_.data(), feat_w,
anchors_.data() strides_[0],
); 0,
utils::detection::GenerateGridAnchors( anchors_.data(),
num_proposals, A, indices_.data(),
feat_h, feat_w, proposals_.template mutable_data<BT, BC>());
strides_[0], utils::detection::GenerateSSProposals(
0, K,
anchors_.data(), num_proposals,
indices_.data(), im_h,
proposals_.template mutable_data<BT, BC>() im_w,
); min_box_h,
utils::detection::GenerateSSProposals( min_box_w,
K, num_proposals, scores,
im_h, im_w, deltas,
min_box_h, min_box_w, indices_.data(),
scores, proposals_.template mutable_data<BT, BC>());
deltas, // Sort, NMS and Retrieve
indices_.data(), utils::detection::SortProposals(
proposals_.template mutable_data<BT, BC>() 0,
); num_proposals - 1,
// Sort, NMS and Retrieve num_proposals,
utils::detection::SortProposals( proposals_.template mutable_data<BT, BC>());
0, utils::detection::ApplyNMS(
num_proposals - 1, num_proposals,
num_proposals, post_nms_topn_,
proposals_.template mutable_data<BT, BC>() nms_thr_,
); proposals_.template mutable_data<BT, Context>(),
utils::detection::ApplyNMS( roi_indices_.data(),
num_proposals, num_rois,
post_nms_topn_, ctx());
nms_thr_, utils::detection::RetrieveRoIs(
proposals_.template mutable_data<BT, Context>(), num_rois,
roi_indices_.data(), n,
num_rois, ctx() proposals_.template data<BT, BC>(),
); roi_indices_.data(),
utils::detection::RetrieveRoIs( y);
num_rois, } else if (strides_.size() > 1) {
n, // Case 2: multiple strides
proposals_.template data<BT, BC>(), CHECK_EQ(strides_.size(), InputSize() - 3)
roi_indices_.data(), << "\nGiven " << strides_.size() << " strides "
y << "and " << InputSize() - 3 << " feature inputs";
); CHECK_EQ(strides_.size(), scales_.size())
} else if (strides_.size() > 1) { << "\nGiven " << strides_.size() << " strides "
// Case 2: multiple strides << "and " << scales_.size() << " scales";
CHECK_EQ(strides_.size(), InputSize() - 3) // Select the top-k candidates as proposals
<< "\nGiven " << strides_.size() << " strides " num_candidates = Input(-3).dim(1);
<< "and " << InputSize() - 3 << " feature inputs"; num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
CHECK_EQ(strides_.size(), scales_.size()) utils::math::ArgPartition(
<< "\nGiven " << strides_.size() << " strides " num_candidates, num_proposals, true, scores, indices_);
<< "and " << scales_.size() << " scales"; // Decode the candidates
// Select the top-k candidates as proposals int base_offset = 0;
num_candidates = Input(-3).dim(1); proposals_.Reshape({num_proposals, 5});
num_proposals = std::min( auto* proposals = proposals_.template mutable_data<BT, BC>();
num_candidates, for (int i = 0; i < strides_.size(); i++) {
(int)pre_nms_topn_ feat_h = Input(i).dim(2);
); feat_w = Input(i).dim(3);
utils::math::ArgPartition( K = feat_h * feat_w;
num_candidates, A = (int)ratios_.size();
num_proposals, anchors_.resize((size_t)(A * 4));
true, scores, indices_ utils::detection::GenerateAnchors(
); strides_[i],
// Decode the candidates (int)ratios_.size(),
int base_offset = 0; 1,
proposals_.Reshape({ num_proposals, 5 }); ratios_.data(),
auto* proposals = proposals_ scales_.data(),
.template mutable_data<BT, BC>(); anchors_.data());
for (int i = 0; i < strides_.size(); i++) { utils::detection::GenerateGridAnchors(
feat_h = Input(i).dim(2); num_proposals,
feat_w = Input(i).dim(3); A,
K = feat_h * feat_w; feat_h,
A = (int)ratios_.size(); feat_w,
anchors_.resize((size_t)(A * 4)); strides_[i],
utils::detection::GenerateAnchors( base_offset,
strides_[i], anchors_.data(),
(int)ratios_.size(), indices_.data(),
1, proposals);
ratios_.data(), base_offset += (A * K);
scales_.data(), }
anchors_.data() utils::detection::GenerateMSProposals(
); num_candidates,
utils::detection::GenerateGridAnchors( num_proposals,
num_proposals, A, im_h,
feat_h, feat_w, im_w,
strides_[i], min_box_h,
base_offset, min_box_w,
anchors_.data(), scores,
indices_.data(), deltas,
proposals &indices_[0],
); proposals);
base_offset += (A * K); // Sort, NMS and Retrieve
} utils::detection::SortProposals(
utils::detection::GenerateMSProposals( 0, num_proposals - 1, num_proposals, proposals);
num_candidates, utils::detection::ApplyNMS(
num_proposals, num_proposals,
im_h, im_w, post_nms_topn_,
min_box_h, min_box_w, nms_thr_,
scores, proposals_.template mutable_data<BT, Context>(),
deltas, roi_indices_.data(),
&indices_[0], num_rois,
proposals ctx());
); utils::detection::RetrieveRoIs(
// Sort, NMS and Retrieve num_rois, n, proposals, roi_indices_.data(), y);
utils::detection::SortProposals( } else {
0, LOG(FATAL) << "Excepted at least one stride for proposals.";
num_proposals - 1,
num_proposals,
proposals
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals,
roi_indices_.data(),
y
);
} else {
LOG(FATAL) << "Excepted at least one stride for proposals.";
}
total_rois += num_rois;
y += (num_rois * 5);
im_info += Input(-1).dim(1);
} }
total_rois += num_rois;
Output(0)->Reshape({ total_rois, 5 }); y += (num_rois * 5);
im_info += Input(-1).dim(1);
// Distribute rois into K bins }
if (OutputSize() > 1) {
CHECK_EQ(max_level_ - min_level_ + 1, OutputSize()) Output(0)->Reshape({total_rois, 5});
<< "\nExcepted " << OutputSize() << " outputs for levels "
"between [" << min_level_ << ", " << max_level_ << "]."; // Distribute rois into K bins
vector<BT*> ys(OutputSize()); if (OutputSize() > 1) {
vector<vec64_t> bins(OutputSize()); CHECK_EQ(max_level_ - min_level_ + 1, OutputSize())
Tensor RoIs; RoIs.ReshapeLike(*Output(0)); << "\nExcepted " << OutputSize() << " outputs for levels "
<< "between [" << min_level_ << ", " << max_level_ << "].";
auto* rois = RoIs.template mutable_data<BT, BC>(); vector<BT*> ys(OutputSize());
vector<vec64_t> bins(OutputSize());
ctx()->template Copy<BT, BC, BC>( Tensor RoIs;
Output(0)->count(), RoIs.ReshapeLike(*Output(0));
rois, Output(0)->template data<BT, BC>()
); auto* rois = RoIs.template mutable_data<BT, BC>();
utils::detection::CollectRoIs( ctx()->template Copy<BT, BC, BC>(
total_rois, Output(0)->count(), rois, Output(0)->template data<BT, BC>());
min_level_,
max_level_, utils::detection::CollectRoIs(
canonical_level_, total_rois,
canonical_scale_, min_level_,
rois, bins max_level_,
); canonical_level_,
canonical_scale_,
for (int i = 0; i < OutputSize(); i++) { rois,
Output(i)->Reshape({ std::max((int)bins[i].size(), 1), 5 }); bins);
ys[i] = Output(i)->template mutable_data<BT, BC>();
} for (int i = 0; i < OutputSize(); i++) {
Output(i)->Reshape({std::max((int)bins[i].size(), 1), 5});
utils::detection::DistributeRoIs(bins, rois, ys); ys[i] = Output(i)->template mutable_data<BT, BC>();
} }
utils::detection::DistributeRoIs(bins, rois, ys);
}
} }
template <class Context> template <class Context>
void RPNDecoderOp<Context>::RunOnDevice() { void RPNDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0); num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_) CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_ << "\nExcepted " << num_images_ << " groups info, got "
<< " groups info, got " << Input(-1).dim(0) << ".";
<< Input(-1).dim(0) << ".";
roi_indices_.resize(post_nms_topn_); roi_indices_.resize(post_nms_topn_);
Output(0)->Reshape({ num_images_ * post_nms_topn_, 5 }); Output(0)->Reshape({num_images_ * post_nms_topn_, 5});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3)); DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
} }
DEPLOY_CPU(RPNDecoder); DEPLOY_CPU(RPNDecoder);
...@@ -241,8 +218,6 @@ DEPLOY_CPU(RPNDecoder); ...@@ -241,8 +218,6 @@ DEPLOY_CPU(RPNDecoder);
DEPLOY_CUDA(RPNDecoder); DEPLOY_CUDA(RPNDecoder);
#endif #endif
OPERATOR_SCHEMA(RPNDecoder) OPERATOR_SCHEMA(RPNDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <https://opensource.org/licenses/BSD-2-Clause> * <https://opensource.org/licenses/BSD-2-Clause>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,36 +20,36 @@ namespace dragon { ...@@ -20,36 +20,36 @@ namespace dragon {
template <class Context> template <class Context>
class RPNDecoderOp final : public Operator<Context> { class RPNDecoderOp final : public Operator<Context> {
public: public:
RPNDecoderOp(const OperatorDef& def, Workspace* ws) RPNDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")), strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")), ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")), scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)), pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)), post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)),
nms_thr_(OpArg<float>("nms_thresh", 0.7f)), nms_thr_(OpArg<float>("nms_thresh", 0.7f)),
min_size_(OpArg<int64_t>("min_size", 16)), min_size_(OpArg<int64_t>("min_size", 16)),
min_level_(OpArg<int64_t>("min_level", 2)), min_level_(OpArg<int64_t>("min_level", 2)),
max_level_(OpArg<int64_t>("max_level", 5)), max_level_(OpArg<int64_t>("max_level", 5)),
canonical_level_(OpArg<int64_t>("canonical_level", 4)), canonical_level_(OpArg<int64_t>("canonical_level", 4)),
canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {} canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float nms_thr_; float nms_thr_;
vec64_t strides_, indices_, roi_indices_; vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_; vector<float> ratios_, scales_, scores_, anchors_;
int64_t min_size_, pre_nms_topn_, post_nms_topn_; int64_t min_size_, pre_nms_topn_, post_nms_topn_;
int64_t num_images_, min_level_, max_level_; int64_t num_images_, min_level_, max_level_;
int64_t canonical_level_, canonical_scale_; int64_t canonical_level_, canonical_scale_;
Tensor proposals_; Tensor proposals_;
}; };
} // namespace dragon } // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_ #endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
...@@ -15,25 +15,35 @@ from __future__ import absolute_import ...@@ -15,25 +15,35 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import glob
from distutils.core import setup from distutils.core import setup
from dragon.tools import cpp_extension
from dragon.tools import cpp_extension
if cpp_extension.CUDA_HOME is not None and \ if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available(): cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension Extension = cpp_extension.CUDAExtension
else: else:
Extension = cpp_extension.CppExtension Extension = cpp_extension.CppExtension
def find_sources(*dirs):
ext_suffixes = ['.cc']
if Extension is cpp_extension.CUDAExtension:
ext_suffixes.append('.cu')
sources = []
for path in dirs:
for ext_suffix in ext_suffixes:
sources += glob.glob(
path + '/*' + ext_suffix,
recursive=True,
)
return sources
ext_modules = [ ext_modules = [
Extension( Extension(
name='install.lib.modules._C', name='install.lib.modules._C',
sources=[ sources=find_sources('**'),
'utils/detection_utils.cc',
'utils/detection_utils.cu',
'operators/nms_op.cc',
'operators/retinanet_decoder_op.cc',
'operators/rpn_decoder_op.cc',
],
), ),
] ]
......
#include <dragon/core/context.h>
#include "detection_utils.h" #include "detection_utils.h"
#include <dragon/core/context.h>
namespace dragon { namespace dragon {
...@@ -9,45 +9,46 @@ namespace detection { ...@@ -9,45 +9,46 @@ namespace detection {
template <typename T> template <typename T>
T IoU(const T A[], const T B[]) { T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] || if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) return 0;
A[2] < B[0] || A[3] < B[1]) return 0; const T x1 = std::max(A[0], B[0]);
const T x1 = std::max(A[0], B[0]); const T y1 = std::max(A[1], B[1]);
const T y1 = std::max(A[1], B[1]); const T x2 = std::min(A[2], B[2]);
const T x2 = std::min(A[2], B[2]); const T y2 = std::min(A[3], B[3]);
const T y2 = std::min(A[3], B[3]); const T width = std::max((T)0, x2 - x1 + 1);
const T width = std::max((T)0, x2 - x1 + 1); const T height = std::max((T)0, y2 - y1 + 1);
const T height = std::max((T)0, y2 - y1 + 1); const T area = width * height;
const T area = width * height; const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1); const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1); return area / (A_area + B_area - area);
return area / (A_area + B_area - area);
} }
template <> void ApplyNMS<float, CPUContext>( template <>
const int num_boxes, void ApplyNMS<float, CPUContext>(
const int max_keeps, const int num_boxes,
const float thresh, const int max_keeps,
const float* boxes, const float thresh,
int64_t* keep_indices, const float* boxes,
int& num_keep, int64_t* keep_indices,
CPUContext* ctx) { int& num_keep,
int count = 0; CPUContext* ctx) {
std::vector<char> is_dead(num_boxes); int count = 0;
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0; std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) { for (int i = 0; i < num_boxes; ++i)
if (is_dead[i]) continue; is_dead[i] = 0;
keep_indices[count++] = i; for (int i = 0; i < num_boxes; ++i) {
if (count == max_keeps) break; if (is_dead[i]) continue;
for (int j = i + 1; j < num_boxes; ++j) keep_indices[count++] = i;
if (!is_dead[j] && IoU(&boxes[i * 5], if (count == max_keeps) break;
&boxes[j * 5]) > thresh) for (int j = i + 1; j < num_boxes; ++j)
is_dead[j] = 1; if (!is_dead[j] && IoU(&boxes[i * 5], &boxes[j * 5]) > thresh) {
} is_dead[j] = 1;
num_keep = count; }
}
num_keep = count;
} }
} // namespace detection } // namespace detection
} // namespace utils } // namespace utils
} // namespace dragon } // namespace dragon
...@@ -9,127 +9,121 @@ namespace utils { ...@@ -9,127 +9,121 @@ namespace utils {
namespace detection { namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define DIV_UP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64 #define NUM_THREADS 64
namespace { namespace {
template <typename T> template <typename T>
__device__ bool _CheckIoU( __device__ bool _CheckIoU(const T* a, const T* b, const float thresh) {
const T* a, const T x1 = max(a[0], b[0]);
const T* b, const T y1 = max(a[1], b[1]);
const float thresh) { const T x2 = min(a[2], b[2]);
const T x1 = max(a[0], b[0]); const T y2 = min(a[3], b[3]);
const T y1 = max(a[1], b[1]); const T width = max(T(0), x2 - x1 + 1);
const T x2 = min(a[2], b[2]); const T height = max(T(0), y2 - y1 + 1);
const T y2 = min(a[3], b[3]); const T inter = width * height;
const T width = max(T(0), x2 - x1 + 1); const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T height = max(T(0), y2 - y1 + 1); const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
const T inter = width * height; return inter > thresh * (Sa + Sb - inter);
const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
return inter > thresh * (Sa + Sb - inter);
} }
template <typename T> template <typename T>
__global__ void _NonMaxSuppression( __global__ void _NonMaxSuppression(
const int num_blocks, const int num_blocks,
const int num_boxes, const int num_boxes,
const T thresh, const T thresh,
const T* dev_boxes, const T* dev_boxes,
uint64_t* dev_mask) { uint64_t* dev_mask) {
const int row_start = blockIdx.y; const int row_start = blockIdx.y;
const int col_start = blockIdx.x; const int col_start = blockIdx.x;
if (row_start > col_start) return; if (row_start > col_start) return;
const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS); const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS);
const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS); const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS);
__shared__ T block_boxes[NUM_THREADS * 4]; __shared__ T block_boxes[NUM_THREADS * 4];
if (threadIdx.x < col_size) { if (threadIdx.x < col_size) {
const int c1 = threadIdx.x * 4; const int c1 = threadIdx.x * 4;
const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5; const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5;
block_boxes[c1] = dev_boxes[c2]; block_boxes[c1] = dev_boxes[c2];
block_boxes[c1 + 1] = dev_boxes[c2 + 1]; block_boxes[c1 + 1] = dev_boxes[c2 + 1];
block_boxes[c1 + 2] = dev_boxes[c2 + 2]; block_boxes[c1 + 2] = dev_boxes[c2 + 2];
block_boxes[c1 + 3] = dev_boxes[c2 + 3]; block_boxes[c1 + 3] = dev_boxes[c2 + 3];
} }
__syncthreads(); __syncthreads();
if (threadIdx.x < row_size) { if (threadIdx.x < row_size) {
const int index = row_start * NUM_THREADS + threadIdx.x; const int index = row_start * NUM_THREADS + threadIdx.x;
const T* dev_box = dev_boxes + index * 5; const T* dev_box = dev_boxes + index * 5;
unsigned long long val = 0; unsigned long long val = 0;
const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0; const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0;
for (int i = start; i < col_size; ++i) { for (int i = start; i < col_size; ++i) {
if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) { if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) {
val |= 1ULL << i; val |= 1ULL << i;
} }
}
dev_mask[index * num_blocks + col_start] = val;
} }
dev_mask[index * num_blocks + col_start] = val;
}
} }
} // namespace } // namespace
template <> void ApplyNMS<float, CUDAContext>( template <>
const int num_boxes, void ApplyNMS<float, CUDAContext>(
const int max_keeps, const int num_boxes,
const float thresh, const int max_keeps,
const float* boxes, const float thresh,
int64_t* keep_indices, const float* boxes,
int& num_keep, int64_t* keep_indices,
CUDAContext* ctx) { int& num_keep,
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS); CUDAContext* ctx) {
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS);
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t)); vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression
<<< dim3(num_blocks, num_blocks), NUM_THREADS, _NonMaxSuppression<<<
0, ctx->cuda_stream() >>>( dim3(num_blocks, num_blocks),
num_blocks, NUM_THREADS,
num_boxes, 0,
thresh, ctx->cuda_stream()>>>(num_blocks, num_boxes, thresh, boxes, mask_dev);
boxes,
mask_dev CUDA_CHECK(cudaMemcpyAsync(
); mask_host.data(),
mask_dev,
CUDA_CHECK(cudaMemcpyAsync( mask_host.size() * sizeof(uint64_t),
mask_host.data(), cudaMemcpyDeviceToHost,
mask_dev, ctx->cuda_stream()));
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost, ctx->FinishDeviceComputation();
ctx->cuda_stream()
)); vector<uint64_t> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks);
ctx->FinishDeviceComputation();
int num_selected = 0;
vector<uint64_t> dead_bit(num_blocks); for (int i = 0; i < num_boxes; ++i) {
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks); const int nblock = i / NUM_THREADS;
const int inblock = i % NUM_THREADS;
int num_selected = 0; if (!(dead_bit[nblock] & (1ULL << inblock))) {
for (int i = 0; i < num_boxes; ++i) { keep_indices[num_selected++] = i;
const int nblock = i / NUM_THREADS; auto* mask_i = &mask_host[0] + i * num_blocks;
const int inblock = i % NUM_THREADS; for (int j = nblock; j < num_blocks; ++j)
if (!(dead_bit[nblock] & (1ULL << inblock))) { dead_bit[j] |= mask_i[j];
keep_indices[num_selected++] = i; if (num_selected == max_keeps) break;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
} }
num_keep = num_selected; }
ctx->Delete(mask_dev); num_keep = num_selected;
ctx->Delete(mask_dev);
} }
} // namespace detection } // namespace detection
} // namespace utils } // namespace utils
} // namespace dragon } // namespace dragon
#endif // USE_CUDA #endif // USE_CUDA
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <https://opensource.org/licenses/BSD-2-Clause> * <https://opensource.org/licenses/BSD-2-Clause>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -13,8 +13,7 @@ ...@@ -13,8 +13,7 @@
#ifndef SEETADET_CXX_UTILS_DETECTION_UTILS_H_ #ifndef SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#define SEETADET_CXX_UTILS_DETECTION_UTILS_H_ #define SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#include "dragon/core/context.h" #include "dragon/core/common.h"
#include "dragon/core/operator.h"
namespace dragon { namespace dragon {
...@@ -24,390 +23,409 @@ namespace detection { ...@@ -24,390 +23,409 @@ namespace detection {
#define ROUND(x) ((int)((x) + (T)0.5)) #define ROUND(x) ((int)((x) + (T)0.5))
/******************** BBox ********************/ /*!
* Box API
*/
template <typename T> template <typename T>
inline int FilterBoxes( inline int FilterBoxes(
const T dx, const T dx,
const T dy, const T dy,
const T d_log_w, const T d_log_w,
const T d_log_h, const T d_log_h,
const T im_w, const T im_w,
const T im_h, const T im_h,
const T min_box_w, const T min_box_w,
const T min_box_h, const T min_box_h,
T* bbox) { T* bbox) {
const T w = bbox[2] - bbox[0] + 1; const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1; const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w; const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h; const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x; const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y; const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w; const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h; const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w; bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h; bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w; bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h; bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)); bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1));
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)); bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1));
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)); bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1));
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)); bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1));
const T bbox_w = bbox[2] - bbox[0] + 1; const T bbox_w = bbox[2] - bbox[0] + 1;
const T bbox_h = bbox[3] - bbox[1] + 1; const T bbox_h = bbox[3] - bbox[1] + 1;
return (bbox_w >= min_box_w) * (bbox_h >= min_box_h); return (bbox_w >= min_box_w) * (bbox_h >= min_box_h);
} }
template <typename T> template <typename T>
inline void BBoxTransform( inline void BBoxTransform(
const T dx, const T dx,
const T dy, const T dy,
const T d_log_w, const T d_log_w,
const T d_log_h, const T d_log_h,
const T im_w, const T im_w,
const T im_h, const T im_h,
const T im_scale_h, const T im_scale_h,
const T im_scale_w, const T im_scale_w,
T* bbox) { T* bbox) {
const T w = bbox[2] - bbox[0] + 1; const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1; const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w; const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h; const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x; const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y; const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w; const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h; const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w; bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h; bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w; bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h; bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)) / im_scale_w; bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)) / im_scale_w;
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)) / im_scale_h; bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)) / im_scale_h;
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)) / im_scale_w; bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)) / im_scale_w;
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h; bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h;
} }
/******************** Anchor ********************/ /*!
* Anchor API
*/
template <typename T> template <typename T>
inline void GenerateAnchors( inline void GenerateAnchors(
int base_size, int base_size,
const int num_ratios, const int num_ratios,
const int num_scales, const int num_scales,
const T* ratios, const T* ratios,
const T* scales, const T* scales,
T* anchors) { T* anchors) {
const T base_area = (T)(base_size * base_size); const T base_area = (T)(base_size * base_size);
const T center = (T)0.5 * (base_size - (T)1); const T center = (T)0.5 * (base_size - (T)1);
T* offset_anchors = anchors; T* offset_anchors = anchors;
for (int i = 0; i < num_ratios; ++i) { for (int i = 0; i < num_ratios; ++i) {
const T ratio_w = (T)ROUND(sqrt(base_area / ratios[i])); const T ratio_w = (T)ROUND(sqrt(base_area / ratios[i]));
const T ratio_h = (T)ROUND(ratio_w * ratios[i]); const T ratio_h = (T)ROUND(ratio_w * ratios[i]);
for (int j = 0; j < num_scales; ++j) { for (int j = 0; j < num_scales; ++j) {
const T scale_w = (T)0.5 * (ratio_w * scales[j] - (T)1); const T scale_w = (T)0.5 * (ratio_w * scales[j] - (T)1);
const T scale_h = (T)0.5 * (ratio_h * scales[j] - (T)1); const T scale_h = (T)0.5 * (ratio_h * scales[j] - (T)1);
offset_anchors[0] = center - scale_w; offset_anchors[0] = center - scale_w;
offset_anchors[1] = center - scale_h; offset_anchors[1] = center - scale_h;
offset_anchors[2] = center + scale_w; offset_anchors[2] = center + scale_w;
offset_anchors[3] = center + scale_h; offset_anchors[3] = center + scale_h;
offset_anchors += 4; offset_anchors += 4;
}
} }
}
} }
template <typename T> template <typename T>
inline void GenerateGridAnchors( inline void GenerateGridAnchors(
const int num_proposals, const int num_proposals,
const int num_anchors, const int num_anchors,
const int feat_h, const int feat_h,
const int feat_w, const int feat_w,
const int stride, const int stride,
const int base_offset, const int base_offset,
const T* anchors, const T* anchors,
const int64_t* indices, const int64_t* indices,
T* proposals) { T* proposals) {
T x, y; T x, y;
int idx_3d, a, h, w; int idx_3d, a, h, w;
int idx_range = num_anchors * feat_h * feat_w; int idx_range = num_anchors * feat_h * feat_w;
for (int i = 0; i < num_proposals; ++i) { for (int i = 0; i < num_proposals; ++i) {
idx_3d = (int)indices[i] - base_offset; idx_3d = (int)indices[i] - base_offset;
if (idx_3d >= 0 && idx_3d < idx_range) { if (idx_3d >= 0 && idx_3d < idx_range) {
w = idx_3d % feat_w; w = idx_3d % feat_w;
h = (idx_3d / feat_w) % feat_h; h = (idx_3d / feat_w) % feat_h;
a = idx_3d / feat_w / feat_h; a = idx_3d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride; x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4; auto* A = anchors + a * 4;
auto* P = proposals + i * 5; auto* P = proposals + i * 5;
P[0] = x + A[0], P[1] = y + A[1]; P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3]; P[2] = x + A[2], P[3] = y + A[3];
}
} }
}
} }
template <typename T> template <typename T>
inline void GenerateGridAnchors( inline void GenerateGridAnchors(
const int num_proposals, const int num_proposals,
const int num_classes, const int num_classes,
const int num_anchors, const int num_anchors,
const int feat_h, const int feat_h,
const int feat_w, const int feat_w,
const int stride, const int stride,
const int base_offset, const int base_offset,
const T* anchors, const T* anchors,
const int64_t* indices, const int64_t* indices,
T* proposals) { T* proposals) {
T x, y; T x, y;
int idx_4d, a, h, w; int idx_4d, a, h, w;
int lr = num_classes * base_offset; int lr = num_classes * base_offset;
int rr = num_classes * (num_anchors * feat_h * feat_w); int rr = num_classes * (num_anchors * feat_h * feat_w);
for (int i = 0; i < num_proposals; ++i) { for (int i = 0; i < num_proposals; ++i) {
idx_4d = (int)indices[i] - lr; idx_4d = (int)indices[i] - lr;
if (idx_4d >= 0 && idx_4d < rr) { if (idx_4d >= 0 && idx_4d < rr) {
idx_4d /= num_classes; idx_4d /= num_classes;
w = idx_4d % feat_w; w = idx_4d % feat_w;
h = (idx_4d / feat_w) % feat_h; h = (idx_4d / feat_w) % feat_h;
a = idx_4d / feat_w / feat_h; a = idx_4d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride; x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4; auto* A = anchors + a * 4;
auto* P = proposals + i * 7 + 1; auto* P = proposals + i * 7 + 1;
P[0] = x + A[0], P[1] = y + A[1]; P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3]; P[2] = x + A[2], P[3] = y + A[3];
}
} }
}
} }
/******************** Proposal ********************/ /*!
* Proposal API
*/
template <typename T> template <typename T>
void GenerateSSProposals( void GenerateSSProposals(
const int K, const int K,
const int num_proposals, const int num_proposals,
const float im_h, const float im_h,
const float im_w, const float im_w,
const float min_box_h, const float min_box_h,
const float min_box_w, const float min_box_w,
const T* scores, const T* scores,
const T* deltas, const T* deltas,
const int64_t* indices, const int64_t* indices,
T* proposals) { T* proposals) {
int64_t index, a, k; int64_t index, a, k;
const float* delta; const float* delta;
float* proposal = proposals; float* proposal = proposals;
float dx, dy, d_log_w, d_log_h; float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) { for (int i = 0; i < num_proposals; ++i) {
index = indices[i]; index = indices[i];
a = index / K, k = index % K; a = index / K, k = index % K;
delta = deltas + k; delta = deltas + k;
dx = delta[(a * 4 + 0) * K]; dx = delta[(a * 4 + 0) * K];
dy = delta[(a * 4 + 1) * K]; dy = delta[(a * 4 + 1) * K];
d_log_w = delta[(a * 4 + 2) * K]; d_log_w = delta[(a * 4 + 2) * K];
d_log_h = delta[(a * 4 + 3) * K]; d_log_h = delta[(a * 4 + 3) * K];
proposal[4] = FilterBoxes( proposal[4] = FilterBoxes(
dx, dy, dx,
d_log_w, d_log_h, dy,
im_w, im_h, d_log_w,
min_box_w, min_box_h, d_log_h,
proposal im_w,
) * scores[index]; im_h,
proposal += 5; min_box_w,
} min_box_h,
proposal) *
scores[index];
proposal += 5;
}
} }
template <typename T> template <typename T>
void GenerateMSProposals( void GenerateMSProposals(
const int num_candidates, const int num_candidates,
const int num_proposals, const int num_proposals,
const float im_h, const float im_h,
const float im_w, const float im_w,
const float min_box_h, const float min_box_h,
const float min_box_w, const float min_box_w,
const T* scores, const T* scores,
const T* deltas, const T* deltas,
const int64_t* indices, const int64_t* indices,
T* proposals) { T* proposals) {
int64_t index; int64_t index;
int64_t num_candidates_2x = 2 * num_candidates; int64_t num_candidates_2x = 2 * num_candidates;
int64_t num_candidates_3x = 3 * num_candidates; int64_t num_candidates_3x = 3 * num_candidates;
float* proposal = proposals; float* proposal = proposals;
float dx, dy, d_log_w, d_log_h; float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) { for (int i = 0; i < num_proposals; ++i) {
index = indices[i]; index = indices[i];
dx = deltas[index]; dx = deltas[index];
dy = deltas[num_candidates + index]; dy = deltas[num_candidates + index];
d_log_w = deltas[num_candidates_2x + index]; d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[num_candidates_3x + index]; d_log_h = deltas[num_candidates_3x + index];
proposal[4] = FilterBoxes( proposal[4] = FilterBoxes(
dx, dy, dx,
d_log_w, d_log_h, dy,
im_w, im_h, d_log_w,
min_box_w, min_box_h, d_log_h,
proposal im_w,
) * scores[index]; im_h,
proposal += 5; min_box_w,
} min_box_h,
proposal) *
scores[index];
proposal += 5;
}
} }
template <typename T> template <typename T>
void GenerateMCProposals( void GenerateMCProposals(
const int num_proposals, const int num_proposals,
const int num_boxes, const int num_boxes,
const int num_classes, const int num_classes,
const int im_idx, const int im_idx,
const float im_h, const float im_h,
const float im_w, const float im_w,
const float im_scale_h, const float im_scale_h,
const float im_scale_w, const float im_scale_w,
const T* scores, const T* scores,
const T* deltas, const T* deltas,
const int64_t* indices, const int64_t* indices,
T* proposals) { T* proposals) {
int64_t index, cls; int64_t index, cls;
int64_t num_boxes_2x = 2 * num_boxes; int64_t num_boxes_2x = 2 * num_boxes;
int64_t num_boxes_3x = 3 * num_boxes; int64_t num_boxes_3x = 3 * num_boxes;
float* proposal = proposals; float* proposal = proposals;
float dx, dy, d_log_w, d_log_h; float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) { for (int i = 0; i < num_proposals; ++i) {
cls = indices[i] % num_classes; cls = indices[i] % num_classes;
index = indices[i] / num_classes; index = indices[i] / num_classes;
dx = deltas[index]; dx = deltas[index];
dy = deltas[num_boxes + index]; dy = deltas[num_boxes + index];
d_log_w = deltas[num_boxes_2x + index]; d_log_w = deltas[num_boxes_2x + index];
d_log_h = deltas[num_boxes_3x + index]; d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx; proposal[0] = im_idx;
BBoxTransform( BBoxTransform(
dx, dy, dx,
d_log_w, d_log_h, dy,
im_w, im_h, d_log_w,
im_scale_h, im_scale_w, d_log_h,
proposal + 1 im_w,
); im_h,
proposal[5] = scores[indices[i]]; im_scale_h,
proposal[6] = cls + 1; im_scale_w,
proposal += 7; proposal + 1);
} proposal[5] = scores[indices[i]];
proposal[6] = cls + 1;
proposal += 7;
}
} }
template <typename T> template <typename T>
inline void SortProposals( inline void
const int start, SortProposals(const int start, const int end, const int num_top, T* proposals) {
const int end, const T pivot_score = proposals[start * 5 + 4];
const int num_top, int left = start + 1, right = end;
T* proposals) { while (left <= right) {
const T pivot_score = proposals[start * 5 + 4]; while (left <= end && proposals[left * 5 + 4] >= pivot_score)
int left = start + 1, right = end; ++left;
while (left <= right) { while (right > start && proposals[right * 5 + 4] <= pivot_score)
while (left <= end && proposals[left * 5 + 4] >= pivot_score) ++left; --right;
while (right > start && proposals[right * 5 + 4] <= pivot_score) --right; if (left <= right) {
if (left <= right) { for (int i = 0; i < 5; ++i)
for (int i = 0; i < 5; ++i) std::swap(proposals[left * 5 + i], proposals[right * 5 + i]);
std::swap(proposals[left * 5 + i], proposals[right * 5 + i]); ++left;
++left; --right;
--right;
}
} }
if (right > start) { }
for (int i = 0; i < 5; ++i) if (right > start) {
std::swap(proposals[start * 5 + i], proposals[right * 5 + i]); for (int i = 0; i < 5; ++i)
} std::swap(proposals[start * 5 + i], proposals[right * 5 + i]);
if (start < right - 1) SortProposals(start, right - 1, num_top, proposals); }
if (right + 1 < num_top && right + 1 < end) if (start < right - 1) SortProposals(start, right - 1, num_top, proposals);
SortProposals(right + 1, end, num_top, proposals); if (right + 1 < num_top && right + 1 < end)
SortProposals(right + 1, end, num_top, proposals);
} }
template <typename T> template <typename T>
inline void RetrieveRoIs( inline void RetrieveRoIs(
const int num_rois, const int num_rois,
const int roi_batch_ind, const int roi_batch_ind,
const T* proposals, const T* proposals,
const int64_t* roi_indices, const int64_t* roi_indices,
T* rois) { T* rois) {
for (int i = 0; i < num_rois; ++i) { for (int i = 0; i < num_rois; ++i) {
const T* proposal = proposals + roi_indices[i] * 5; const T* proposal = proposals + roi_indices[i] * 5;
rois[i * 5 + 0] = (T)roi_batch_ind; rois[i * 5 + 0] = (T)roi_batch_ind;
rois[i * 5 + 1] = proposal[0]; rois[i * 5 + 1] = proposal[0];
rois[i * 5 + 2] = proposal[1]; rois[i * 5 + 2] = proposal[1];
rois[i * 5 + 3] = proposal[2]; rois[i * 5 + 3] = proposal[2];
rois[i * 5 + 4] = proposal[3]; rois[i * 5 + 4] = proposal[3];
} }
} }
template <typename T> template <typename T>
inline int roi_level( inline int roi_level(
const int min_level, const int min_level,
const int max_level, const int max_level,
const int canonical_level, const int canonical_level,
const int canonical_scale, const int canonical_scale,
T* roi) { T* roi) {
T w = roi[3] - roi[1] + 1; T w = roi[3] - roi[1] + 1;
T h = roi[4] - roi[2] + 1; T h = roi[4] - roi[2] + 1;
// Refer the settings of paper // Refer the settings of paper
int level = canonical_level + std::log2( int level = canonical_level +
std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale); std::log2(std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale);
return std::min(max_level, std::max(min_level, level)); return std::min(max_level, std::max(min_level, level));
} }
template <typename T> template <typename T>
inline void CollectRoIs( inline void CollectRoIs(
const int num_rois, const int num_rois,
const int min_level, const int min_level,
const int max_level, const int max_level,
const int canonical_level, const int canonical_level,
const int canonical_scale, const int canonical_scale,
const T* rois, const T* rois,
vector<vec64_t>& roi_bins) { vector<vec64_t>& roi_bins) {
const T* roi = rois; const T* roi = rois;
for (int i = 0; i < num_rois; ++i) { for (int i = 0; i < num_rois; ++i) {
int bin_idx = roi_level(min_level, max_level, int bin_idx =
canonical_level, canonical_scale, roi); roi_level(min_level, max_level, canonical_level, canonical_scale, roi);
bin_idx = std::max(bin_idx - min_level, 0); bin_idx = std::max(bin_idx - min_level, 0);
roi_bins[bin_idx].push_back(i); roi_bins[bin_idx].push_back(i);
roi += 5; roi += 5;
} }
} }
template <typename T> template <typename T>
inline void DistributeRoIs( inline void DistributeRoIs(
const vector<vec64_t>& roi_bins, const vector<vec64_t>& roi_bins,
const T* rois, const T* rois,
vector<T*> outputs) { vector<T*> outputs) {
for (int i = 0; i < roi_bins.size(); i++) { for (int i = 0; i < roi_bins.size(); i++) {
auto* y = outputs[i]; auto* y = outputs[i];
if (roi_bins[i].size() == 0) { if (roi_bins[i].size() == 0) {
// Fake a tiny roi to avoid empty roi pooling // Fake a tiny roi to avoid empty roi pooling
y[0] = 0, y[1] = 0, y[2] = 0, y[3] = 1, y[4] = 1; y[0] = 0, y[1] = 0, y[2] = 0, y[3] = 1, y[4] = 1;
} else { } else {
for (int j = 0; j < roi_bins[i].size(); ++j) { for (int j = 0; j < roi_bins[i].size(); ++j) {
const T* roi = rois + roi_bins[i][j] * 5; const T* roi = rois + roi_bins[i][j] * 5;
for (int k = 0; k < 5; ++k) y[k] = roi[k]; for (int k = 0; k < 5; ++k)
y += 5; y[k] = roi[k];
} y += 5;
} }
} }
}
} }
/******************** NMS ********************/ /*!
* NMS API
*/
template <typename T, class Context> template <typename T, class Context>
void ApplyNMS( void ApplyNMS(
const int num_boxes, const int num_boxes,
const int max_keeps, const int max_keeps,
const T thresh, const T thresh,
const T* boxes, const T* boxes,
int64_t* keep_indices, int64_t* keep_indices,
int& num_keep, int& num_keep,
Context* ctx); Context* ctx);
} // namespace detection } // namespace detection
} // namespace utils } // namespace utils
} // namespace dragon } // namespace dragon
#endif // SEETADET_CXX_UTILS_DETECTION_UTILS_H_ #endif // SEETADET_CXX_UTILS_DETECTION_UTILS_H_
...@@ -52,12 +52,9 @@ class AnchorTarget(object): ...@@ -52,12 +52,9 @@ class AnchorTarget(object):
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base # Generate grid anchors from base
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( all_anchors = generate_grid_anchors(
features, grid_shapes, self.base_anchors, self.strides)
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0] num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
......
...@@ -58,12 +58,9 @@ class Proposal(object): ...@@ -58,12 +58,9 @@ class Proposal(object):
# Get resources # Get resources
num_images = ims_info.shape[0] num_images = ims_info.shape[0]
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( all_anchors = generate_grid_anchors(
features, grid_shapes, self.base_anchors, self.strides)
self.base_anchors,
self.strides,
)
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
......
...@@ -19,40 +19,40 @@ import numpy as np ...@@ -19,40 +19,40 @@ import numpy as np
from seetadet.core.config import cfg from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides): def generate_grid_anchors(grid_shapes, base_anchors, strides):
num_strides = len(strides) num_strides = len(strides)
if len(features) != num_strides: if len(grid_shapes) != num_strides:
raise ValueError( raise ValueError(
'Given %d features for %d strides.' 'Given %d grids for %d strides.'
% (len(features), num_strides) % (len(grid_shapes), num_strides)
) )
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
anchors_to_pack = [] anchors_to_pack = []
for i in range(len(features)): for i in range(len(grid_shapes)):
height, width = features[i].shape[-2:] height, width = grid_shapes[i]
shift_x = np.arange(0, width) * strides[i] shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i] shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add a anchors (1, a, 4) to
# cell K shifts (K, 1, 4) to get # cell k shifts (k, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (k, a, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (k * a, 4) shifted anchors
A = base_anchors[i].shape[0] a = base_anchors[i].shape[0]
K = shifts.shape[0] k = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) + anchors = (base_anchors[i].reshape((1, a, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, k, 4)).transpose((1, 0, 2)))
if num_strides > 1: if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4) # Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to # We will pack it with other strides to
# match the data format of (N, C, H, W) # match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2)) anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4)) anchors = anchors.reshape((a * k, 4))
anchors_to_pack.append(anchors) anchors_to_pack.append(anchors)
else: else:
# Original order of Faster R-CNN # Original order of Faster R-CNN
return anchors.reshape((K * A, 4)) return anchors.reshape((k * a, 4))
return np.vstack(anchors_to_pack) return np.vstack(anchors_to_pack)
......
...@@ -46,6 +46,9 @@ class AnchorTarget(object): ...@@ -46,6 +46,9 @@ class AnchorTarget(object):
ratios=self.ratios, ratios=self.ratios,
sizes=sizes, sizes=sizes,
)) ))
# Store the cached grid anchors
self.last_grid_shapes = None
self.last_grid_anchors = None
def __call__(self, features, gt_boxes): def __call__(self, features, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
...@@ -58,12 +61,17 @@ class AnchorTarget(object): ...@@ -58,12 +61,17 @@ class AnchorTarget(object):
) )
# Generate grid anchors from base # Generate grid anchors from base
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( if grid_shapes == self.last_grid_shapes:
features, all_anchors = self.last_grid_anchors
self.base_anchors, else:
self.strides, self.last_grid_shapes = grid_shapes
) self.last_grid_anchors = all_anchors = \
generate_grid_anchors(
grid_shapes,
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0] num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
......
...@@ -15,6 +15,7 @@ from __future__ import print_function ...@@ -15,6 +15,7 @@ from __future__ import print_function
import types import types
import dragon
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
...@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images): ...@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images):
# Unpack results # Unpack results
results = outputs['detections'] results = outputs['detections']
detections = [[] for _ in range(len((raw_images)))] detections = [[] for _ in range(len(raw_images))]
for i in range(len(ims)): for i in range(len(ims)):
inds = np.where(results[:, 0].astype(np.int32) == i)[0] inds = np.where(results[:, 0].astype(np.int32) == i)[0]
...@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
indices[i], indices[i],
dict([('im_detect', _t['im_detect'].average_time), dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]), ('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]), dict([('boxes', boxes_this_image)]),
)) ))
...@@ -45,14 +45,14 @@ class PriorBox(object): ...@@ -45,14 +45,14 @@ class PriorBox(object):
aspect_ratios[i], aspect_ratios[i],
) )
) )
self.grid_anchors = None # Store the cached grid anchors
self.last_grid_anchors = None
def __call__(self, features): def __call__(self, features):
if self.grid_anchors is not None: if self.last_grid_anchors is not None:
return self.grid_anchors return self.last_grid_anchors
self.grid_anchors = []
all_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
# 1. Generate base grids # 1. Generate base grids
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
...@@ -61,23 +61,23 @@ class PriorBox(object): ...@@ -61,23 +61,23 @@ class PriorBox(object):
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
# 2. Apply anchors on base grids # 2. Apply anchors on base grids
# Add A anchors (1, A, 4) to # Add a anchors (1, a, 4) to
# cell K shifts (K, 1, 4) to get # cell k shifts (k, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (k, a, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (k * a, 4) shifted anchors
A = self.base_anchors[i].shape[0] a = self.base_anchors[i].shape[0]
D = self.base_anchors[i].shape[1] d = self.base_anchors[i].shape[1]
shifts = np.vstack(( shifts = np.vstack((
shift_x.ravel(), shift_x.ravel(),
shift_y.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_x.ravel(),
shift_y.ravel()) shift_y.ravel())
).transpose() ).transpose()
K = shifts.shape[0] # K = map_h * map_w k = shifts.shape[0] # k = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, D)) + anchors = (self.base_anchors[i].reshape((1, a, d)) +
shifts.reshape((1, K, D)).transpose((1, 0, 2))) shifts.reshape((1, k, d)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, D)).astype(np.float32) anchors = anchors.reshape((k * a, d)).astype(np.float32)
self.grid_anchors.append(anchors) all_anchors.append(anchors)
self.grid_anchors = np.concatenate(self.grid_anchors)
return self.grid_anchors self.last_grid_anchors = np.concatenate(all_anchors)
return self.last_grid_anchors
...@@ -32,11 +32,9 @@ def get_images(ims): ...@@ -32,11 +32,9 @@ def get_images(ims):
for im in ims: for im in ims:
im_scales.append((float(out_size) / im.shape[0], im_scales.append((float(out_size) / im.shape[0],
float(out_size) / im.shape[1])) float(out_size) / im.shape[1]))
processed_ims.append( processed_ims.append(cv2.resize(
cv2.resize(
im, (out_size, out_size), im, (out_size, out_size),
interpolation=cv2.INTER_AREA, interpolation=cv2.INTER_AREA))
))
if ims[0].dtype == 'uint16': if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256. ims_blob = np.array(processed_ims, dtype='float32') / 256.
else: else:
......
...@@ -49,12 +49,12 @@ class Distort(object): ...@@ -49,12 +49,12 @@ class Distort(object):
] ]
def apply(self, img, boxes=None): def apply(self, img, boxes=None):
if self._prob > 0: self._prob = 0.5 if cfg.TRAIN.USE_COLOR_JITTER else 0
img = PIL.Image.fromarray(img) img = PIL.Image.fromarray(img)
for transform_fn, prob in self._transforms: for transform_fn, prob in self._transforms:
if npr.uniform() < prob: if npr.uniform() < prob:
img = transform_fn(img) img = transform_fn(img)
img = img.enhance(1. + npr.uniform(-.4, .4)) img = img.enhance(1. + npr.uniform(-.4, .4))
return np.array(img), boxes return np.array(img), boxes
return img, boxes return img, boxes
......
...@@ -27,8 +27,9 @@ if __name__ == '__main__': ...@@ -27,8 +27,9 @@ if __name__ == '__main__':
np.random.seed(3) np.random.seed(3)
cfg.TRAIN.SCALES = [300] cfg.TRAIN.SCALES = [300]
cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00] cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00]
cfg.TRAIN.USE_COLOR_JITTER = True
augmentor = transforms.Compose( transformer = transforms.Compose(
transforms.Distort(), transforms.Distort(),
transforms.Expand(), transforms.Expand(),
transforms.Sample(), transforms.Sample(),
...@@ -38,12 +39,12 @@ if __name__ == '__main__': ...@@ -38,12 +39,12 @@ if __name__ == '__main__':
while True: while True:
img = cv2.imread('cat.jpg') img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32) boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes) img, boxes = transformer(img, boxes)
for box in boxes: for box in boxes:
x1 = int(box[0] * img.shape[1]) x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0]) y1 = int(box[1] * img.shape[0])
x2 = int(box[2] * img.shape[1]) x2 = int(box[2] * img.shape[1])
y2 = int(box[3] * img.shape[0]) y2 = int(box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2) cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2)
cv2.imshow('Sample', img) cv2.imshow('Transforms - Preview', img)
cv2.waitKey(0) cv2.waitKey(0)
...@@ -70,14 +70,15 @@ class Pipeline(dali.Pipeline): ...@@ -70,14 +70,15 @@ class Pipeline(dali.Pipeline):
# Decode image # Decode image
image = self.decode(inputs['image']) image = self.decode(inputs['image'])
# Augment the color space # Augment the color space if necessary
image = self.hsv( if cfg.TRAIN.USE_COLOR_JITTER:
self.brightness_contrast( image = self.hsv(
image, self.brightness_contrast(
brightness=self.twist_rng(), image,
contrast=self.twist_rng(), brightness=self.twist_rng(),
), saturation=self.twist_rng() contrast=self.twist_rng(),
) ), saturation=self.twist_rng()
)
# Expand randomly to get smaller objects # Expand randomly to get smaller objects
pr = self.paste_ratio() * self.flip_rng() + 1. pr = self.paste_ratio() * self.flip_rng() + 1.
......
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
from seetadet.datasets import kpl_record from seetadet.datasets import kpl_dataset
def get_dataset(name): def get_dataset(name):
...@@ -42,5 +42,5 @@ def list_dataset(): ...@@ -42,5 +42,5 @@ def list_dataset():
_GLOBAL_REGISTERED_DATASET = { _GLOBAL_REGISTERED_DATASET = {
'default': lambda source: 'default': lambda source:
kpl_record.KPLRecordDataset(source), kpl_dataset.KPLRecordDataset(source),
} }
...@@ -149,8 +149,10 @@ class AirNet(nn.Module): ...@@ -149,8 +149,10 @@ class AirNet(nn.Module):
x = self.layer1(x) x = self.layer1(x)
outputs = [None, None, self.layer2(x)] outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])] if hasattr(self, 'layer3'):
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])] outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'):
outputs += [self.layer4(outputs[-1])]
return outputs return outputs
......
...@@ -39,16 +39,17 @@ class Detector(nn.Module): ...@@ -39,16 +39,17 @@ class Detector(nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.') backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:] body, modules = backbone[0], backbone[1:]
# + DataLoader # DataLoader
self.data_loader = None
self.data_loader_cls = importlib.import_module( self.data_loader_cls = importlib.import_module(
'seetadet.algo.{}'.format(model)).DataLoader 'seetadet.algo.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap() self.bootstrap = vision.Bootstrap()
# + FeatureExtractor # FeatureExtractor
self.body = backbones.get(body)() self.body = backbones.get(body)()
feature_dims = self.body.feature_dims feature_dims = self.body.feature_dims
# + FeatureEnhancer # FeatureEnhancer
if 'fpn' in modules: if 'fpn' in modules:
self.fpn = models.FPN(feature_dims) self.fpn = models.FPN(feature_dims)
feature_dims = self.fpn.feature_dims feature_dims = self.fpn.feature_dims
...@@ -57,7 +58,7 @@ class Detector(nn.Module): ...@@ -57,7 +58,7 @@ class Detector(nn.Module):
else: else:
feature_dims = [feature_dims[-1]] feature_dims = [feature_dims[-1]]
# + Detection Modules # Detection Modules
if 'rcnn' in model: if 'rcnn' in model:
self.rpn = models.RPN(feature_dims[0]) self.rpn = models.RPN(feature_dims[0])
if 'faster' in model: if 'faster' in model:
...@@ -106,7 +107,7 @@ class Detector(nn.Module): ...@@ -106,7 +107,7 @@ class Detector(nn.Module):
if inputs is None: if inputs is None:
# 1) Training: <= DataLayer # 1) Training: <= DataLayer
# 2) Inference: <= Given # 2) Inference: <= Given
if not hasattr(self, 'data_loader'): if self.data_loader is None:
self.data_loader = self.data_loader_cls() self.data_loader = self.data_loader_cls()
inputs = self.data_loader() inputs = self.data_loader()
...@@ -171,29 +172,34 @@ class Detector(nn.Module): ...@@ -171,29 +172,34 @@ class Detector(nn.Module):
# Merge Affine into Convolution # # Merge Affine into Convolution #
################################### ###################################
last_module = None last_module = None
for e in self.modules(): for module in self.modules():
if isinstance(e, nn.Affine) and \ if isinstance(module, nn.Affine) and \
isinstance(last_module, nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x module.forward = lambda x: x
last_module.bias = e.bias last_module.bias = module.bias
last_module.weight.data.mul_(e.weight.data) weight = module.weight.data.view(
last_module = e 0, *([1] * (last_module.weight.ndimension() - 1)))
last_module.weight.data.mul_(weight)
last_module = module
###################################### ######################################
# Merge BatchNorm into Convolution # # Merge BatchNorm into Convolution #
###################################### ######################################
last_module = None last_module = None
for e in self.modules(): for module in self.modules():
if isinstance(e, nn.BatchNorm2d) and \ if isinstance(module, nn.BatchNorm2d) and \
isinstance(last_module, nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x module.forward = lambda x: x
term = torch.sqrt(e.running_var.data + e.eps) term = torch.sqrt(module.running_var.data + module.eps)
term = e.weight.data / term term = module.weight.data / term
last_module.bias = e.bias.data - term * e.running_mean.data last_module.bias = \
module.bias.data - \
term * module.running_mean.data
term = term.view(0, *([1] * (last_module.weight.ndimension() - 1)))
if last_module.weight.dtype == 'float16': if last_module.weight.dtype == 'float16':
last_module.bias.half_() last_module.bias.half_()
weight = last_module.weight.data.float() weight = last_module.weight.data.float()
...@@ -201,7 +207,7 @@ class Detector(nn.Module): ...@@ -201,7 +207,7 @@ class Detector(nn.Module):
last_module.weight.copy_(weight) last_module.weight.copy_(weight)
else: else:
last_module.weight.data.mul_(term) last_module.weight.data.mul_(term)
last_module = e last_module = module
def new_detector(device, weights=None, training=False): def new_detector(device, weights=None, training=False):
......
...@@ -31,7 +31,8 @@ class FPN(nn.Module): ...@@ -31,7 +31,8 @@ class FPN(nn.Module):
dim = cfg.FPN.DIM dim = cfg.FPN.DIM
self.C = nn.ModuleList() self.C = nn.ModuleList()
self.P = nn.ModuleList() self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1): self.highest_backbone_lvl = min(cfg.FPN.RPN_MAX_LEVEL, HIGHEST_BACKBONE_LVL)
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, self.highest_backbone_lvl + 1):
self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True)) self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(nn.Conv3x3(dim, dim, bias=True)) self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE: if 'rcnn' in cfg.MODEL.TYPE:
...@@ -40,8 +41,8 @@ class FPN(nn.Module): ...@@ -40,8 +41,8 @@ class FPN(nn.Module):
else: else:
self.apply_func = self.apply_on_generic self.apply_func = self.apply_on_generic
self.relu = nn.ReLU(inplace=False) self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1): for lvl in range(self.highest_backbone_lvl + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim dim_in = feature_dims[-1] if lvl == self.highest_backbone_lvl + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True)) self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim] self.feature_dims = [dim]
self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE
...@@ -56,12 +57,12 @@ class FPN(nn.Module): ...@@ -56,12 +57,12 @@ class FPN(nn.Module):
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Apply max pool for higher features # Apply max pool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1])) outputs.append(self.maxpool(outputs[-1]))
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0: if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample( upscale_output = nn_funcs.upsample(
...@@ -76,15 +77,15 @@ class FPN(nn.Module): ...@@ -76,15 +77,15 @@ class FPN(nn.Module):
def apply_on_generic(self, features): def apply_on_generic(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Add extra convolutions for higher features # Add extra convolutions for higher features
extra_input = features[-1] extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input)) outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: if i != max_lvl:
extra_input = self.relu(outputs[-1]) extra_input = self.relu(outputs[-1])
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0: if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample( upscale_output = nn_funcs.upsample(
......
...@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module): ...@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module):
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if nn.is_conv2d(m): if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, 'fan_out') init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None: if m.bias is not None:
init.constant(m.bias, 0) init.constant(m.bias, 0)
...@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module): ...@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module):
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if nn.is_conv2d(m): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
......
...@@ -17,8 +17,6 @@ from __future__ import absolute_import ...@@ -17,8 +17,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch
from seetadet.core.config import cfg from seetadet.core.config import cfg
from seetadet.core.registry import backbones from seetadet.core.registry import backbones
from seetadet.modules import nn from seetadet.modules import nn
...@@ -37,11 +35,12 @@ class BasicBlock(nn.Module): ...@@ -37,11 +35,12 @@ class BasicBlock(nn.Module):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.FrozenAffine(dim_out) self.bn1 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.FrozenAffine(dim_out) self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock1 = nn.DropBlock2d(**dropblock) if dropblock else None
self.dropblock2 = nn.DropBlock2d(**dropblock) if dropblock else None
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -50,14 +49,14 @@ class BasicBlock(nn.Module): ...@@ -50,14 +49,14 @@ class BasicBlock(nn.Module):
out = self.bn1(out) out = self.bn1(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None: if self.dropblock1 is not None:
out = self.dropblock(out) out = self.dropblock1(out)
out = self.conv2(out) out = self.conv2(out)
out = self.bn2(out) out = self.bn2(out)
if self.dropblock is not None: if self.dropblock2 is not None:
residual = self.dropblock(residual) residual = self.dropblock2(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
...@@ -67,7 +66,7 @@ class BasicBlock(nn.Module): ...@@ -67,7 +66,7 @@ class BasicBlock(nn.Module):
return out return out
class Bottleneck(torch.nn.Module): class Bottleneck(nn.Module):
# 1x64d => 0.25 (ResNet) # 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt) # 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \ contraction = cfg.RESNET.NUM_GROUPS \
...@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module): ...@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module):
self.conv1 = nn.Conv1x1(dim_in, dim) self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.FrozenAffine(dim) self.bn1 = nn.FrozenAffine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride) self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.drop2 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn2 = nn.FrozenAffine(dim) self.bn2 = nn.FrozenAffine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out) self.conv3 = nn.Conv1x1(dim, dim_out)
self.drop3 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn3 = nn.FrozenAffine(dim_out) self.bn3 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module): ...@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module):
out = self.relu(out) out = self.relu(out)
out = self.conv2(out) out = self.conv2(out)
if self.drop2 is not None:
out = self.drop2(out)
out = self.bn2(out) out = self.bn2(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv3(out) out = self.conv3(out)
out = self.bn3(out) out = self.bn3(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
out += residual out += residual
if self.drop3 is not None:
out = self.drop3(out)
out = self.relu(out) out = self.relu(out)
return out return out
class ResNet(torch.nn.Module): class ResNet(nn.Module):
def __init__(self, block, layers, filters): def __init__(self, block, layers, filters):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:] self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d( self.conv1 = nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
stride=2, stride=2,
...@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module): ...@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module):
bias=False, bias=False,
) )
self.bn1 = nn.FrozenAffine(self.dim_in) self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = nn.MaxPool2d(
kernel_size=3, kernel_size=3,
stride=2, stride=2,
padding=0, padding=0,
ceil_mode=True, ceil_mode=True,
) )
self.drop3 = torch.nn.DropBlock2d( drop3 = {
kp=0.9, 'kp': 0.9,
block_size=7, 'block_size': 7,
alpha=0.25, 'alpha': 1.00,
decrement=cfg.DROPBLOCK.DECREMENT 'decrement': cfg.DROPBLOCK.DECREMENT,
) if cfg.DROPBLOCK.DROP_ON else None 'inplace': True,
self.drop4 = torch.nn.DropBlock2d( } if cfg.DROPBLOCK.DROP_ON else None
kp=0.9, drop4 = {
block_size=7, 'kp': 0.9,
alpha=1.00, 'block_size': 7,
decrement=cfg.DROPBLOCK.DECREMENT 'alpha': 1.00,
) if cfg.DROPBLOCK.DROP_ON else None 'decrement': cfg.DROPBLOCK.DECREMENT,
'inplace': True,
} if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0]) self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2) self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3) self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4) self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, drop4)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module): ...@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module):
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
......
...@@ -29,7 +29,6 @@ class SSD(nn.Module): ...@@ -29,7 +29,6 @@ class SSD(nn.Module):
######################################## ########################################
# SSD outputs # # SSD outputs #
######################################## ########################################
self.cls_conv = torch.nn.ModuleList( self.cls_conv = torch.nn.ModuleList(
nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True) nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True)
for _ in range(cfg.SSD.NUM_CONVS) for _ in range(cfg.SSD.NUM_CONVS)
......
...@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function): ...@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function):
return self.dispatch([dets], [self.alloc()]) return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function): class _RetinaNetDecoder(Function):
"""Decode predictions from RetinaNet.""" """Decode predictions from RetinaNet."""
......
...@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'): ...@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'):
nonlinearity='relu', nonlinearity='relu',
) )
# Aliases # Aliases
constant = nn.init.constant_ constant = nn.init.constant_
normal = nn.init.normal_ normal = nn.init.normal_
...@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object): ...@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object):
return nn.SigmoidFocalLoss( return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA, alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA, gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
negative_index=0, # Background index
) )
...@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss ...@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d DepthwiseConv2d = nn.DepthwiseConv2d
DropBlock2d = nn.DropBlock2d
Linear = nn.Linear Linear = nn.Linear
MaxPool2d = nn.MaxPool2d MaxPool2d = nn.MaxPool2d
Module = nn.Module Module = nn.Module
......
...@@ -15,7 +15,7 @@ from __future__ import print_function ...@@ -15,7 +15,7 @@ from __future__ import print_function
import functools import functools
import dragon.vm.torch as torch from dragon.vm import torch
from seetadet.core.config import cfg from seetadet.core.config import cfg
...@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module): ...@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module):
def __init__(self): def __init__(self):
super(Bootstrap, self).__init__() super(Bootstrap, self).__init__()
self.normalize_func = functools.partial( self._device = torch.device('cpu')
self._dummy_buffer = torch.ones(1)
self._normalize_func = functools.partial(
torch.channel_normalize, torch.channel_normalize,
mean=cfg.PIXEL_MEANS, mean=cfg.PIXEL_MEANS,
std=[1., 1., 1.], std=[1., 1., 1.],
...@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module): ...@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module):
dims=(0, 3, 1, 2), dims=(0, 3, 1, 2),
dtype=cfg.MODEL.PRECISION.lower(), dtype=cfg.MODEL.PRECISION.lower(),
) )
self.dummy_buffer = torch.ones(1)
def _apply(self, fn): def _apply(self, fn):
fn(self.dummy_buffer) fn(self._dummy_buffer)
def cpu(self): def cpu(self):
self._device = torch.device('cpu') self._device = torch.device('cpu')
...@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module): ...@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module):
self._device = torch.device('cuda', device) self._device = torch.device('cuda', device)
def device(self): def device(self):
"""Return the device of this module.""" return self._dummy_buffer.device
return self.dummy_buffer.device
def forward(self, input): def forward(self, input):
if isinstance(input, torch.Tensor): if isinstance(input, torch.Tensor):
if input.size(1) <= 3: if input.shape[1] <= 3:
return input return input
cur_device = self.device() cur_device = self.device()
if input._device != cur_device: if input._device != cur_device:
...@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module): ...@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module):
input = input.cpu() input = input.cpu()
else: else:
input = input.cuda(cur_device.index) input = input.cuda(cur_device.index)
return self.normalize_func(input) return self._normalize_func(input)
...@@ -32,8 +32,8 @@ class SGDSolver(object): ...@@ -32,8 +32,8 @@ class SGDSolver(object):
lr=cfg.SOLVER.BASE_LR, lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM, momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY, weight_decay=cfg.SOLVER.WEIGHT_DECAY,
clip_gradient=float(cfg.SOLVER.CLIP_NORM), clip_norm=float(cfg.SOLVER.CLIP_NORM),
scale_gradient=1. / cfg.SOLVER.LOSS_SCALING, scale=1. / cfg.SOLVER.LOSS_SCALING,
) )
self.lr_scheduler = lr_scheduler.get_scheduler() self.lr_scheduler = lr_scheduler.get_scheduler()
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import operator
from dragon.vm import torch
from seetadet.modules import nn
def dense_conv_flops(m, inputs, output):
"""Hook to compute flops for a dense convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
in_c, out_c = inputs[0].shape[1], output.shape[1]
m.__params__ = (k_dim * in_c + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def depthwise_conv_flops(m, inputs, output):
"""Hook to compute flops for a depthwise convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
out_c = output.shape[1]
m.__params__ = (k_dim + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def register_flops(module):
"""Register hooks to collect flops info."""
if not hasattr(module, '__flops__'):
module.__flops__ = 0.
for m in module.modules():
if isinstance(m, nn.DepthwiseConv2d):
m.register_forward_hook(depthwise_conv_flops)
elif isinstance(m, nn.Conv2d):
m.register_forward_hook(dense_conv_flops)
def collect_flops(module, normalizer=1e6):
"""Collect flops from the last forward."""
total_flops = 0.
for m in module.modules():
if hasattr(m, '__flops__'):
total_flops += m.__flops__
m.__flops__ = 0.
return total_flops / normalizer
def benchmark_flops(module, normalizer=1e6):
"""Return the flops by running benchmark once."""
register_flops(module)
collect_flops(module)
original_training = module.training
if original_training:
module.eval()
with torch.no_grad():
module()
if original_training:
module.train()
return collect_flops(module, normalizer)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!