Commit 8558d3df by Ting PAN

Adapt to the latest dragon preview version

Summary:
This commit changes repo to match dragon.0.3.0.dev20200707.
1 parent 4bcab266
Showing with 1252 additions and 1064 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.2 (20200707)
Dragon Minimum Required (Version 0.3.0.dev20200707)
Changes:
- Adapt to the latest dragon preview version.
Preview Features:
- None
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.4.1 (20200421)
Dragon Minimum Required (Version 0.3.0.dev20200421)
......
......@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements
seeta-dragon >= 0.3.0.dev20200421
seeta-dragon >= 0.3.0.dev20200707
## Installation
......
......@@ -32,16 +32,17 @@ FRCNN:
TRAIN:
WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800]
MAX_SIZE: 1333
NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
......@@ -32,16 +32,16 @@ FRCNN:
TRAIN:
WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800]
MAX_SIZE: 1333
NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
......@@ -30,7 +30,7 @@ TRAIN:
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [600]
MAX_SIZE: 1000
NMS: 0.45
\ No newline at end of file
NMS: 0.45
RPN_POST_NMS_TOP_N: 1000
\ No newline at end of file
......@@ -29,16 +29,16 @@ FRCNN:
TRAIN:
WEIGHTS: '/model/VGG16.RCNN.pth'
DATASET: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2
BATCH_SIZE: 128
SCALES: [600]
MAX_SIZE: 1000
RPN_MIN_SIZE: 16
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300
SCALES: [600]
MAX_SIZE: 1000
NMS: 0.45
\ No newline at end of file
RPN_MIN_SIZE: 16
NMS: 0.45
RPN_POST_NMS_TOP_N: 300
\ No newline at end of file
......@@ -32,11 +32,11 @@ FPN:
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: True
IMS_PER_BATCH: 16
SCALES: [416]
RANDOM_SCALES: [0.25, 1.0]
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: False
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
......
......@@ -23,10 +23,10 @@ FPN:
TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.25, 1.0]
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -24,10 +24,10 @@ FPN:
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.25, 2.0]
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -38,6 +38,7 @@ TRAIN:
IMS_PER_BATCH: 32
SCALES: [300]
RANDOM_SCALES: [0.25, 1.00]
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -3,7 +3,7 @@ VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: ssd
BACKBONE: airnet5b.mbox
BACKBONE: airnet.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
......@@ -17,19 +17,30 @@ SOLVER:
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_320
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 8
SSD:
NUM_CONVS: 2
MULTIBOX:
STRIDES: [8, 16, 32]
MIN_SIZES: [30, 90, 150]
MAX_SIZES: [90, 150, 210]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [
[1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5],
]
TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -37,9 +37,10 @@ SSD:
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320]
RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32
USE_COLOR_JITTER: True
TEST:
DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
...
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h"
#include "../utils/detection_utils.h"
namespace dragon {
template <class Context> template <typename T>
template <class Context>
template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected;
utils::detection::ApplyNMS(
Output(0)->count(),
Output(0)->count(),
iou_threshold_,
Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx()
);
Output(0)->Reshape({ num_selected });
int num_selected;
utils::detection::ApplyNMS(
Output(0)->count(),
Output(0)->count(),
iou_threshold_,
Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected,
ctx());
Output(0)->Reshape({num_selected});
}
template <class Context>
void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) });
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5).";
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
Output(0)->Reshape({Input(0).dim(0)});
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
}
DEPLOY_CPU(NonMaxSuppression);
......@@ -41,4 +38,4 @@ OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1);
NO_GRADIENT(NonMaxSuppression);
} // namespace dragon
} // namespace dragon
......@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
......@@ -20,20 +20,20 @@ namespace dragon {
template <class Context>
class NonMaxSuppressionOp final : public Operator<Context> {
public:
NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {}
USE_OPERATOR_FUNCTIONS;
NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
template <typename T>
void DoRunWithType();
protected:
float iou_threshold_;
float iou_threshold_;
};
} // namespace dragon
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_NMS_OP_H_
#endif // SEETADET_CXX_OPERATORS_NMS_OP_H_
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include <dragon/utils/math_functions.h>
#include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
template <class Context>
template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w;
int C = Input(-3).dim(2), A, K;
int total_proposals = 0;
int num_candidates, num_boxes, num_proposals;
int feat_h, feat_w;
int C = Input(-3).dim(2), A, K;
int total_proposals = 0;
int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
BT im_h = im_info[0];
BT im_w = im_info[1];
BT im_scale_h = im_info[2];
BT im_scale_w = im_info[2];
if (Input(-1).dim(1) == 4) im_scale_w = im_info[3];
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " features";
// Select the top-k candidates as proposals
num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1);
roi_indices_.resize(num_candidates);
num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_)
roi_indices_[num_candidates++] = i;
scores_.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i)
scores_[i] = scores[roi_indices_[i]];
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true,
scores_.data(),
indices_
);
for (int i = 0; i < num_proposals; ++i)
indices_[i] = roi_indices_[indices_[i]];
// Decode the candidates
int base_offset = 0;
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, C, A,
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
y
);
base_offset += (A * K);
}
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes, C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y
);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
for (int n = 0; n < num_images_; ++n) {
BT im_h = im_info[0];
BT im_w = im_info[1];
BT im_scale_h = im_info[2];
BT im_scale_w = im_info[2];
if (Input(-1).dim(1) == 4) im_scale_w = im_info[3];
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " features";
// Select the top-k candidates as proposals
num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1);
roi_indices_.resize(num_candidates);
num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_) roi_indices_[num_candidates++] = i;
scores_.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i)
scores_[i] = scores[roi_indices_[i]];
num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
utils::math::ArgPartition(
num_candidates, num_proposals, true, scores_.data(), indices_);
for (int i = 0; i < num_proposals; ++i)
indices_[i] = roi_indices_[indices_[i]];
// Decode the candidates
int base_offset = 0;
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data());
utils::detection::GenerateGridAnchors(
num_proposals,
C,
A,
feat_h,
feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
y);
base_offset += (A * K);
}
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes,
C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 });
Output(0)->Reshape({total_proposals, 7});
}
template <class Context>
void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
num_images_ = Input(0).dim(0);
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 });
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_ << " groups info, got "
<< Input(-1).dim(0) << ".";
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
Output(0)->Reshape({num_images_ * pre_nms_topn_, 7});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
DEPLOY_CPU(RetinaNetDecoder);
......@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder);
DEPLOY_CUDA(RetinaNetDecoder);
#endif
OPERATOR_SCHEMA(RetinaNetDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
OPERATOR_SCHEMA(RetinaNetDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
} // namespace dragon
} // namespace dragon
......@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
......@@ -20,27 +20,27 @@ namespace dragon {
template <class Context>
class RetinaNetDecoderOp final : public Operator<Context> {
public:
RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
score_thr_(OpArg<float>("score_thresh", 0.05f)) {}
USE_OPERATOR_FUNCTIONS;
RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
score_thr_(OpArg<float>("score_thresh", 0.05f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
template <typename T>
void DoRunWithType();
protected:
float score_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t num_images_, pre_nms_topn_;
float score_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t num_images_, pre_nms_topn_;
};
} // namespace dragon
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include <dragon/utils/math_functions.h>
#include "../utils/detection_utils.h"
#include "rpn_decoder_op.h"
namespace dragon {
template <class Context> template <typename T>
template <class Context>
template <typename T>
void RPNDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w, K, A;
int total_rois = 0, num_rois;
int num_candidates, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
const BT im_h = im_info[0];
const BT im_w = im_info[1];
const BT scale = im_info[2];
const BT min_box_h = min_size_ * scale;
const BT min_box_w = min_size_ * scale;
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
if (strides_.size() == 1) {
// Case 1: single stride
feat_h = Input(0).dim(2);
feat_w = Input(0).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
// Select the Top-K candidates as proposals
num_candidates = A * K;
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
// Decode the candidates
anchors_.resize((size_t)(A * 4));
proposals_.Reshape({ num_proposals, 5 });
utils::detection::GenerateAnchors(
strides_[0],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
strides_[0],
0,
anchors_.data(),
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
utils::detection::GenerateSSProposals(
K, num_proposals,
im_h, im_w,
min_box_h, min_box_w,
scores,
deltas,
indices_.data(),
proposals_.template mutable_data<BT, BC>()
);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals_.template mutable_data<BT, BC>()
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals_.template data<BT, BC>(),
roi_indices_.data(),
y
);
} else if (strides_.size() > 1) {
// Case 2: multiple strides
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " feature inputs";
CHECK_EQ(strides_.size(), scales_.size())
<< "\nGiven " << strides_.size() << " strides "
<< "and " << scales_.size() << " scales";
// Select the top-k candidates as proposals
num_candidates = Input(-3).dim(1);
num_proposals = std::min(
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition(
num_candidates,
num_proposals,
true, scores, indices_
);
// Decode the candidates
int base_offset = 0;
proposals_.Reshape({ num_proposals, 5 });
auto* proposals = proposals_
.template mutable_data<BT, BC>();
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = (int)ratios_.size();
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
1,
ratios_.data(),
scales_.data(),
anchors_.data()
);
utils::detection::GenerateGridAnchors(
num_proposals, A,
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
proposals
);
base_offset += (A * K);
}
utils::detection::GenerateMSProposals(
num_candidates,
num_proposals,
im_h, im_w,
min_box_h, min_box_w,
scores,
deltas,
&indices_[0],
proposals
);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals
);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals,
roi_indices_.data(),
y
);
} else {
LOG(FATAL) << "Excepted at least one stride for proposals.";
}
total_rois += num_rois;
y += (num_rois * 5);
im_info += Input(-1).dim(1);
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w, K, A;
int total_rois = 0, num_rois;
int num_candidates, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) {
const BT im_h = im_info[0];
const BT im_w = im_info[1];
const BT scale = im_info[2];
const BT min_box_h = min_size_ * scale;
const BT min_box_w = min_size_ * scale;
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
if (strides_.size() == 1) {
// Case 1: single stride
feat_h = Input(0).dim(2);
feat_w = Input(0).dim(3);
K = feat_h * feat_w;
A = int(ratios_.size() * scales_.size());
// Select the Top-K candidates as proposals
num_candidates = A * K;
num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
utils::math::ArgPartition(
num_candidates, num_proposals, true, scores, indices_);
// Decode the candidates
anchors_.resize((size_t)(A * 4));
proposals_.Reshape({num_proposals, 5});
utils::detection::GenerateAnchors(
strides_[0],
(int)ratios_.size(),
(int)scales_.size(),
ratios_.data(),
scales_.data(),
anchors_.data());
utils::detection::GenerateGridAnchors(
num_proposals,
A,
feat_h,
feat_w,
strides_[0],
0,
anchors_.data(),
indices_.data(),
proposals_.template mutable_data<BT, BC>());
utils::detection::GenerateSSProposals(
K,
num_proposals,
im_h,
im_w,
min_box_h,
min_box_w,
scores,
deltas,
indices_.data(),
proposals_.template mutable_data<BT, BC>());
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0,
num_proposals - 1,
num_proposals,
proposals_.template mutable_data<BT, BC>());
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois,
ctx());
utils::detection::RetrieveRoIs(
num_rois,
n,
proposals_.template data<BT, BC>(),
roi_indices_.data(),
y);
} else if (strides_.size() > 1) {
// Case 2: multiple strides
CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " feature inputs";
CHECK_EQ(strides_.size(), scales_.size())
<< "\nGiven " << strides_.size() << " strides "
<< "and " << scales_.size() << " scales";
// Select the top-k candidates as proposals
num_candidates = Input(-3).dim(1);
num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
utils::math::ArgPartition(
num_candidates, num_proposals, true, scores, indices_);
// Decode the candidates
int base_offset = 0;
proposals_.Reshape({num_proposals, 5});
auto* proposals = proposals_.template mutable_data<BT, BC>();
for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3);
K = feat_h * feat_w;
A = (int)ratios_.size();
anchors_.resize((size_t)(A * 4));
utils::detection::GenerateAnchors(
strides_[i],
(int)ratios_.size(),
1,
ratios_.data(),
scales_.data(),
anchors_.data());
utils::detection::GenerateGridAnchors(
num_proposals,
A,
feat_h,
feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
proposals);
base_offset += (A * K);
}
utils::detection::GenerateMSProposals(
num_candidates,
num_proposals,
im_h,
im_w,
min_box_h,
min_box_w,
scores,
deltas,
&indices_[0],
proposals);
// Sort, NMS and Retrieve
utils::detection::SortProposals(
0, num_proposals - 1, num_proposals, proposals);
utils::detection::ApplyNMS(
num_proposals,
post_nms_topn_,
nms_thr_,
proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(),
num_rois,
ctx());
utils::detection::RetrieveRoIs(
num_rois, n, proposals, roi_indices_.data(), y);
} else {
LOG(FATAL) << "Excepted at least one stride for proposals.";
}
Output(0)->Reshape({ total_rois, 5 });
// Distribute rois into K bins
if (OutputSize() > 1) {
CHECK_EQ(max_level_ - min_level_ + 1, OutputSize())
<< "\nExcepted " << OutputSize() << " outputs for levels "
"between [" << min_level_ << ", " << max_level_ << "].";
vector<BT*> ys(OutputSize());
vector<vec64_t> bins(OutputSize());
Tensor RoIs; RoIs.ReshapeLike(*Output(0));
auto* rois = RoIs.template mutable_data<BT, BC>();
ctx()->template Copy<BT, BC, BC>(
Output(0)->count(),
rois, Output(0)->template data<BT, BC>()
);
utils::detection::CollectRoIs(
total_rois,
min_level_,
max_level_,
canonical_level_,
canonical_scale_,
rois, bins
);
for (int i = 0; i < OutputSize(); i++) {
Output(i)->Reshape({ std::max((int)bins[i].size(), 1), 5 });
ys[i] = Output(i)->template mutable_data<BT, BC>();
}
utils::detection::DistributeRoIs(bins, rois, ys);
total_rois += num_rois;
y += (num_rois * 5);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({total_rois, 5});
// Distribute rois into K bins
if (OutputSize() > 1) {
CHECK_EQ(max_level_ - min_level_ + 1, OutputSize())
<< "\nExcepted " << OutputSize() << " outputs for levels "
<< "between [" << min_level_ << ", " << max_level_ << "].";
vector<BT*> ys(OutputSize());
vector<vec64_t> bins(OutputSize());
Tensor RoIs;
RoIs.ReshapeLike(*Output(0));
auto* rois = RoIs.template mutable_data<BT, BC>();
ctx()->template Copy<BT, BC, BC>(
Output(0)->count(), rois, Output(0)->template data<BT, BC>());
utils::detection::CollectRoIs(
total_rois,
min_level_,
max_level_,
canonical_level_,
canonical_scale_,
rois,
bins);
for (int i = 0; i < OutputSize(); i++) {
Output(i)->Reshape({std::max((int)bins[i].size(), 1), 5});
ys[i] = Output(i)->template mutable_data<BT, BC>();
}
utils::detection::DistributeRoIs(bins, rois, ys);
}
}
template <class Context>
void RPNDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0);
num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_ << " groups info, got "
<< Input(-1).dim(0) << ".";
roi_indices_.resize(post_nms_topn_);
Output(0)->Reshape({ num_images_ * post_nms_topn_, 5 });
roi_indices_.resize(post_nms_topn_);
Output(0)->Reshape({num_images_ * post_nms_topn_, 5});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
}
DEPLOY_CPU(RPNDecoder);
......@@ -241,8 +218,6 @@ DEPLOY_CPU(RPNDecoder);
DEPLOY_CUDA(RPNDecoder);
#endif
OPERATOR_SCHEMA(RPNDecoder)
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
OPERATOR_SCHEMA(RPNDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
} // namespace dragon
} // namespace dragon
......@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
......@@ -20,36 +20,36 @@ namespace dragon {
template <class Context>
class RPNDecoderOp final : public Operator<Context> {
public:
RPNDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)),
nms_thr_(OpArg<float>("nms_thresh", 0.7f)),
min_size_(OpArg<int64_t>("min_size", 16)),
min_level_(OpArg<int64_t>("min_level", 2)),
max_level_(OpArg<int64_t>("max_level", 5)),
canonical_level_(OpArg<int64_t>("canonical_level", 4)),
canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
RPNDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)),
nms_thr_(OpArg<float>("nms_thresh", 0.7f)),
min_size_(OpArg<int64_t>("min_size", 16)),
min_level_(OpArg<int64_t>("min_level", 2)),
max_level_(OpArg<int64_t>("max_level", 5)),
canonical_level_(OpArg<int64_t>("canonical_level", 4)),
canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T>
void DoRunWithType();
protected:
float nms_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t min_size_, pre_nms_topn_, post_nms_topn_;
int64_t num_images_, min_level_, max_level_;
int64_t canonical_level_, canonical_scale_;
Tensor proposals_;
float nms_thr_;
vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_;
int64_t min_size_, pre_nms_topn_, post_nms_topn_;
int64_t num_images_, min_level_, max_level_;
int64_t canonical_level_, canonical_scale_;
Tensor proposals_;
};
} // namespace dragon
} // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
#endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
......@@ -5,7 +5,7 @@
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
......@@ -15,25 +15,35 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import glob
from distutils.core import setup
from dragon.tools import cpp_extension
from dragon.tools import cpp_extension
if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension
else:
Extension = cpp_extension.CppExtension
def find_sources(*dirs):
ext_suffixes = ['.cc']
if Extension is cpp_extension.CUDAExtension:
ext_suffixes.append('.cu')
sources = []
for path in dirs:
for ext_suffix in ext_suffixes:
sources += glob.glob(
path + '/*' + ext_suffix,
recursive=True,
)
return sources
ext_modules = [
Extension(
name='install.lib.modules._C',
sources=[
'utils/detection_utils.cc',
'utils/detection_utils.cu',
'operators/nms_op.cc',
'operators/retinanet_decoder_op.cc',
'operators/rpn_decoder_op.cc',
],
sources=find_sources('**'),
),
]
......
#include <dragon/core/context.h>
#include "detection_utils.h"
#include <dragon/core/context.h>
namespace dragon {
......@@ -9,45 +9,46 @@ namespace detection {
template <typename T>
T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] ||
A[2] < B[0] || A[3] < B[1]) return 0;
const T x1 = std::max(A[0], B[0]);
const T y1 = std::max(A[1], B[1]);
const T x2 = std::min(A[2], B[2]);
const T y2 = std::min(A[3], B[3]);
const T width = std::max((T)0, x2 - x1 + 1);
const T height = std::max((T)0, y2 - y1 + 1);
const T area = width * height;
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
return area / (A_area + B_area - area);
if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) return 0;
const T x1 = std::max(A[0], B[0]);
const T y1 = std::max(A[1], B[1]);
const T x2 = std::min(A[2], B[2]);
const T y2 = std::min(A[3], B[3]);
const T width = std::max((T)0, x2 - x1 + 1);
const T height = std::max((T)0, y2 - y1 + 1);
const T area = width * height;
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
return area / (A_area + B_area - area);
}
template <> void ApplyNMS<float, CPUContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CPUContext* ctx) {
int count = 0;
std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i) {
if (is_dead[i]) continue;
keep_indices[count++] = i;
if (count == max_keeps) break;
for (int j = i + 1; j < num_boxes; ++j)
if (!is_dead[j] && IoU(&boxes[i * 5],
&boxes[j * 5]) > thresh)
is_dead[j] = 1;
}
num_keep = count;
template <>
void ApplyNMS<float, CPUContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CPUContext* ctx) {
int count = 0;
std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i)
is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i) {
if (is_dead[i]) continue;
keep_indices[count++] = i;
if (count == max_keeps) break;
for (int j = i + 1; j < num_boxes; ++j)
if (!is_dead[j] && IoU(&boxes[i * 5], &boxes[j * 5]) > thresh) {
is_dead[j] = 1;
}
}
num_keep = count;
}
} // namespace detection
} // namespace detection
} // namespace utils
} // namespace utils
} // namespace dragon
} // namespace dragon
......@@ -9,127 +9,121 @@ namespace utils {
namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define DIV_UP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64
namespace {
template <typename T>
__device__ bool _CheckIoU(
const T* a,
const T* b,
const float thresh) {
const T x1 = max(a[0], b[0]);
const T y1 = max(a[1], b[1]);
const T x2 = min(a[2], b[2]);
const T y2 = min(a[3], b[3]);
const T width = max(T(0), x2 - x1 + 1);
const T height = max(T(0), y2 - y1 + 1);
const T inter = width * height;
const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
return inter > thresh * (Sa + Sb - inter);
__device__ bool _CheckIoU(const T* a, const T* b, const float thresh) {
const T x1 = max(a[0], b[0]);
const T y1 = max(a[1], b[1]);
const T x2 = min(a[2], b[2]);
const T y2 = min(a[3], b[3]);
const T width = max(T(0), x2 - x1 + 1);
const T height = max(T(0), y2 - y1 + 1);
const T inter = width * height;
const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
return inter > thresh * (Sa + Sb - inter);
}
template <typename T>
__global__ void _NonMaxSuppression(
const int num_blocks,
const int num_boxes,
const T thresh,
const T* dev_boxes,
uint64_t* dev_mask) {
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;
if (row_start > col_start) return;
const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS);
const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS);
__shared__ T block_boxes[NUM_THREADS * 4];
if (threadIdx.x < col_size) {
const int c1 = threadIdx.x * 4;
const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5;
block_boxes[c1] = dev_boxes[c2];
block_boxes[c1 + 1] = dev_boxes[c2 + 1];
block_boxes[c1 + 2] = dev_boxes[c2 + 2];
block_boxes[c1 + 3] = dev_boxes[c2 + 3];
}
__syncthreads();
if (threadIdx.x < row_size) {
const int index = row_start * NUM_THREADS + threadIdx.x;
const T* dev_box = dev_boxes + index * 5;
unsigned long long val = 0;
const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0;
for (int i = start; i < col_size; ++i) {
if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) {
val |= 1ULL << i;
}
}
dev_mask[index * num_blocks + col_start] = val;
const int num_blocks,
const int num_boxes,
const T thresh,
const T* dev_boxes,
uint64_t* dev_mask) {
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;
if (row_start > col_start) return;
const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS);
const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS);
__shared__ T block_boxes[NUM_THREADS * 4];
if (threadIdx.x < col_size) {
const int c1 = threadIdx.x * 4;
const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5;
block_boxes[c1] = dev_boxes[c2];
block_boxes[c1 + 1] = dev_boxes[c2 + 1];
block_boxes[c1 + 2] = dev_boxes[c2 + 2];
block_boxes[c1 + 3] = dev_boxes[c2 + 3];
}
__syncthreads();
if (threadIdx.x < row_size) {
const int index = row_start * NUM_THREADS + threadIdx.x;
const T* dev_box = dev_boxes + index * 5;
unsigned long long val = 0;
const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0;
for (int i = start; i < col_size; ++i) {
if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) {
val |= 1ULL << i;
}
}
dev_mask[index * num_blocks + col_start] = val;
}
}
} // namespace
template <> void ApplyNMS<float, CUDAContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CUDAContext* ctx) {
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS);
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression
<<< dim3(num_blocks, num_blocks), NUM_THREADS,
0, ctx->cuda_stream() >>>(
num_blocks,
num_boxes,
thresh,
boxes,
mask_dev
);
CUDA_CHECK(cudaMemcpyAsync(
mask_host.data(),
mask_dev,
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost,
ctx->cuda_stream()
));
ctx->FinishDeviceComputation();
vector<uint64_t> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks);
int num_selected = 0;
for (int i = 0; i < num_boxes; ++i) {
const int nblock = i / NUM_THREADS;
const int inblock = i % NUM_THREADS;
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
} // namespace
template <>
void ApplyNMS<float, CUDAContext>(
const int num_boxes,
const int max_keeps,
const float thresh,
const float* boxes,
int64_t* keep_indices,
int& num_keep,
CUDAContext* ctx) {
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS);
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression<<<
dim3(num_blocks, num_blocks),
NUM_THREADS,
0,
ctx->cuda_stream()>>>(num_blocks, num_boxes, thresh, boxes, mask_dev);
CUDA_CHECK(cudaMemcpyAsync(
mask_host.data(),
mask_dev,
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost,
ctx->cuda_stream()));
ctx->FinishDeviceComputation();
vector<uint64_t> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks);
int num_selected = 0;
for (int i = 0; i < num_boxes; ++i) {
const int nblock = i / NUM_THREADS;
const int inblock = i % NUM_THREADS;
if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j)
dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
num_keep = num_selected;
}
ctx->Delete(mask_dev);
num_keep = num_selected;
ctx->Delete(mask_dev);
}
} // namespace detection
} // namespace detection
} // namespace utils
} // namespace utils
} // namespace dragon
} // namespace dragon
#endif // USE_CUDA
#endif // USE_CUDA
......@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
......@@ -13,8 +13,7 @@
#ifndef SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#define SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#include "dragon/core/context.h"
#include "dragon/core/operator.h"
#include "dragon/core/common.h"
namespace dragon {
......@@ -24,390 +23,409 @@ namespace detection {
#define ROUND(x) ((int)((x) + (T)0.5))
/******************** BBox ********************/
/*!
* Box API
*/
template <typename T>
inline int FilterBoxes(
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T min_box_w,
const T min_box_h,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1));
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1));
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1));
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1));
const T bbox_w = bbox[2] - bbox[0] + 1;
const T bbox_h = bbox[3] - bbox[1] + 1;
return (bbox_w >= min_box_w) * (bbox_h >= min_box_h);
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T min_box_w,
const T min_box_h,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1));
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1));
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1));
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1));
const T bbox_w = bbox[2] - bbox[0] + 1;
const T bbox_h = bbox[3] - bbox[1] + 1;
return (bbox_w >= min_box_w) * (bbox_h >= min_box_h);
}
template <typename T>
inline void BBoxTransform(
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T im_scale_h,
const T im_scale_w,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)) / im_scale_w;
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)) / im_scale_h;
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)) / im_scale_w;
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h;
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T im_scale_h,
const T im_scale_w,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)) / im_scale_w;
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)) / im_scale_h;
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)) / im_scale_w;
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h;
}
/******************** Anchor ********************/
/*!
* Anchor API
*/
template <typename T>
inline void GenerateAnchors(
int base_size,
const int num_ratios,
const int num_scales,
const T* ratios,
const T* scales,
T* anchors) {
const T base_area = (T)(base_size * base_size);
const T center = (T)0.5 * (base_size - (T)1);
T* offset_anchors = anchors;
for (int i = 0; i < num_ratios; ++i) {
const T ratio_w = (T)ROUND(sqrt(base_area / ratios[i]));
const T ratio_h = (T)ROUND(ratio_w * ratios[i]);
for (int j = 0; j < num_scales; ++j) {
const T scale_w = (T)0.5 * (ratio_w * scales[j] - (T)1);
const T scale_h = (T)0.5 * (ratio_h * scales[j] - (T)1);
offset_anchors[0] = center - scale_w;
offset_anchors[1] = center - scale_h;
offset_anchors[2] = center + scale_w;
offset_anchors[3] = center + scale_h;
offset_anchors += 4;
}
int base_size,
const int num_ratios,
const int num_scales,
const T* ratios,
const T* scales,
T* anchors) {
const T base_area = (T)(base_size * base_size);
const T center = (T)0.5 * (base_size - (T)1);
T* offset_anchors = anchors;
for (int i = 0; i < num_ratios; ++i) {
const T ratio_w = (T)ROUND(sqrt(base_area / ratios[i]));
const T ratio_h = (T)ROUND(ratio_w * ratios[i]);
for (int j = 0; j < num_scales; ++j) {
const T scale_w = (T)0.5 * (ratio_w * scales[j] - (T)1);
const T scale_h = (T)0.5 * (ratio_h * scales[j] - (T)1);
offset_anchors[0] = center - scale_w;
offset_anchors[1] = center - scale_h;
offset_anchors[2] = center + scale_w;
offset_anchors[3] = center + scale_h;
offset_anchors += 4;
}
}
}
template <typename T>
inline void GenerateGridAnchors(
const int num_proposals,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_3d, a, h, w;
int idx_range = num_anchors * feat_h * feat_w;
for (int i = 0; i < num_proposals; ++i) {
idx_3d = (int)indices[i] - base_offset;
if (idx_3d >= 0 && idx_3d < idx_range) {
w = idx_3d % feat_w;
h = (idx_3d / feat_w) % feat_h;
a = idx_3d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 5;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
const int num_proposals,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_3d, a, h, w;
int idx_range = num_anchors * feat_h * feat_w;
for (int i = 0; i < num_proposals; ++i) {
idx_3d = (int)indices[i] - base_offset;
if (idx_3d >= 0 && idx_3d < idx_range) {
w = idx_3d % feat_w;
h = (idx_3d / feat_w) % feat_h;
a = idx_3d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 5;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
}
}
template <typename T>
inline void GenerateGridAnchors(
const int num_proposals,
const int num_classes,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_4d, a, h, w;
int lr = num_classes * base_offset;
int rr = num_classes * (num_anchors * feat_h * feat_w);
for (int i = 0; i < num_proposals; ++i) {
idx_4d = (int)indices[i] - lr;
if (idx_4d >= 0 && idx_4d < rr) {
idx_4d /= num_classes;
w = idx_4d % feat_w;
h = (idx_4d / feat_w) % feat_h;
a = idx_4d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 7 + 1;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
const int num_proposals,
const int num_classes,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_4d, a, h, w;
int lr = num_classes * base_offset;
int rr = num_classes * (num_anchors * feat_h * feat_w);
for (int i = 0; i < num_proposals; ++i) {
idx_4d = (int)indices[i] - lr;
if (idx_4d >= 0 && idx_4d < rr) {
idx_4d /= num_classes;
w = idx_4d % feat_w;
h = (idx_4d / feat_w) % feat_h;
a = idx_4d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 7 + 1;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
}
}
/******************** Proposal ********************/
/*!
* Proposal API
*/
template <typename T>
void GenerateSSProposals(
const int K,
const int num_proposals,
const float im_h,
const float im_w,
const float min_box_h,
const float min_box_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, a, k;
const float* delta;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
a = index / K, k = index % K;
delta = deltas + k;
dx = delta[(a * 4 + 0) * K];
dy = delta[(a * 4 + 1) * K];
d_log_w = delta[(a * 4 + 2) * K];
d_log_h = delta[(a * 4 + 3) * K];
proposal[4] = FilterBoxes(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
min_box_w, min_box_h,
proposal
) * scores[index];
proposal += 5;
}
const int K,
const int num_proposals,
const float im_h,
const float im_w,
const float min_box_h,
const float min_box_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, a, k;
const float* delta;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
a = index / K, k = index % K;
delta = deltas + k;
dx = delta[(a * 4 + 0) * K];
dy = delta[(a * 4 + 1) * K];
d_log_w = delta[(a * 4 + 2) * K];
d_log_h = delta[(a * 4 + 3) * K];
proposal[4] = FilterBoxes(
dx,
dy,
d_log_w,
d_log_h,
im_w,
im_h,
min_box_w,
min_box_h,
proposal) *
scores[index];
proposal += 5;
}
}
template <typename T>
void GenerateMSProposals(
const int num_candidates,
const int num_proposals,
const float im_h,
const float im_w,
const float min_box_h,
const float min_box_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index;
int64_t num_candidates_2x = 2 * num_candidates;
int64_t num_candidates_3x = 3 * num_candidates;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
dx = deltas[index];
dy = deltas[num_candidates + index];
d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[num_candidates_3x + index];
proposal[4] = FilterBoxes(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
min_box_w, min_box_h,
proposal
) * scores[index];
proposal += 5;
}
const int num_candidates,
const int num_proposals,
const float im_h,
const float im_w,
const float min_box_h,
const float min_box_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index;
int64_t num_candidates_2x = 2 * num_candidates;
int64_t num_candidates_3x = 3 * num_candidates;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
dx = deltas[index];
dy = deltas[num_candidates + index];
d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[num_candidates_3x + index];
proposal[4] = FilterBoxes(
dx,
dy,
d_log_w,
d_log_h,
im_w,
im_h,
min_box_w,
min_box_h,
proposal) *
scores[index];
proposal += 5;
}
}
template <typename T>
void GenerateMCProposals(
const int num_proposals,
const int num_boxes,
const int num_classes,
const int im_idx,
const float im_h,
const float im_w,
const float im_scale_h,
const float im_scale_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, cls;
int64_t num_boxes_2x = 2 * num_boxes;
int64_t num_boxes_3x = 3 * num_boxes;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
cls = indices[i] % num_classes;
index = indices[i] / num_classes;
dx = deltas[index];
dy = deltas[num_boxes + index];
d_log_w = deltas[num_boxes_2x + index];
d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx;
BBoxTransform(
dx, dy,
d_log_w, d_log_h,
im_w, im_h,
im_scale_h, im_scale_w,
proposal + 1
);
proposal[5] = scores[indices[i]];
proposal[6] = cls + 1;
proposal += 7;
}
const int num_proposals,
const int num_boxes,
const int num_classes,
const int im_idx,
const float im_h,
const float im_w,
const float im_scale_h,
const float im_scale_w,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, cls;
int64_t num_boxes_2x = 2 * num_boxes;
int64_t num_boxes_3x = 3 * num_boxes;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
cls = indices[i] % num_classes;
index = indices[i] / num_classes;
dx = deltas[index];
dy = deltas[num_boxes + index];
d_log_w = deltas[num_boxes_2x + index];
d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx;
BBoxTransform(
dx,
dy,
d_log_w,
d_log_h,
im_w,
im_h,
im_scale_h,
im_scale_w,
proposal + 1);
proposal[5] = scores[indices[i]];
proposal[6] = cls + 1;
proposal += 7;
}
}
template <typename T>
inline void SortProposals(
const int start,
const int end,
const int num_top,
T* proposals) {
const T pivot_score = proposals[start * 5 + 4];
int left = start + 1, right = end;
while (left <= right) {
while (left <= end && proposals[left * 5 + 4] >= pivot_score) ++left;
while (right > start && proposals[right * 5 + 4] <= pivot_score) --right;
if (left <= right) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[left * 5 + i], proposals[right * 5 + i]);
++left;
--right;
}
inline void
SortProposals(const int start, const int end, const int num_top, T* proposals) {
const T pivot_score = proposals[start * 5 + 4];
int left = start + 1, right = end;
while (left <= right) {
while (left <= end && proposals[left * 5 + 4] >= pivot_score)
++left;
while (right > start && proposals[right * 5 + 4] <= pivot_score)
--right;
if (left <= right) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[left * 5 + i], proposals[right * 5 + i]);
++left;
--right;
}
if (right > start) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[start * 5 + i], proposals[right * 5 + i]);
}
if (start < right - 1) SortProposals(start, right - 1, num_top, proposals);
if (right + 1 < num_top && right + 1 < end)
SortProposals(right + 1, end, num_top, proposals);
}
if (right > start) {
for (int i = 0; i < 5; ++i)
std::swap(proposals[start * 5 + i], proposals[right * 5 + i]);
}
if (start < right - 1) SortProposals(start, right - 1, num_top, proposals);
if (right + 1 < num_top && right + 1 < end)
SortProposals(right + 1, end, num_top, proposals);
}
template <typename T>
inline void RetrieveRoIs(
const int num_rois,
const int roi_batch_ind,
const T* proposals,
const int64_t* roi_indices,
T* rois) {
for (int i = 0; i < num_rois; ++i) {
const T* proposal = proposals + roi_indices[i] * 5;
rois[i * 5 + 0] = (T)roi_batch_ind;
rois[i * 5 + 1] = proposal[0];
rois[i * 5 + 2] = proposal[1];
rois[i * 5 + 3] = proposal[2];
rois[i * 5 + 4] = proposal[3];
}
const int num_rois,
const int roi_batch_ind,
const T* proposals,
const int64_t* roi_indices,
T* rois) {
for (int i = 0; i < num_rois; ++i) {
const T* proposal = proposals + roi_indices[i] * 5;
rois[i * 5 + 0] = (T)roi_batch_ind;
rois[i * 5 + 1] = proposal[0];
rois[i * 5 + 2] = proposal[1];
rois[i * 5 + 3] = proposal[2];
rois[i * 5 + 4] = proposal[3];
}
}
template <typename T>
inline int roi_level(
const int min_level,
const int max_level,
const int canonical_level,
const int canonical_scale,
T* roi) {
T w = roi[3] - roi[1] + 1;
T h = roi[4] - roi[2] + 1;
// Refer the settings of paper
int level = canonical_level + std::log2(
std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale);
return std::min(max_level, std::max(min_level, level));
const int min_level,
const int max_level,
const int canonical_level,
const int canonical_scale,
T* roi) {
T w = roi[3] - roi[1] + 1;
T h = roi[4] - roi[2] + 1;
// Refer the settings of paper
int level = canonical_level +
std::log2(std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale);
return std::min(max_level, std::max(min_level, level));
}
template <typename T>
inline void CollectRoIs(
const int num_rois,
const int min_level,
const int max_level,
const int canonical_level,
const int canonical_scale,
const T* rois,
vector<vec64_t>& roi_bins) {
const T* roi = rois;
for (int i = 0; i < num_rois; ++i) {
int bin_idx = roi_level(min_level, max_level,
canonical_level, canonical_scale, roi);
bin_idx = std::max(bin_idx - min_level, 0);
roi_bins[bin_idx].push_back(i);
roi += 5;
}
const int num_rois,
const int min_level,
const int max_level,
const int canonical_level,
const int canonical_scale,
const T* rois,
vector<vec64_t>& roi_bins) {
const T* roi = rois;
for (int i = 0; i < num_rois; ++i) {
int bin_idx =
roi_level(min_level, max_level, canonical_level, canonical_scale, roi);
bin_idx = std::max(bin_idx - min_level, 0);
roi_bins[bin_idx].push_back(i);
roi += 5;
}
}
template <typename T>
inline void DistributeRoIs(
const vector<vec64_t>& roi_bins,
const T* rois,
vector<T*> outputs) {
for (int i = 0; i < roi_bins.size(); i++) {
auto* y = outputs[i];
if (roi_bins[i].size() == 0) {
// Fake a tiny roi to avoid empty roi pooling
y[0] = 0, y[1] = 0, y[2] = 0, y[3] = 1, y[4] = 1;
} else {
for (int j = 0; j < roi_bins[i].size(); ++j) {
const T* roi = rois + roi_bins[i][j] * 5;
for (int k = 0; k < 5; ++k) y[k] = roi[k];
y += 5;
}
}
const vector<vec64_t>& roi_bins,
const T* rois,
vector<T*> outputs) {
for (int i = 0; i < roi_bins.size(); i++) {
auto* y = outputs[i];
if (roi_bins[i].size() == 0) {
// Fake a tiny roi to avoid empty roi pooling
y[0] = 0, y[1] = 0, y[2] = 0, y[3] = 1, y[4] = 1;
} else {
for (int j = 0; j < roi_bins[i].size(); ++j) {
const T* roi = rois + roi_bins[i][j] * 5;
for (int k = 0; k < 5; ++k)
y[k] = roi[k];
y += 5;
}
}
}
}
/******************** NMS ********************/
/*!
* NMS API
*/
template <typename T, class Context>
void ApplyNMS(
const int num_boxes,
const int max_keeps,
const T thresh,
const T* boxes,
int64_t* keep_indices,
int& num_keep,
Context* ctx);
const int num_boxes,
const int max_keeps,
const T thresh,
const T* boxes,
int64_t* keep_indices,
int& num_keep,
Context* ctx);
} // namespace detection
} // namespace detection
} // namespace utils
} // namespace utils
} // namespace dragon
} // namespace dragon
#endif // SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#endif // SEETADET_CXX_UTILS_DETECTION_UTILS_H_
......@@ -52,12 +52,9 @@ class AnchorTarget(object):
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
grid_shapes = [f.shape[-2:] for f in features]
all_anchors = generate_grid_anchors(
grid_shapes, self.base_anchors, self.strides)
num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
......
......@@ -58,12 +58,9 @@ class Proposal(object):
# Get resources
num_images = ims_info.shape[0]
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
grid_shapes = [f.shape[-2:] for f in features]
all_anchors = generate_grid_anchors(
grid_shapes, self.base_anchors, self.strides)
# Prepare for the outputs
batch_rois = []
......
......@@ -19,40 +19,40 @@ import numpy as np
from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides):
def generate_grid_anchors(grid_shapes, base_anchors, strides):
num_strides = len(strides)
if len(features) != num_strides:
if len(grid_shapes) != num_strides:
raise ValueError(
'Given %d features for %d strides.'
% (len(features), num_strides)
'Given %d grids for %d strides.'
% (len(grid_shapes), num_strides)
)
# Generate proposals from shifted anchors
anchors_to_pack = []
for i in range(len(features)):
height, width = features[i].shape[-2:]
for i in range(len(grid_shapes)):
height, width = grid_shapes[i]
shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = base_anchors[i].shape[0]
K = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
# Add a anchors (1, a, 4) to
# cell k shifts (k, 1, 4) to get
# shift anchors (k, a, 4)
# Reshape to (k * a, 4) shifted anchors
a = base_anchors[i].shape[0]
k = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, a, 4)) +
shifts.reshape((1, k, 4)).transpose((1, 0, 2)))
if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to
# match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4))
anchors = anchors.reshape((a * k, 4))
anchors_to_pack.append(anchors)
else:
# Original order of Faster R-CNN
return anchors.reshape((K * A, 4))
return anchors.reshape((k * a, 4))
return np.vstack(anchors_to_pack)
......
......@@ -46,6 +46,9 @@ class AnchorTarget(object):
ratios=self.ratios,
sizes=sizes,
))
# Store the cached grid anchors
self.last_grid_shapes = None
self.last_grid_anchors = None
def __call__(self, features, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH
......@@ -58,12 +61,17 @@ class AnchorTarget(object):
)
# Generate grid anchors from base
all_anchors = \
generate_grid_anchors(
features,
self.base_anchors,
self.strides,
)
grid_shapes = [f.shape[-2:] for f in features]
if grid_shapes == self.last_grid_shapes:
all_anchors = self.last_grid_anchors
else:
self.last_grid_shapes = grid_shapes
self.last_grid_anchors = all_anchors = \
generate_grid_anchors(
grid_shapes,
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
......
......@@ -15,6 +15,7 @@ from __future__ import print_function
import types
import dragon
import dragon.vm.torch as torch
import numpy as np
......@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images):
# Unpack results
results = outputs['detections']
detections = [[] for _ in range(len((raw_images)))]
detections = [[] for _ in range(len(raw_images))]
for i in range(len(ims)):
inds = np.where(results[:, 0].astype(np.int32) == i)[0]
......@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put((
indices[i],
dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]),
('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]),
))
......@@ -45,14 +45,14 @@ class PriorBox(object):
aspect_ratios[i],
)
)
self.grid_anchors = None
# Store the cached grid anchors
self.last_grid_anchors = None
def __call__(self, features):
if self.grid_anchors is not None:
return self.grid_anchors
self.grid_anchors = []
if self.last_grid_anchors is not None:
return self.last_grid_anchors
all_anchors = []
for i in range(len(self.strides)):
# 1. Generate base grids
height, width = features[i].shape[-2:]
......@@ -61,23 +61,23 @@ class PriorBox(object):
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
# 2. Apply anchors on base grids
# Add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# Reshape to (K * A, 4) shifted anchors
A = self.base_anchors[i].shape[0]
D = self.base_anchors[i].shape[1]
# Add a anchors (1, a, 4) to
# cell k shifts (k, 1, 4) to get
# shift anchors (k, a, 4)
# Reshape to (k * a, 4) shifted anchors
a = self.base_anchors[i].shape[0]
d = self.base_anchors[i].shape[1]
shifts = np.vstack((
shift_x.ravel(),
shift_y.ravel(),
shift_x.ravel(),
shift_y.ravel())
).transpose()
K = shifts.shape[0] # K = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, D)) +
shifts.reshape((1, K, D)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, D)).astype(np.float32)
self.grid_anchors.append(anchors)
self.grid_anchors = np.concatenate(self.grid_anchors)
k = shifts.shape[0] # k = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, a, d)) +
shifts.reshape((1, k, d)).transpose((1, 0, 2)))
anchors = anchors.reshape((k * a, d)).astype(np.float32)
all_anchors.append(anchors)
return self.grid_anchors
self.last_grid_anchors = np.concatenate(all_anchors)
return self.last_grid_anchors
......@@ -32,11 +32,9 @@ def get_images(ims):
for im in ims:
im_scales.append((float(out_size) / im.shape[0],
float(out_size) / im.shape[1]))
processed_ims.append(
cv2.resize(
processed_ims.append(cv2.resize(
im, (out_size, out_size),
interpolation=cv2.INTER_AREA,
))
interpolation=cv2.INTER_AREA))
if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256.
else:
......
......@@ -49,12 +49,12 @@ class Distort(object):
]
def apply(self, img, boxes=None):
if self._prob > 0:
img = PIL.Image.fromarray(img)
for transform_fn, prob in self._transforms:
if npr.uniform() < prob:
img = transform_fn(img)
img = img.enhance(1. + npr.uniform(-.4, .4))
self._prob = 0.5 if cfg.TRAIN.USE_COLOR_JITTER else 0
img = PIL.Image.fromarray(img)
for transform_fn, prob in self._transforms:
if npr.uniform() < prob:
img = transform_fn(img)
img = img.enhance(1. + npr.uniform(-.4, .4))
return np.array(img), boxes
return img, boxes
......
......@@ -27,8 +27,9 @@ if __name__ == '__main__':
np.random.seed(3)
cfg.TRAIN.SCALES = [300]
cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00]
cfg.TRAIN.USE_COLOR_JITTER = True
augmentor = transforms.Compose(
transformer = transforms.Compose(
transforms.Distort(),
transforms.Expand(),
transforms.Sample(),
......@@ -38,12 +39,12 @@ if __name__ == '__main__':
while True:
img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes)
img, boxes = transformer(img, boxes)
for box in boxes:
x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0])
x2 = int(box[2] * img.shape[1])
y2 = int(box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2)
cv2.imshow('Sample', img)
cv2.imshow('Transforms - Preview', img)
cv2.waitKey(0)
......@@ -70,14 +70,15 @@ class Pipeline(dali.Pipeline):
# Decode image
image = self.decode(inputs['image'])
# Augment the color space
image = self.hsv(
self.brightness_contrast(
image,
brightness=self.twist_rng(),
contrast=self.twist_rng(),
), saturation=self.twist_rng()
)
# Augment the color space if necessary
if cfg.TRAIN.USE_COLOR_JITTER:
image = self.hsv(
self.brightness_contrast(
image,
brightness=self.twist_rng(),
contrast=self.twist_rng(),
), saturation=self.twist_rng()
)
# Expand randomly to get smaller objects
pr = self.paste_ratio() * self.flip_rng() + 1.
......
......@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function
import os
from seetadet.datasets import kpl_record
from seetadet.datasets import kpl_dataset
def get_dataset(name):
......@@ -42,5 +42,5 @@ def list_dataset():
_GLOBAL_REGISTERED_DATASET = {
'default': lambda source:
kpl_record.KPLRecordDataset(source),
kpl_dataset.KPLRecordDataset(source),
}
......@@ -149,8 +149,10 @@ class AirNet(nn.Module):
x = self.layer1(x)
outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
if hasattr(self, 'layer3'):
outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'):
outputs += [self.layer4(outputs[-1])]
return outputs
......
......@@ -39,16 +39,17 @@ class Detector(nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:]
# + DataLoader
# DataLoader
self.data_loader = None
self.data_loader_cls = importlib.import_module(
'seetadet.algo.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap()
# + FeatureExtractor
# FeatureExtractor
self.body = backbones.get(body)()
feature_dims = self.body.feature_dims
# + FeatureEnhancer
# FeatureEnhancer
if 'fpn' in modules:
self.fpn = models.FPN(feature_dims)
feature_dims = self.fpn.feature_dims
......@@ -57,7 +58,7 @@ class Detector(nn.Module):
else:
feature_dims = [feature_dims[-1]]
# + Detection Modules
# Detection Modules
if 'rcnn' in model:
self.rpn = models.RPN(feature_dims[0])
if 'faster' in model:
......@@ -106,7 +107,7 @@ class Detector(nn.Module):
if inputs is None:
# 1) Training: <= DataLayer
# 2) Inference: <= Given
if not hasattr(self, 'data_loader'):
if self.data_loader is None:
self.data_loader = self.data_loader_cls()
inputs = self.data_loader()
......@@ -171,29 +172,34 @@ class Detector(nn.Module):
# Merge Affine into Convolution #
###################################
last_module = None
for e in self.modules():
if isinstance(e, nn.Affine) and \
for module in self.modules():
if isinstance(module, nn.Affine) and \
isinstance(last_module, nn.Conv2d):
if last_module.bias is None:
delattr(last_module, 'bias')
e.forward = lambda x: x
last_module.bias = e.bias
last_module.weight.data.mul_(e.weight.data)
last_module = e
module.forward = lambda x: x
last_module.bias = module.bias
weight = module.weight.data.view(
0, *([1] * (last_module.weight.ndimension() - 1)))
last_module.weight.data.mul_(weight)
last_module = module
######################################
# Merge BatchNorm into Convolution #
######################################
last_module = None
for e in self.modules():
if isinstance(e, nn.BatchNorm2d) and \
for module in self.modules():
if isinstance(module, nn.BatchNorm2d) and \
isinstance(last_module, nn.Conv2d):
if last_module.bias is None:
delattr(last_module, 'bias')
e.forward = lambda x: x
term = torch.sqrt(e.running_var.data + e.eps)
term = e.weight.data / term
last_module.bias = e.bias.data - term * e.running_mean.data
module.forward = lambda x: x
term = torch.sqrt(module.running_var.data + module.eps)
term = module.weight.data / term
last_module.bias = \
module.bias.data - \
term * module.running_mean.data
term = term.view(0, *([1] * (last_module.weight.ndimension() - 1)))
if last_module.weight.dtype == 'float16':
last_module.bias.half_()
weight = last_module.weight.data.float()
......@@ -201,7 +207,7 @@ class Detector(nn.Module):
last_module.weight.copy_(weight)
else:
last_module.weight.data.mul_(term)
last_module = e
last_module = module
def new_detector(device, weights=None, training=False):
......
......@@ -31,7 +31,8 @@ class FPN(nn.Module):
dim = cfg.FPN.DIM
self.C = nn.ModuleList()
self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.highest_backbone_lvl = min(cfg.FPN.RPN_MAX_LEVEL, HIGHEST_BACKBONE_LVL)
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, self.highest_backbone_lvl + 1):
self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE:
......@@ -40,8 +41,8 @@ class FPN(nn.Module):
else:
self.apply_func = self.apply_on_generic
self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim
for lvl in range(self.highest_backbone_lvl + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == self.highest_backbone_lvl + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim]
self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE
......@@ -56,12 +57,12 @@ class FPN(nn.Module):
def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Apply max pool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1]))
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample(
......@@ -76,15 +77,15 @@ class FPN(nn.Module):
def apply_on_generic(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Add extra convolutions for higher features
extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1):
for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl:
extra_input = self.relu(outputs[-1])
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1):
for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample(
......
......@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module):
def reset_parameters(self):
for m in self.modules():
if nn.is_conv2d(m):
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None:
init.constant(m.bias, 0)
......@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module):
# Stop the gradients if necessary
def freeze_func(m):
if nn.is_conv2d(m):
if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
......
......@@ -17,8 +17,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from seetadet.core.config import cfg
from seetadet.core.registry import backbones
from seetadet.modules import nn
......@@ -37,11 +35,12 @@ class BasicBlock(nn.Module):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample
self.dropblock = dropblock
self.dropblock1 = nn.DropBlock2d(**dropblock) if dropblock else None
self.dropblock2 = nn.DropBlock2d(**dropblock) if dropblock else None
def forward(self, x):
residual = x
......@@ -50,14 +49,14 @@ class BasicBlock(nn.Module):
out = self.bn1(out)
out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
if self.dropblock1 is not None:
out = self.dropblock1(out)
out = self.conv2(out)
out = self.bn2(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.dropblock2 is not None:
residual = self.dropblock2(residual)
if self.downsample is not None:
residual = self.downsample(residual)
......@@ -67,7 +66,7 @@ class BasicBlock(nn.Module):
return out
class Bottleneck(torch.nn.Module):
class Bottleneck(nn.Module):
# 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \
......@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module):
self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.FrozenAffine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.drop2 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn2 = nn.FrozenAffine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out)
self.drop3 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn3 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.dropblock = dropblock
def forward(self, x):
residual = x
......@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module):
out = self.relu(out)
out = self.conv2(out)
if self.drop2 is not None:
out = self.drop2(out)
out = self.bn2(out)
out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv3(out)
out = self.bn3(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None:
residual = self.downsample(residual)
out += residual
if self.drop3 is not None:
out = self.drop3(out)
out = self.relu(out)
return out
class ResNet(torch.nn.Module):
class ResNet(nn.Module):
def __init__(self, block, layers, filters):
super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d(
self.conv1 = nn.Conv2d(
3, 64,
kernel_size=7,
stride=2,
......@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module):
bias=False,
)
self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(
kernel_size=3,
stride=2,
padding=0,
ceil_mode=True,
)
self.drop3 = torch.nn.DropBlock2d(
kp=0.9,
block_size=7,
alpha=0.25,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
self.drop4 = torch.nn.DropBlock2d(
kp=0.9,
block_size=7,
alpha=1.00,
decrement=cfg.DROPBLOCK.DECREMENT
) if cfg.DROPBLOCK.DROP_ON else None
drop3 = {
'kp': 0.9,
'block_size': 7,
'alpha': 1.00,
'decrement': cfg.DROPBLOCK.DECREMENT,
'inplace': True,
} if cfg.DROPBLOCK.DROP_ON else None
drop4 = {
'kp': 0.9,
'block_size': 7,
'alpha': 1.00,
'decrement': cfg.DROPBLOCK.DECREMENT,
'inplace': True,
} if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, drop4)
self.reset_parameters()
def reset_parameters(self):
......@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module):
# Stop the gradients if necessary
def freeze_func(m):
if isinstance(m, torch.nn.Conv2d):
if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False
m._buffers['weight'] = m.weight
del m._parameters['weight']
......
......@@ -29,7 +29,6 @@ class SSD(nn.Module):
########################################
# SSD outputs #
########################################
self.cls_conv = torch.nn.ModuleList(
nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True)
for _ in range(cfg.SSD.NUM_CONVS)
......
......@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function):
return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function):
"""Decode predictions from RetinaNet."""
......
......@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'):
nonlinearity='relu',
)
# Aliases
constant = nn.init.constant_
normal = nn.init.normal_
......@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object):
return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
negative_index=0, # Background index
)
......@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d
DropBlock2d = nn.DropBlock2d
Linear = nn.Linear
MaxPool2d = nn.MaxPool2d
Module = nn.Module
......
......@@ -15,7 +15,7 @@ from __future__ import print_function
import functools
import dragon.vm.torch as torch
from dragon.vm import torch
from seetadet.core.config import cfg
......@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module):
def __init__(self):
super(Bootstrap, self).__init__()
self.normalize_func = functools.partial(
self._device = torch.device('cpu')
self._dummy_buffer = torch.ones(1)
self._normalize_func = functools.partial(
torch.channel_normalize,
mean=cfg.PIXEL_MEANS,
std=[1., 1., 1.],
......@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module):
dims=(0, 3, 1, 2),
dtype=cfg.MODEL.PRECISION.lower(),
)
self.dummy_buffer = torch.ones(1)
def _apply(self, fn):
fn(self.dummy_buffer)
fn(self._dummy_buffer)
def cpu(self):
self._device = torch.device('cpu')
......@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module):
self._device = torch.device('cuda', device)
def device(self):
"""Return the device of this module."""
return self.dummy_buffer.device
return self._dummy_buffer.device
def forward(self, input):
if isinstance(input, torch.Tensor):
if input.size(1) <= 3:
if input.shape[1] <= 3:
return input
cur_device = self.device()
if input._device != cur_device:
......@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module):
input = input.cpu()
else:
input = input.cuda(cur_device.index)
return self.normalize_func(input)
return self._normalize_func(input)
......@@ -32,8 +32,8 @@ class SGDSolver(object):
lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
clip_gradient=float(cfg.SOLVER.CLIP_NORM),
scale_gradient=1. / cfg.SOLVER.LOSS_SCALING,
clip_norm=float(cfg.SOLVER.CLIP_NORM),
scale=1. / cfg.SOLVER.LOSS_SCALING,
)
self.lr_scheduler = lr_scheduler.get_scheduler()
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import operator
from dragon.vm import torch
from seetadet.modules import nn
def dense_conv_flops(m, inputs, output):
"""Hook to compute flops for a dense convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
in_c, out_c = inputs[0].shape[1], output.shape[1]
m.__params__ = (k_dim * in_c + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def depthwise_conv_flops(m, inputs, output):
"""Hook to compute flops for a depthwise convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
out_c = output.shape[1]
m.__params__ = (k_dim + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def register_flops(module):
"""Register hooks to collect flops info."""
if not hasattr(module, '__flops__'):
module.__flops__ = 0.
for m in module.modules():
if isinstance(m, nn.DepthwiseConv2d):
m.register_forward_hook(depthwise_conv_flops)
elif isinstance(m, nn.Conv2d):
m.register_forward_hook(dense_conv_flops)
def collect_flops(module, normalizer=1e6):
"""Collect flops from the last forward."""
total_flops = 0.
for m in module.modules():
if hasattr(m, '__flops__'):
total_flops += m.__flops__
m.__flops__ = 0.
return total_flops / normalizer
def benchmark_flops(module, normalizer=1e6):
"""Return the flops by running benchmark once."""
register_flops(module)
collect_flops(module)
original_training = module.training
if original_training:
module.eval()
with torch.no_grad():
module()
if original_training:
module.train()
return collect_flops(module, normalizer)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!