Commit 8558d3df by Ting PAN

Adapt to the latest dragon preview version

Summary:
This commit changes repo to match dragon.0.3.0.dev20200707.
1 parent 4bcab266
Showing with 711 additions and 516 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.2 (20200707)
Dragon Minimum Required (Version 0.3.0.dev20200707)
Changes:
- Adapt to the latest dragon preview version.
Preview Features:
- None
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.4.1 (20200421) SeetaDet 0.4.1 (20200421)
Dragon Minimum Required (Version 0.3.0.dev20200421) Dragon Minimum Required (Version 0.3.0.dev20200421)
......
...@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de ...@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements ## Requirements
seeta-dragon >= 0.3.0.dev20200421 seeta-dragon >= 0.3.0.dev20200707
## Installation ## Installation
......
...@@ -32,16 +32,17 @@ FRCNN: ...@@ -32,16 +32,17 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
...@@ -32,16 +32,16 @@ FRCNN: ...@@ -32,16 +32,16 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
...@@ -30,7 +30,7 @@ TRAIN: ...@@ -30,7 +30,7 @@ TRAIN:
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 NMS: 0.45
\ No newline at end of file RPN_POST_NMS_TOP_N: 1000
\ No newline at end of file
...@@ -29,16 +29,16 @@ FRCNN: ...@@ -29,16 +29,16 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/VGG16.RCNN.pth' WEIGHTS: '/model/VGG16.RCNN.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
RPN_MIN_SIZE: 16
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 RPN_MIN_SIZE: 16
\ No newline at end of file NMS: 0.45
RPN_POST_NMS_TOP_N: 300
\ No newline at end of file
...@@ -32,11 +32,11 @@ FPN: ...@@ -32,11 +32,11 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: True
IMS_PER_BATCH: 16 IMS_PER_BATCH: 16
SCALES: [416] SCALES: [416]
RANDOM_SCALES: [0.25, 1.0] RANDOM_SCALES: [0.25, 1.0]
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: False
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
......
...@@ -23,10 +23,10 @@ FPN: ...@@ -23,10 +23,10 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.0] RANDOM_SCALES: [0.25, 1.0]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -24,10 +24,10 @@ FPN: ...@@ -24,10 +24,10 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 2.0] RANDOM_SCALES: [0.25, 2.0]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -38,6 +38,7 @@ TRAIN: ...@@ -38,6 +38,7 @@ TRAIN:
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -3,7 +3,7 @@ VIS: False ...@@ -3,7 +3,7 @@ VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: ssd TYPE: ssd
BACKBONE: airnet5b.mbox BACKBONE: airnet.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
...@@ -17,19 +17,30 @@ SOLVER: ...@@ -17,19 +17,30 @@ SOLVER:
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_320 SNAPSHOT_PREFIX: voc_ssd_320
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 8
SSD: SSD:
NUM_CONVS: 2 NUM_CONVS: 2
MULTIBOX: MULTIBOX:
STRIDES: [8, 16, 32] STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 90, 150] MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [90, 150, 210] MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] ASPECT_RATIOS: [
[1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5],
]
TRAIN: TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -37,9 +37,10 @@ SSD: ...@@ -37,9 +37,10 @@ SSD:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
...
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h" #include "nms_op.h"
#include "../utils/detection_utils.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() { void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected; int num_selected;
utils::detection::ApplyNMS( utils::detection::ApplyNMS(
Output(0)->count(), Output(0)->count(),
Output(0)->count(), Output(0)->count(),
iou_threshold_, iou_threshold_,
Input(0).template mutable_data<T, Context>(), Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(), Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx() num_selected,
); ctx());
Output(0)->Reshape({ num_selected }); Output(0)->Reshape({num_selected});
} }
template <class Context> template <class Context>
void NonMaxSuppressionOp<Context>::RunOnDevice() { void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5) CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5)."; << "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) });
DispatchHelper<TensorTypes<float>>::Call(this, Input(0)); Output(0)->Reshape({Input(0).dim(0)});
DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
} }
DEPLOY_CPU(NonMaxSuppression); DEPLOY_CPU(NonMaxSuppression);
...@@ -41,4 +38,4 @@ OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1); ...@@ -41,4 +38,4 @@ OPERATOR_SCHEMA(NonMaxSuppression).NumInputs(1).NumOutputs(1);
NO_GRADIENT(NonMaxSuppression); NO_GRADIENT(NonMaxSuppression);
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <https://opensource.org/licenses/BSD-2-Clause> * <https://opensource.org/licenses/BSD-2-Clause>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,20 +20,20 @@ namespace dragon { ...@@ -20,20 +20,20 @@ namespace dragon {
template <class Context> template <class Context>
class NonMaxSuppressionOp final : public Operator<Context> { class NonMaxSuppressionOp final : public Operator<Context> {
public: public:
NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws) NonMaxSuppressionOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {} iou_threshold_(OpArg<float>("iou_threshold", 0.5f)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float iou_threshold_; float iou_threshold_;
}; };
} // namespace dragon } // namespace dragon
#endif // SEETADET_CXX_OPERATORS_NMS_OP_H_ #endif // SEETADET_CXX_OPERATORS_NMS_OP_H_
#include <dragon/core/workspace.h> #include <dragon/utils/math_functions.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h" #include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h" #include "retinanet_decoder_op.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() { void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox using BC = CPUContext; // Context of BBox
int feat_h, feat_w; int feat_h, feat_w;
int C = Input(-3).dim(2), A, K; int C = Input(-3).dim(2), A, K;
int total_proposals = 0; int total_proposals = 0;
int num_candidates, num_boxes, num_proposals; int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>(); auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>(); auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>(); auto* im_info = Input(-1).template data<BT, BC>();
auto* y = Output(0)->template mutable_data<BT, BC>(); auto* y = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images_; ++n) { for (int n = 0; n < num_images_; ++n) {
BT im_h = im_info[0]; BT im_h = im_info[0];
BT im_w = im_info[1]; BT im_w = im_info[1];
BT im_scale_h = im_info[2]; BT im_scale_h = im_info[2];
BT im_scale_w = im_info[2]; BT im_scale_w = im_info[2];
if (Input(-1).dim(1) == 4) im_scale_w = im_info[3]; if (Input(-1).dim(1) == 4) im_scale_w = im_info[3];
auto* scores = batch_scores + n * Input(-3).stride(0); auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0); auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides_.size(), InputSize() - 3) CHECK_EQ(strides_.size(), InputSize() - 3)
<< "\nGiven " << strides_.size() << " strides " << "\nGiven " << strides_.size() << " strides "
<< "and " << InputSize() - 3 << " features"; << "and " << InputSize() - 3 << " features";
// Select the top-k candidates as proposals // Select the top-k candidates as proposals
num_boxes = Input(-3).dim(1); num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1); num_candidates = Input(-3).count(1);
roi_indices_.resize(num_candidates); roi_indices_.resize(num_candidates);
num_candidates = 0; num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i) for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_) if (scores[i] > score_thr_) roi_indices_[num_candidates++] = i;
roi_indices_[num_candidates++] = i; scores_.resize(num_candidates);
scores_.resize(num_candidates); for (int i = 0; i < num_candidates; ++i)
for (int i = 0; i < num_candidates; ++i) scores_[i] = scores[roi_indices_[i]];
scores_[i] = scores[roi_indices_[i]]; num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
num_proposals = std::min( utils::math::ArgPartition(
num_candidates, num_candidates, num_proposals, true, scores_.data(), indices_);
(int)pre_nms_topn_ for (int i = 0; i < num_proposals; ++i)
); indices_[i] = roi_indices_[indices_[i]];
utils::math::ArgPartition( // Decode the candidates
num_candidates, int base_offset = 0;
num_proposals, for (int i = 0; i < strides_.size(); i++) {
true, feat_h = Input(i).dim(2);
scores_.data(), feat_w = Input(i).dim(3);
indices_ K = feat_h * feat_w;
); A = int(ratios_.size() * scales_.size());
for (int i = 0; i < num_proposals; ++i) anchors_.resize((size_t)(A * 4));
indices_[i] = roi_indices_[indices_[i]]; utils::detection::GenerateAnchors(
// Decode the candidates strides_[i],
int base_offset = 0; (int)ratios_.size(),
for (int i = 0; i < strides_.size(); i++) { (int)scales_.size(),
feat_h = Input(i).dim(2); ratios_.data(),
feat_w = Input(i).dim(3); scales_.data(),
K = feat_h * feat_w; anchors_.data());
A = int(ratios_.size() * scales_.size()); utils::detection::GenerateGridAnchors(
anchors_.resize((size_t)(A * 4)); num_proposals,
utils::detection::GenerateAnchors( C,
strides_[i], A,
(int)ratios_.size(), feat_h,
(int)scales_.size(), feat_w,
ratios_.data(), strides_[i],
scales_.data(), base_offset,
anchors_.data() anchors_.data(),
); indices_.data(),
utils::detection::GenerateGridAnchors( y);
num_proposals, C, A, base_offset += (A * K);
feat_h, feat_w,
strides_[i],
base_offset,
anchors_.data(),
indices_.data(),
y
);
base_offset += (A * K);
}
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes, C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y
);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
} }
utils::detection::GenerateMCProposals(
num_proposals,
num_boxes,
C,
n,
im_h,
im_w,
im_scale_h,
im_scale_w,
scores,
deltas,
indices_.data(),
y);
total_proposals += num_proposals;
y += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 }); Output(0)->Reshape({total_proposals, 7});
} }
template <class Context> template <class Context>
void RetinaNetDecoderOp<Context>::RunOnDevice() { void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0); num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_
<< " groups info, got "
<< Input(-1).dim(0) << ".";
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 }); CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_ << " groups info, got "
<< Input(-1).dim(0) << ".";
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3)); Output(0)->Reshape({num_images_ * pre_nms_topn_, 7});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
} }
DEPLOY_CPU(RetinaNetDecoder); DEPLOY_CPU(RetinaNetDecoder);
...@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder); ...@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder);
DEPLOY_CUDA(RetinaNetDecoder); DEPLOY_CUDA(RetinaNetDecoder);
#endif #endif
OPERATOR_SCHEMA(RetinaNetDecoder) OPERATOR_SCHEMA(RetinaNetDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon } // namespace dragon
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <https://opensource.org/licenses/BSD-2-Clause> * <https://opensource.org/licenses/BSD-2-Clause>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,27 +20,27 @@ namespace dragon { ...@@ -20,27 +20,27 @@ namespace dragon {
template <class Context> template <class Context>
class RetinaNetDecoderOp final : public Operator<Context> { class RetinaNetDecoderOp final : public Operator<Context> {
public: public:
RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws) RetinaNetDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")), strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")), ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")), scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)), pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
score_thr_(OpArg<float>("score_thresh", 0.05f)) {} score_thr_(OpArg<float>("score_thresh", 0.05f)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float score_thr_; float score_thr_;
vec64_t strides_, indices_, roi_indices_; vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_; vector<float> ratios_, scales_, scores_, anchors_;
int64_t num_images_, pre_nms_topn_; int64_t num_images_, pre_nms_topn_;
}; };
} // namespace dragon } // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_ #endif // SEETADET_CXX_OPERATORS_RETINANET_DECODER_OP_H_
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* You should have received a copy of the BSD 2-Clause License * You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See, * along with the software. If not, See,
* *
* <https://opensource.org/licenses/BSD-2-Clause> * <https://opensource.org/licenses/BSD-2-Clause>
* *
* ------------------------------------------------------------ * ------------------------------------------------------------
*/ */
...@@ -20,36 +20,36 @@ namespace dragon { ...@@ -20,36 +20,36 @@ namespace dragon {
template <class Context> template <class Context>
class RPNDecoderOp final : public Operator<Context> { class RPNDecoderOp final : public Operator<Context> {
public: public:
RPNDecoderOp(const OperatorDef& def, Workspace* ws) RPNDecoderOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
strides_(OpArgs<int64_t>("strides")), strides_(OpArgs<int64_t>("strides")),
ratios_(OpArgs<float>("ratios")), ratios_(OpArgs<float>("ratios")),
scales_(OpArgs<float>("scales")), scales_(OpArgs<float>("scales")),
pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)), pre_nms_topn_(OpArg<int64_t>("pre_nms_top_n", 6000)),
post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)), post_nms_topn_(OpArg<int64_t>("post_nms_top_n", 300)),
nms_thr_(OpArg<float>("nms_thresh", 0.7f)), nms_thr_(OpArg<float>("nms_thresh", 0.7f)),
min_size_(OpArg<int64_t>("min_size", 16)), min_size_(OpArg<int64_t>("min_size", 16)),
min_level_(OpArg<int64_t>("min_level", 2)), min_level_(OpArg<int64_t>("min_level", 2)),
max_level_(OpArg<int64_t>("max_level", 5)), max_level_(OpArg<int64_t>("max_level", 5)),
canonical_level_(OpArg<int64_t>("canonical_level", 4)), canonical_level_(OpArg<int64_t>("canonical_level", 4)),
canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {} canonical_scale_(OpArg<int64_t>("canonical_scale", 224)) {}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> template <typename T>
void DoRunWithType(); void DoRunWithType();
protected: protected:
float nms_thr_; float nms_thr_;
vec64_t strides_, indices_, roi_indices_; vec64_t strides_, indices_, roi_indices_;
vector<float> ratios_, scales_, scores_, anchors_; vector<float> ratios_, scales_, scores_, anchors_;
int64_t min_size_, pre_nms_topn_, post_nms_topn_; int64_t min_size_, pre_nms_topn_, post_nms_topn_;
int64_t num_images_, min_level_, max_level_; int64_t num_images_, min_level_, max_level_;
int64_t canonical_level_, canonical_scale_; int64_t canonical_level_, canonical_scale_;
Tensor proposals_; Tensor proposals_;
}; };
} // namespace dragon } // namespace dragon
#endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_ #endif // SEETADET_CXX_OPERATORS_RPN_DECODER_OP_H_
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
# You should have received a copy of the BSD 2-Clause License # You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See, # along with the software. If not, See,
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
...@@ -15,25 +15,35 @@ from __future__ import absolute_import ...@@ -15,25 +15,35 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import glob
from distutils.core import setup from distutils.core import setup
from dragon.tools import cpp_extension
from dragon.tools import cpp_extension
if cpp_extension.CUDA_HOME is not None and \ if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available(): cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension Extension = cpp_extension.CUDAExtension
else: else:
Extension = cpp_extension.CppExtension Extension = cpp_extension.CppExtension
def find_sources(*dirs):
ext_suffixes = ['.cc']
if Extension is cpp_extension.CUDAExtension:
ext_suffixes.append('.cu')
sources = []
for path in dirs:
for ext_suffix in ext_suffixes:
sources += glob.glob(
path + '/*' + ext_suffix,
recursive=True,
)
return sources
ext_modules = [ ext_modules = [
Extension( Extension(
name='install.lib.modules._C', name='install.lib.modules._C',
sources=[ sources=find_sources('**'),
'utils/detection_utils.cc',
'utils/detection_utils.cu',
'operators/nms_op.cc',
'operators/retinanet_decoder_op.cc',
'operators/rpn_decoder_op.cc',
],
), ),
] ]
......
#include <dragon/core/context.h>
#include "detection_utils.h" #include "detection_utils.h"
#include <dragon/core/context.h>
namespace dragon { namespace dragon {
...@@ -9,45 +9,46 @@ namespace detection { ...@@ -9,45 +9,46 @@ namespace detection {
template <typename T> template <typename T>
T IoU(const T A[], const T B[]) { T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] || if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) return 0;
A[2] < B[0] || A[3] < B[1]) return 0; const T x1 = std::max(A[0], B[0]);
const T x1 = std::max(A[0], B[0]); const T y1 = std::max(A[1], B[1]);
const T y1 = std::max(A[1], B[1]); const T x2 = std::min(A[2], B[2]);
const T x2 = std::min(A[2], B[2]); const T y2 = std::min(A[3], B[3]);
const T y2 = std::min(A[3], B[3]); const T width = std::max((T)0, x2 - x1 + 1);
const T width = std::max((T)0, x2 - x1 + 1); const T height = std::max((T)0, y2 - y1 + 1);
const T height = std::max((T)0, y2 - y1 + 1); const T area = width * height;
const T area = width * height; const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1);
const T A_area = (A[2] - A[0] + 1) * (A[3] - A[1] + 1); const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1);
const T B_area = (B[2] - B[0] + 1) * (B[3] - B[1] + 1); return area / (A_area + B_area - area);
return area / (A_area + B_area - area);
} }
template <> void ApplyNMS<float, CPUContext>( template <>
const int num_boxes, void ApplyNMS<float, CPUContext>(
const int max_keeps, const int num_boxes,
const float thresh, const int max_keeps,
const float* boxes, const float thresh,
int64_t* keep_indices, const float* boxes,
int& num_keep, int64_t* keep_indices,
CPUContext* ctx) { int& num_keep,
int count = 0; CPUContext* ctx) {
std::vector<char> is_dead(num_boxes); int count = 0;
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0; std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) { for (int i = 0; i < num_boxes; ++i)
if (is_dead[i]) continue; is_dead[i] = 0;
keep_indices[count++] = i; for (int i = 0; i < num_boxes; ++i) {
if (count == max_keeps) break; if (is_dead[i]) continue;
for (int j = i + 1; j < num_boxes; ++j) keep_indices[count++] = i;
if (!is_dead[j] && IoU(&boxes[i * 5], if (count == max_keeps) break;
&boxes[j * 5]) > thresh) for (int j = i + 1; j < num_boxes; ++j)
is_dead[j] = 1; if (!is_dead[j] && IoU(&boxes[i * 5], &boxes[j * 5]) > thresh) {
} is_dead[j] = 1;
num_keep = count; }
}
num_keep = count;
} }
} // namespace detection } // namespace detection
} // namespace utils } // namespace utils
} // namespace dragon } // namespace dragon
...@@ -9,127 +9,121 @@ namespace utils { ...@@ -9,127 +9,121 @@ namespace utils {
namespace detection { namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define DIV_UP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64 #define NUM_THREADS 64
namespace { namespace {
template <typename T> template <typename T>
__device__ bool _CheckIoU( __device__ bool _CheckIoU(const T* a, const T* b, const float thresh) {
const T* a, const T x1 = max(a[0], b[0]);
const T* b, const T y1 = max(a[1], b[1]);
const float thresh) { const T x2 = min(a[2], b[2]);
const T x1 = max(a[0], b[0]); const T y2 = min(a[3], b[3]);
const T y1 = max(a[1], b[1]); const T width = max(T(0), x2 - x1 + 1);
const T x2 = min(a[2], b[2]); const T height = max(T(0), y2 - y1 + 1);
const T y2 = min(a[3], b[3]); const T inter = width * height;
const T width = max(T(0), x2 - x1 + 1); const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T height = max(T(0), y2 - y1 + 1); const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
const T inter = width * height; return inter > thresh * (Sa + Sb - inter);
const T Sa = (a[2] - a[0] + T(1)) * (a[3] - a[1] + T(1));
const T Sb = (b[2] - b[0] + T(1)) * (b[3] - b[1] + T(1));
return inter > thresh * (Sa + Sb - inter);
} }
template <typename T> template <typename T>
__global__ void _NonMaxSuppression( __global__ void _NonMaxSuppression(
const int num_blocks, const int num_blocks,
const int num_boxes, const int num_boxes,
const T thresh, const T thresh,
const T* dev_boxes, const T* dev_boxes,
uint64_t* dev_mask) { uint64_t* dev_mask) {
const int row_start = blockIdx.y; const int row_start = blockIdx.y;
const int col_start = blockIdx.x; const int col_start = blockIdx.x;
if (row_start > col_start) return; if (row_start > col_start) return;
const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS); const int row_size = min(num_boxes - row_start * NUM_THREADS, NUM_THREADS);
const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS); const int col_size = min(num_boxes - col_start * NUM_THREADS, NUM_THREADS);
__shared__ T block_boxes[NUM_THREADS * 4]; __shared__ T block_boxes[NUM_THREADS * 4];
if (threadIdx.x < col_size) { if (threadIdx.x < col_size) {
const int c1 = threadIdx.x * 4; const int c1 = threadIdx.x * 4;
const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5; const int c2 = (col_start * NUM_THREADS + threadIdx.x) * 5;
block_boxes[c1] = dev_boxes[c2]; block_boxes[c1] = dev_boxes[c2];
block_boxes[c1 + 1] = dev_boxes[c2 + 1]; block_boxes[c1 + 1] = dev_boxes[c2 + 1];
block_boxes[c1 + 2] = dev_boxes[c2 + 2]; block_boxes[c1 + 2] = dev_boxes[c2 + 2];
block_boxes[c1 + 3] = dev_boxes[c2 + 3]; block_boxes[c1 + 3] = dev_boxes[c2 + 3];
} }
__syncthreads(); __syncthreads();
if (threadIdx.x < row_size) { if (threadIdx.x < row_size) {
const int index = row_start * NUM_THREADS + threadIdx.x; const int index = row_start * NUM_THREADS + threadIdx.x;
const T* dev_box = dev_boxes + index * 5; const T* dev_box = dev_boxes + index * 5;
unsigned long long val = 0; unsigned long long val = 0;
const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0; const int start = (row_start == col_start) ? (threadIdx.x + 1) : 0;
for (int i = start; i < col_size; ++i) { for (int i = start; i < col_size; ++i) {
if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) { if (_CheckIoU(dev_box, block_boxes + i * 4, thresh)) {
val |= 1ULL << i; val |= 1ULL << i;
} }
}
dev_mask[index * num_blocks + col_start] = val;
} }
dev_mask[index * num_blocks + col_start] = val;
}
} }
} // namespace } // namespace
template <> void ApplyNMS<float, CUDAContext>( template <>
const int num_boxes, void ApplyNMS<float, CUDAContext>(
const int max_keeps, const int num_boxes,
const float thresh, const int max_keeps,
const float* boxes, const float thresh,
int64_t* keep_indices, const float* boxes,
int& num_keep, int64_t* keep_indices,
CUDAContext* ctx) { int& num_keep,
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS); CUDAContext* ctx) {
const int num_blocks = DIV_UP(num_boxes, NUM_THREADS);
vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t)); vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression
<<< dim3(num_blocks, num_blocks), NUM_THREADS, _NonMaxSuppression<<<
0, ctx->cuda_stream() >>>( dim3(num_blocks, num_blocks),
num_blocks, NUM_THREADS,
num_boxes, 0,
thresh, ctx->cuda_stream()>>>(num_blocks, num_boxes, thresh, boxes, mask_dev);
boxes,
mask_dev CUDA_CHECK(cudaMemcpyAsync(
); mask_host.data(),
mask_dev,
CUDA_CHECK(cudaMemcpyAsync( mask_host.size() * sizeof(uint64_t),
mask_host.data(), cudaMemcpyDeviceToHost,
mask_dev, ctx->cuda_stream()));
mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost, ctx->FinishDeviceComputation();
ctx->cuda_stream()
)); vector<uint64_t> dead_bit(num_blocks);
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks);
ctx->FinishDeviceComputation();
int num_selected = 0;
vector<uint64_t> dead_bit(num_blocks); for (int i = 0; i < num_boxes; ++i) {
memset(&dead_bit[0], 0, sizeof(uint64_t) * num_blocks); const int nblock = i / NUM_THREADS;
const int inblock = i % NUM_THREADS;
int num_selected = 0; if (!(dead_bit[nblock] & (1ULL << inblock))) {
for (int i = 0; i < num_boxes; ++i) { keep_indices[num_selected++] = i;
const int nblock = i / NUM_THREADS; auto* mask_i = &mask_host[0] + i * num_blocks;
const int inblock = i % NUM_THREADS; for (int j = nblock; j < num_blocks; ++j)
if (!(dead_bit[nblock] & (1ULL << inblock))) { dead_bit[j] |= mask_i[j];
keep_indices[num_selected++] = i; if (num_selected == max_keeps) break;
auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break;
}
} }
num_keep = num_selected; }
ctx->Delete(mask_dev); num_keep = num_selected;
ctx->Delete(mask_dev);
} }
} // namespace detection } // namespace detection
} // namespace utils } // namespace utils
} // namespace dragon } // namespace dragon
#endif // USE_CUDA #endif // USE_CUDA
...@@ -52,12 +52,9 @@ class AnchorTarget(object): ...@@ -52,12 +52,9 @@ class AnchorTarget(object):
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base # Generate grid anchors from base
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( all_anchors = generate_grid_anchors(
features, grid_shapes, self.base_anchors, self.strides)
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0] num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
......
...@@ -58,12 +58,9 @@ class Proposal(object): ...@@ -58,12 +58,9 @@ class Proposal(object):
# Get resources # Get resources
num_images = ims_info.shape[0] num_images = ims_info.shape[0]
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( all_anchors = generate_grid_anchors(
features, grid_shapes, self.base_anchors, self.strides)
self.base_anchors,
self.strides,
)
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
......
...@@ -19,40 +19,40 @@ import numpy as np ...@@ -19,40 +19,40 @@ import numpy as np
from seetadet.core.config import cfg from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides): def generate_grid_anchors(grid_shapes, base_anchors, strides):
num_strides = len(strides) num_strides = len(strides)
if len(features) != num_strides: if len(grid_shapes) != num_strides:
raise ValueError( raise ValueError(
'Given %d features for %d strides.' 'Given %d grids for %d strides.'
% (len(features), num_strides) % (len(grid_shapes), num_strides)
) )
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
anchors_to_pack = [] anchors_to_pack = []
for i in range(len(features)): for i in range(len(grid_shapes)):
height, width = features[i].shape[-2:] height, width = grid_shapes[i]
shift_x = np.arange(0, width) * strides[i] shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i] shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add a anchors (1, a, 4) to
# cell K shifts (K, 1, 4) to get # cell k shifts (k, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (k, a, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (k * a, 4) shifted anchors
A = base_anchors[i].shape[0] a = base_anchors[i].shape[0]
K = shifts.shape[0] k = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) + anchors = (base_anchors[i].reshape((1, a, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, k, 4)).transpose((1, 0, 2)))
if num_strides > 1: if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4) # Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to # We will pack it with other strides to
# match the data format of (N, C, H, W) # match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2)) anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4)) anchors = anchors.reshape((a * k, 4))
anchors_to_pack.append(anchors) anchors_to_pack.append(anchors)
else: else:
# Original order of Faster R-CNN # Original order of Faster R-CNN
return anchors.reshape((K * A, 4)) return anchors.reshape((k * a, 4))
return np.vstack(anchors_to_pack) return np.vstack(anchors_to_pack)
......
...@@ -46,6 +46,9 @@ class AnchorTarget(object): ...@@ -46,6 +46,9 @@ class AnchorTarget(object):
ratios=self.ratios, ratios=self.ratios,
sizes=sizes, sizes=sizes,
)) ))
# Store the cached grid anchors
self.last_grid_shapes = None
self.last_grid_anchors = None
def __call__(self, features, gt_boxes): def __call__(self, features, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
...@@ -58,12 +61,17 @@ class AnchorTarget(object): ...@@ -58,12 +61,17 @@ class AnchorTarget(object):
) )
# Generate grid anchors from base # Generate grid anchors from base
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( if grid_shapes == self.last_grid_shapes:
features, all_anchors = self.last_grid_anchors
self.base_anchors, else:
self.strides, self.last_grid_shapes = grid_shapes
) self.last_grid_anchors = all_anchors = \
generate_grid_anchors(
grid_shapes,
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0] num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
......
...@@ -15,6 +15,7 @@ from __future__ import print_function ...@@ -15,6 +15,7 @@ from __future__ import print_function
import types import types
import dragon
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
...@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images): ...@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images):
# Unpack results # Unpack results
results = outputs['detections'] results = outputs['detections']
detections = [[] for _ in range(len((raw_images)))] detections = [[] for _ in range(len(raw_images))]
for i in range(len(ims)): for i in range(len(ims)):
inds = np.where(results[:, 0].astype(np.int32) == i)[0] inds = np.where(results[:, 0].astype(np.int32) == i)[0]
...@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
indices[i], indices[i],
dict([('im_detect', _t['im_detect'].average_time), dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]), ('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]), dict([('boxes', boxes_this_image)]),
)) ))
...@@ -45,14 +45,14 @@ class PriorBox(object): ...@@ -45,14 +45,14 @@ class PriorBox(object):
aspect_ratios[i], aspect_ratios[i],
) )
) )
self.grid_anchors = None # Store the cached grid anchors
self.last_grid_anchors = None
def __call__(self, features): def __call__(self, features):
if self.grid_anchors is not None: if self.last_grid_anchors is not None:
return self.grid_anchors return self.last_grid_anchors
self.grid_anchors = []
all_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
# 1. Generate base grids # 1. Generate base grids
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
...@@ -61,23 +61,23 @@ class PriorBox(object): ...@@ -61,23 +61,23 @@ class PriorBox(object):
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
# 2. Apply anchors on base grids # 2. Apply anchors on base grids
# Add A anchors (1, A, 4) to # Add a anchors (1, a, 4) to
# cell K shifts (K, 1, 4) to get # cell k shifts (k, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (k, a, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (k * a, 4) shifted anchors
A = self.base_anchors[i].shape[0] a = self.base_anchors[i].shape[0]
D = self.base_anchors[i].shape[1] d = self.base_anchors[i].shape[1]
shifts = np.vstack(( shifts = np.vstack((
shift_x.ravel(), shift_x.ravel(),
shift_y.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_x.ravel(),
shift_y.ravel()) shift_y.ravel())
).transpose() ).transpose()
K = shifts.shape[0] # K = map_h * map_w k = shifts.shape[0] # k = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, D)) + anchors = (self.base_anchors[i].reshape((1, a, d)) +
shifts.reshape((1, K, D)).transpose((1, 0, 2))) shifts.reshape((1, k, d)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, D)).astype(np.float32) anchors = anchors.reshape((k * a, d)).astype(np.float32)
self.grid_anchors.append(anchors) all_anchors.append(anchors)
self.grid_anchors = np.concatenate(self.grid_anchors)
return self.grid_anchors self.last_grid_anchors = np.concatenate(all_anchors)
return self.last_grid_anchors
...@@ -32,11 +32,9 @@ def get_images(ims): ...@@ -32,11 +32,9 @@ def get_images(ims):
for im in ims: for im in ims:
im_scales.append((float(out_size) / im.shape[0], im_scales.append((float(out_size) / im.shape[0],
float(out_size) / im.shape[1])) float(out_size) / im.shape[1]))
processed_ims.append( processed_ims.append(cv2.resize(
cv2.resize(
im, (out_size, out_size), im, (out_size, out_size),
interpolation=cv2.INTER_AREA, interpolation=cv2.INTER_AREA))
))
if ims[0].dtype == 'uint16': if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256. ims_blob = np.array(processed_ims, dtype='float32') / 256.
else: else:
......
...@@ -49,12 +49,12 @@ class Distort(object): ...@@ -49,12 +49,12 @@ class Distort(object):
] ]
def apply(self, img, boxes=None): def apply(self, img, boxes=None):
if self._prob > 0: self._prob = 0.5 if cfg.TRAIN.USE_COLOR_JITTER else 0
img = PIL.Image.fromarray(img) img = PIL.Image.fromarray(img)
for transform_fn, prob in self._transforms: for transform_fn, prob in self._transforms:
if npr.uniform() < prob: if npr.uniform() < prob:
img = transform_fn(img) img = transform_fn(img)
img = img.enhance(1. + npr.uniform(-.4, .4)) img = img.enhance(1. + npr.uniform(-.4, .4))
return np.array(img), boxes return np.array(img), boxes
return img, boxes return img, boxes
......
...@@ -27,8 +27,9 @@ if __name__ == '__main__': ...@@ -27,8 +27,9 @@ if __name__ == '__main__':
np.random.seed(3) np.random.seed(3)
cfg.TRAIN.SCALES = [300] cfg.TRAIN.SCALES = [300]
cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00] cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00]
cfg.TRAIN.USE_COLOR_JITTER = True
augmentor = transforms.Compose( transformer = transforms.Compose(
transforms.Distort(), transforms.Distort(),
transforms.Expand(), transforms.Expand(),
transforms.Sample(), transforms.Sample(),
...@@ -38,12 +39,12 @@ if __name__ == '__main__': ...@@ -38,12 +39,12 @@ if __name__ == '__main__':
while True: while True:
img = cv2.imread('cat.jpg') img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32) boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes) img, boxes = transformer(img, boxes)
for box in boxes: for box in boxes:
x1 = int(box[0] * img.shape[1]) x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0]) y1 = int(box[1] * img.shape[0])
x2 = int(box[2] * img.shape[1]) x2 = int(box[2] * img.shape[1])
y2 = int(box[3] * img.shape[0]) y2 = int(box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2) cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2)
cv2.imshow('Sample', img) cv2.imshow('Transforms - Preview', img)
cv2.waitKey(0) cv2.waitKey(0)
...@@ -70,14 +70,15 @@ class Pipeline(dali.Pipeline): ...@@ -70,14 +70,15 @@ class Pipeline(dali.Pipeline):
# Decode image # Decode image
image = self.decode(inputs['image']) image = self.decode(inputs['image'])
# Augment the color space # Augment the color space if necessary
image = self.hsv( if cfg.TRAIN.USE_COLOR_JITTER:
self.brightness_contrast( image = self.hsv(
image, self.brightness_contrast(
brightness=self.twist_rng(), image,
contrast=self.twist_rng(), brightness=self.twist_rng(),
), saturation=self.twist_rng() contrast=self.twist_rng(),
) ), saturation=self.twist_rng()
)
# Expand randomly to get smaller objects # Expand randomly to get smaller objects
pr = self.paste_ratio() * self.flip_rng() + 1. pr = self.paste_ratio() * self.flip_rng() + 1.
......
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
from seetadet.datasets import kpl_record from seetadet.datasets import kpl_dataset
def get_dataset(name): def get_dataset(name):
...@@ -42,5 +42,5 @@ def list_dataset(): ...@@ -42,5 +42,5 @@ def list_dataset():
_GLOBAL_REGISTERED_DATASET = { _GLOBAL_REGISTERED_DATASET = {
'default': lambda source: 'default': lambda source:
kpl_record.KPLRecordDataset(source), kpl_dataset.KPLRecordDataset(source),
} }
...@@ -149,8 +149,10 @@ class AirNet(nn.Module): ...@@ -149,8 +149,10 @@ class AirNet(nn.Module):
x = self.layer1(x) x = self.layer1(x)
outputs = [None, None, self.layer2(x)] outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])] if hasattr(self, 'layer3'):
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])] outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'):
outputs += [self.layer4(outputs[-1])]
return outputs return outputs
......
...@@ -39,16 +39,17 @@ class Detector(nn.Module): ...@@ -39,16 +39,17 @@ class Detector(nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.') backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:] body, modules = backbone[0], backbone[1:]
# + DataLoader # DataLoader
self.data_loader = None
self.data_loader_cls = importlib.import_module( self.data_loader_cls = importlib.import_module(
'seetadet.algo.{}'.format(model)).DataLoader 'seetadet.algo.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap() self.bootstrap = vision.Bootstrap()
# + FeatureExtractor # FeatureExtractor
self.body = backbones.get(body)() self.body = backbones.get(body)()
feature_dims = self.body.feature_dims feature_dims = self.body.feature_dims
# + FeatureEnhancer # FeatureEnhancer
if 'fpn' in modules: if 'fpn' in modules:
self.fpn = models.FPN(feature_dims) self.fpn = models.FPN(feature_dims)
feature_dims = self.fpn.feature_dims feature_dims = self.fpn.feature_dims
...@@ -57,7 +58,7 @@ class Detector(nn.Module): ...@@ -57,7 +58,7 @@ class Detector(nn.Module):
else: else:
feature_dims = [feature_dims[-1]] feature_dims = [feature_dims[-1]]
# + Detection Modules # Detection Modules
if 'rcnn' in model: if 'rcnn' in model:
self.rpn = models.RPN(feature_dims[0]) self.rpn = models.RPN(feature_dims[0])
if 'faster' in model: if 'faster' in model:
...@@ -106,7 +107,7 @@ class Detector(nn.Module): ...@@ -106,7 +107,7 @@ class Detector(nn.Module):
if inputs is None: if inputs is None:
# 1) Training: <= DataLayer # 1) Training: <= DataLayer
# 2) Inference: <= Given # 2) Inference: <= Given
if not hasattr(self, 'data_loader'): if self.data_loader is None:
self.data_loader = self.data_loader_cls() self.data_loader = self.data_loader_cls()
inputs = self.data_loader() inputs = self.data_loader()
...@@ -171,29 +172,34 @@ class Detector(nn.Module): ...@@ -171,29 +172,34 @@ class Detector(nn.Module):
# Merge Affine into Convolution # # Merge Affine into Convolution #
################################### ###################################
last_module = None last_module = None
for e in self.modules(): for module in self.modules():
if isinstance(e, nn.Affine) and \ if isinstance(module, nn.Affine) and \
isinstance(last_module, nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x module.forward = lambda x: x
last_module.bias = e.bias last_module.bias = module.bias
last_module.weight.data.mul_(e.weight.data) weight = module.weight.data.view(
last_module = e 0, *([1] * (last_module.weight.ndimension() - 1)))
last_module.weight.data.mul_(weight)
last_module = module
###################################### ######################################
# Merge BatchNorm into Convolution # # Merge BatchNorm into Convolution #
###################################### ######################################
last_module = None last_module = None
for e in self.modules(): for module in self.modules():
if isinstance(e, nn.BatchNorm2d) and \ if isinstance(module, nn.BatchNorm2d) and \
isinstance(last_module, nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x module.forward = lambda x: x
term = torch.sqrt(e.running_var.data + e.eps) term = torch.sqrt(module.running_var.data + module.eps)
term = e.weight.data / term term = module.weight.data / term
last_module.bias = e.bias.data - term * e.running_mean.data last_module.bias = \
module.bias.data - \
term * module.running_mean.data
term = term.view(0, *([1] * (last_module.weight.ndimension() - 1)))
if last_module.weight.dtype == 'float16': if last_module.weight.dtype == 'float16':
last_module.bias.half_() last_module.bias.half_()
weight = last_module.weight.data.float() weight = last_module.weight.data.float()
...@@ -201,7 +207,7 @@ class Detector(nn.Module): ...@@ -201,7 +207,7 @@ class Detector(nn.Module):
last_module.weight.copy_(weight) last_module.weight.copy_(weight)
else: else:
last_module.weight.data.mul_(term) last_module.weight.data.mul_(term)
last_module = e last_module = module
def new_detector(device, weights=None, training=False): def new_detector(device, weights=None, training=False):
......
...@@ -31,7 +31,8 @@ class FPN(nn.Module): ...@@ -31,7 +31,8 @@ class FPN(nn.Module):
dim = cfg.FPN.DIM dim = cfg.FPN.DIM
self.C = nn.ModuleList() self.C = nn.ModuleList()
self.P = nn.ModuleList() self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1): self.highest_backbone_lvl = min(cfg.FPN.RPN_MAX_LEVEL, HIGHEST_BACKBONE_LVL)
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, self.highest_backbone_lvl + 1):
self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True)) self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(nn.Conv3x3(dim, dim, bias=True)) self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE: if 'rcnn' in cfg.MODEL.TYPE:
...@@ -40,8 +41,8 @@ class FPN(nn.Module): ...@@ -40,8 +41,8 @@ class FPN(nn.Module):
else: else:
self.apply_func = self.apply_on_generic self.apply_func = self.apply_on_generic
self.relu = nn.ReLU(inplace=False) self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1): for lvl in range(self.highest_backbone_lvl + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim dim_in = feature_dims[-1] if lvl == self.highest_backbone_lvl + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True)) self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim] self.feature_dims = [dim]
self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE
...@@ -56,12 +57,12 @@ class FPN(nn.Module): ...@@ -56,12 +57,12 @@ class FPN(nn.Module):
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Apply max pool for higher features # Apply max pool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1])) outputs.append(self.maxpool(outputs[-1]))
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0: if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample( upscale_output = nn_funcs.upsample(
...@@ -76,15 +77,15 @@ class FPN(nn.Module): ...@@ -76,15 +77,15 @@ class FPN(nn.Module):
def apply_on_generic(self, features): def apply_on_generic(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Add extra convolutions for higher features # Add extra convolutions for higher features
extra_input = features[-1] extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input)) outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: if i != max_lvl:
extra_input = self.relu(outputs[-1]) extra_input = self.relu(outputs[-1])
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0: if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample( upscale_output = nn_funcs.upsample(
......
...@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module): ...@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module):
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if nn.is_conv2d(m): if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, 'fan_out') init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None: if m.bias is not None:
init.constant(m.bias, 0) init.constant(m.bias, 0)
...@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module): ...@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module):
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if nn.is_conv2d(m): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
......
...@@ -17,8 +17,6 @@ from __future__ import absolute_import ...@@ -17,8 +17,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch
from seetadet.core.config import cfg from seetadet.core.config import cfg
from seetadet.core.registry import backbones from seetadet.core.registry import backbones
from seetadet.modules import nn from seetadet.modules import nn
...@@ -37,11 +35,12 @@ class BasicBlock(nn.Module): ...@@ -37,11 +35,12 @@ class BasicBlock(nn.Module):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.FrozenAffine(dim_out) self.bn1 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.FrozenAffine(dim_out) self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock1 = nn.DropBlock2d(**dropblock) if dropblock else None
self.dropblock2 = nn.DropBlock2d(**dropblock) if dropblock else None
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -50,14 +49,14 @@ class BasicBlock(nn.Module): ...@@ -50,14 +49,14 @@ class BasicBlock(nn.Module):
out = self.bn1(out) out = self.bn1(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None: if self.dropblock1 is not None:
out = self.dropblock(out) out = self.dropblock1(out)
out = self.conv2(out) out = self.conv2(out)
out = self.bn2(out) out = self.bn2(out)
if self.dropblock is not None: if self.dropblock2 is not None:
residual = self.dropblock(residual) residual = self.dropblock2(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
...@@ -67,7 +66,7 @@ class BasicBlock(nn.Module): ...@@ -67,7 +66,7 @@ class BasicBlock(nn.Module):
return out return out
class Bottleneck(torch.nn.Module): class Bottleneck(nn.Module):
# 1x64d => 0.25 (ResNet) # 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt) # 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \ contraction = cfg.RESNET.NUM_GROUPS \
...@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module): ...@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module):
self.conv1 = nn.Conv1x1(dim_in, dim) self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.FrozenAffine(dim) self.bn1 = nn.FrozenAffine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride) self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.drop2 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn2 = nn.FrozenAffine(dim) self.bn2 = nn.FrozenAffine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out) self.conv3 = nn.Conv1x1(dim, dim_out)
self.drop3 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn3 = nn.FrozenAffine(dim_out) self.bn3 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module): ...@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module):
out = self.relu(out) out = self.relu(out)
out = self.conv2(out) out = self.conv2(out)
if self.drop2 is not None:
out = self.drop2(out)
out = self.bn2(out) out = self.bn2(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv3(out) out = self.conv3(out)
out = self.bn3(out) out = self.bn3(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
out += residual out += residual
if self.drop3 is not None:
out = self.drop3(out)
out = self.relu(out) out = self.relu(out)
return out return out
class ResNet(torch.nn.Module): class ResNet(nn.Module):
def __init__(self, block, layers, filters): def __init__(self, block, layers, filters):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:] self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d( self.conv1 = nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
stride=2, stride=2,
...@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module): ...@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module):
bias=False, bias=False,
) )
self.bn1 = nn.FrozenAffine(self.dim_in) self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = nn.MaxPool2d(
kernel_size=3, kernel_size=3,
stride=2, stride=2,
padding=0, padding=0,
ceil_mode=True, ceil_mode=True,
) )
self.drop3 = torch.nn.DropBlock2d( drop3 = {
kp=0.9, 'kp': 0.9,
block_size=7, 'block_size': 7,
alpha=0.25, 'alpha': 1.00,
decrement=cfg.DROPBLOCK.DECREMENT 'decrement': cfg.DROPBLOCK.DECREMENT,
) if cfg.DROPBLOCK.DROP_ON else None 'inplace': True,
self.drop4 = torch.nn.DropBlock2d( } if cfg.DROPBLOCK.DROP_ON else None
kp=0.9, drop4 = {
block_size=7, 'kp': 0.9,
alpha=1.00, 'block_size': 7,
decrement=cfg.DROPBLOCK.DECREMENT 'alpha': 1.00,
) if cfg.DROPBLOCK.DROP_ON else None 'decrement': cfg.DROPBLOCK.DECREMENT,
'inplace': True,
} if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0]) self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2) self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3) self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4) self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, drop4)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module): ...@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module):
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
......
...@@ -29,7 +29,6 @@ class SSD(nn.Module): ...@@ -29,7 +29,6 @@ class SSD(nn.Module):
######################################## ########################################
# SSD outputs # # SSD outputs #
######################################## ########################################
self.cls_conv = torch.nn.ModuleList( self.cls_conv = torch.nn.ModuleList(
nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True) nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True)
for _ in range(cfg.SSD.NUM_CONVS) for _ in range(cfg.SSD.NUM_CONVS)
......
...@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function): ...@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function):
return self.dispatch([dets], [self.alloc()]) return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function): class _RetinaNetDecoder(Function):
"""Decode predictions from RetinaNet.""" """Decode predictions from RetinaNet."""
......
...@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'): ...@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'):
nonlinearity='relu', nonlinearity='relu',
) )
# Aliases # Aliases
constant = nn.init.constant_ constant = nn.init.constant_
normal = nn.init.normal_ normal = nn.init.normal_
...@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object): ...@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object):
return nn.SigmoidFocalLoss( return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA, alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA, gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
negative_index=0, # Background index
) )
...@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss ...@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d DepthwiseConv2d = nn.DepthwiseConv2d
DropBlock2d = nn.DropBlock2d
Linear = nn.Linear Linear = nn.Linear
MaxPool2d = nn.MaxPool2d MaxPool2d = nn.MaxPool2d
Module = nn.Module Module = nn.Module
......
...@@ -15,7 +15,7 @@ from __future__ import print_function ...@@ -15,7 +15,7 @@ from __future__ import print_function
import functools import functools
import dragon.vm.torch as torch from dragon.vm import torch
from seetadet.core.config import cfg from seetadet.core.config import cfg
...@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module): ...@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module):
def __init__(self): def __init__(self):
super(Bootstrap, self).__init__() super(Bootstrap, self).__init__()
self.normalize_func = functools.partial( self._device = torch.device('cpu')
self._dummy_buffer = torch.ones(1)
self._normalize_func = functools.partial(
torch.channel_normalize, torch.channel_normalize,
mean=cfg.PIXEL_MEANS, mean=cfg.PIXEL_MEANS,
std=[1., 1., 1.], std=[1., 1., 1.],
...@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module): ...@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module):
dims=(0, 3, 1, 2), dims=(0, 3, 1, 2),
dtype=cfg.MODEL.PRECISION.lower(), dtype=cfg.MODEL.PRECISION.lower(),
) )
self.dummy_buffer = torch.ones(1)
def _apply(self, fn): def _apply(self, fn):
fn(self.dummy_buffer) fn(self._dummy_buffer)
def cpu(self): def cpu(self):
self._device = torch.device('cpu') self._device = torch.device('cpu')
...@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module): ...@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module):
self._device = torch.device('cuda', device) self._device = torch.device('cuda', device)
def device(self): def device(self):
"""Return the device of this module.""" return self._dummy_buffer.device
return self.dummy_buffer.device
def forward(self, input): def forward(self, input):
if isinstance(input, torch.Tensor): if isinstance(input, torch.Tensor):
if input.size(1) <= 3: if input.shape[1] <= 3:
return input return input
cur_device = self.device() cur_device = self.device()
if input._device != cur_device: if input._device != cur_device:
...@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module): ...@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module):
input = input.cpu() input = input.cpu()
else: else:
input = input.cuda(cur_device.index) input = input.cuda(cur_device.index)
return self.normalize_func(input) return self._normalize_func(input)
...@@ -32,8 +32,8 @@ class SGDSolver(object): ...@@ -32,8 +32,8 @@ class SGDSolver(object):
lr=cfg.SOLVER.BASE_LR, lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM, momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY, weight_decay=cfg.SOLVER.WEIGHT_DECAY,
clip_gradient=float(cfg.SOLVER.CLIP_NORM), clip_norm=float(cfg.SOLVER.CLIP_NORM),
scale_gradient=1. / cfg.SOLVER.LOSS_SCALING, scale=1. / cfg.SOLVER.LOSS_SCALING,
) )
self.lr_scheduler = lr_scheduler.get_scheduler() self.lr_scheduler = lr_scheduler.get_scheduler()
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import operator
from dragon.vm import torch
from seetadet.modules import nn
def dense_conv_flops(m, inputs, output):
"""Hook to compute flops for a dense convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
in_c, out_c = inputs[0].shape[1], output.shape[1]
m.__params__ = (k_dim * in_c + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def depthwise_conv_flops(m, inputs, output):
"""Hook to compute flops for a depthwise convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
out_c = output.shape[1]
m.__params__ = (k_dim + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def register_flops(module):
"""Register hooks to collect flops info."""
if not hasattr(module, '__flops__'):
module.__flops__ = 0.
for m in module.modules():
if isinstance(m, nn.DepthwiseConv2d):
m.register_forward_hook(depthwise_conv_flops)
elif isinstance(m, nn.Conv2d):
m.register_forward_hook(dense_conv_flops)
def collect_flops(module, normalizer=1e6):
"""Collect flops from the last forward."""
total_flops = 0.
for m in module.modules():
if hasattr(m, '__flops__'):
total_flops += m.__flops__
m.__flops__ = 0.
return total_flops / normalizer
def benchmark_flops(module, normalizer=1e6):
"""Return the flops by running benchmark once."""
register_flops(module)
collect_flops(module)
original_training = module.training
if original_training:
module.eval()
with torch.no_grad():
module()
if original_training:
module.train()
return collect_flops(module, normalizer)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!