Commit 8558d3df by Ting PAN

Adapt to the latest dragon preview version

Summary:
This commit changes repo to match dragon.0.3.0.dev20200707.
1 parent 4bcab266
Showing with 540 additions and 350 deletions
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.2 (20200707)
Dragon Minimum Required (Version 0.3.0.dev20200707)
Changes:
- Adapt to the latest dragon preview version.
Preview Features:
- None
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.4.1 (20200421) SeetaDet 0.4.1 (20200421)
Dragon Minimum Required (Version 0.3.0.dev20200421) Dragon Minimum Required (Version 0.3.0.dev20200421)
......
...@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de ...@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements ## Requirements
seeta-dragon >= 0.3.0.dev20200421 seeta-dragon >= 0.3.0.dev20200707
## Installation ## Installation
......
...@@ -32,16 +32,17 @@ FRCNN: ...@@ -32,16 +32,17 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
...@@ -32,16 +32,16 @@ FRCNN: ...@@ -32,16 +32,16 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-101.Affine.pth' WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
NMS: 0.5 NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
...@@ -30,7 +30,7 @@ TRAIN: ...@@ -30,7 +30,7 @@ TRAIN:
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
NMS: 0.45 NMS: 0.45
RPN_POST_NMS_TOP_N: 1000
\ No newline at end of file
...@@ -29,16 +29,16 @@ FRCNN: ...@@ -29,16 +29,16 @@ FRCNN:
TRAIN: TRAIN:
WEIGHTS: '/model/VGG16.RCNN.pth' WEIGHTS: '/model/VGG16.RCNN.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
RPN_MIN_SIZE: 16
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
RPN_MIN_SIZE: 16
NMS: 0.45 NMS: 0.45
RPN_POST_NMS_TOP_N: 300
\ No newline at end of file
...@@ -32,11 +32,11 @@ FPN: ...@@ -32,11 +32,11 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2014_trainval35k'
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: True
IMS_PER_BATCH: 16 IMS_PER_BATCH: 16
SCALES: [416] SCALES: [416]
RANDOM_SCALES: [0.25, 1.0] RANDOM_SCALES: [0.25, 1.0]
USE_DIFF: False # Do not use crowd objects
USE_COLOR_JITTER: False
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
......
...@@ -23,10 +23,10 @@ FPN: ...@@ -23,10 +23,10 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.0] RANDOM_SCALES: [0.25, 1.0]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -24,10 +24,10 @@ FPN: ...@@ -24,10 +24,10 @@ FPN:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
USE_COLOR_JITTER: True
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 2.0] RANDOM_SCALES: [0.25, 2.0]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -38,6 +38,7 @@ TRAIN: ...@@ -38,6 +38,7 @@ TRAIN:
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
SCALES: [300] SCALES: [300]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -3,7 +3,7 @@ VIS: False ...@@ -3,7 +3,7 @@ VIS: False
ENABLE_TENSOR_BOARD: False ENABLE_TENSOR_BOARD: False
MODEL: MODEL:
TYPE: ssd TYPE: ssd
BACKBONE: airnet5b.mbox BACKBONE: airnet.fpn
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
...@@ -17,19 +17,30 @@ SOLVER: ...@@ -17,19 +17,30 @@ SOLVER:
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_320 SNAPSHOT_PREFIX: voc_ssd_320
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 8
SSD: SSD:
NUM_CONVS: 2 NUM_CONVS: 2
MULTIBOX: MULTIBOX:
STRIDES: [8, 16, 32] STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 90, 150] MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [90, 150, 210] MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] ASPECT_RATIOS: [
[1, 2, 0.5],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33],
[1, 2, 0.5],
[1, 2, 0.5],
]
TRAIN: TRAIN:
WEIGHTS: '/model/AirNet.Affine.pth' WEIGHTS: '/model/AirNet.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
...@@ -37,9 +37,10 @@ SSD: ...@@ -37,9 +37,10 @@ SSD:
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.Affine.pth' WEIGHTS: '/model/R-50.Affine.pth'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
SCALES: [320] SCALES: [320]
RANDOM_SCALES: [0.25, 1.00] RANDOM_SCALES: [0.25, 1.00]
IMS_PER_BATCH: 32 USE_COLOR_JITTER: True
TEST: TEST:
DATASET: '/data/voc_2007_test' DATASET: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
...
#include <dragon/core/workspace.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h"
#include "nms_op.h" #include "nms_op.h"
#include "../utils/detection_utils.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void NonMaxSuppressionOp<Context>::DoRunWithType() { void NonMaxSuppressionOp<Context>::DoRunWithType() {
int num_selected; int num_selected;
...@@ -16,10 +14,10 @@ void NonMaxSuppressionOp<Context>::DoRunWithType() { ...@@ -16,10 +14,10 @@ void NonMaxSuppressionOp<Context>::DoRunWithType() {
iou_threshold_, iou_threshold_,
Input(0).template mutable_data<T, Context>(), Input(0).template mutable_data<T, Context>(),
Output(0)->template mutable_data<int64_t, CPUContext>(), Output(0)->template mutable_data<int64_t, CPUContext>(),
num_selected, ctx() num_selected,
); ctx());
Output(0)->Reshape({ num_selected }); Output(0)->Reshape({num_selected});
} }
template <class Context> template <class Context>
...@@ -27,8 +25,7 @@ void NonMaxSuppressionOp<Context>::RunOnDevice() { ...@@ -27,8 +25,7 @@ void NonMaxSuppressionOp<Context>::RunOnDevice() {
CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5) CHECK(Input(0).ndim() == 2 && Input(0).dim(1) == 5)
<< "\nThe dimensions of boxes should be (num_boxes, 5)."; << "\nThe dimensions of boxes should be (num_boxes, 5).";
Output(0)->Reshape({ Input(0).dim(0) }); Output(0)->Reshape({Input(0).dim(0)});
DispatchHelper<TensorTypes<float>>::Call(this, Input(0)); DispatchHelper<TensorTypes<float>>::Call(this, Input(0));
} }
......
#include <dragon/core/workspace.h> #include <dragon/utils/math_functions.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h" #include "../utils/detection_utils.h"
#include "retinanet_decoder_op.h" #include "retinanet_decoder_op.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void RetinaNetDecoderOp<Context>::DoRunWithType() { void RetinaNetDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox using BC = CPUContext; // Context of BBox
...@@ -38,22 +38,13 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() { ...@@ -38,22 +38,13 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() {
roi_indices_.resize(num_candidates); roi_indices_.resize(num_candidates);
num_candidates = 0; num_candidates = 0;
for (int i = 0; i < roi_indices_.size(); ++i) for (int i = 0; i < roi_indices_.size(); ++i)
if (scores[i] > score_thr_) if (scores[i] > score_thr_) roi_indices_[num_candidates++] = i;
roi_indices_[num_candidates++] = i;
scores_.resize(num_candidates); scores_.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i) for (int i = 0; i < num_candidates; ++i)
scores_[i] = scores[roi_indices_[i]]; scores_[i] = scores[roi_indices_[i]];
num_proposals = std::min( num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition( utils::math::ArgPartition(
num_candidates, num_candidates, num_proposals, true, scores_.data(), indices_);
num_proposals,
true,
scores_.data(),
indices_
);
for (int i = 0; i < num_proposals; ++i) for (int i = 0; i < num_proposals; ++i)
indices_[i] = roi_indices_[indices_[i]]; indices_[i] = roi_indices_[indices_[i]];
// Decode the candidates // Decode the candidates
...@@ -70,22 +61,24 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() { ...@@ -70,22 +61,24 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() {
(int)scales_.size(), (int)scales_.size(),
ratios_.data(), ratios_.data(),
scales_.data(), scales_.data(),
anchors_.data() anchors_.data());
);
utils::detection::GenerateGridAnchors( utils::detection::GenerateGridAnchors(
num_proposals, C, A, num_proposals,
feat_h, feat_w, C,
A,
feat_h,
feat_w,
strides_[i], strides_[i],
base_offset, base_offset,
anchors_.data(), anchors_.data(),
indices_.data(), indices_.data(),
y y);
);
base_offset += (A * K); base_offset += (A * K);
} }
utils::detection::GenerateMCProposals( utils::detection::GenerateMCProposals(
num_proposals, num_proposals,
num_boxes, C, num_boxes,
C,
n, n,
im_h, im_h,
im_w, im_w,
...@@ -94,14 +87,13 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() { ...@@ -94,14 +87,13 @@ void RetinaNetDecoderOp<Context>::DoRunWithType() {
scores, scores,
deltas, deltas,
indices_.data(), indices_.data(),
y y);
);
total_proposals += num_proposals; total_proposals += num_proposals;
y += (num_proposals * 7); y += (num_proposals * 7);
im_info += Input(-1).dim(1); im_info += Input(-1).dim(1);
} }
Output(0)->Reshape({ total_proposals, 7 }); Output(0)->Reshape({total_proposals, 7});
} }
template <class Context> template <class Context>
...@@ -109,12 +101,10 @@ void RetinaNetDecoderOp<Context>::RunOnDevice() { ...@@ -109,12 +101,10 @@ void RetinaNetDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0); num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_) CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_ << "\nExcepted " << num_images_ << " groups info, got "
<< " groups info, got "
<< Input(-1).dim(0) << "."; << Input(-1).dim(0) << ".";
Output(0)->Reshape({ num_images_ * pre_nms_topn_, 7 }); Output(0)->Reshape({num_images_ * pre_nms_topn_, 7});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3)); DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
} }
...@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder); ...@@ -123,8 +113,6 @@ DEPLOY_CPU(RetinaNetDecoder);
DEPLOY_CUDA(RetinaNetDecoder); DEPLOY_CUDA(RetinaNetDecoder);
#endif #endif
OPERATOR_SCHEMA(RetinaNetDecoder) OPERATOR_SCHEMA(RetinaNetDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon } // namespace dragon
#include <dragon/core/workspace.h> #include <dragon/utils/math_functions.h>
#include <dragon/utils/math_utils.h>
#include "../utils/detection_utils.h" #include "../utils/detection_utils.h"
#include "rpn_decoder_op.h" #include "rpn_decoder_op.h"
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context>
template <typename T>
void RPNDecoderOp<Context>::DoRunWithType() { void RPNDecoderOp<Context>::DoRunWithType() {
using BT = float; // DType of BBox using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox using BC = CPUContext; // Context of BBox
...@@ -36,66 +36,60 @@ void RPNDecoderOp<Context>::DoRunWithType() { ...@@ -36,66 +36,60 @@ void RPNDecoderOp<Context>::DoRunWithType() {
A = int(ratios_.size() * scales_.size()); A = int(ratios_.size() * scales_.size());
// Select the Top-K candidates as proposals // Select the Top-K candidates as proposals
num_candidates = A * K; num_candidates = A * K;
num_proposals = std::min( num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition( utils::math::ArgPartition(
num_candidates, num_candidates, num_proposals, true, scores, indices_);
num_proposals,
true, scores, indices_
);
// Decode the candidates // Decode the candidates
anchors_.resize((size_t)(A * 4)); anchors_.resize((size_t)(A * 4));
proposals_.Reshape({ num_proposals, 5 }); proposals_.Reshape({num_proposals, 5});
utils::detection::GenerateAnchors( utils::detection::GenerateAnchors(
strides_[0], strides_[0],
(int)ratios_.size(), (int)ratios_.size(),
(int)scales_.size(), (int)scales_.size(),
ratios_.data(), ratios_.data(),
scales_.data(), scales_.data(),
anchors_.data() anchors_.data());
);
utils::detection::GenerateGridAnchors( utils::detection::GenerateGridAnchors(
num_proposals, A, num_proposals,
feat_h, feat_w, A,
feat_h,
feat_w,
strides_[0], strides_[0],
0, 0,
anchors_.data(), anchors_.data(),
indices_.data(), indices_.data(),
proposals_.template mutable_data<BT, BC>() proposals_.template mutable_data<BT, BC>());
);
utils::detection::GenerateSSProposals( utils::detection::GenerateSSProposals(
K, num_proposals, K,
im_h, im_w, num_proposals,
min_box_h, min_box_w, im_h,
im_w,
min_box_h,
min_box_w,
scores, scores,
deltas, deltas,
indices_.data(), indices_.data(),
proposals_.template mutable_data<BT, BC>() proposals_.template mutable_data<BT, BC>());
);
// Sort, NMS and Retrieve // Sort, NMS and Retrieve
utils::detection::SortProposals( utils::detection::SortProposals(
0, 0,
num_proposals - 1, num_proposals - 1,
num_proposals, num_proposals,
proposals_.template mutable_data<BT, BC>() proposals_.template mutable_data<BT, BC>());
);
utils::detection::ApplyNMS( utils::detection::ApplyNMS(
num_proposals, num_proposals,
post_nms_topn_, post_nms_topn_,
nms_thr_, nms_thr_,
proposals_.template mutable_data<BT, Context>(), proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(), roi_indices_.data(),
num_rois, ctx() num_rois,
); ctx());
utils::detection::RetrieveRoIs( utils::detection::RetrieveRoIs(
num_rois, num_rois,
n, n,
proposals_.template data<BT, BC>(), proposals_.template data<BT, BC>(),
roi_indices_.data(), roi_indices_.data(),
y y);
);
} else if (strides_.size() > 1) { } else if (strides_.size() > 1) {
// Case 2: multiple strides // Case 2: multiple strides
CHECK_EQ(strides_.size(), InputSize() - 3) CHECK_EQ(strides_.size(), InputSize() - 3)
...@@ -106,20 +100,13 @@ void RPNDecoderOp<Context>::DoRunWithType() { ...@@ -106,20 +100,13 @@ void RPNDecoderOp<Context>::DoRunWithType() {
<< "and " << scales_.size() << " scales"; << "and " << scales_.size() << " scales";
// Select the top-k candidates as proposals // Select the top-k candidates as proposals
num_candidates = Input(-3).dim(1); num_candidates = Input(-3).dim(1);
num_proposals = std::min( num_proposals = std::min(num_candidates, (int)pre_nms_topn_);
num_candidates,
(int)pre_nms_topn_
);
utils::math::ArgPartition( utils::math::ArgPartition(
num_candidates, num_candidates, num_proposals, true, scores, indices_);
num_proposals,
true, scores, indices_
);
// Decode the candidates // Decode the candidates
int base_offset = 0; int base_offset = 0;
proposals_.Reshape({ num_proposals, 5 }); proposals_.Reshape({num_proposals, 5});
auto* proposals = proposals_ auto* proposals = proposals_.template mutable_data<BT, BC>();
.template mutable_data<BT, BC>();
for (int i = 0; i < strides_.size(); i++) { for (int i = 0; i < strides_.size(); i++) {
feat_h = Input(i).dim(2); feat_h = Input(i).dim(2);
feat_w = Input(i).dim(3); feat_w = Input(i).dim(3);
...@@ -132,51 +119,43 @@ void RPNDecoderOp<Context>::DoRunWithType() { ...@@ -132,51 +119,43 @@ void RPNDecoderOp<Context>::DoRunWithType() {
1, 1,
ratios_.data(), ratios_.data(),
scales_.data(), scales_.data(),
anchors_.data() anchors_.data());
);
utils::detection::GenerateGridAnchors( utils::detection::GenerateGridAnchors(
num_proposals, A, num_proposals,
feat_h, feat_w, A,
feat_h,
feat_w,
strides_[i], strides_[i],
base_offset, base_offset,
anchors_.data(), anchors_.data(),
indices_.data(), indices_.data(),
proposals proposals);
);
base_offset += (A * K); base_offset += (A * K);
} }
utils::detection::GenerateMSProposals( utils::detection::GenerateMSProposals(
num_candidates, num_candidates,
num_proposals, num_proposals,
im_h, im_w, im_h,
min_box_h, min_box_w, im_w,
min_box_h,
min_box_w,
scores, scores,
deltas, deltas,
&indices_[0], &indices_[0],
proposals proposals);
);
// Sort, NMS and Retrieve // Sort, NMS and Retrieve
utils::detection::SortProposals( utils::detection::SortProposals(
0, 0, num_proposals - 1, num_proposals, proposals);
num_proposals - 1,
num_proposals,
proposals
);
utils::detection::ApplyNMS( utils::detection::ApplyNMS(
num_proposals, num_proposals,
post_nms_topn_, post_nms_topn_,
nms_thr_, nms_thr_,
proposals_.template mutable_data<BT, Context>(), proposals_.template mutable_data<BT, Context>(),
roi_indices_.data(), roi_indices_.data(),
num_rois, ctx()
);
utils::detection::RetrieveRoIs(
num_rois, num_rois,
n, ctx());
proposals, utils::detection::RetrieveRoIs(
roi_indices_.data(), num_rois, n, proposals, roi_indices_.data(), y);
y
);
} else { } else {
LOG(FATAL) << "Excepted at least one stride for proposals."; LOG(FATAL) << "Excepted at least one stride for proposals.";
} }
...@@ -185,23 +164,22 @@ void RPNDecoderOp<Context>::DoRunWithType() { ...@@ -185,23 +164,22 @@ void RPNDecoderOp<Context>::DoRunWithType() {
im_info += Input(-1).dim(1); im_info += Input(-1).dim(1);
} }
Output(0)->Reshape({ total_rois, 5 }); Output(0)->Reshape({total_rois, 5});
// Distribute rois into K bins // Distribute rois into K bins
if (OutputSize() > 1) { if (OutputSize() > 1) {
CHECK_EQ(max_level_ - min_level_ + 1, OutputSize()) CHECK_EQ(max_level_ - min_level_ + 1, OutputSize())
<< "\nExcepted " << OutputSize() << " outputs for levels " << "\nExcepted " << OutputSize() << " outputs for levels "
"between [" << min_level_ << ", " << max_level_ << "]."; << "between [" << min_level_ << ", " << max_level_ << "].";
vector<BT*> ys(OutputSize()); vector<BT*> ys(OutputSize());
vector<vec64_t> bins(OutputSize()); vector<vec64_t> bins(OutputSize());
Tensor RoIs; RoIs.ReshapeLike(*Output(0)); Tensor RoIs;
RoIs.ReshapeLike(*Output(0));
auto* rois = RoIs.template mutable_data<BT, BC>(); auto* rois = RoIs.template mutable_data<BT, BC>();
ctx()->template Copy<BT, BC, BC>( ctx()->template Copy<BT, BC, BC>(
Output(0)->count(), Output(0)->count(), rois, Output(0)->template data<BT, BC>());
rois, Output(0)->template data<BT, BC>()
);
utils::detection::CollectRoIs( utils::detection::CollectRoIs(
total_rois, total_rois,
...@@ -209,11 +187,11 @@ void RPNDecoderOp<Context>::DoRunWithType() { ...@@ -209,11 +187,11 @@ void RPNDecoderOp<Context>::DoRunWithType() {
max_level_, max_level_,
canonical_level_, canonical_level_,
canonical_scale_, canonical_scale_,
rois, bins rois,
); bins);
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
Output(i)->Reshape({ std::max((int)bins[i].size(), 1), 5 }); Output(i)->Reshape({std::max((int)bins[i].size(), 1), 5});
ys[i] = Output(i)->template mutable_data<BT, BC>(); ys[i] = Output(i)->template mutable_data<BT, BC>();
} }
...@@ -226,12 +204,11 @@ void RPNDecoderOp<Context>::RunOnDevice() { ...@@ -226,12 +204,11 @@ void RPNDecoderOp<Context>::RunOnDevice() {
num_images_ = Input(0).dim(0); num_images_ = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images_) CHECK_EQ(Input(-1).dim(0), num_images_)
<< "\nExcepted " << num_images_ << "\nExcepted " << num_images_ << " groups info, got "
<< " groups info, got "
<< Input(-1).dim(0) << "."; << Input(-1).dim(0) << ".";
roi_indices_.resize(post_nms_topn_); roi_indices_.resize(post_nms_topn_);
Output(0)->Reshape({ num_images_ * post_nms_topn_, 5 }); Output(0)->Reshape({num_images_ * post_nms_topn_, 5});
DispatchHelper<TensorTypes<float>>::Call(this, Input(-3)); DispatchHelper<TensorTypes<float>>::Call(this, Input(-3));
} }
...@@ -241,8 +218,6 @@ DEPLOY_CPU(RPNDecoder); ...@@ -241,8 +218,6 @@ DEPLOY_CPU(RPNDecoder);
DEPLOY_CUDA(RPNDecoder); DEPLOY_CUDA(RPNDecoder);
#endif #endif
OPERATOR_SCHEMA(RPNDecoder) OPERATOR_SCHEMA(RPNDecoder).NumInputs(3, INT_MAX).NumOutputs(1, INT_MAX);
.NumInputs(3, INT_MAX)
.NumOutputs(1, INT_MAX);
} // namespace dragon } // namespace dragon
...@@ -15,25 +15,35 @@ from __future__ import absolute_import ...@@ -15,25 +15,35 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import glob
from distutils.core import setup from distutils.core import setup
from dragon.tools import cpp_extension
from dragon.tools import cpp_extension
if cpp_extension.CUDA_HOME is not None and \ if cpp_extension.CUDA_HOME is not None and \
cpp_extension._cuda.is_available(): cpp_extension._cuda.is_available():
Extension = cpp_extension.CUDAExtension Extension = cpp_extension.CUDAExtension
else: else:
Extension = cpp_extension.CppExtension Extension = cpp_extension.CppExtension
def find_sources(*dirs):
ext_suffixes = ['.cc']
if Extension is cpp_extension.CUDAExtension:
ext_suffixes.append('.cu')
sources = []
for path in dirs:
for ext_suffix in ext_suffixes:
sources += glob.glob(
path + '/*' + ext_suffix,
recursive=True,
)
return sources
ext_modules = [ ext_modules = [
Extension( Extension(
name='install.lib.modules._C', name='install.lib.modules._C',
sources=[ sources=find_sources('**'),
'utils/detection_utils.cc',
'utils/detection_utils.cu',
'operators/nms_op.cc',
'operators/retinanet_decoder_op.cc',
'operators/rpn_decoder_op.cc',
],
), ),
] ]
......
#include <dragon/core/context.h>
#include "detection_utils.h" #include "detection_utils.h"
#include <dragon/core/context.h>
namespace dragon { namespace dragon {
...@@ -9,8 +9,7 @@ namespace detection { ...@@ -9,8 +9,7 @@ namespace detection {
template <typename T> template <typename T>
T IoU(const T A[], const T B[]) { T IoU(const T A[], const T B[]) {
if (A[0] > B[2] || A[1] > B[3] || if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) return 0;
A[2] < B[0] || A[3] < B[1]) return 0;
const T x1 = std::max(A[0], B[0]); const T x1 = std::max(A[0], B[0]);
const T y1 = std::max(A[1], B[1]); const T y1 = std::max(A[1], B[1]);
const T x2 = std::min(A[2], B[2]); const T x2 = std::min(A[2], B[2]);
...@@ -23,7 +22,8 @@ T IoU(const T A[], const T B[]) { ...@@ -23,7 +22,8 @@ T IoU(const T A[], const T B[]) {
return area / (A_area + B_area - area); return area / (A_area + B_area - area);
} }
template <> void ApplyNMS<float, CPUContext>( template <>
void ApplyNMS<float, CPUContext>(
const int num_boxes, const int num_boxes,
const int max_keeps, const int max_keeps,
const float thresh, const float thresh,
...@@ -33,16 +33,17 @@ template <> void ApplyNMS<float, CPUContext>( ...@@ -33,16 +33,17 @@ template <> void ApplyNMS<float, CPUContext>(
CPUContext* ctx) { CPUContext* ctx) {
int count = 0; int count = 0;
std::vector<char> is_dead(num_boxes); std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) is_dead[i] = 0; for (int i = 0; i < num_boxes; ++i)
is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i) { for (int i = 0; i < num_boxes; ++i) {
if (is_dead[i]) continue; if (is_dead[i]) continue;
keep_indices[count++] = i; keep_indices[count++] = i;
if (count == max_keeps) break; if (count == max_keeps) break;
for (int j = i + 1; j < num_boxes; ++j) for (int j = i + 1; j < num_boxes; ++j)
if (!is_dead[j] && IoU(&boxes[i * 5], if (!is_dead[j] && IoU(&boxes[i * 5], &boxes[j * 5]) > thresh) {
&boxes[j * 5]) > thresh)
is_dead[j] = 1; is_dead[j] = 1;
} }
}
num_keep = count; num_keep = count;
} }
......
...@@ -9,16 +9,13 @@ namespace utils { ...@@ -9,16 +9,13 @@ namespace utils {
namespace detection { namespace detection {
#define DIV_UP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define DIV_UP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define NUM_THREADS 64 #define NUM_THREADS 64
namespace { namespace {
template <typename T> template <typename T>
__device__ bool _CheckIoU( __device__ bool _CheckIoU(const T* a, const T* b, const float thresh) {
const T* a,
const T* b,
const float thresh) {
const T x1 = max(a[0], b[0]); const T x1 = max(a[0], b[0]);
const T y1 = max(a[1], b[1]); const T y1 = max(a[1], b[1]);
const T x2 = min(a[2], b[2]); const T x2 = min(a[2], b[2]);
...@@ -74,7 +71,8 @@ __global__ void _NonMaxSuppression( ...@@ -74,7 +71,8 @@ __global__ void _NonMaxSuppression(
} // namespace } // namespace
template <> void ApplyNMS<float, CUDAContext>( template <>
void ApplyNMS<float, CUDAContext>(
const int num_boxes, const int num_boxes,
const int max_keeps, const int max_keeps,
const float thresh, const float thresh,
...@@ -87,23 +85,18 @@ template <> void ApplyNMS<float, CUDAContext>( ...@@ -87,23 +85,18 @@ template <> void ApplyNMS<float, CUDAContext>(
vector<uint64_t> mask_host(num_boxes * num_blocks); vector<uint64_t> mask_host(num_boxes * num_blocks);
auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t)); auto* mask_dev = (uint64_t*)ctx->New(mask_host.size() * sizeof(uint64_t));
_NonMaxSuppression _NonMaxSuppression<<<
<<< dim3(num_blocks, num_blocks), NUM_THREADS, dim3(num_blocks, num_blocks),
0, ctx->cuda_stream() >>>( NUM_THREADS,
num_blocks, 0,
num_boxes, ctx->cuda_stream()>>>(num_blocks, num_boxes, thresh, boxes, mask_dev);
thresh,
boxes,
mask_dev
);
CUDA_CHECK(cudaMemcpyAsync( CUDA_CHECK(cudaMemcpyAsync(
mask_host.data(), mask_host.data(),
mask_dev, mask_dev,
mask_host.size() * sizeof(uint64_t), mask_host.size() * sizeof(uint64_t),
cudaMemcpyDeviceToHost, cudaMemcpyDeviceToHost,
ctx->cuda_stream() ctx->cuda_stream()));
));
ctx->FinishDeviceComputation(); ctx->FinishDeviceComputation();
...@@ -117,12 +110,13 @@ template <> void ApplyNMS<float, CUDAContext>( ...@@ -117,12 +110,13 @@ template <> void ApplyNMS<float, CUDAContext>(
if (!(dead_bit[nblock] & (1ULL << inblock))) { if (!(dead_bit[nblock] & (1ULL << inblock))) {
keep_indices[num_selected++] = i; keep_indices[num_selected++] = i;
auto* mask_i = &mask_host[0] + i * num_blocks; auto* mask_i = &mask_host[0] + i * num_blocks;
for (int j = nblock; j < num_blocks; ++j) dead_bit[j] |= mask_i[j]; for (int j = nblock; j < num_blocks; ++j)
dead_bit[j] |= mask_i[j];
if (num_selected == max_keeps) break; if (num_selected == max_keeps) break;
} }
} }
num_keep = num_selected;
num_keep = num_selected;
ctx->Delete(mask_dev); ctx->Delete(mask_dev);
} }
......
...@@ -13,8 +13,7 @@ ...@@ -13,8 +13,7 @@
#ifndef SEETADET_CXX_UTILS_DETECTION_UTILS_H_ #ifndef SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#define SEETADET_CXX_UTILS_DETECTION_UTILS_H_ #define SEETADET_CXX_UTILS_DETECTION_UTILS_H_
#include "dragon/core/context.h" #include "dragon/core/common.h"
#include "dragon/core/operator.h"
namespace dragon { namespace dragon {
...@@ -24,7 +23,9 @@ namespace detection { ...@@ -24,7 +23,9 @@ namespace detection {
#define ROUND(x) ((int)((x) + (T)0.5)) #define ROUND(x) ((int)((x) + (T)0.5))
/******************** BBox ********************/ /*!
* Box API
*/
template <typename T> template <typename T>
inline int FilterBoxes( inline int FilterBoxes(
...@@ -94,7 +95,9 @@ inline void BBoxTransform( ...@@ -94,7 +95,9 @@ inline void BBoxTransform(
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h; bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale_h;
} }
/******************** Anchor ********************/ /*!
* Anchor API
*/
template <typename T> template <typename T>
inline void GenerateAnchors( inline void GenerateAnchors(
...@@ -183,7 +186,9 @@ inline void GenerateGridAnchors( ...@@ -183,7 +186,9 @@ inline void GenerateGridAnchors(
} }
} }
/******************** Proposal ********************/ /*!
* Proposal API
*/
template <typename T> template <typename T>
void GenerateSSProposals( void GenerateSSProposals(
...@@ -210,12 +215,16 @@ void GenerateSSProposals( ...@@ -210,12 +215,16 @@ void GenerateSSProposals(
d_log_w = delta[(a * 4 + 2) * K]; d_log_w = delta[(a * 4 + 2) * K];
d_log_h = delta[(a * 4 + 3) * K]; d_log_h = delta[(a * 4 + 3) * K];
proposal[4] = FilterBoxes( proposal[4] = FilterBoxes(
dx, dy, dx,
d_log_w, d_log_h, dy,
im_w, im_h, d_log_w,
min_box_w, min_box_h, d_log_h,
proposal im_w,
) * scores[index]; im_h,
min_box_w,
min_box_h,
proposal) *
scores[index];
proposal += 5; proposal += 5;
} }
} }
...@@ -244,12 +253,16 @@ void GenerateMSProposals( ...@@ -244,12 +253,16 @@ void GenerateMSProposals(
d_log_w = deltas[num_candidates_2x + index]; d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[num_candidates_3x + index]; d_log_h = deltas[num_candidates_3x + index];
proposal[4] = FilterBoxes( proposal[4] = FilterBoxes(
dx, dy, dx,
d_log_w, d_log_h, dy,
im_w, im_h, d_log_w,
min_box_w, min_box_h, d_log_h,
proposal im_w,
) * scores[index]; im_h,
min_box_w,
min_box_h,
proposal) *
scores[index];
proposal += 5; proposal += 5;
} }
} }
...@@ -282,12 +295,15 @@ void GenerateMCProposals( ...@@ -282,12 +295,15 @@ void GenerateMCProposals(
d_log_h = deltas[num_boxes_3x + index]; d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx; proposal[0] = im_idx;
BBoxTransform( BBoxTransform(
dx, dy, dx,
d_log_w, d_log_h, dy,
im_w, im_h, d_log_w,
im_scale_h, im_scale_w, d_log_h,
proposal + 1 im_w,
); im_h,
im_scale_h,
im_scale_w,
proposal + 1);
proposal[5] = scores[indices[i]]; proposal[5] = scores[indices[i]];
proposal[6] = cls + 1; proposal[6] = cls + 1;
proposal += 7; proposal += 7;
...@@ -295,16 +311,15 @@ void GenerateMCProposals( ...@@ -295,16 +311,15 @@ void GenerateMCProposals(
} }
template <typename T> template <typename T>
inline void SortProposals( inline void
const int start, SortProposals(const int start, const int end, const int num_top, T* proposals) {
const int end,
const int num_top,
T* proposals) {
const T pivot_score = proposals[start * 5 + 4]; const T pivot_score = proposals[start * 5 + 4];
int left = start + 1, right = end; int left = start + 1, right = end;
while (left <= right) { while (left <= right) {
while (left <= end && proposals[left * 5 + 4] >= pivot_score) ++left; while (left <= end && proposals[left * 5 + 4] >= pivot_score)
while (right > start && proposals[right * 5 + 4] <= pivot_score) --right; ++left;
while (right > start && proposals[right * 5 + 4] <= pivot_score)
--right;
if (left <= right) { if (left <= right) {
for (int i = 0; i < 5; ++i) for (int i = 0; i < 5; ++i)
std::swap(proposals[left * 5 + i], proposals[right * 5 + i]); std::swap(proposals[left * 5 + i], proposals[right * 5 + i]);
...@@ -348,8 +363,8 @@ inline int roi_level( ...@@ -348,8 +363,8 @@ inline int roi_level(
T w = roi[3] - roi[1] + 1; T w = roi[3] - roi[1] + 1;
T h = roi[4] - roi[2] + 1; T h = roi[4] - roi[2] + 1;
// Refer the settings of paper // Refer the settings of paper
int level = canonical_level + std::log2( int level = canonical_level +
std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale); std::log2(std::max(std::sqrt(w * h), (T)1) / (T)canonical_scale);
return std::min(max_level, std::max(min_level, level)); return std::min(max_level, std::max(min_level, level));
} }
...@@ -364,8 +379,8 @@ inline void CollectRoIs( ...@@ -364,8 +379,8 @@ inline void CollectRoIs(
vector<vec64_t>& roi_bins) { vector<vec64_t>& roi_bins) {
const T* roi = rois; const T* roi = rois;
for (int i = 0; i < num_rois; ++i) { for (int i = 0; i < num_rois; ++i) {
int bin_idx = roi_level(min_level, max_level, int bin_idx =
canonical_level, canonical_scale, roi); roi_level(min_level, max_level, canonical_level, canonical_scale, roi);
bin_idx = std::max(bin_idx - min_level, 0); bin_idx = std::max(bin_idx - min_level, 0);
roi_bins[bin_idx].push_back(i); roi_bins[bin_idx].push_back(i);
roi += 5; roi += 5;
...@@ -385,14 +400,17 @@ inline void DistributeRoIs( ...@@ -385,14 +400,17 @@ inline void DistributeRoIs(
} else { } else {
for (int j = 0; j < roi_bins[i].size(); ++j) { for (int j = 0; j < roi_bins[i].size(); ++j) {
const T* roi = rois + roi_bins[i][j] * 5; const T* roi = rois + roi_bins[i][j] * 5;
for (int k = 0; k < 5; ++k) y[k] = roi[k]; for (int k = 0; k < 5; ++k)
y[k] = roi[k];
y += 5; y += 5;
} }
} }
} }
} }
/******************** NMS ********************/ /*!
* NMS API
*/
template <typename T, class Context> template <typename T, class Context>
void ApplyNMS( void ApplyNMS(
......
...@@ -52,12 +52,9 @@ class AnchorTarget(object): ...@@ -52,12 +52,9 @@ class AnchorTarget(object):
gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images) gt_boxes_wide = box_util.dismantle_boxes(gt_boxes, num_images)
# Generate grid anchors from base # Generate grid anchors from base
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( all_anchors = generate_grid_anchors(
features, grid_shapes, self.base_anchors, self.strides)
self.base_anchors,
self.strides,
)
num_anchors = all_anchors.shape[0] num_anchors = all_anchors.shape[0]
# Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care # Label: ``1`` is positive, ``0`` is negative, ``-1` is don't care
......
...@@ -58,12 +58,9 @@ class Proposal(object): ...@@ -58,12 +58,9 @@ class Proposal(object):
# Get resources # Get resources
num_images = ims_info.shape[0] num_images = ims_info.shape[0]
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
generate_grid_anchors( all_anchors = generate_grid_anchors(
features, grid_shapes, self.base_anchors, self.strides)
self.base_anchors,
self.strides,
)
# Prepare for the outputs # Prepare for the outputs
batch_rois = [] batch_rois = []
......
...@@ -19,40 +19,40 @@ import numpy as np ...@@ -19,40 +19,40 @@ import numpy as np
from seetadet.core.config import cfg from seetadet.core.config import cfg
def generate_grid_anchors(features, base_anchors, strides): def generate_grid_anchors(grid_shapes, base_anchors, strides):
num_strides = len(strides) num_strides = len(strides)
if len(features) != num_strides: if len(grid_shapes) != num_strides:
raise ValueError( raise ValueError(
'Given %d features for %d strides.' 'Given %d grids for %d strides.'
% (len(features), num_strides) % (len(grid_shapes), num_strides)
) )
# Generate proposals from shifted anchors # Generate proposals from shifted anchors
anchors_to_pack = [] anchors_to_pack = []
for i in range(len(features)): for i in range(len(grid_shapes)):
height, width = features[i].shape[-2:] height, width = grid_shapes[i]
shift_x = np.arange(0, width) * strides[i] shift_x = np.arange(0, width) * strides[i]
shift_y = np.arange(0, height) * strides[i] shift_y = np.arange(0, height) * strides[i]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() shift_x.ravel(), shift_y.ravel())).transpose()
# Add A anchors (1, A, 4) to # Add a anchors (1, a, 4) to
# cell K shifts (K, 1, 4) to get # cell k shifts (k, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (k, a, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (k * a, 4) shifted anchors
A = base_anchors[i].shape[0] a = base_anchors[i].shape[0]
K = shifts.shape[0] k = shifts.shape[0]
anchors = (base_anchors[i].reshape((1, A, 4)) + anchors = (base_anchors[i].reshape((1, a, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) shifts.reshape((1, k, 4)).transpose((1, 0, 2)))
if num_strides > 1: if num_strides > 1:
# Transpose from (K, A, 4) to (A, K, 4) # Transpose from (K, A, 4) to (A, K, 4)
# We will pack it with other strides to # We will pack it with other strides to
# match the data format of (N, C, H, W) # match the data format of (N, C, H, W)
anchors = anchors.transpose((1, 0, 2)) anchors = anchors.transpose((1, 0, 2))
anchors = anchors.reshape((A * K, 4)) anchors = anchors.reshape((a * k, 4))
anchors_to_pack.append(anchors) anchors_to_pack.append(anchors)
else: else:
# Original order of Faster R-CNN # Original order of Faster R-CNN
return anchors.reshape((K * A, 4)) return anchors.reshape((k * a, 4))
return np.vstack(anchors_to_pack) return np.vstack(anchors_to_pack)
......
...@@ -46,6 +46,9 @@ class AnchorTarget(object): ...@@ -46,6 +46,9 @@ class AnchorTarget(object):
ratios=self.ratios, ratios=self.ratios,
sizes=sizes, sizes=sizes,
)) ))
# Store the cached grid anchors
self.last_grid_shapes = None
self.last_grid_anchors = None
def __call__(self, features, gt_boxes): def __call__(self, features, gt_boxes):
num_images = cfg.TRAIN.IMS_PER_BATCH num_images = cfg.TRAIN.IMS_PER_BATCH
...@@ -58,9 +61,14 @@ class AnchorTarget(object): ...@@ -58,9 +61,14 @@ class AnchorTarget(object):
) )
# Generate grid anchors from base # Generate grid anchors from base
all_anchors = \ grid_shapes = [f.shape[-2:] for f in features]
if grid_shapes == self.last_grid_shapes:
all_anchors = self.last_grid_anchors
else:
self.last_grid_shapes = grid_shapes
self.last_grid_anchors = all_anchors = \
generate_grid_anchors( generate_grid_anchors(
features, grid_shapes,
self.base_anchors, self.base_anchors,
self.strides, self.strides,
) )
......
...@@ -15,6 +15,7 @@ from __future__ import print_function ...@@ -15,6 +15,7 @@ from __future__ import print_function
import types import types
import dragon
import dragon.vm.torch as torch import dragon.vm.torch as torch
import numpy as np import numpy as np
...@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images): ...@@ -59,7 +60,7 @@ def ims_detect(detector, raw_images):
# Unpack results # Unpack results
results = outputs['detections'] results = outputs['detections']
detections = [[] for _ in range(len((raw_images)))] detections = [[] for _ in range(len(raw_images))]
for i in range(len(ims)): for i in range(len(ims)):
inds = np.where(results[:, 0].astype(np.int32) == i)[0] inds = np.where(results[:, 0].astype(np.int32) == i)[0]
...@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device): ...@@ -126,6 +127,6 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put(( q_out.put((
indices[i], indices[i],
dict([('im_detect', _t['im_detect'].average_time), dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]), ('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]), dict([('boxes', boxes_this_image)]),
)) ))
...@@ -45,14 +45,14 @@ class PriorBox(object): ...@@ -45,14 +45,14 @@ class PriorBox(object):
aspect_ratios[i], aspect_ratios[i],
) )
) )
self.grid_anchors = None # Store the cached grid anchors
self.last_grid_anchors = None
def __call__(self, features): def __call__(self, features):
if self.grid_anchors is not None: if self.last_grid_anchors is not None:
return self.grid_anchors return self.last_grid_anchors
self.grid_anchors = []
all_anchors = []
for i in range(len(self.strides)): for i in range(len(self.strides)):
# 1. Generate base grids # 1. Generate base grids
height, width = features[i].shape[-2:] height, width = features[i].shape[-2:]
...@@ -61,23 +61,23 @@ class PriorBox(object): ...@@ -61,23 +61,23 @@ class PriorBox(object):
shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x, shift_y = np.meshgrid(shift_x, shift_y)
# 2. Apply anchors on base grids # 2. Apply anchors on base grids
# Add A anchors (1, A, 4) to # Add a anchors (1, a, 4) to
# cell K shifts (K, 1, 4) to get # cell k shifts (k, 1, 4) to get
# shift anchors (K, A, 4) # shift anchors (k, a, 4)
# Reshape to (K * A, 4) shifted anchors # Reshape to (k * a, 4) shifted anchors
A = self.base_anchors[i].shape[0] a = self.base_anchors[i].shape[0]
D = self.base_anchors[i].shape[1] d = self.base_anchors[i].shape[1]
shifts = np.vstack(( shifts = np.vstack((
shift_x.ravel(), shift_x.ravel(),
shift_y.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_x.ravel(),
shift_y.ravel()) shift_y.ravel())
).transpose() ).transpose()
K = shifts.shape[0] # K = map_h * map_w k = shifts.shape[0] # k = map_h * map_w
anchors = (self.base_anchors[i].reshape((1, A, D)) + anchors = (self.base_anchors[i].reshape((1, a, d)) +
shifts.reshape((1, K, D)).transpose((1, 0, 2))) shifts.reshape((1, k, d)).transpose((1, 0, 2)))
anchors = anchors.reshape((K * A, D)).astype(np.float32) anchors = anchors.reshape((k * a, d)).astype(np.float32)
self.grid_anchors.append(anchors) all_anchors.append(anchors)
self.grid_anchors = np.concatenate(self.grid_anchors)
return self.grid_anchors self.last_grid_anchors = np.concatenate(all_anchors)
return self.last_grid_anchors
...@@ -32,11 +32,9 @@ def get_images(ims): ...@@ -32,11 +32,9 @@ def get_images(ims):
for im in ims: for im in ims:
im_scales.append((float(out_size) / im.shape[0], im_scales.append((float(out_size) / im.shape[0],
float(out_size) / im.shape[1])) float(out_size) / im.shape[1]))
processed_ims.append( processed_ims.append(cv2.resize(
cv2.resize(
im, (out_size, out_size), im, (out_size, out_size),
interpolation=cv2.INTER_AREA, interpolation=cv2.INTER_AREA))
))
if ims[0].dtype == 'uint16': if ims[0].dtype == 'uint16':
ims_blob = np.array(processed_ims, dtype='float32') / 256. ims_blob = np.array(processed_ims, dtype='float32') / 256.
else: else:
......
...@@ -49,7 +49,7 @@ class Distort(object): ...@@ -49,7 +49,7 @@ class Distort(object):
] ]
def apply(self, img, boxes=None): def apply(self, img, boxes=None):
if self._prob > 0: self._prob = 0.5 if cfg.TRAIN.USE_COLOR_JITTER else 0
img = PIL.Image.fromarray(img) img = PIL.Image.fromarray(img)
for transform_fn, prob in self._transforms: for transform_fn, prob in self._transforms:
if npr.uniform() < prob: if npr.uniform() < prob:
......
...@@ -27,8 +27,9 @@ if __name__ == '__main__': ...@@ -27,8 +27,9 @@ if __name__ == '__main__':
np.random.seed(3) np.random.seed(3)
cfg.TRAIN.SCALES = [300] cfg.TRAIN.SCALES = [300]
cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00] cfg.TRAIN.RANDOM_SCALES = [0.25, 1.00]
cfg.TRAIN.USE_COLOR_JITTER = True
augmentor = transforms.Compose( transformer = transforms.Compose(
transforms.Distort(), transforms.Distort(),
transforms.Expand(), transforms.Expand(),
transforms.Sample(), transforms.Sample(),
...@@ -38,12 +39,12 @@ if __name__ == '__main__': ...@@ -38,12 +39,12 @@ if __name__ == '__main__':
while True: while True:
img = cv2.imread('cat.jpg') img = cv2.imread('cat.jpg')
boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32) boxes = np.array([[0.33, 0.04, 0.71, 0.98]], dtype=np.float32)
img, boxes = augmentor(img, boxes) img, boxes = transformer(img, boxes)
for box in boxes: for box in boxes:
x1 = int(box[0] * img.shape[1]) x1 = int(box[0] * img.shape[1])
y1 = int(box[1] * img.shape[0]) y1 = int(box[1] * img.shape[0])
x2 = int(box[2] * img.shape[1]) x2 = int(box[2] * img.shape[1])
y2 = int(box[3] * img.shape[0]) y2 = int(box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2) cv2.rectangle(img, (x1, y1), (x2, y2), (188, 119, 64), 2)
cv2.imshow('Sample', img) cv2.imshow('Transforms - Preview', img)
cv2.waitKey(0) cv2.waitKey(0)
...@@ -70,7 +70,8 @@ class Pipeline(dali.Pipeline): ...@@ -70,7 +70,8 @@ class Pipeline(dali.Pipeline):
# Decode image # Decode image
image = self.decode(inputs['image']) image = self.decode(inputs['image'])
# Augment the color space # Augment the color space if necessary
if cfg.TRAIN.USE_COLOR_JITTER:
image = self.hsv( image = self.hsv(
self.brightness_contrast( self.brightness_contrast(
image, image,
......
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
from seetadet.datasets import kpl_record from seetadet.datasets import kpl_dataset
def get_dataset(name): def get_dataset(name):
...@@ -42,5 +42,5 @@ def list_dataset(): ...@@ -42,5 +42,5 @@ def list_dataset():
_GLOBAL_REGISTERED_DATASET = { _GLOBAL_REGISTERED_DATASET = {
'default': lambda source: 'default': lambda source:
kpl_record.KPLRecordDataset(source), kpl_dataset.KPLRecordDataset(source),
} }
...@@ -149,8 +149,10 @@ class AirNet(nn.Module): ...@@ -149,8 +149,10 @@ class AirNet(nn.Module):
x = self.layer1(x) x = self.layer1(x)
outputs = [None, None, self.layer2(x)] outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])] if hasattr(self, 'layer3'):
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])] outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'):
outputs += [self.layer4(outputs[-1])]
return outputs return outputs
......
...@@ -39,16 +39,17 @@ class Detector(nn.Module): ...@@ -39,16 +39,17 @@ class Detector(nn.Module):
backbone = cfg.MODEL.BACKBONE.lower().split('.') backbone = cfg.MODEL.BACKBONE.lower().split('.')
body, modules = backbone[0], backbone[1:] body, modules = backbone[0], backbone[1:]
# + DataLoader # DataLoader
self.data_loader = None
self.data_loader_cls = importlib.import_module( self.data_loader_cls = importlib.import_module(
'seetadet.algo.{}'.format(model)).DataLoader 'seetadet.algo.{}'.format(model)).DataLoader
self.bootstrap = vision.Bootstrap() self.bootstrap = vision.Bootstrap()
# + FeatureExtractor # FeatureExtractor
self.body = backbones.get(body)() self.body = backbones.get(body)()
feature_dims = self.body.feature_dims feature_dims = self.body.feature_dims
# + FeatureEnhancer # FeatureEnhancer
if 'fpn' in modules: if 'fpn' in modules:
self.fpn = models.FPN(feature_dims) self.fpn = models.FPN(feature_dims)
feature_dims = self.fpn.feature_dims feature_dims = self.fpn.feature_dims
...@@ -57,7 +58,7 @@ class Detector(nn.Module): ...@@ -57,7 +58,7 @@ class Detector(nn.Module):
else: else:
feature_dims = [feature_dims[-1]] feature_dims = [feature_dims[-1]]
# + Detection Modules # Detection Modules
if 'rcnn' in model: if 'rcnn' in model:
self.rpn = models.RPN(feature_dims[0]) self.rpn = models.RPN(feature_dims[0])
if 'faster' in model: if 'faster' in model:
...@@ -106,7 +107,7 @@ class Detector(nn.Module): ...@@ -106,7 +107,7 @@ class Detector(nn.Module):
if inputs is None: if inputs is None:
# 1) Training: <= DataLayer # 1) Training: <= DataLayer
# 2) Inference: <= Given # 2) Inference: <= Given
if not hasattr(self, 'data_loader'): if self.data_loader is None:
self.data_loader = self.data_loader_cls() self.data_loader = self.data_loader_cls()
inputs = self.data_loader() inputs = self.data_loader()
...@@ -171,29 +172,34 @@ class Detector(nn.Module): ...@@ -171,29 +172,34 @@ class Detector(nn.Module):
# Merge Affine into Convolution # # Merge Affine into Convolution #
################################### ###################################
last_module = None last_module = None
for e in self.modules(): for module in self.modules():
if isinstance(e, nn.Affine) and \ if isinstance(module, nn.Affine) and \
isinstance(last_module, nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x module.forward = lambda x: x
last_module.bias = e.bias last_module.bias = module.bias
last_module.weight.data.mul_(e.weight.data) weight = module.weight.data.view(
last_module = e 0, *([1] * (last_module.weight.ndimension() - 1)))
last_module.weight.data.mul_(weight)
last_module = module
###################################### ######################################
# Merge BatchNorm into Convolution # # Merge BatchNorm into Convolution #
###################################### ######################################
last_module = None last_module = None
for e in self.modules(): for module in self.modules():
if isinstance(e, nn.BatchNorm2d) and \ if isinstance(module, nn.BatchNorm2d) and \
isinstance(last_module, nn.Conv2d): isinstance(last_module, nn.Conv2d):
if last_module.bias is None: if last_module.bias is None:
delattr(last_module, 'bias') delattr(last_module, 'bias')
e.forward = lambda x: x module.forward = lambda x: x
term = torch.sqrt(e.running_var.data + e.eps) term = torch.sqrt(module.running_var.data + module.eps)
term = e.weight.data / term term = module.weight.data / term
last_module.bias = e.bias.data - term * e.running_mean.data last_module.bias = \
module.bias.data - \
term * module.running_mean.data
term = term.view(0, *([1] * (last_module.weight.ndimension() - 1)))
if last_module.weight.dtype == 'float16': if last_module.weight.dtype == 'float16':
last_module.bias.half_() last_module.bias.half_()
weight = last_module.weight.data.float() weight = last_module.weight.data.float()
...@@ -201,7 +207,7 @@ class Detector(nn.Module): ...@@ -201,7 +207,7 @@ class Detector(nn.Module):
last_module.weight.copy_(weight) last_module.weight.copy_(weight)
else: else:
last_module.weight.data.mul_(term) last_module.weight.data.mul_(term)
last_module = e last_module = module
def new_detector(device, weights=None, training=False): def new_detector(device, weights=None, training=False):
......
...@@ -31,7 +31,8 @@ class FPN(nn.Module): ...@@ -31,7 +31,8 @@ class FPN(nn.Module):
dim = cfg.FPN.DIM dim = cfg.FPN.DIM
self.C = nn.ModuleList() self.C = nn.ModuleList()
self.P = nn.ModuleList() self.P = nn.ModuleList()
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1): self.highest_backbone_lvl = min(cfg.FPN.RPN_MAX_LEVEL, HIGHEST_BACKBONE_LVL)
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, self.highest_backbone_lvl + 1):
self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True)) self.C.append(nn.Conv1x1(feature_dims[lvl - 1], dim, bias=True))
self.P.append(nn.Conv3x3(dim, dim, bias=True)) self.P.append(nn.Conv3x3(dim, dim, bias=True))
if 'rcnn' in cfg.MODEL.TYPE: if 'rcnn' in cfg.MODEL.TYPE:
...@@ -40,8 +41,8 @@ class FPN(nn.Module): ...@@ -40,8 +41,8 @@ class FPN(nn.Module):
else: else:
self.apply_func = self.apply_on_generic self.apply_func = self.apply_on_generic
self.relu = nn.ReLU(inplace=False) self.relu = nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1): for lvl in range(self.highest_backbone_lvl + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else dim dim_in = feature_dims[-1] if lvl == self.highest_backbone_lvl + 1 else dim
self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True)) self.P.append(nn.Conv3x3(dim_in, dim, stride=2, bias=True))
self.feature_dims = [dim] self.feature_dims = [dim]
self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE self.coarsest_stride = cfg.MODEL.COARSEST_STRIDE
...@@ -56,12 +57,12 @@ class FPN(nn.Module): ...@@ -56,12 +57,12 @@ class FPN(nn.Module):
def apply_on_rcnn(self, features): def apply_on_rcnn(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Apply max pool for higher features # Apply max pool for higher features
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.maxpool(outputs[-1])) outputs.append(self.maxpool(outputs[-1]))
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0: if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample( upscale_output = nn_funcs.upsample(
...@@ -76,15 +77,15 @@ class FPN(nn.Module): ...@@ -76,15 +77,15 @@ class FPN(nn.Module):
def apply_on_generic(self, features): def apply_on_generic(self, features):
fpn_input = self.C[-1](features[-1]) fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)] outputs = [self.P[self.highest_backbone_lvl - min_lvl](fpn_input)]
# Add extra convolutions for higher features # Add extra convolutions for higher features
extra_input = features[-1] extra_input = features[-1]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_lvl + 1): for i in range(self.highest_backbone_lvl + 1, max_lvl + 1):
outputs.append(self.P[i - min_lvl](extra_input)) outputs.append(self.P[i - min_lvl](extra_input))
if i != max_lvl: if i != max_lvl:
extra_input = self.relu(outputs[-1]) extra_input = self.relu(outputs[-1])
# Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL] # Build pyramids between [MIN_LEVEL, HIGHEST_LEVEL]
for i in range(HIGHEST_BACKBONE_LVL - 1, min_lvl - 1, -1): for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1]) lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0: if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample( upscale_output = nn_funcs.upsample(
......
...@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module): ...@@ -161,7 +161,7 @@ class NASMobileNet(nn.Module):
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if nn.is_conv2d(m): if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, 'fan_out') init.kaiming_normal(m.weight, 'fan_out')
if m.bias is not None: if m.bias is not None:
init.constant(m.bias, 0) init.constant(m.bias, 0)
...@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module): ...@@ -173,7 +173,7 @@ class NASMobileNet(nn.Module):
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if nn.is_conv2d(m): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
......
...@@ -17,8 +17,6 @@ from __future__ import absolute_import ...@@ -17,8 +17,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import dragon.vm.torch as torch
from seetadet.core.config import cfg from seetadet.core.config import cfg
from seetadet.core.registry import backbones from seetadet.core.registry import backbones
from seetadet.modules import nn from seetadet.modules import nn
...@@ -37,11 +35,12 @@ class BasicBlock(nn.Module): ...@@ -37,11 +35,12 @@ class BasicBlock(nn.Module):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = nn.Conv3x3(dim_in, dim_out, stride) self.conv1 = nn.Conv3x3(dim_in, dim_out, stride)
self.bn1 = nn.FrozenAffine(dim_out) self.bn1 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv3x3(dim_out, dim_out) self.conv2 = nn.Conv3x3(dim_out, dim_out)
self.bn2 = nn.FrozenAffine(dim_out) self.bn2 = nn.FrozenAffine(dim_out)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock self.dropblock1 = nn.DropBlock2d(**dropblock) if dropblock else None
self.dropblock2 = nn.DropBlock2d(**dropblock) if dropblock else None
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -50,14 +49,14 @@ class BasicBlock(nn.Module): ...@@ -50,14 +49,14 @@ class BasicBlock(nn.Module):
out = self.bn1(out) out = self.bn1(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None: if self.dropblock1 is not None:
out = self.dropblock(out) out = self.dropblock1(out)
out = self.conv2(out) out = self.conv2(out)
out = self.bn2(out) out = self.bn2(out)
if self.dropblock is not None: if self.dropblock2 is not None:
residual = self.dropblock(residual) residual = self.dropblock2(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
...@@ -67,7 +66,7 @@ class BasicBlock(nn.Module): ...@@ -67,7 +66,7 @@ class BasicBlock(nn.Module):
return out return out
class Bottleneck(torch.nn.Module): class Bottleneck(nn.Module):
# 1x64d => 0.25 (ResNet) # 1x64d => 0.25 (ResNet)
# 32x8d, 64x4d => 1.0 (ResNeXt) # 32x8d, 64x4d => 1.0 (ResNeXt)
contraction = cfg.RESNET.NUM_GROUPS \ contraction = cfg.RESNET.NUM_GROUPS \
...@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module): ...@@ -86,12 +85,13 @@ class Bottleneck(torch.nn.Module):
self.conv1 = nn.Conv1x1(dim_in, dim) self.conv1 = nn.Conv1x1(dim_in, dim)
self.bn1 = nn.FrozenAffine(dim) self.bn1 = nn.FrozenAffine(dim)
self.conv2 = nn.Conv3x3(dim, dim, stride=stride) self.conv2 = nn.Conv3x3(dim, dim, stride=stride)
self.drop2 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn2 = nn.FrozenAffine(dim) self.bn2 = nn.FrozenAffine(dim)
self.conv3 = nn.Conv1x1(dim, dim_out) self.conv3 = nn.Conv1x1(dim, dim_out)
self.drop3 = nn.DropBlock2d(**dropblock) if dropblock else None
self.bn3 = nn.FrozenAffine(dim_out) self.bn3 = nn.FrozenAffine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.downsample = downsample self.downsample = downsample
self.dropblock = dropblock
def forward(self, x): def forward(self, x):
residual = x residual = x
...@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module): ...@@ -101,32 +101,30 @@ class Bottleneck(torch.nn.Module):
out = self.relu(out) out = self.relu(out)
out = self.conv2(out) out = self.conv2(out)
if self.drop2 is not None:
out = self.drop2(out)
out = self.bn2(out) out = self.bn2(out)
out = self.relu(out) out = self.relu(out)
if self.dropblock is not None:
out = self.dropblock(out)
out = self.conv3(out) out = self.conv3(out)
out = self.bn3(out) out = self.bn3(out)
if self.dropblock is not None:
residual = self.dropblock(residual)
if self.downsample is not None: if self.downsample is not None:
residual = self.downsample(residual) residual = self.downsample(residual)
out += residual out += residual
if self.drop3 is not None:
out = self.drop3(out)
out = self.relu(out) out = self.relu(out)
return out return out
class ResNet(torch.nn.Module): class ResNet(nn.Module):
def __init__(self, block, layers, filters): def __init__(self, block, layers, filters):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.dim_in, filters = filters[0], filters[1:] self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d( self.conv1 = nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
stride=2, stride=2,
...@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module): ...@@ -134,29 +132,31 @@ class ResNet(torch.nn.Module):
bias=False, bias=False,
) )
self.bn1 = nn.FrozenAffine(self.dim_in) self.bn1 = nn.FrozenAffine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = nn.MaxPool2d(
kernel_size=3, kernel_size=3,
stride=2, stride=2,
padding=0, padding=0,
ceil_mode=True, ceil_mode=True,
) )
self.drop3 = torch.nn.DropBlock2d( drop3 = {
kp=0.9, 'kp': 0.9,
block_size=7, 'block_size': 7,
alpha=0.25, 'alpha': 1.00,
decrement=cfg.DROPBLOCK.DECREMENT 'decrement': cfg.DROPBLOCK.DECREMENT,
) if cfg.DROPBLOCK.DROP_ON else None 'inplace': True,
self.drop4 = torch.nn.DropBlock2d( } if cfg.DROPBLOCK.DROP_ON else None
kp=0.9, drop4 = {
block_size=7, 'kp': 0.9,
alpha=1.00, 'block_size': 7,
decrement=cfg.DROPBLOCK.DECREMENT 'alpha': 1.00,
) if cfg.DROPBLOCK.DROP_ON else None 'decrement': cfg.DROPBLOCK.DECREMENT,
'inplace': True,
} if cfg.DROPBLOCK.DROP_ON else None
self.layer1 = self.make_blocks(block, filters[0], layers[0]) self.layer1 = self.make_blocks(block, filters[0], layers[0])
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2) self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, self.drop3) self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, self.drop4) self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, drop4)
self.reset_parameters() self.reset_parameters()
def reset_parameters(self): def reset_parameters(self):
...@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module): ...@@ -166,7 +166,7 @@ class ResNet(torch.nn.Module):
# Stop the gradients if necessary # Stop the gradients if necessary
def freeze_func(m): def freeze_func(m):
if isinstance(m, torch.nn.Conv2d): if isinstance(m, nn.Conv2d):
m.weight.requires_grad = False m.weight.requires_grad = False
m._buffers['weight'] = m.weight m._buffers['weight'] = m.weight
del m._parameters['weight'] del m._parameters['weight']
......
...@@ -29,7 +29,6 @@ class SSD(nn.Module): ...@@ -29,7 +29,6 @@ class SSD(nn.Module):
######################################## ########################################
# SSD outputs # # SSD outputs #
######################################## ########################################
self.cls_conv = torch.nn.ModuleList( self.cls_conv = torch.nn.ModuleList(
nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True) nn.Conv3x3(feature_dims[0], feature_dims[0], bias=True)
for _ in range(cfg.SSD.NUM_CONVS) for _ in range(cfg.SSD.NUM_CONVS)
......
...@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function): ...@@ -36,7 +36,6 @@ class _NonMaxSuppression(Function):
return self.dispatch([dets], [self.alloc()]) return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function): class _RetinaNetDecoder(Function):
"""Decode predictions from RetinaNet.""" """Decode predictions from RetinaNet."""
......
...@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'): ...@@ -33,6 +33,7 @@ def kaiming_normal(weight, mode='fan_in'):
nonlinearity='relu', nonlinearity='relu',
) )
# Aliases # Aliases
constant = nn.init.constant_ constant = nn.init.constant_
normal = nn.init.normal_ normal = nn.init.normal_
...@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object): ...@@ -185,6 +185,7 @@ class SigmoidFocalLoss(object):
return nn.SigmoidFocalLoss( return nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA, alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA, gamma=cfg.MODEL.FOCAL_LOSS_GAMMA,
negative_index=0, # Background index
) )
...@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss ...@@ -211,6 +212,7 @@ BCEWithLogitsLoss = nn.BCEWithLogitsLoss
Conv2d = nn.Conv2d Conv2d = nn.Conv2d
ConvTranspose2d = nn.ConvTranspose2d ConvTranspose2d = nn.ConvTranspose2d
DepthwiseConv2d = nn.DepthwiseConv2d DepthwiseConv2d = nn.DepthwiseConv2d
DropBlock2d = nn.DropBlock2d
Linear = nn.Linear Linear = nn.Linear
MaxPool2d = nn.MaxPool2d MaxPool2d = nn.MaxPool2d
Module = nn.Module Module = nn.Module
......
...@@ -15,7 +15,7 @@ from __future__ import print_function ...@@ -15,7 +15,7 @@ from __future__ import print_function
import functools import functools
import dragon.vm.torch as torch from dragon.vm import torch
from seetadet.core.config import cfg from seetadet.core.config import cfg
...@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module): ...@@ -41,7 +41,9 @@ class Bootstrap(torch.nn.Module):
def __init__(self): def __init__(self):
super(Bootstrap, self).__init__() super(Bootstrap, self).__init__()
self.normalize_func = functools.partial( self._device = torch.device('cpu')
self._dummy_buffer = torch.ones(1)
self._normalize_func = functools.partial(
torch.channel_normalize, torch.channel_normalize,
mean=cfg.PIXEL_MEANS, mean=cfg.PIXEL_MEANS,
std=[1., 1., 1.], std=[1., 1., 1.],
...@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module): ...@@ -49,10 +51,9 @@ class Bootstrap(torch.nn.Module):
dims=(0, 3, 1, 2), dims=(0, 3, 1, 2),
dtype=cfg.MODEL.PRECISION.lower(), dtype=cfg.MODEL.PRECISION.lower(),
) )
self.dummy_buffer = torch.ones(1)
def _apply(self, fn): def _apply(self, fn):
fn(self.dummy_buffer) fn(self._dummy_buffer)
def cpu(self): def cpu(self):
self._device = torch.device('cpu') self._device = torch.device('cpu')
...@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module): ...@@ -61,12 +62,11 @@ class Bootstrap(torch.nn.Module):
self._device = torch.device('cuda', device) self._device = torch.device('cuda', device)
def device(self): def device(self):
"""Return the device of this module.""" return self._dummy_buffer.device
return self.dummy_buffer.device
def forward(self, input): def forward(self, input):
if isinstance(input, torch.Tensor): if isinstance(input, torch.Tensor):
if input.size(1) <= 3: if input.shape[1] <= 3:
return input return input
cur_device = self.device() cur_device = self.device()
if input._device != cur_device: if input._device != cur_device:
...@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module): ...@@ -74,4 +74,4 @@ class Bootstrap(torch.nn.Module):
input = input.cpu() input = input.cpu()
else: else:
input = input.cuda(cur_device.index) input = input.cuda(cur_device.index)
return self.normalize_func(input) return self._normalize_func(input)
...@@ -32,8 +32,8 @@ class SGDSolver(object): ...@@ -32,8 +32,8 @@ class SGDSolver(object):
lr=cfg.SOLVER.BASE_LR, lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM, momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY, weight_decay=cfg.SOLVER.WEIGHT_DECAY,
clip_gradient=float(cfg.SOLVER.CLIP_NORM), clip_norm=float(cfg.SOLVER.CLIP_NORM),
scale_gradient=1. / cfg.SOLVER.LOSS_SCALING, scale=1. / cfg.SOLVER.LOSS_SCALING,
) )
self.lr_scheduler = lr_scheduler.get_scheduler() self.lr_scheduler = lr_scheduler.get_scheduler()
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import operator
from dragon.vm import torch
from seetadet.modules import nn
def dense_conv_flops(m, inputs, output):
"""Hook to compute flops for a dense convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
in_c, out_c = inputs[0].shape[1], output.shape[1]
m.__params__ = (k_dim * in_c + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def depthwise_conv_flops(m, inputs, output):
"""Hook to compute flops for a depthwise convolution."""
k_dim = functools.reduce(operator.mul, m.kernel_size)
out_dim = functools.reduce(operator.mul, output.shape[2:])
out_c = output.shape[1]
m.__params__ = (k_dim + (1 if m.bias else 0)) * out_c
m.__flops__ = m.__params__ * out_dim
def register_flops(module):
"""Register hooks to collect flops info."""
if not hasattr(module, '__flops__'):
module.__flops__ = 0.
for m in module.modules():
if isinstance(m, nn.DepthwiseConv2d):
m.register_forward_hook(depthwise_conv_flops)
elif isinstance(m, nn.Conv2d):
m.register_forward_hook(dense_conv_flops)
def collect_flops(module, normalizer=1e6):
"""Collect flops from the last forward."""
total_flops = 0.
for m in module.modules():
if hasattr(m, '__flops__'):
total_flops += m.__flops__
m.__flops__ = 0.
return total_flops / normalizer
def benchmark_flops(module, normalizer=1e6):
"""Return the flops by running benchmark once."""
register_flops(module)
collect_flops(module)
original_training = module.training
if original_training:
module.eval()
with torch.no_grad():
module()
if original_training:
module.train()
return collect_flops(module, normalizer)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!