Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SeetaResearch
/
Dragon
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit 1d03e8e2
authored
Jan 19, 2019
by
Ting PAN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimize GatherOp
1 parent
c5def39b
Show whitespace changes
Inline
Side-by-side
Showing
41 changed files
with
335 additions
and
1225 deletions
Docs/api/python/_static/css/dragon.css
Docs/api/python/contents/config.rst
Docs/api/python/contents/core/tensor.rst
Docs/api/python/contents/core/tensor_utils.rst
Docs/api/python/contents/memonger.rst
Docs/api/python/contents/tools/db.rst
Docs/api/python/contents/tools/im2db.rst
Docs/api/python/contents/tools/summary_writer.rst
Docs/api/python/contents/tools/tensorboard.rst
Docs/api/python/contents/updaters.rst
Docs/api/python/contents/vm/caffe/layer.rst
Docs/api/python/contents/vm/caffe/misc.rst
Docs/api/python/contents/vm/caffe/net.rst
Docs/api/python/contents/vm/caffe/solver.rst
Docs/api/python/contents/vm/theano/compile.rst
Dragon/include/operators/ndarray/gather_op.h
Dragon/include/utils/op_kernel.h
Dragon/modules/cxx/dragon.cc
Dragon/modules/python/py_onnx.h
Dragon/python/dragon/config.py
Dragon/python/dragon/core/helper.py
Dragon/python/dragon/operators/ndarray.py
Dragon/python/dragon/operators/vision.py
Dragon/python/dragon/utils/vision/blob_fetcher.py
Dragon/python/dragon/utils/vision/data_batch.py
Dragon/python/dragon/vm/caffe/model_libs.py
Dragon/python/dragon/vm/caffe/net_spec.py
Dragon/python/dragon/vm/theano/compile/function.py
Dragon/python/dragon/vm/torch/ops/modules/axis.py
Dragon/src/contrib/rcnn/bbox_utils.h
Dragon/src/contrib/rcnn/proposal_op.cc
Dragon/src/kernels/ndarray/gather_op_kernel.cc
Dragon/src/kernels/ndarray/gather_op_kernel.cu
Dragon/src/contrib/onnx/onnx_attibute.cc → Dragon/src/onnx/onnx_attibute.cc
Dragon/src/contrib/onnx/onnx_backend.cc → Dragon/src/onnx/onnx_backend.cc
Dragon/src/contrib/onnx/onnx_backend.h → Dragon/src/onnx/onnx_backend.h
Dragon/src/contrib/onnx/onnx_importer.cc → Dragon/src/onnx/onnx_importer.cc
Dragon/src/contrib/onnx/onnx_initializer.cc → Dragon/src/onnx/onnx_initializer.cc
Dragon/src/operators/arithmetic/maximum_op.cc
Dragon/src/operators/ndarray/gather_op.cc
Dragon/src/operators/vision/drop_block2d_op.cc
Docs/api/python/_static/css/dragon.css
View file @
1d03e8e
...
...
@@ -283,14 +283,16 @@ code.docutils.literal:hover {
dt
{
font-weight
:
700
;
background
:
#
e7f2fa
;
background
:
#
f7f7f7
;
border-bottom
:
solid
#0079b2
;
border-radius
:
1
px
;
border-radius
:
8
px
;
margin-bottom
:
20px
;
padding
:
8px
;
width
:
75%
;
}
dt
:target
,
.highlighted
{
background-color
:
#
e7f2fa
;
background-color
:
#
f7f7f7
;
border-bottom
:
3px
solid
#c7254e
;
}
...
...
@@ -299,7 +301,7 @@ dt:target:before {
content
:
''
;
display
:
block
;
height
:
65px
;
margin
:
-20px
0
0
;
margin
:
-20px
-8px
8px
;
}
dl
.method
dt
{
...
...
Docs/api/python/contents/config.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
========================== =============================================================================
List Brief
...
...
Docs/api/python/contents/core/tensor.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
============================== =============================================================================
List Brief
...
...
Docs/api/python/contents/core/tensor_utils.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
============================== =============================================================================
List Brief
...
...
Docs/api/python/contents/memonger.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/tools/db.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/tools/im2db.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/tools/summary_writer.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/tools/tensorboard.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/updaters.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/vm/caffe/layer.rst
View file @
1d03e8e
...
...
@@ -112,8 +112,8 @@ List Brief
================================= =============================================================================
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/vm/caffe/misc.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
========================= ============================================================================
List Brief
...
...
Docs/api/python/contents/vm/caffe/net.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
========================= =============================================================================
List Brief
...
...
Docs/api/python/contents/vm/caffe/solver.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/vm/theano/compile.rst
View file @
1d03e8e
...
...
@@ -6,8 +6,8 @@
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
============================== =======================================================================
List Brief
...
...
Dragon/include/operators/ndarray/gather_op.h
View file @
1d03e8e
...
...
@@ -39,15 +39,15 @@ class GatherGradientOp final : public Operator<Context> {
GatherGradientOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
def
,
ws
),
axis
(
OperatorBase
::
Arg
<
int64_t
>
(
"axis"
,
0
)),
acc_grad
(
OperatorBase
::
Arg
<
bool
>
(
"acc_gradient"
,
fals
e
))
{}
zero_grad
(
OperatorBase
::
Arg
<
bool
>
(
"zero_grad"
,
tru
e
))
{}
USE_OPERATOR_FUNCTIONS
;
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
RunWithType
();
protected
:
bool
zero_grad
;
int64_t
axis
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
;
bool
acc_grad
;
};
}
// namespace dragon
...
...
Dragon/include/utils/op_kernel.h
View file @
1d03e8e
...
...
@@ -601,32 +601,23 @@ void ArgMin(
/*! ndarray.gather */
template
<
typename
T
,
class
Context
>
void
CanonicalAxis
(
const
int
count
,
const
int
dim
,
T
*
y
,
Context
*
ctx
);
template
<
typename
T
,
class
Context
>
void
Gather
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
64_t
*
indices
,
const
T
*
x
,
T
*
y
,
Context
*
ctx
);
template
<
typename
T
,
class
Context
>
void
GatherGrad
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
64_t
*
indices
,
const
T
*
dy
,
T
*
dx
,
Context
*
ctx
);
...
...
Dragon/modules/cxx/dragon.cc
View file @
1d03e8e
...
...
@@ -3,7 +3,7 @@
#include "core/common.h"
#include "utils/proto_utils.h"
#include "utils/caffemodel.h"
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
#include "dragon.h"
...
...
Dragon/modules/python/py_onnx.h
View file @
1d03e8e
...
...
@@ -11,7 +11,7 @@
#ifndef DRAGON_PYTHON_PY_ONNX_H_
#define DRAGON_PYTHON_PY_ONNX_H_
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
#include "py_dragon.h"
...
...
Dragon/python/dragon/config.py
View file @
1d03e8e
...
...
@@ -270,7 +270,7 @@ def ExportMetaGraph(prefix=''):
These text files will be saved as the following format:
``prefix/Graph_xxx.metatxt``
*prefix/Graph.metatxt*
Note that an empty prefix will leads to invalid exporting.
...
...
@@ -293,12 +293,12 @@ def SetLoggingLevel(level):
Parameters
----------
level :
str
The l
evel, ``DEBUG``, ``INFO``, ``WARNING``, ``ERROR`` or ``FATAL``
.
level :
{'DEBUG', 'INFO, 'WARNING', 'ERROR', 'FATAL'}, required
The l
ogging level
.
Notes
-----
The default level is
``INFO``
.
The default level is
*INFO*
.
"""
C
.
SetLogLevelCC
(
level
)
...
...
Dragon/python/dragon/core/helper.py
View file @
1d03e8e
...
...
@@ -391,9 +391,12 @@ class OperatorHelper(object):
@classmethod
def
_apply_Gather
(
cls
,
arguments
,
inputs
,
outputs
):
outputs
[
0
]
.
dtype
=
inputs
[
0
]
.
dtype
axis
=
arguments
[
'axis'
]
try
:
outputs
[
0
]
.
shape
=
inputs
[
0
]
.
shape
[:]
outputs
[
0
]
.
shape
[
arguments
[
'axis'
]]
=
None
outputs
[
0
]
.
shape
=
\
inputs
[
0
]
.
shape
[:
axis
]
+
\
inputs
[
1
]
.
shape
[:]
+
\
inputs
[
0
]
.
shape
[
axis
+
1
:]
except
:
pass
return
outputs
...
...
Dragon/python/dragon/operators/ndarray.py
View file @
1d03e8e
...
...
@@ -17,10 +17,10 @@ from . import *
@OpSchema.Inputs
(
1
)
def
Gather
(
inputs
,
indices
,
axis
=
0
,
acc_gradient
=
Fals
e
,
**
kwargs
):
def
Gather
(
inputs
,
indices
,
axis
=
0
,
zero_grad
=
Tru
e
,
**
kwargs
):
"""Gather the input according to the indices along the given axis.
**Type Constraints**: (*
int32*, *float32
*)
**Type Constraints**: (*
bool*, *int8*, *uint8*, *int32*, *int64*, *float16*, *float32*, *float64
*)
Parameters
----------
...
...
@@ -30,7 +30,7 @@ def Gather(inputs, indices, axis=0, acc_gradient=False, **kwargs):
The indices to form output tensor.
axis : int, optional
The start axis, can be negative.
acc_gradient
: bool, optional
zero_grad
: bool, optional
Whether to accumulate the gradients.
Returns
...
...
@@ -40,24 +40,10 @@ def Gather(inputs, indices, axis=0, acc_gradient=False, **kwargs):
"""
arguments
=
ParseArgs
(
locals
())
arguments
[
'inputs'
],
arguments
[
'indices'
]
=
[
arguments
[
'inputs'
],
Tensor
.
Convert
(
indices
,
dtype
=
'int32'
)],
None
output
=
Tensor
.
CreateOperator
(
'Gather'
,
**
arguments
)
try
:
output
.
shape
=
inputs
.
shape
[:]
if
not
isinstance
(
indices
,
Tensor
):
if
not
isinstance
(
indices
,
(
list
,
tuple
)):
indices
=
[
indices
]
output
.
shape
[
axis
]
=
len
(
indices
)
else
:
output
.
shape
[
axis
]
=
None
except
:
pass
return
output
arguments
[
'inputs'
],
arguments
[
'indices'
]
=
\
[
arguments
[
'inputs'
],
Tensor
.
Convert
(
indices
,
dtype
=
'int64'
)],
None
return
Tensor
.
CreateOperator
(
'Gather'
,
**
arguments
)
@OpSchema.Inputs
(
1
)
...
...
Dragon/python/dragon/operators/vision.py
View file @
1d03e8e
...
...
@@ -283,9 +283,7 @@ def Pool2d(
@OpSchema.Inputs
(
2
)
def
ROIPool
(
inputs
,
pool_h
,
pool_w
,
spatial_scale
=
1.0
,
**
kwargs
):
"""Max RoI Pooling. `[Girshick, 2015] <https://arxiv.org/abs/1504.08083>`_.
The first dimension of input must be ``1``.
"""Max RoIPooling. `[Girshick, 2015] <https://arxiv.org/abs/1504.08083>`_.
**Type Constraints**: (*float16*, *float32*)
...
...
@@ -311,9 +309,7 @@ def ROIPool(inputs, pool_h, pool_w, spatial_scale=1.0, **kwargs):
@OpSchema.Inputs
(
2
)
def
ROIAlign
(
inputs
,
pool_h
=
0
,
pool_w
=
0
,
spatial_scale
=
1.0
,
sampling_ratio
=
2
,
**
kwargs
):
"""AVG ROIAlign. `[He et.al, 2017] <https://arxiv.org/abs/1703.06870>`_.
The first dimension of input must be ``1``.
"""AVG RoIAlign. `[He et.al, 2017] <https://arxiv.org/abs/1703.06870>`_.
**Type Constraints**: (*float16*, *float32*)
...
...
Dragon/python/dragon/utils/vision/blob_fetcher.py
View file @
1d03e8e
...
...
@@ -20,7 +20,7 @@ from multiprocessing import Process
class
BlobFetcher
(
Process
):
"""BlobFetcher is deployed to queue blobs from `DataTransformer`_.
It is supported to form
``NHWC`` image blobs and ``1D``
label blobs.
It is supported to form
*NHWC* image blobs and *1d*
label blobs.
"""
def
__init__
(
self
,
**
kwargs
):
...
...
Dragon/python/dragon/utils/vision/data_batch.py
View file @
1d03e8e
...
...
@@ -26,7 +26,7 @@ from .blob_fetcher import BlobFetcher
class
DataBatch
(
object
):
"""DataBatch aims to prefetch data by
``Triple-Buffering``
.
"""DataBatch aims to prefetch data by
*Triple-Buffering*
.
It takes full advantages of the Process/Thread of Python,
which provides remarkable I/O speed up for scalable distributed training.
...
...
Dragon/python/dragon/vm/caffe/model_libs.py
deleted
100644 → 0
View file @
c5def39
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/weiliu89/caffe/blob/ssd/python/caffe/model_libs.py>
#
# ------------------------------------------------------------
import
os
from
dragon.vm.caffe
import
layers
as
L
from
dragon.vm.caffe
import
params
as
P
from
dragon.vm.caffe.proto
import
caffe_pb2
def
check_if_exist
(
path
):
return
os
.
path
.
exists
(
path
)
def
make_if_not_exist
(
path
):
if
not
os
.
path
.
exists
(
path
):
os
.
makedirs
(
path
)
def
UnpackVariable
(
var
,
num
):
if
type
(
var
)
is
list
and
len
(
var
)
==
num
:
return
var
else
:
ret
=
[]
if
type
(
var
)
is
list
:
assert
len
(
var
)
==
1
for
i
in
range
(
0
,
num
):
ret
.
append
(
var
[
0
])
else
:
for
i
in
range
(
0
,
num
):
ret
.
append
(
var
)
return
ret
def
ConvBNLayer
(
net
,
from_layer
,
out_layer
,
use_bn
,
use_relu
,
num_output
,
kernel_size
,
pad
,
stride
,
dilation
=
1
,
use_scale
=
True
,
lr_mult
=
1
,
conv_prefix
=
''
,
conv_postfix
=
''
,
bn_prefix
=
''
,
bn_postfix
=
'_bn'
,
scale_prefix
=
''
,
scale_postfix
=
'_scale'
,
bias_prefix
=
''
,
bias_postfix
=
'_bias'
,
**
bn_params
):
if
use_bn
:
# parameters for convolution layer with batchnorm.
kwargs
=
{
'param'
:
[
dict
(
lr_mult
=
lr_mult
,
decay_mult
=
1
)],
'weight_filler'
:
dict
(
type
=
'gaussian'
,
std
=
0.01
),
'bias_term'
:
False
,
}
eps
=
bn_params
.
get
(
'eps'
,
1e-3
)
moving_average_fraction
=
bn_params
.
get
(
'moving_average_fraction'
,
0.9
)
use_global_stats
=
bn_params
.
get
(
'use_global_stats'
,
False
)
# parameters for batchnorm layer.
bn_kwargs
=
{
'param'
:
[
dict
(
lr_mult
=
0
,
decay_mult
=
0
),
dict
(
lr_mult
=
0
,
decay_mult
=
0
),
dict
(
lr_mult
=
0
,
decay_mult
=
0
)],
}
bn_lr_mult
=
lr_mult
if
use_global_stats
:
# only specify if use_global_stats is explicitly provided;
# otherwise, use_global_stats_ = this->phase_ == TEST;
bn_kwargs
=
{
'param'
:
[
dict
(
lr_mult
=
0
,
decay_mult
=
0
),
dict
(
lr_mult
=
0
,
decay_mult
=
0
),
dict
(
lr_mult
=
0
,
decay_mult
=
0
)],
'eps'
:
eps
,
'use_global_stats'
:
use_global_stats
,
}
# not updating scale/bias parameters
bn_lr_mult
=
0
# parameters for scale bias layer after batchnorm.
if
use_scale
:
sb_kwargs
=
{
'bias_term'
:
True
}
else
:
kwargs
=
{
'param'
:
[
dict
(
lr_mult
=
lr_mult
,
decay_mult
=
1
),
dict
(
lr_mult
=
2
*
lr_mult
,
decay_mult
=
0
)],
'weight_filler'
:
dict
(
type
=
'xavier'
),
'bias_filler'
:
dict
(
type
=
'constant'
,
value
=
0
)
}
conv_name
=
'{}{}{}'
.
format
(
conv_prefix
,
out_layer
,
conv_postfix
)
[
kernel_h
,
kernel_w
]
=
UnpackVariable
(
kernel_size
,
2
)
[
pad_h
,
pad_w
]
=
UnpackVariable
(
pad
,
2
)
[
stride_h
,
stride_w
]
=
UnpackVariable
(
stride
,
2
)
if
kernel_h
==
kernel_w
:
net
[
conv_name
]
=
L
.
Convolution
(
net
[
from_layer
],
num_output
=
num_output
,
kernel_size
=
kernel_h
,
pad
=
pad_h
,
stride
=
stride_h
,
**
kwargs
)
else
:
net
[
conv_name
]
=
L
.
Convolution
(
net
[
from_layer
],
num_output
=
num_output
,
kernel_h
=
kernel_h
,
kernel_w
=
kernel_w
,
pad_h
=
pad_h
,
pad_w
=
pad_w
,
stride_h
=
stride_h
,
stride_w
=
stride_w
,
**
kwargs
)
if
dilation
>
1
:
net
.
update
(
conv_name
,
{
'dilation'
:
dilation
})
if
use_bn
:
bn_name
=
'{}{}{}'
.
format
(
bn_prefix
,
out_layer
,
bn_postfix
)
net
[
bn_name
]
=
L
.
BatchNorm
(
net
[
conv_name
],
in_place
=
True
,
**
bn_kwargs
)
if
use_scale
:
sb_name
=
'{}{}{}'
.
format
(
scale_prefix
,
out_layer
,
scale_postfix
)
net
[
sb_name
]
=
L
.
Scale
(
net
[
bn_name
],
in_place
=
True
,
**
sb_kwargs
)
else
:
bias_name
=
'{}{}{}'
.
format
(
bias_prefix
,
out_layer
,
bias_postfix
)
net
[
bias_name
]
=
L
.
Bias
(
net
[
bn_name
],
in_place
=
True
,
**
bias_kwargs
)
if
use_relu
:
relu_name
=
'{}_relu'
.
format
(
conv_name
)
net
[
relu_name
]
=
L
.
ReLU
(
net
[
conv_name
],
in_place
=
True
)
def
ResBody
(
net
,
from_layer
,
block_name
,
out2a
,
out2b
,
out2c
,
stride
,
use_branch1
,
dilation
=
1
,
**
bn_param
):
# ResBody(net, 'pool1', '2a', 64, 64, 256, 1, True)
conv_prefix
=
'res{}_'
.
format
(
block_name
)
conv_postfix
=
''
bn_prefix
=
'bn{}_'
.
format
(
block_name
)
bn_postfix
=
''
scale_prefix
=
'scale{}_'
.
format
(
block_name
)
scale_postfix
=
''
use_scale
=
True
if
use_branch1
:
branch_name
=
'branch1'
ConvBNLayer
(
net
,
from_layer
,
branch_name
,
use_bn
=
True
,
use_relu
=
False
,
num_output
=
out2c
,
kernel_size
=
1
,
pad
=
0
,
stride
=
stride
,
use_scale
=
use_scale
,
conv_prefix
=
conv_prefix
,
conv_postfix
=
conv_postfix
,
bn_prefix
=
bn_prefix
,
bn_postfix
=
bn_postfix
,
scale_prefix
=
scale_prefix
,
scale_postfix
=
scale_postfix
,
**
bn_param
)
branch1
=
'{}{}'
.
format
(
conv_prefix
,
branch_name
)
else
:
branch1
=
from_layer
branch_name
=
'branch2a'
ConvBNLayer
(
net
,
from_layer
,
branch_name
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
out2a
,
kernel_size
=
1
,
pad
=
0
,
stride
=
stride
,
use_scale
=
use_scale
,
conv_prefix
=
conv_prefix
,
conv_postfix
=
conv_postfix
,
bn_prefix
=
bn_prefix
,
bn_postfix
=
bn_postfix
,
scale_prefix
=
scale_prefix
,
scale_postfix
=
scale_postfix
,
**
bn_param
)
out_name
=
'{}{}'
.
format
(
conv_prefix
,
branch_name
)
branch_name
=
'branch2b'
if
dilation
==
1
:
ConvBNLayer
(
net
,
out_name
,
branch_name
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
out2b
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
,
use_scale
=
use_scale
,
conv_prefix
=
conv_prefix
,
conv_postfix
=
conv_postfix
,
bn_prefix
=
bn_prefix
,
bn_postfix
=
bn_postfix
,
scale_prefix
=
scale_prefix
,
scale_postfix
=
scale_postfix
,
**
bn_param
)
else
:
pad
=
int
((
3
+
(
dilation
-
1
)
*
2
)
-
1
)
/
2
ConvBNLayer
(
net
,
out_name
,
branch_name
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
out2b
,
kernel_size
=
3
,
pad
=
pad
,
stride
=
1
,
use_scale
=
use_scale
,
dilation
=
dilation
,
conv_prefix
=
conv_prefix
,
conv_postfix
=
conv_postfix
,
bn_prefix
=
bn_prefix
,
bn_postfix
=
bn_postfix
,
scale_prefix
=
scale_prefix
,
scale_postfix
=
scale_postfix
,
**
bn_param
)
out_name
=
'{}{}'
.
format
(
conv_prefix
,
branch_name
)
branch_name
=
'branch2c'
ConvBNLayer
(
net
,
out_name
,
branch_name
,
use_bn
=
True
,
use_relu
=
False
,
num_output
=
out2c
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
,
use_scale
=
use_scale
,
conv_prefix
=
conv_prefix
,
conv_postfix
=
conv_postfix
,
bn_prefix
=
bn_prefix
,
bn_postfix
=
bn_postfix
,
scale_prefix
=
scale_prefix
,
scale_postfix
=
scale_postfix
,
**
bn_param
)
branch2
=
'{}{}'
.
format
(
conv_prefix
,
branch_name
)
res_name
=
'res{}'
.
format
(
block_name
)
net
[
res_name
]
=
L
.
Eltwise
(
net
[
branch1
],
net
[
branch2
])
relu_name
=
'{}_relu'
.
format
(
res_name
)
net
[
relu_name
]
=
L
.
ReLU
(
net
[
res_name
],
in_place
=
True
)
def
InceptionTower
(
net
,
from_layer
,
tower_name
,
layer_params
,
**
bn_param
):
use_scale
=
False
for
param
in
layer_params
:
tower_layer
=
'{}/{}'
.
format
(
tower_name
,
param
[
'name'
])
del
param
[
'name'
]
if
'pool'
in
tower_layer
:
net
[
tower_layer
]
=
L
.
Pooling
(
net
[
from_layer
],
**
param
)
else
:
param
.
update
(
bn_param
)
ConvBNLayer
(
net
,
from_layer
,
tower_layer
,
use_bn
=
True
,
use_relu
=
True
,
use_scale
=
use_scale
,
**
param
)
from_layer
=
tower_layer
return
net
[
from_layer
]
def
CreateAnnotatedDataLayer
(
source
,
batch_size
=
32
,
backend
=
P
.
Data
.
LMDB
,
output_label
=
True
,
train
=
True
,
label_map_file
=
''
,
anno_type
=
None
,
transform_param
=
{},
batch_sampler
=
[{}]):
if
train
:
kwargs
=
{
'include'
:
dict
(
phase
=
caffe_pb2
.
Phase
.
Value
(
'TRAIN'
)),
'transform_param'
:
transform_param
,
}
else
:
kwargs
=
{
'include'
:
dict
(
phase
=
caffe_pb2
.
Phase
.
Value
(
'TEST'
)),
'transform_param'
:
transform_param
,
}
ntop
=
1
if
output_label
:
ntop
=
2
annotated_data_param
=
{
'label_map_file'
:
label_map_file
,
'batch_sampler'
:
batch_sampler
,
}
if
anno_type
is
not
None
:
annotated_data_param
.
update
({
'anno_type'
:
anno_type
})
return
L
.
AnnotatedData
(
name
=
"data"
,
annotated_data_param
=
annotated_data_param
,
data_param
=
dict
(
batch_size
=
batch_size
,
backend
=
backend
,
source
=
source
),
ntop
=
ntop
,
**
kwargs
)
def
VGGNetBody
(
net
,
from_layer
,
need_fc
=
True
,
fully_conv
=
False
,
reduced
=
False
,
dilated
=
False
,
nopool
=
False
,
dropout
=
True
,
freeze_layers
=
[],
dilate_pool4
=
False
):
kwargs
=
{
'param'
:
[
dict
(
lr_mult
=
1
,
decay_mult
=
1
),
dict
(
lr_mult
=
2
,
decay_mult
=
0
)],
'weight_filler'
:
dict
(
type
=
'xavier'
),
'bias_filler'
:
dict
(
type
=
'constant'
,
value
=
0
)}
assert
from_layer
in
net
.
keys
()
net
.
conv1_1
=
L
.
Convolution
(
net
[
from_layer
],
num_output
=
64
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu1_1
=
L
.
ReLU
(
net
.
conv1_1
,
in_place
=
True
)
net
.
conv1_2
=
L
.
Convolution
(
net
.
relu1_1
,
num_output
=
64
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu1_2
=
L
.
ReLU
(
net
.
conv1_2
,
in_place
=
True
)
if
nopool
:
name
=
'conv1_3'
net
[
name
]
=
L
.
Convolution
(
net
.
relu1_2
,
num_output
=
64
,
pad
=
1
,
kernel_size
=
3
,
stride
=
2
,
**
kwargs
)
else
:
name
=
'pool1'
net
.
pool1
=
L
.
Pooling
(
net
.
relu1_2
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
2
,
stride
=
2
)
net
.
conv2_1
=
L
.
Convolution
(
net
[
name
],
num_output
=
128
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu2_1
=
L
.
ReLU
(
net
.
conv2_1
,
in_place
=
True
)
net
.
conv2_2
=
L
.
Convolution
(
net
.
relu2_1
,
num_output
=
128
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu2_2
=
L
.
ReLU
(
net
.
conv2_2
,
in_place
=
True
)
if
nopool
:
name
=
'conv2_3'
net
[
name
]
=
L
.
Convolution
(
net
.
relu2_2
,
num_output
=
128
,
pad
=
1
,
kernel_size
=
3
,
stride
=
2
,
**
kwargs
)
else
:
name
=
'pool2'
net
[
name
]
=
L
.
Pooling
(
net
.
relu2_2
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
2
,
stride
=
2
)
net
.
conv3_1
=
L
.
Convolution
(
net
[
name
],
num_output
=
256
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu3_1
=
L
.
ReLU
(
net
.
conv3_1
,
in_place
=
True
)
net
.
conv3_2
=
L
.
Convolution
(
net
.
relu3_1
,
num_output
=
256
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu3_2
=
L
.
ReLU
(
net
.
conv3_2
,
in_place
=
True
)
net
.
conv3_3
=
L
.
Convolution
(
net
.
relu3_2
,
num_output
=
256
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu3_3
=
L
.
ReLU
(
net
.
conv3_3
,
in_place
=
True
)
if
nopool
:
name
=
'conv3_4'
net
[
name
]
=
L
.
Convolution
(
net
.
relu3_3
,
num_output
=
256
,
pad
=
1
,
kernel_size
=
3
,
stride
=
2
,
**
kwargs
)
else
:
name
=
'pool3'
net
[
name
]
=
L
.
Pooling
(
net
.
relu3_3
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
2
,
stride
=
2
)
net
.
conv4_1
=
L
.
Convolution
(
net
[
name
],
num_output
=
512
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu4_1
=
L
.
ReLU
(
net
.
conv4_1
,
in_place
=
True
)
net
.
conv4_2
=
L
.
Convolution
(
net
.
relu4_1
,
num_output
=
512
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu4_2
=
L
.
ReLU
(
net
.
conv4_2
,
in_place
=
True
)
net
.
conv4_3
=
L
.
Convolution
(
net
.
relu4_2
,
num_output
=
512
,
pad
=
1
,
kernel_size
=
3
,
**
kwargs
)
net
.
relu4_3
=
L
.
ReLU
(
net
.
conv4_3
,
in_place
=
True
)
if
nopool
:
name
=
'conv4_4'
net
[
name
]
=
L
.
Convolution
(
net
.
relu4_3
,
num_output
=
512
,
pad
=
1
,
kernel_size
=
3
,
stride
=
2
,
**
kwargs
)
else
:
name
=
'pool4'
if
dilate_pool4
:
net
[
name
]
=
L
.
Pooling
(
net
.
relu4_3
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
3
,
stride
=
1
,
pad
=
1
)
dilation
=
2
else
:
net
[
name
]
=
L
.
Pooling
(
net
.
relu4_3
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
2
,
stride
=
2
)
dilation
=
1
kernel_size
=
3
pad
=
int
(
int
((
kernel_size
+
(
dilation
-
1
)
*
(
kernel_size
-
1
))
-
1
)
/
2
)
net
.
conv5_1
=
L
.
Convolution
(
net
[
name
],
num_output
=
512
,
pad
=
pad
,
kernel_size
=
kernel_size
,
dilation
=
dilation
,
**
kwargs
)
net
.
relu5_1
=
L
.
ReLU
(
net
.
conv5_1
,
in_place
=
True
)
net
.
conv5_2
=
L
.
Convolution
(
net
.
relu5_1
,
num_output
=
512
,
pad
=
pad
,
kernel_size
=
kernel_size
,
dilation
=
dilation
,
**
kwargs
)
net
.
relu5_2
=
L
.
ReLU
(
net
.
conv5_2
,
in_place
=
True
)
net
.
conv5_3
=
L
.
Convolution
(
net
.
relu5_2
,
num_output
=
512
,
pad
=
pad
,
kernel_size
=
kernel_size
,
dilation
=
dilation
,
**
kwargs
)
net
.
relu5_3
=
L
.
ReLU
(
net
.
conv5_3
,
in_place
=
True
)
if
need_fc
:
if
dilated
:
if
nopool
:
name
=
'conv5_4'
net
[
name
]
=
L
.
Convolution
(
net
.
relu5_3
,
num_output
=
512
,
pad
=
1
,
kernel_size
=
3
,
stride
=
1
,
**
kwargs
)
else
:
name
=
'pool5'
net
[
name
]
=
L
.
Pooling
(
net
.
relu5_3
,
pool
=
P
.
Pooling
.
MAX
,
pad
=
1
,
kernel_size
=
3
,
stride
=
1
)
else
:
if
nopool
:
name
=
'conv5_4'
net
[
name
]
=
L
.
Convolution
(
net
.
relu5_3
,
num_output
=
512
,
pad
=
1
,
kernel_size
=
3
,
stride
=
2
,
**
kwargs
)
else
:
name
=
'pool5'
net
[
name
]
=
L
.
Pooling
(
net
.
relu5_3
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
2
,
stride
=
2
)
if
fully_conv
:
if
dilated
:
if
reduced
:
dilation
=
dilation
*
6
kernel_size
=
3
num_output
=
1024
else
:
dilation
=
dilation
*
2
kernel_size
=
7
num_output
=
4096
else
:
if
reduced
:
dilation
=
dilation
*
3
kernel_size
=
3
num_output
=
1024
else
:
kernel_size
=
7
num_output
=
4096
pad
=
int
(
int
((
kernel_size
+
(
dilation
-
1
)
*
(
kernel_size
-
1
))
-
1
)
/
2
)
net
.
fc6
=
L
.
Convolution
(
net
[
name
],
num_output
=
num_output
,
pad
=
pad
,
kernel_size
=
kernel_size
,
dilation
=
dilation
,
**
kwargs
)
net
.
relu6
=
L
.
ReLU
(
net
.
fc6
,
in_place
=
True
)
if
dropout
:
net
.
drop6
=
L
.
Dropout
(
net
.
relu6
,
dropout_ratio
=
0.5
,
in_place
=
True
)
if
reduced
:
net
.
fc7
=
L
.
Convolution
(
net
.
relu6
,
num_output
=
1024
,
kernel_size
=
1
,
**
kwargs
)
else
:
net
.
fc7
=
L
.
Convolution
(
net
.
relu6
,
num_output
=
4096
,
kernel_size
=
1
,
**
kwargs
)
net
.
relu7
=
L
.
ReLU
(
net
.
fc7
,
in_place
=
True
)
if
dropout
:
net
.
drop7
=
L
.
Dropout
(
net
.
relu7
,
dropout_ratio
=
0.5
,
in_place
=
True
)
else
:
net
.
fc6
=
L
.
InnerProduct
(
net
.
pool5
,
num_output
=
4096
)
net
.
relu6
=
L
.
ReLU
(
net
.
fc6
,
in_place
=
True
)
if
dropout
:
net
.
drop6
=
L
.
Dropout
(
net
.
relu6
,
dropout_ratio
=
0.5
,
in_place
=
True
)
net
.
fc7
=
L
.
InnerProduct
(
net
.
relu6
,
num_output
=
4096
)
net
.
relu7
=
L
.
ReLU
(
net
.
fc7
,
in_place
=
True
)
if
dropout
:
net
.
drop7
=
L
.
Dropout
(
net
.
relu7
,
dropout_ratio
=
0.5
,
in_place
=
True
)
# Update freeze layers.
kwargs
[
'param'
]
=
[
dict
(
lr_mult
=
0
,
decay_mult
=
0
),
dict
(
lr_mult
=
0
,
decay_mult
=
0
)]
layers
=
net
.
keys
()
for
freeze_layer
in
freeze_layers
:
if
freeze_layer
in
layers
:
net
.
update
(
freeze_layer
,
kwargs
)
return
net
def
ResNet101Body
(
net
,
from_layer
,
use_pool5
=
True
,
use_dilation_conv5
=
False
,
**
bn_param
):
conv_prefix
=
''
conv_postfix
=
''
bn_prefix
=
'bn_'
bn_postfix
=
''
scale_prefix
=
'scale_'
scale_postfix
=
''
ConvBNLayer
(
net
,
from_layer
,
'conv1'
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
64
,
kernel_size
=
7
,
pad
=
3
,
stride
=
2
,
conv_prefix
=
conv_prefix
,
conv_postfix
=
conv_postfix
,
bn_prefix
=
bn_prefix
,
bn_postfix
=
bn_postfix
,
scale_prefix
=
scale_prefix
,
scale_postfix
=
scale_postfix
,
**
bn_param
)
net
.
pool1
=
L
.
Pooling
(
net
.
conv1
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
3
,
stride
=
2
)
ResBody
(
net
,
'pool1'
,
'2a'
,
out2a
=
64
,
out2b
=
64
,
out2c
=
256
,
stride
=
1
,
use_branch1
=
True
,
**
bn_param
)
ResBody
(
net
,
'res2a'
,
'2b'
,
out2a
=
64
,
out2b
=
64
,
out2c
=
256
,
stride
=
1
,
use_branch1
=
False
,
**
bn_param
)
ResBody
(
net
,
'res2b'
,
'2c'
,
out2a
=
64
,
out2b
=
64
,
out2c
=
256
,
stride
=
1
,
use_branch1
=
False
,
**
bn_param
)
ResBody
(
net
,
'res2c'
,
'3a'
,
out2a
=
128
,
out2b
=
128
,
out2c
=
512
,
stride
=
2
,
use_branch1
=
True
,
**
bn_param
)
from_layer
=
'res3a'
for
i
in
range
(
1
,
4
):
block_name
=
'3b{}'
.
format
(
i
)
ResBody
(
net
,
from_layer
,
block_name
,
out2a
=
128
,
out2b
=
128
,
out2c
=
512
,
stride
=
1
,
use_branch1
=
False
,
**
bn_param
)
from_layer
=
'res{}'
.
format
(
block_name
)
ResBody
(
net
,
from_layer
,
'4a'
,
out2a
=
256
,
out2b
=
256
,
out2c
=
1024
,
stride
=
2
,
use_branch1
=
True
,
**
bn_param
)
from_layer
=
'res4a'
for
i
in
range
(
1
,
23
):
block_name
=
'4b{}'
.
format
(
i
)
ResBody
(
net
,
from_layer
,
block_name
,
out2a
=
256
,
out2b
=
256
,
out2c
=
1024
,
stride
=
1
,
use_branch1
=
False
,
**
bn_param
)
from_layer
=
'res{}'
.
format
(
block_name
)
stride
=
2
dilation
=
1
if
use_dilation_conv5
:
stride
=
1
dilation
=
2
ResBody
(
net
,
from_layer
,
'5a'
,
out2a
=
512
,
out2b
=
512
,
out2c
=
2048
,
stride
=
stride
,
use_branch1
=
True
,
dilation
=
dilation
,
**
bn_param
)
ResBody
(
net
,
'res5a'
,
'5b'
,
out2a
=
512
,
out2b
=
512
,
out2c
=
2048
,
stride
=
1
,
use_branch1
=
False
,
dilation
=
dilation
,
**
bn_param
)
ResBody
(
net
,
'res5b'
,
'5c'
,
out2a
=
512
,
out2b
=
512
,
out2c
=
2048
,
stride
=
1
,
use_branch1
=
False
,
dilation
=
dilation
,
**
bn_param
)
if
use_pool5
:
net
.
pool5
=
L
.
Pooling
(
net
.
res5c
,
pool
=
P
.
Pooling
.
AVE
,
global_pooling
=
True
)
return
net
def
ResNet152Body
(
net
,
from_layer
,
use_pool5
=
True
,
use_dilation_conv5
=
False
,
**
bn_param
):
conv_prefix
=
''
conv_postfix
=
''
bn_prefix
=
'bn_'
bn_postfix
=
''
scale_prefix
=
'scale_'
scale_postfix
=
''
ConvBNLayer
(
net
,
from_layer
,
'conv1'
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
64
,
kernel_size
=
7
,
pad
=
3
,
stride
=
2
,
conv_prefix
=
conv_prefix
,
conv_postfix
=
conv_postfix
,
bn_prefix
=
bn_prefix
,
bn_postfix
=
bn_postfix
,
scale_prefix
=
scale_prefix
,
scale_postfix
=
scale_postfix
,
**
bn_param
)
net
.
pool1
=
L
.
Pooling
(
net
.
conv1
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
3
,
stride
=
2
)
ResBody
(
net
,
'pool1'
,
'2a'
,
out2a
=
64
,
out2b
=
64
,
out2c
=
256
,
stride
=
1
,
use_branch1
=
True
,
**
bn_param
)
ResBody
(
net
,
'res2a'
,
'2b'
,
out2a
=
64
,
out2b
=
64
,
out2c
=
256
,
stride
=
1
,
use_branch1
=
False
,
**
bn_param
)
ResBody
(
net
,
'res2b'
,
'2c'
,
out2a
=
64
,
out2b
=
64
,
out2c
=
256
,
stride
=
1
,
use_branch1
=
False
,
**
bn_param
)
ResBody
(
net
,
'res2c'
,
'3a'
,
out2a
=
128
,
out2b
=
128
,
out2c
=
512
,
stride
=
2
,
use_branch1
=
True
,
**
bn_param
)
from_layer
=
'res3a'
for
i
in
range
(
1
,
8
):
block_name
=
'3b{}'
.
format
(
i
)
ResBody
(
net
,
from_layer
,
block_name
,
out2a
=
128
,
out2b
=
128
,
out2c
=
512
,
stride
=
1
,
use_branch1
=
False
,
**
bn_param
)
from_layer
=
'res{}'
.
format
(
block_name
)
ResBody
(
net
,
from_layer
,
'4a'
,
out2a
=
256
,
out2b
=
256
,
out2c
=
1024
,
stride
=
2
,
use_branch1
=
True
,
**
bn_param
)
from_layer
=
'res4a'
for
i
in
range
(
1
,
36
):
block_name
=
'4b{}'
.
format
(
i
)
ResBody
(
net
,
from_layer
,
block_name
,
out2a
=
256
,
out2b
=
256
,
out2c
=
1024
,
stride
=
1
,
use_branch1
=
False
,
**
bn_param
)
from_layer
=
'res{}'
.
format
(
block_name
)
stride
=
2
dilation
=
1
if
use_dilation_conv5
:
stride
=
1
dilation
=
2
ResBody
(
net
,
from_layer
,
'5a'
,
out2a
=
512
,
out2b
=
512
,
out2c
=
2048
,
stride
=
stride
,
use_branch1
=
True
,
dilation
=
dilation
,
**
bn_param
)
ResBody
(
net
,
'res5a'
,
'5b'
,
out2a
=
512
,
out2b
=
512
,
out2c
=
2048
,
stride
=
1
,
use_branch1
=
False
,
dilation
=
dilation
,
**
bn_param
)
ResBody
(
net
,
'res5b'
,
'5c'
,
out2a
=
512
,
out2b
=
512
,
out2c
=
2048
,
stride
=
1
,
use_branch1
=
False
,
dilation
=
dilation
,
**
bn_param
)
if
use_pool5
:
net
.
pool5
=
L
.
Pooling
(
net
.
res5c
,
pool
=
P
.
Pooling
.
AVE
,
global_pooling
=
True
)
return
net
def
InceptionV3Body
(
net
,
from_layer
,
output_pred
=
False
,
**
bn_param
):
# scale is fixed to 1, thus we ignore it.
use_scale
=
False
out_layer
=
'conv'
ConvBNLayer
(
net
,
from_layer
,
out_layer
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
32
,
kernel_size
=
3
,
pad
=
0
,
stride
=
2
,
use_scale
=
use_scale
,
**
bn_param
)
from_layer
=
out_layer
out_layer
=
'conv_1'
ConvBNLayer
(
net
,
from_layer
,
out_layer
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
32
,
kernel_size
=
3
,
pad
=
0
,
stride
=
1
,
use_scale
=
use_scale
,
**
bn_param
)
from_layer
=
out_layer
out_layer
=
'conv_2'
ConvBNLayer
(
net
,
from_layer
,
out_layer
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
64
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
,
use_scale
=
use_scale
,
**
bn_param
)
from_layer
=
out_layer
out_layer
=
'pool'
net
[
out_layer
]
=
L
.
Pooling
(
net
[
from_layer
],
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
3
,
stride
=
2
,
pad
=
0
)
from_layer
=
out_layer
out_layer
=
'conv_3'
ConvBNLayer
(
net
,
from_layer
,
out_layer
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
80
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
,
use_scale
=
use_scale
,
**
bn_param
)
from_layer
=
out_layer
out_layer
=
'conv_4'
ConvBNLayer
(
net
,
from_layer
,
out_layer
,
use_bn
=
True
,
use_relu
=
True
,
num_output
=
192
,
kernel_size
=
3
,
pad
=
0
,
stride
=
1
,
use_scale
=
use_scale
,
**
bn_param
)
from_layer
=
out_layer
out_layer
=
'pool_1'
net
[
out_layer
]
=
L
.
Pooling
(
net
[
from_layer
],
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
3
,
stride
=
2
,
pad
=
0
)
from_layer
=
out_layer
# inceptions with 1x1, 3x3, 5x5 convolutions
for
inception_id
in
range
(
0
,
3
):
if
inception_id
==
0
:
out_layer
=
'mixed'
tower_2_conv_num_output
=
32
else
:
out_layer
=
'mixed_{}'
.
format
(
inception_id
)
tower_2_conv_num_output
=
64
towers
=
[]
tower_name
=
'{}'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
64
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
48
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
dict
(
name
=
'conv_1'
,
num_output
=
64
,
kernel_size
=
5
,
pad
=
2
,
stride
=
1
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower_1'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
64
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
dict
(
name
=
'conv_1'
,
num_output
=
96
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
),
dict
(
name
=
'conv_2'
,
num_output
=
96
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower_2'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'pool'
,
pool
=
P
.
Pooling
.
AVE
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
),
dict
(
name
=
'conv'
,
num_output
=
tower_2_conv_num_output
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
],
**
bn_param
)
towers
.
append
(
tower
)
out_layer
=
'{}/join'
.
format
(
out_layer
)
net
[
out_layer
]
=
L
.
Concat
(
*
towers
,
axis
=
1
)
from_layer
=
out_layer
# inceptions with 1x1, 3x3(in sequence) convolutions
out_layer
=
'mixed_3'
towers
=
[]
tower_name
=
'{}'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
384
,
kernel_size
=
3
,
pad
=
0
,
stride
=
2
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
64
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
dict
(
name
=
'conv_1'
,
num_output
=
96
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
),
dict
(
name
=
'conv_2'
,
num_output
=
96
,
kernel_size
=
3
,
pad
=
0
,
stride
=
2
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'pool'
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
3
,
pad
=
0
,
stride
=
2
),
],
**
bn_param
)
towers
.
append
(
tower
)
out_layer
=
'{}/join'
.
format
(
out_layer
)
net
[
out_layer
]
=
L
.
Concat
(
*
towers
,
axis
=
1
)
from_layer
=
out_layer
# inceptions with 1x1, 7x1, 1x7 convolutions
for
inception_id
in
range
(
4
,
8
):
if
inception_id
==
4
:
num_output
=
128
elif
inception_id
==
5
or
inception_id
==
6
:
num_output
=
160
elif
inception_id
==
7
:
num_output
=
192
out_layer
=
'mixed_{}'
.
format
(
inception_id
)
towers
=
[]
tower_name
=
'{}'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
192
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
num_output
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
dict
(
name
=
'conv_1'
,
num_output
=
num_output
,
kernel_size
=
[
1
,
7
],
pad
=
[
0
,
3
],
stride
=
[
1
,
1
]),
dict
(
name
=
'conv_2'
,
num_output
=
192
,
kernel_size
=
[
7
,
1
],
pad
=
[
3
,
0
],
stride
=
[
1
,
1
]),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower_1'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
num_output
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
dict
(
name
=
'conv_1'
,
num_output
=
num_output
,
kernel_size
=
[
7
,
1
],
pad
=
[
3
,
0
],
stride
=
[
1
,
1
]),
dict
(
name
=
'conv_2'
,
num_output
=
num_output
,
kernel_size
=
[
1
,
7
],
pad
=
[
0
,
3
],
stride
=
[
1
,
1
]),
dict
(
name
=
'conv_3'
,
num_output
=
num_output
,
kernel_size
=
[
7
,
1
],
pad
=
[
3
,
0
],
stride
=
[
1
,
1
]),
dict
(
name
=
'conv_4'
,
num_output
=
192
,
kernel_size
=
[
1
,
7
],
pad
=
[
0
,
3
],
stride
=
[
1
,
1
]),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower_2'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'pool'
,
pool
=
P
.
Pooling
.
AVE
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
),
dict
(
name
=
'conv'
,
num_output
=
192
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
],
**
bn_param
)
towers
.
append
(
tower
)
out_layer
=
'{}/join'
.
format
(
out_layer
)
net
[
out_layer
]
=
L
.
Concat
(
*
towers
,
axis
=
1
)
from_layer
=
out_layer
# inceptions with 1x1, 3x3, 1x7, 7x1 filters
out_layer
=
'mixed_8'
towers
=
[]
tower_name
=
'{}/tower'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
192
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
dict
(
name
=
'conv_1'
,
num_output
=
320
,
kernel_size
=
3
,
pad
=
0
,
stride
=
2
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower_1'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
192
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
dict
(
name
=
'conv_1'
,
num_output
=
192
,
kernel_size
=
[
1
,
7
],
pad
=
[
0
,
3
],
stride
=
[
1
,
1
]),
dict
(
name
=
'conv_2'
,
num_output
=
192
,
kernel_size
=
[
7
,
1
],
pad
=
[
3
,
0
],
stride
=
[
1
,
1
]),
dict
(
name
=
'conv_3'
,
num_output
=
192
,
kernel_size
=
3
,
pad
=
0
,
stride
=
2
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'pool'
,
pool
=
P
.
Pooling
.
MAX
,
kernel_size
=
3
,
pad
=
0
,
stride
=
2
),
],
**
bn_param
)
towers
.
append
(
tower
)
out_layer
=
'{}/join'
.
format
(
out_layer
)
net
[
out_layer
]
=
L
.
Concat
(
*
towers
,
axis
=
1
)
from_layer
=
out_layer
for
inception_id
in
range
(
9
,
11
):
num_output
=
384
num_output2
=
448
if
inception_id
==
9
:
pool
=
P
.
Pooling
.
AVE
else
:
pool
=
P
.
Pooling
.
MAX
out_layer
=
'mixed_{}'
.
format
(
inception_id
)
towers
=
[]
tower_name
=
'{}'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
320
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
],
**
bn_param
)
towers
.
append
(
tower
)
tower_name
=
'{}/tower'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
num_output
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
],
**
bn_param
)
subtowers
=
[]
subtower_name
=
'{}/mixed'
.
format
(
tower_name
)
subtower
=
InceptionTower
(
net
,
'{}/conv'
.
format
(
tower_name
),
subtower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
num_output
,
kernel_size
=
[
1
,
3
],
pad
=
[
0
,
1
],
stride
=
[
1
,
1
]),
],
**
bn_param
)
subtowers
.
append
(
subtower
)
subtower
=
InceptionTower
(
net
,
'{}/conv'
.
format
(
tower_name
),
subtower_name
,
[
dict
(
name
=
'conv_1'
,
num_output
=
num_output
,
kernel_size
=
[
3
,
1
],
pad
=
[
1
,
0
],
stride
=
[
1
,
1
]),
],
**
bn_param
)
subtowers
.
append
(
subtower
)
net
[
subtower_name
]
=
L
.
Concat
(
*
subtowers
,
axis
=
1
)
towers
.
append
(
net
[
subtower_name
])
tower_name
=
'{}/tower_1'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
num_output2
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
dict
(
name
=
'conv_1'
,
num_output
=
num_output
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
),
],
**
bn_param
)
subtowers
=
[]
subtower_name
=
'{}/mixed'
.
format
(
tower_name
)
subtower
=
InceptionTower
(
net
,
'{}/conv_1'
.
format
(
tower_name
),
subtower_name
,
[
dict
(
name
=
'conv'
,
num_output
=
num_output
,
kernel_size
=
[
1
,
3
],
pad
=
[
0
,
1
],
stride
=
[
1
,
1
]),
],
**
bn_param
)
subtowers
.
append
(
subtower
)
subtower
=
InceptionTower
(
net
,
'{}/conv_1'
.
format
(
tower_name
),
subtower_name
,
[
dict
(
name
=
'conv_1'
,
num_output
=
num_output
,
kernel_size
=
[
3
,
1
],
pad
=
[
1
,
0
],
stride
=
[
1
,
1
]),
],
**
bn_param
)
subtowers
.
append
(
subtower
)
net
[
subtower_name
]
=
L
.
Concat
(
*
subtowers
,
axis
=
1
)
towers
.
append
(
net
[
subtower_name
])
tower_name
=
'{}/tower_2'
.
format
(
out_layer
)
tower
=
InceptionTower
(
net
,
from_layer
,
tower_name
,
[
dict
(
name
=
'pool'
,
pool
=
pool
,
kernel_size
=
3
,
pad
=
1
,
stride
=
1
),
dict
(
name
=
'conv'
,
num_output
=
192
,
kernel_size
=
1
,
pad
=
0
,
stride
=
1
),
],
**
bn_param
)
towers
.
append
(
tower
)
out_layer
=
'{}/join'
.
format
(
out_layer
)
net
[
out_layer
]
=
L
.
Concat
(
*
towers
,
axis
=
1
)
from_layer
=
out_layer
if
output_pred
:
net
.
pool_3
=
L
.
Pooling
(
net
[
from_layer
],
pool
=
P
.
Pooling
.
AVE
,
kernel_size
=
8
,
pad
=
0
,
stride
=
1
)
net
.
softmax
=
L
.
InnerProduct
(
net
.
pool_3
,
num_output
=
1008
)
net
.
softmax_prob
=
L
.
Softmax
(
net
.
softmax
)
return
net
def
CreateMultiBoxHead
(
net
,
data_layer
=
"data"
,
num_classes
=
[],
from_layers
=
[],
use_objectness
=
False
,
use_iou
=
False
,
normalizations
=
[],
use_batchnorm
=
True
,
lr_mult
=
1
,
use_scale
=
True
,
min_sizes
=
[],
max_sizes
=
[],
prior_variance
=
[
0.1
],
aspect_ratios
=
[],
steps
=
[],
img_height
=
0
,
img_width
=
0
,
share_location
=
True
,
flip
=
True
,
clip
=
True
,
offset
=
0.5
,
inter_layer_depth
=
[],
kernel_size
=
1
,
pad
=
0
,
conf_postfix
=
''
,
loc_postfix
=
''
,
**
bn_param
):
assert
num_classes
,
"must provide num_classes"
assert
num_classes
>
0
,
"num_classes must be positive number"
if
normalizations
:
assert
len
(
from_layers
)
==
len
(
normalizations
),
"from_layers and normalizations should have same length"
assert
len
(
from_layers
)
==
len
(
min_sizes
),
"from_layers and min_sizes should have same length"
if
max_sizes
:
assert
len
(
from_layers
)
==
len
(
max_sizes
),
"from_layers and max_sizes should have same length"
if
aspect_ratios
:
assert
len
(
from_layers
)
==
len
(
aspect_ratios
),
"from_layers and aspect_ratios should have same length"
if
steps
:
assert
len
(
from_layers
)
==
len
(
steps
),
"from_layers and steps should have same length"
net_layers
=
net
.
keys
()
assert
data_layer
in
net_layers
,
"data_layer is not in net's layers"
if
inter_layer_depth
:
assert
len
(
from_layers
)
==
len
(
inter_layer_depth
),
"from_layers and inter_layer_depth should have same length"
num
=
len
(
from_layers
)
priorbox_layers
=
[]
loc_layers
=
[]
conf_layers
=
[]
iou_layers
=
[]
objectness_layers
=
[]
for
i
in
range
(
0
,
num
):
from_layer
=
from_layers
[
i
]
# Get the normalize value.
if
normalizations
:
if
normalizations
[
i
]
!=
-
1
:
norm_name
=
"{}_norm"
.
format
(
from_layer
)
net
[
norm_name
]
=
L
.
Normalize
(
net
[
from_layer
],
scale_filler
=
dict
(
type
=
"constant"
,
value
=
normalizations
[
i
]),
across_spatial
=
False
,
channel_shared
=
False
)
from_layer
=
norm_name
# Add intermediate layers.
if
inter_layer_depth
:
if
inter_layer_depth
[
i
]
>
0
:
inter_name
=
"{}_inter"
.
format
(
from_layer
)
ConvBNLayer
(
net
,
from_layer
,
inter_name
,
use_bn
=
use_batchnorm
,
use_relu
=
True
,
lr_mult
=
lr_mult
,
num_output
=
inter_layer_depth
[
i
],
kernel_size
=
3
,
pad
=
1
,
stride
=
1
,
**
bn_param
)
from_layer
=
inter_name
# Estimate number of priors per location given provided parameters.
min_size
=
min_sizes
[
i
]
if
type
(
min_size
)
is
not
list
:
min_size
=
[
min_size
]
aspect_ratio
=
[]
if
len
(
aspect_ratios
)
>
i
:
aspect_ratio
=
aspect_ratios
[
i
]
if
type
(
aspect_ratio
)
is
not
list
:
aspect_ratio
=
[
aspect_ratio
]
max_size
=
[]
if
len
(
max_sizes
)
>
i
:
max_size
=
max_sizes
[
i
]
if
type
(
max_size
)
is
not
list
:
max_size
=
[
max_size
]
if
max_size
:
assert
len
(
max_size
)
==
len
(
min_size
),
"max_size and min_size should have same length."
if
max_size
:
num_priors_per_location
=
(
2
+
len
(
aspect_ratio
))
*
len
(
min_size
)
else
:
num_priors_per_location
=
(
1
+
len
(
aspect_ratio
))
*
len
(
min_size
)
if
flip
:
num_priors_per_location
+=
len
(
aspect_ratio
)
*
len
(
min_size
)
step
=
[]
if
len
(
steps
)
>
i
:
step
=
steps
[
i
]
# Create location prediction layer.
name
=
"{}_mbox_loc{}"
.
format
(
from_layer
,
loc_postfix
)
num_loc_output
=
num_priors_per_location
*
4
;
if
not
share_location
:
num_loc_output
*=
num_classes
ConvBNLayer
(
net
,
from_layer
,
name
,
use_bn
=
use_batchnorm
,
use_relu
=
False
,
lr_mult
=
lr_mult
,
num_output
=
num_loc_output
,
kernel_size
=
kernel_size
,
pad
=
pad
,
stride
=
1
,
**
bn_param
)
permute_name
=
"{}_perm"
.
format
(
name
)
net
[
permute_name
]
=
L
.
Permute
(
net
[
name
],
order
=
[
0
,
2
,
3
,
1
])
flatten_name
=
"{}_flat"
.
format
(
name
)
net
[
flatten_name
]
=
L
.
Flatten
(
net
[
permute_name
],
axis
=
1
)
loc_layers
.
append
(
net
[
flatten_name
])
# Create confidence prediction layer.
name
=
"{}_mbox_conf{}"
.
format
(
from_layer
,
conf_postfix
)
num_conf_output
=
num_priors_per_location
*
num_classes
;
ConvBNLayer
(
net
,
from_layer
,
name
,
use_bn
=
use_batchnorm
,
use_relu
=
False
,
lr_mult
=
lr_mult
,
num_output
=
num_conf_output
,
kernel_size
=
kernel_size
,
pad
=
pad
,
stride
=
1
,
**
bn_param
)
permute_name
=
"{}_perm"
.
format
(
name
)
net
[
permute_name
]
=
L
.
Permute
(
net
[
name
],
order
=
[
0
,
2
,
3
,
1
])
flatten_name
=
"{}_flat"
.
format
(
name
)
net
[
flatten_name
]
=
L
.
Flatten
(
net
[
permute_name
],
axis
=
1
)
conf_layers
.
append
(
net
[
flatten_name
])
# Create iou prediction layer.
if
use_iou
:
name
=
"{}_mbox_iou{}"
.
format
(
from_layer
,
conf_postfix
)
num_iou_output
=
num_priors_per_location
ConvBNLayer
(
net
,
from_layer
,
name
,
use_bn
=
use_batchnorm
,
use_relu
=
False
,
lr_mult
=
lr_mult
,
num_output
=
num_iou_output
,
kernel_size
=
kernel_size
,
pad
=
pad
,
stride
=
1
,
**
bn_param
)
permute_name
=
"{}_perm"
.
format
(
name
)
net
[
permute_name
]
=
L
.
Permute
(
net
[
name
],
order
=
[
0
,
2
,
3
,
1
])
flatten_name
=
"{}_flat"
.
format
(
name
)
net
[
flatten_name
]
=
L
.
Flatten
(
net
[
permute_name
],
axis
=
1
)
iou_layers
.
append
(
net
[
flatten_name
])
# Create prior generation layer.
name
=
"{}_mbox_priorbox"
.
format
(
from_layer
)
priorbox_param
=
{
'min_size'
:
min_size
,
'clip'
:
clip
,
'offset'
:
offset
}
if
max_size
:
priorbox_param
.
update
({
'max_size'
:
max_size
})
if
aspect_ratio
:
priorbox_param
.
update
({
'aspect_ratio'
:
aspect_ratio
,
'flip'
:
flip
})
if
step
:
priorbox_param
.
update
({
'step'
:
step
})
if
img_height
!=
0
and
img_width
!=
0
:
if
img_height
==
img_width
:
priorbox_param
.
update
({
'img_size'
:
img_height
})
else
:
priorbox_param
.
update
({
'img_h'
:
img_height
,
'img_w'
:
img_width
})
net
[
name
]
=
L
.
Python
(
net
[
from_layer
],
net
[
'im_info'
],
module
=
'layers.prior_box_layer'
,
layer
=
'PriorBoxLayer'
,
param_str
=
str
(
priorbox_param
))
priorbox_layers
.
append
(
net
[
name
])
# Create objectness prediction layer.
if
use_objectness
:
name
=
"{}_mbox_objectness"
.
format
(
from_layer
)
num_obj_output
=
num_priors_per_location
*
2
;
ConvBNLayer
(
net
,
from_layer
,
name
,
use_bn
=
use_batchnorm
,
use_relu
=
False
,
lr_mult
=
lr_mult
,
num_output
=
num_obj_output
,
kernel_size
=
kernel_size
,
pad
=
pad
,
stride
=
1
,
**
bn_param
)
permute_name
=
"{}_perm"
.
format
(
name
)
net
[
permute_name
]
=
L
.
Permute
(
net
[
name
],
order
=
[
0
,
2
,
3
,
1
])
flatten_name
=
"{}_flat"
.
format
(
name
)
net
[
flatten_name
]
=
L
.
Flatten
(
net
[
permute_name
],
axis
=
1
)
objectness_layers
.
append
(
net
[
flatten_name
])
# Concatenate priorbox, loc, and conf layers.
mbox_layers
=
[]
name
=
"mbox_loc"
net
[
name
]
=
L
.
Concat
(
*
loc_layers
,
axis
=
1
)
net
[
'mbox_loc_reshape'
]
=
L
.
Reshape
(
net
[
name
],
shape
=
{
'dim'
:
[
0
,
-
1
,
4
]})
mbox_layers
.
append
(
net
[
'mbox_loc_reshape'
])
name
=
"mbox_conf"
net
[
name
]
=
L
.
Concat
(
*
conf_layers
,
axis
=
1
)
net
[
'mbox_conf_reshape'
]
=
L
.
Reshape
(
net
[
name
],
shape
=
{
'dim'
:
[
0
,
-
1
,
num_classes
]})
mbox_layers
.
append
(
net
[
'mbox_conf_reshape'
])
if
use_iou
:
name
=
"mbox_iou"
net
[
name
]
=
L
.
Concat
(
*
iou_layers
,
axis
=
1
)
net
[
'mbox_iou_reshape'
]
=
L
.
Reshape
(
net
[
name
],
shape
=
{
'dim'
:
[
0
,
-
1
]})
mbox_layers
.
append
(
net
[
'mbox_iou_reshape'
])
name
=
"mbox_priorbox"
net
[
name
]
=
L
.
Concat
(
*
priorbox_layers
,
axis
=
0
)
mbox_layers
.
append
(
net
[
name
])
if
use_objectness
:
name
=
"mbox_objectness"
net
[
name
]
=
L
.
Concat
(
*
objectness_layers
,
axis
=
1
)
mbox_layers
.
append
(
net
[
name
])
return
mbox_layers
Dragon/python/dragon/vm/caffe/net_spec.py
View file @
1d03e8e
Dragon/python/dragon/vm/theano/compile/function.py
View file @
1d03e8e
...
...
@@ -354,15 +354,14 @@ class Function(object):
# Store for future development
self
.
meta_graph
=
meta_graph
self
.
graph_name
=
meta_graph
.
name
# Call c api to create graph
ws
.
CreateGraph
(
meta_graph
)
self
.
graph_name
=
ws
.
CreateGraph
(
meta_graph
)
# Bind a lambda callback to run this graph
callback_inputs
=
self
.
inputs
if
explicit_inputs
else
[]
self
.
callback
=
lambda
*
args
,
**
kwargs
:
\
ws
.
RunGraph
(
meta_graph
.
name
,
(
callback_inputs
,
args
),
self
.
outputs
,
**
kwargs
)
ws
.
RunGraph
(
self
.
graph_
name
,
(
callback_inputs
,
args
),
self
.
outputs
,
**
kwargs
)
# Self return
return
self
...
...
@@ -386,7 +385,7 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
----------
inputs : sequence of Tensor, optional
The inputs to feed.
in
puts : sequence of Tensor, optional
out
puts : sequence of Tensor, optional
The outputs to fetch.
givens : dict of Tensor, optional
The substitutions to use.
...
...
Dragon/python/dragon/vm/torch/ops/modules/axis.py
View file @
1d03e8e
...
...
@@ -60,6 +60,7 @@ class Gather(BaseModule):
'n_inputs'
:
2
,
'n_outputs'
:
1
,
'arguments'
:
{
'axis'
:
self
.
axis
,
'zero_grad'
:
True
,
}
}
...
...
Dragon/src/contrib/rcnn/bbox_utils.h
View file @
1d03e8e
...
...
@@ -188,15 +188,15 @@ inline void RetrieveRoIs(
template
<
typename
T
>
inline
int
roi_level
(
const
int
min_level
,
// e.g. 2
const
int
max_level
,
// e.g. 5
const
int
canonical_level
,
// e.g. 4
const
int
canonical_scale
,
// e.g. 224
const
int
min_level
,
const
int
max_level
,
const
int
canonical_level
,
const
int
canonical_scale
,
T
*
roi
)
{
T
w
=
roi
[
3
]
-
roi
[
1
]
+
1
;
T
h
=
roi
[
4
]
-
roi
[
2
]
+
1
;
// Refer the settings of paper
int
level
=
canonical_level
+
(
int
)
std
::
log
(
int
level
=
canonical_level
+
std
::
log2
(
std
::
max
(
std
::
sqrt
(
w
*
h
),
(
T
)
1
)
/
(
T
)
canonical_scale
);
return
std
::
min
(
max_level
,
std
::
max
(
min_level
,
level
));
}
...
...
Dragon/src/contrib/rcnn/proposal_op.cc
View file @
1d03e8e
...
...
@@ -80,7 +80,7 @@ void ProposalOp<Context>::RunWithType(
anchors_
.
Reshape
({
A
,
4
});
rcnn
::
GenerateAnchors
<
BT
>
(
strides
[
i
],
(
int
)
ratios
.
size
(),
1
,
&
ratios
[
0
],
&
scales
[
0
],
(
int
)
ratios
.
size
(),
1
,
&
ratios
[
0
],
&
scales
[
i
],
anchors_
.
template
mutable_data
<
BT
,
CPUContext
>
());
rcnn
::
GenerateGridAnchors
<
BT
>
(
...
...
Dragon/src/kernels/ndarray/gather_op_kernel.cc
View file @
1d03e8e
...
...
@@ -6,134 +6,93 @@ namespace dragon {
namespace
kernel
{
/*! CanonicalAxis <T = int32, Device = CPU> */
template
<>
void
CanonicalAxis
<
int
,
CPUContext
>
(
const
int
count
,
const
int
dim
,
int
*
y
,
CPUContext
*
ctx
)
{
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for
(
int
i
=
0
;
i
<
count
;
++
i
)
if
(
y
[
i
]
<
0
)
y
[
i
]
+=
dim
;
}
/*! Gather <T = ?, Device = CPU> */
template
<
typename
T
>
void
_Gather
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
64_t
*
indices
,
const
T
*
x
,
T
*
y
,
CPUContext
*
ctx
)
{
int64_t
x_offset
,
y_offset
,
x_idx_offset
,
y_idx_offset
;
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
y_idx_offset
=
i
;
x_idx_offset
=
indices
[
y_idx_offset
];
int64_t
x_offset
,
select_idx
;
for
(
int
n
=
0
;
n
<
outer_dim
;
++
n
)
{
x_offset
=
(
n
*
x_slice_dim
+
x_idx_offset
)
*
inner_dim
;
y_offset
=
(
n
*
y_slice_dim
+
y_idx_offset
)
*
inner_dim
;
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
select_idx
=
indices
[
i
];
select_idx
=
select_idx
>=
0
?
select_idx
:
select_idx
+
x_slice_dim
;
x_offset
=
(
n
*
x_slice_dim
+
select_idx
)
*
inner_dim
;
ctx
->
Copy
<
T
,
CPUContext
,
CPUContext
>
(
inner_dim
,
y
+
y_offset
,
x
+
x_offset
);
inner_dim
,
y
,
x
+
x_offset
);
y
+=
inner_dim
;
}
}
}
/*! Gather <T = float32, Device = CPU> */
template
<>
void
Gather
<
float
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
float
*
x
,
float
*
y
,
CPUContext
*
ctx
)
{
_Gather
<
float
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
x
,
y
,
ctx
);
}
/*! Gather <T = int32, Device = CPU> */
template
<>
void
Gather
<
int
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
*
x
,
int
*
y
,
CPUContext
*
ctx
)
{
_Gather
<
int
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
x
,
y
,
ctx
);
}
/*! GatherGrad <T = ?, Device = CPU> */
template
<
typename
T
>
void
_GatherGrad
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
64_t
*
indices
,
const
T
*
dy
,
T
*
dx
,
CPUContext
*
ctx
)
{
int64_t
x_offset
,
y_offset
,
x_idx_offset
,
y_idx_offset
;
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
y_idx_offset
=
i
;
x_idx_offset
=
indices
[
y_idx_offset
];
int64_t
x_offset
,
select_idx
;
for
(
int
n
=
0
;
n
<
outer_dim
;
++
n
)
{
x_offset
=
(
n
*
x_slice_dim
+
x_idx_offset
)
*
inner_dim
;
y_offset
=
(
n
*
y_slice_dim
+
y_idx_offset
)
*
inner_dim
;
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
select_idx
=
indices
[
i
];
select_idx
=
select_idx
>=
0
?
select_idx
:
select_idx
+
x_slice_dim
;
x_offset
=
(
n
*
x_slice_dim
+
select_idx
)
*
inner_dim
;
math
::
Add
<
T
,
CPUContext
>
(
inner_dim
,
dy
+
y_offset
,
dx
+
x_offset
,
dx
+
x_offset
,
ctx
);
dy
,
dx
+
x_offset
,
dx
+
x_offset
,
ctx
);
dy
+=
inner_dim
;
}
}
}
/*! GatherGrad <T = float32, Device = CPU> */
template
<>
void
GatherGrad
<
float
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
float
*
dy
,
float
*
dx
,
CPUContext
*
ctx
)
{
_GatherGrad
<
float
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dy
,
dx
,
ctx
);
}
/*! GatherGrad <T = int32, Device = CPU> */
/*! Kernel Launchers */
#define DEFINE_GATHER_KERNEL_LAUNCHER(name, T) \
template <> void name<T, CPUContext>( \
const int outer_dim, \
const int inner_dim, \
const int x_slice_dim, \
const int y_slice_dim, \
const int64_t* indices, \
const T* x, \
T* y, \
CPUContext* ctx) { \
_##name<T> \
(outer_dim, inner_dim, x_slice_dim, \
y_slice_dim, indices, x, y, ctx); \
}
template
<>
void
GatherGrad
<
int
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
*
dy
,
int
*
dx
,
CPUContext
*
ctx
)
{
_GatherGrad
<
int
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dy
,
dx
,
ctx
);
}
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
bool
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
int8_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
uint8_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
int
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
int64_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
float16
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
float
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
double
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
int8_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
uint8_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
int
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
int64_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
float16
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
float
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
double
);
#undef DEFINE_GATHER_KERNEL_LAUNCHER
}
// namespace kernel
...
...
Dragon/src/kernels/ndarray/gather_op_kernel.cu
View file @
1d03e8e
...
...
@@ -2,160 +2,176 @@
#include "core/context_cuda.h"
#include "utils/op_kernel.h"
#include "utils/cub_device.h"
namespace dragon {
namespace kernel {
/*! CanonicalAxis <T = int32, Device = CUDA> */
template <typename T>
__global__ void _CanonicalAxis(
const int count,
const int dim,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
if (y[idx] < 0) y[idx] += dim;
}
}
template <> void CanonicalAxis<int, CUDAContext>(
const int count,
const int dim,
int* y,
CUDAContext* ctx) {
_CanonicalAxis<int>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, dim, y);
}
/*! Gather <T = ?, Device = CUDA> */
template <typename T>
__global__ void _Gather(
const int count,
const int outer_dim,
const int nthreads,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int
*
indices,
const int
64_t*
indices,
const T* x,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int outer_idx = idx / inner_dim / y_slice_dim;
const int slice_idx = idx % inner_dim;
const int y_idx_offset = (idx / inner_dim) % y_slice_dim;
const int x_idx_offset = indices[y_idx_offset];
const int x_idx = (outer_idx * x_slice_dim + x_idx_offset)
* inner_dim + slice_idx;
y[idx] = x[x_idx];
CUDA_1D_KERNEL_LOOP(y_idx, nthreads) {
const int outer_idx = y_idx / inner_dim / y_slice_dim;
const int inner_idx = y_idx % inner_dim;
#if __CUDA_ARCH__ >= 350
int select_idx = __ldg(indices +
((y_idx / inner_dim) % y_slice_dim));
#else
int select_idx = indices[
(y_idx / inner_dim) % y_slice_dim];
#endif
select_idx = select_idx >= 0 ?
select_idx : select_idx + x_slice_dim;
const int x_idx = (outer_idx * x_slice_dim + select_idx)
* inner_dim + inner_idx;
y[y_idx] = x[x_idx];
}
}
/*! Gather <T = float32, Device = CUDA> */
template <> void Gather<float, CUDAContext>(
const int count,
const int outer_dim,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int* indices,
const float* x,
float* y,
CUDAContext* ctx) {
_Gather<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, x, y);
}
/*! Gather <T = int32, Device = CUDA> */
template <> void Gather<int, CUDAContext>(
const int count,
const int outer_dim,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int* indices,
const int* x,
int* y,
CUDAContext* ctx) {
_Gather<int>
<< <CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, x, y);
}
/*! GatherGrad <T = ?, Device = CUDA> */
template <typename T>
__global__ void _GatherGrad(
const int count,
const int outer_dim,
const int nthreads,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int
*
indices,
const int
64_t*
indices,
const T* dy,
T* dx) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int outer_idx = idx / inner_dim / y_slice_dim;
const int slice_idx = idx % inner_dim;
const int y_idx_offset = (idx / inner_dim) % y_slice_dim;
const int x_idx_offset = indices[y_idx_offset];
const int x_idx = (outer_idx * x_slice_dim + x_idx_offset)
* inner_dim + slice_idx;
atomicAdd(dx + x_idx, dy[idx]);
CUDA_1D_KERNEL_LOOP(i, nthreads) {
const int outer_idx = i / inner_dim;
const int inner_idx = i % inner_dim;
for (int j = 0; j < y_slice_dim; ++j) {
#if __CUDA_ARCH__ >= 350
int select_idx = __ldg(indices + j);
#else
int select_idx = indices[j];
#endif
select_idx = select_idx >= 0 ?
select_idx : select_idx + x_slice_dim;
const int x_idx = (outer_idx * x_slice_dim + select_idx)
* inner_dim + inner_idx;
const int y_idx = (outer_idx * y_slice_dim + j)
* inner_dim + inner_idx;
dx[x_idx] += dy[y_idx];
}
}
}
/*! GatherGrad <T = float
32
, Device = CUDA> */
/*! GatherGrad <T = float
16
, Device = CUDA> */
template <> void GatherGrad<float, CUDAContext>(
const int count,
const int outer_dim,
template <> __global__ void _GatherGrad<half>(
const int nthreads,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int* indices,
const float* dy,
float* dx,
CUDAContext* ctx) {
_GatherGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, dy, dx);
const int64_t* indices,
const half* dy,
half* dx) {
CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 530
const int outer_idx = i / inner_dim;
const int inner_idx = i % inner_dim;
for (int j = 0; j < y_slice_dim; ++j) {
int select_idx = __ldg(indices + j);
select_idx = select_idx >= 0 ?
select_idx : select_idx + x_slice_dim;
const int x_idx = (outer_idx * x_slice_dim + select_idx)
* inner_dim + inner_idx;
const int y_idx = (outer_idx * y_slice_dim + j)
* inner_dim + inner_idx;
dx[x_idx] = __hadd(dx[x_idx], dy[y_idx]);
}
#endif
}
}
/*! GatherGrad <T = int32, Device = CUDA> */
/*! Kernel Launchers */
#define DEFINE_GATHER_KERNEL_LAUNCHER(T) \
template <> void Gather<T, CUDAContext>( \
const int outer_dim, \
const int inner_dim, \
const int x_slice_dim, \
const int y_slice_dim, \
const int64_t* indices, \
const T* x, \
T* y, \
CUDAContext* ctx) { \
auto nthreads = outer_dim * y_slice_dim * inner_dim; \
_Gather<T> \
<< < CUDA_BLOCKS(nthreads), CUDA_THREADS, \
0, ctx->cuda_stream() >> > \
(nthreads, inner_dim, x_slice_dim, \
y_slice_dim, indices, x, y); \
}
template <> void GatherGrad<int, CUDAContext>(
const int count,
#define DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(T) \
template <> void GatherGrad<T, CUDAContext>( \
const int outer_dim, \
const int inner_dim, \
const int x_slice_dim, \
const int y_slice_dim, \
const int64_t* indices, \
const T* dy, \
T* dx, \
CUDAContext* ctx) { \
auto nthreads = outer_dim * inner_dim; \
_GatherGrad<T> \
<< < CUDA_BLOCKS(nthreads), CUDA_THREADS, \
0, ctx->cuda_stream() >> > \
(nthreads, inner_dim, x_slice_dim, \
y_slice_dim, indices, dy, dx); \
}
DEFINE_GATHER_KERNEL_LAUNCHER(bool);
DEFINE_GATHER_KERNEL_LAUNCHER(int8_t);
DEFINE_GATHER_KERNEL_LAUNCHER(uint8_t);
DEFINE_GATHER_KERNEL_LAUNCHER(int);
DEFINE_GATHER_KERNEL_LAUNCHER(int64_t);
DEFINE_GATHER_KERNEL_LAUNCHER(float16);
DEFINE_GATHER_KERNEL_LAUNCHER(float);
DEFINE_GATHER_KERNEL_LAUNCHER(double);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(int8_t);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(uint8_t);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(int);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(int64_t);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(float);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(double);
template <> void GatherGrad<float16, CUDAContext>(
const int outer_dim,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int
*
indices,
const
int*
dy,
int*
dx,
const int
64_t*
indices,
const
float16*
dy,
float16*
dx,
CUDAContext* ctx) {
_GatherGrad<int>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
auto nthreads = outer_dim * inner_dim;
_GatherGrad<half>
<< < CUDA_BLOCKS(nthreads), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, dy, dx);
(nthreads, inner_dim, x_slice_dim,
y_slice_dim, indices,
reinterpret_cast<const half*>(dy),
reinterpret_cast<half*>(dx));
}
#undef DEFINE_GATHER_KERNEL_LAUNCHER
#undef DEFINE_GATHER_GRAD_KERNEL_LAUNCHER
} // namespace kernel
} // namepsace dragon
...
...
Dragon/src/
contrib/
onnx/onnx_attibute.cc
→
Dragon/src/onnx/onnx_attibute.cc
View file @
1d03e8e
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
namespace
dragon
{
...
...
Dragon/src/
contrib/
onnx/onnx_backend.cc
→
Dragon/src/onnx/onnx_backend.cc
View file @
1d03e8e
#include "core/operator_schema.h"
#include "utils/proto_utils.h"
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
namespace
dragon
{
...
...
Dragon/src/
contrib/
onnx/onnx_backend.h
→
Dragon/src/onnx/onnx_backend.h
View file @
1d03e8e
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* Codes are based on:
*
* <https://github.com/pytorch/pytorch/blob/master/caffe2/onnx/backend.h>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_
CONTRIB_
ONNX_ONNX_BACKEND_H_
#define DRAGON_
CONTRIB_
ONNX_ONNX_BACKEND_H_
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* Codes are based on:
*
* <https://github.com/pytorch/pytorch/blob/master/caffe2/onnx/backend.h>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_ONNX_ONNX_BACKEND_H_
#define DRAGON_ONNX_ONNX_BACKEND_H_
#include "core/common.h"
#include "proto/onnx.pb.h"
...
...
@@ -228,4 +228,4 @@ class ONNXBackend {
}
// namespace dragon
#endif // DRAGON_CONTRIB_ONNX_ONNX_BACKEND_H_
\ No newline at end of file
#endif // DRAGON_ONNX_ONNX_BACKEND_H_
\ No newline at end of file
Dragon/src/
contrib/
onnx/onnx_importer.cc
→
Dragon/src/onnx/onnx_importer.cc
View file @
1d03e8e
#include "utils/map_utils.h"
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
namespace
dragon
{
...
...
Dragon/src/
contrib/
onnx/onnx_initializer.cc
→
Dragon/src/onnx/onnx_initializer.cc
View file @
1d03e8e
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
namespace
dragon
{
...
...
Dragon/src/operators/arithmetic/maximum_op.cc
View file @
1d03e8e
Dragon/src/operators/ndarray/gather_op.cc
View file @
1d03e8e
...
...
@@ -13,12 +13,10 @@ namespace dragon {
template
<
class
Context
>
template
<
typename
T
>
void
GatherOp
<
Context
>::
RunWithType
()
{
auto
*
Xdata
=
Input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
indices
=
Input
(
1
).
template
mutable_data
<
int
,
Context
>
();
auto
*
indices
=
Input
(
1
).
template
mutable_data
<
int
64_t
,
Context
>
();
auto
*
Ydata
=
Output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
kernel
::
CanonicalAxis
(
Input
(
1
).
count
(),
x_slice_dim
,
indices
,
ctx
());
kernel
::
Gather
(
Output
(
0
)
->
count
(),
kernel
::
Gather
(
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
Xdata
,
Ydata
,
ctx
());
...
...
@@ -28,22 +26,38 @@ template <class Context>
void
GatherOp
<
Context
>::
RunOnDevice
()
{
DETERMINE_RUNTIME_ARGUMENTS
(
Input
(
0
));
output_dims
=
Input
(
0
).
dims
();
x_slice_dim
=
Input
(
0
).
dim
(
axis
);
output_dims
[
axis
]
=
y_slice_dim
=
Input
(
1
).
count
();
y_slice_dim
=
Input
(
1
).
count
();
outer_dim
=
Input
(
0
).
count
(
0
,
axis
);
inner_dim
=
Input
(
0
).
count
(
axis
+
1
);
CHECK_GT
(
y_slice_dim
,
0
)
<<
"
\n
Length of indices must > 0."
;
const
auto
&
s1
=
Input
(
0
).
dims
().
begin
();
const
auto
&
e1
=
s1
+
axis
,
s3
=
e1
+
1
;
const
auto
&
e3
=
Input
(
0
).
dims
().
end
();
const
auto
&
s2
=
Input
(
1
).
dims
().
begin
();
const
auto
&
e2
=
Input
(
1
).
dims
().
end
();
output_dims
.
assign
(
s1
,
e1
);
output_dims
.
insert
(
output_dims
.
end
(),
s2
,
e2
);
output_dims
.
insert
(
output_dims
.
end
(),
s3
,
e3
);
Output
(
0
)
->
Reshape
(
output_dims
);
CHECK
(
Input
(
1
).
template
IsType
<
int
>
())
<<
"
\n
The type of indices should be int
32
."
;
CHECK
(
Input
(
1
).
template
IsType
<
int
64_t
>
())
<<
"
\n
The type of indices should be int
64
."
;
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
if
(
XIsType
(
Input
(
0
),
bool
))
RunWithType
<
bool
>
();
else
if
(
XIsType
(
Input
(
0
),
int8_t
))
RunWithType
<
int8_t
>
();
else
if
(
XIsType
(
Input
(
0
),
uint8_t
))
RunWithType
<
uint8_t
>
();
else
if
(
XIsType
(
Input
(
0
),
int
))
RunWithType
<
int
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"float32"
,
"int32"
});
else
if
(
XIsType
(
Input
(
0
),
int64_t
))
RunWithType
<
int64_t
>
();
else
if
(
XIsType
(
Input
(
0
),
float16
))
RunWithType
<
float16
>
();
else
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
else
if
(
XIsType
(
Input
(
0
),
double
))
RunWithType
<
double
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"bool"
,
"int8"
,
"uint8"
,
"int32"
,
"int64"
,
"float16"
,
"float32"
,
"float64"
,
});
}
DEPLOY_CPU
(
Gather
);
...
...
@@ -54,18 +68,17 @@ OPERATOR_SCHEMA(Gather).NumInputs(2).NumOutputs(1);
template
<
class
Context
>
template
<
typename
T
>
void
GatherGradientOp
<
Context
>::
RunWithType
()
{
auto
*
indices
=
Input
(
1
).
template
data
<
int
,
Context
>
();
auto
*
indices
=
Input
(
1
).
template
data
<
int
64_t
,
Context
>
();
auto
*
dYdata
=
Input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
Output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
T
*
dXdata
=
nullptr
;
if
(
!
acc_grad
)
{
dXdata
=
Output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
math
::
Set
(
Output
(
0
)
->
count
(),
cast
::
to
<
T
>
(
0.
f
),
dXdata
,
ctx
());
}
else
{
dXdata
=
Output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
// Zero the gradients Optionally
if
(
zero_grad
)
{
math
::
Set
(
Output
(
0
)
->
count
(),
cast
::
to
<
T
>
(
0.
f
),
dXdata
,
ctx
());
}
kernel
::
GatherGrad
(
Input
(
-
1
).
count
(),
kernel
::
GatherGrad
(
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dYdata
,
dXdata
,
ctx
());
...
...
@@ -82,12 +95,20 @@ void GatherGradientOp<Context>::RunOnDevice() {
Output
(
0
)
->
ReshapeLike
(
Input
(
0
));
CHECK
(
Input
(
1
).
template
IsType
<
int
>
())
<<
"
\n
The type of indices should be int
32
."
;
CHECK
(
Input
(
1
).
template
IsType
<
int
64_t
>
())
<<
"
\n
The type of indices should be int
64
."
;
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
if
(
XIsType
(
Input
(
0
),
int8_t
))
RunWithType
<
int8_t
>
();
else
if
(
XIsType
(
Input
(
0
),
uint8_t
))
RunWithType
<
uint8_t
>
();
else
if
(
XIsType
(
Input
(
0
),
int
))
RunWithType
<
int
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"float32"
,
"int32"
});
else
if
(
XIsType
(
Input
(
0
),
int64_t
))
RunWithType
<
int64_t
>
();
else
if
(
XIsType
(
Input
(
0
),
float16
))
RunWithType
<
float16
>
();
else
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
else
if
(
XIsType
(
Input
(
0
),
double
))
RunWithType
<
double
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"int8"
,
"uint8"
,
"int32"
,
"int64"
,
"float16"
,
"float32"
,
"float64"
,
});
}
DEPLOY_CPU
(
GatherGradient
);
...
...
Dragon/src/operators/vision/drop_block2d_op.cc
View file @
1d03e8e
...
...
@@ -15,6 +15,27 @@ void DropBlock2dOp<Context>::RunWithType() {
Output
(
0
)
->
count
(),
Ydata
,
Xdata
);
}
}
else
if
(
phase
()
==
"TRAIN"
)
{
if
(
data_format
==
"NCHW"
)
{
n
=
Input
(
0
).
dim
(
0
),
c
=
Input
(
0
).
dim
(
1
);
h
=
Input
(
0
).
dim
(
2
),
w
=
Input
(
0
).
dim
(
3
);
}
else
if
(
data_format
==
"NHWC"
)
{
n
=
Input
(
0
).
dim
(
0
),
c
=
Input
(
0
).
dim
(
-
1
);
h
=
Input
(
0
).
dim
(
1
),
w
=
Input
(
0
).
dim
(
2
);
}
seed_h
=
h
-
block_size
+
1
;
seed_w
=
w
-
block_size
+
1
;
CHECK
(
seed_h
>
0
&&
seed_w
>
0
)
<<
"
\n
Excepted block_size <= feat_size."
;
if
(
decrement
>
0
&&
apply_prob
>
keep_prob
())
{
apply_prob
-=
decrement
;
}
else
{
apply_prob
=
keep_prob
();
}
gamma
=
(
1.
f
-
apply_prob
)
/
(
block_size
*
block_size
);
gamma
*=
(
alpha
*
(
h
*
w
)
/
(
seed_h
*
seed_w
));
auto
*
mask
=
ws
()
->
CreateTensor
(
mount_name
(
"drop_block/mask"
))
->
ReshapeLike
(
Input
(
0
));
auto
*
norm
=
ws
()
->
CreateTensor
(
mount_name
(
...
...
@@ -58,29 +79,8 @@ void DropBlock2dOp<Context>::RunWithType() {
template
<
class
Context
>
void
DropBlock2dOp
<
Context
>::
RunOnDevice
()
{
if
(
data_format
==
"NCHW"
)
{
n
=
Input
(
0
).
dim
(
0
),
c
=
Input
(
0
).
dim
(
1
);
h
=
Input
(
0
).
dim
(
2
),
w
=
Input
(
0
).
dim
(
3
);
}
else
if
(
data_format
==
"NHWC"
)
{
n
=
Input
(
0
).
dim
(
0
),
c
=
Input
(
0
).
dim
(
-
1
);
h
=
Input
(
0
).
dim
(
1
),
w
=
Input
(
0
).
dim
(
2
);
}
seed_h
=
h
-
block_size
+
1
;
seed_w
=
w
-
block_size
+
1
;
CHECK
(
seed_h
>
0
&&
seed_w
>
0
)
<<
"
\n
Excepted block_size <= feat_size."
;
Output
(
0
)
->
ReshapeLike
(
Input
(
0
));
if
(
decrement
>
0
&&
apply_prob
>
keep_prob
())
{
apply_prob
-=
decrement
;
}
else
{
apply_prob
=
keep_prob
();
}
gamma
=
(
1.
f
-
apply_prob
)
/
(
block_size
*
block_size
);
gamma
*=
(
alpha
*
(
h
*
w
)
/
(
seed_h
*
seed_w
));
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
else
if
(
XIsType
(
Input
(
0
),
float16
))
RunWithType
<
float16
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"float32"
,
"float16"
});
...
...
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment