Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SeetaResearch
/
Dragon
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit 77179032
authored
Jan 08, 2018
by
Ting PAN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactor Shape Module
1 parent
2d1b7752
Show whitespace changes
Inline
Side-by-side
Showing
28 changed files
with
971 additions
and
361 deletions
Dragon/include/operators/misc/initialize_op.h
Dragon/include/operators/ndarray/arange_op.h
Dragon/include/operators/ndarray/tile_op.h
Dragon/include/operators/norm/batch_norm_op.h
Dragon/include/operators/vision/bilinear_resize_op.h
Dragon/include/operators/vision/conv_op_base.h
Dragon/include/operators/vision/nn_resize_op.h
Dragon/include/utils/op_kernel.h
Dragon/python/dragon/core/tensor.py
Dragon/python/dragon/docs/contents/core/tensor.rst
Dragon/python/dragon/operators/initializer.py
Dragon/python/dragon/operators/ndarray.py
Dragon/python/dragon/operators/vision.py
Dragon/python/setup.py
Dragon/src/operators/misc/initialize_op.cc
Dragon/src/operators/ndarray/arange_op.cc
Dragon/src/operators/ndarray/at_op.cc
Dragon/src/operators/ndarray/random_pick_op.cc
Dragon/src/operators/ndarray/shape_op.cc
Dragon/src/operators/ndarray/tile_op.cc
Dragon/src/operators/norm/cudnn_batch_norm_op.cc
Dragon/src/operators/norm/fused_batch_norm.cc
Dragon/src/operators/vision/bilinear_resize_op.cc
Dragon/src/operators/vision/conv_op_base.cc
Dragon/src/operators/vision/nn_resize_op.cc
Dragon/src/utils/math_functions.cc
Dragon/src/utils/op_kernel.cc
Dragon/src/utils/op_kernel.cu
Dragon/include/operators/misc/initialize_op.h
View file @
7717903
...
...
@@ -17,16 +17,16 @@ class InitializeOp: public Operator<Context> {
public
:
InitializeOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
op_def
,
ws
),
static_shape
(
OperatorBase
::
GetRepeatedArg
<
int
>
(
"static_shape
"
)),
dynamic_shape
(
OperatorBase
::
GetSingleArg
<
string
>
(
"dynamic_
shape"
,
""
))
{}
dims_desc
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"dims
"
)),
shape_desc
(
OperatorBase
::
GetSingleArg
<
string
>
(
"
shape"
,
""
))
{}
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
RunWithType
();
protected
:
vector
<
string
>
dims_desc
;
string
shape_desc
;
TensorFiller
filler
;
vector
<
int
>
static_shape
;
string
dynamic_shape
;
};
template
<
class
Context
>
...
...
Dragon/include/operators/ndarray/arange_op.h
View file @
7717903
...
...
@@ -16,24 +16,19 @@ class ArangeOp final : public Operator<Context> {
public
:
ArangeOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
op_def
,
ws
),
start
(
OperatorBase
::
GetSingleArg
<
int
>
(
"static_start"
,
0
)),
stop
(
OperatorBase
::
GetSingleArg
<
int
>
(
"static_stop"
,
-
1
)),
step
(
OperatorBase
::
GetSingleArg
<
int
>
(
"static_step"
,
1
)),
dtype
(
OperatorBase
::
GetSingleArg
<
string
>
(
"dtype"
,
"FLOAT32"
))
{
dynamic_start_
=
OperatorBase
::
GetSingleArg
<
string
>
(
"dynamic_start"
,
""
);
dynamic_stop_
=
OperatorBase
::
GetSingleArg
<
string
>
(
"dynamic_stop"
,
""
);
dynamic_step_
=
OperatorBase
::
GetSingleArg
<
string
>
(
"dynamic_step"
,
""
);
}
start_desc
(
OperatorBase
::
GetSingleArg
<
string
>
(
"start"
,
""
)),
stop_desc
(
OperatorBase
::
GetSingleArg
<
string
>
(
"stop"
,
""
)),
step_desc
(
OperatorBase
::
GetSingleArg
<
string
>
(
"step"
,
""
)),
dtype
(
OperatorBase
::
GetSingleArg
<
string
>
(
"dtype"
,
"FLOAT32"
))
{}
void
RunOnDevice
()
override
;
void
Reshape
();
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
RunWithType
();
protected
:
string
start_desc
,
stop_desc
,
step_desc
,
dtype
;
TIndex
start
,
stop
,
step
,
count
;
Tensor
*
dynamic_start
,
*
dynamic_stop
,
*
dynamic_step
;
string
dynamic_start_
,
dynamic_stop_
,
dynamic_step_
;
string
dtype
;
};
}
// namespace dragon
...
...
Dragon/include/operators/ndarray/tile_op.h
View file @
7717903
...
...
@@ -16,19 +16,13 @@ class TileOp : public Operator<Context> {
public
:
TileOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
op_def
,
ws
),
multiples
(
OperatorBase
::
GetRepeatedArg
<
int
>
(
"multiples"
))
{
for
(
int
i
=
0
;
i
<
multiples
.
size
();
i
++
)
if
(
multiples
[
i
]
>
1
)
process_axes
.
push_back
({
multiples
[
i
],
i
});
std
::
sort
(
process_axes
.
begin
(),
process_axes
.
end
());
}
multiples_desc
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"multiples"
))
{}
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
TileRunWithType
();
protected
:
vector
<
int
>
multiples
;
vector
<
pair
<
int
,
int
>
>
process_axes
;
vector
<
string
>
multiples_desc
;
TIndex
axis
,
multiple
,
outer_dim
,
ex_inner_dim
;
Tensor
*
dest
,
*
source
;
};
...
...
@@ -38,12 +32,7 @@ class TileGradientOp : public Operator<Context> {
public
:
TileGradientOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
op_def
,
ws
),
multiples
(
OperatorBase
::
GetRepeatedArg
<
int
>
(
"multiples"
))
{
for
(
int
i
=
0
;
i
<
multiples
.
size
();
i
++
)
if
(
multiples
[
i
]
>
1
)
process_axes
.
push_back
({
multiples
[
i
],
i
});
std
::
sort
(
process_axes
.
begin
(),
process_axes
.
end
());
std
::
reverse
(
process_axes
.
begin
(),
process_axes
.
end
());
multiples_desc
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"multiples"
))
{
DISABLE_SHARE_GRADIENT
;
}
...
...
@@ -51,8 +40,7 @@ class TileGradientOp : public Operator<Context> {
template
<
typename
T
>
void
TileRunWithType
();
protected
:
vector
<
int
>
multiples
;
vector
<
pair
<
int
,
int
>
>
process_axes
;
vector
<
string
>
multiples_desc
;
TIndex
axis
,
multiple
,
outer_dim
,
ex_inner_dim
;
Tensor
*
dest
,
*
source
;
};
...
...
Dragon/include/operators/norm/batch_norm_op.h
View file @
7717903
...
...
@@ -81,13 +81,17 @@ class FusedBatchNormOp : public Operator<Context> {
eps
(
OperatorBase
::
GetSingleArg
<
float
>
(
"eps"
,
float
(
1e-3
))),
use_stats
(
OperatorBase
::
GetSingleArg
<
int
>
(
"use_stats"
,
-
1
))
{}
void
Setup
()
{
NOT_IMPLEMENTED
;
}
void
Setup
()
;
void
RunOnDevice
()
override
{
NOT_IMPLEMENTED
;
}
template
<
typename
T
>
void
RunWithType
()
{
NOT_IMPLEMENTED
;
}
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
TrainingRunWithType
();
template
<
typename
T
>
void
InferenceRunWithType
();
protected
:
float
momentum
,
eps
;
Tensor
num_by_chans
;
Tensor
*
multiplier
,
*
num_multiplier
,
*
spatial_multiplier
;
Tensor
*
mean
,
*
var
,
*
stddev
,
*
x_norm
;
TIndex
axis
,
N
,
C
,
S
,
NC
,
NS
;
string
data_format
;
int
use_stats
;
...
...
@@ -103,15 +107,19 @@ class FusedBatchNormGradientOp : public Operator<Context> {
eps
(
OperatorBase
::
GetSingleArg
<
float
>
(
"eps"
,
float
(
1e-3
))),
use_stats
(
OperatorBase
::
GetSingleArg
<
int
>
(
"use_stats"
,
-
1
))
{
}
void
Setup
()
{
NOT_IMPLEMENTED
;
}
void
Setup
()
;
void
ShareGradient
()
override
;
void
RunOnDevice
()
override
{
NOT_IMPLEMENTED
;
}
template
<
typename
T
>
void
RunWithType
()
{
NOT_IMPLEMENTED
;
}
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
TrainingRunWithType
();
template
<
typename
T
>
void
InferenceRunWithType
();
protected
:
float
eps
;
Tensor
num_by_chans
;
Tensor
*
multiplier
,
*
num_multiplier
,
*
spatial_multiplier
;
Tensor
*
mean
,
*
var
,
*
stddev
,
*
x_norm
;
TIndex
axis
,
N
,
C
,
S
,
NC
,
NS
;
string
data_format
;
int
use_stats
;
...
...
Dragon/include/operators/vision/bilinear_resize_op.h
View file @
7717903
...
...
@@ -16,8 +16,7 @@ class BilinearResizeOp : public Operator<Context> {
public
:
BilinearResizeOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
op_def
,
ws
),
static_dsize
(
OperatorBase
::
GetRepeatedArg
<
int
>
(
"static_dsize"
)),
dynamic_dsize
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"dynamic_dsize"
)),
dsize_desc
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"dsize"
)),
fy
(
OperatorBase
::
GetSingleArg
<
float
>
(
"fy"
,
-
1
.
0
)),
fx
(
OperatorBase
::
GetSingleArg
<
float
>
(
"fx"
,
-
1
.
0
)),
data_format
(
OperatorBase
::
GetSingleArg
<
string
>
(
"data_format"
,
"NCHW"
))
{
...
...
@@ -29,8 +28,7 @@ class BilinearResizeOp : public Operator<Context> {
template
<
typename
T
>
void
RunWithType
();
protected
:
vector
<
int
>
static_dsize
;
vector
<
string
>
dynamic_dsize
;
vector
<
string
>
dsize_desc
;
float
fy
,
fx
;
string
data_format
;
TIndex
n
,
c
,
h
,
w
,
out_h
,
out_w
,
spatial_axis
;
...
...
Dragon/include/operators/vision/conv_op_base.h
View file @
7717903
...
...
@@ -22,8 +22,7 @@ class ConvOpBase : public Operator<Context> {
padding
(
OperatorBase
::
GetSingleArg
<
string
>
(
"padding"
,
"VALID"
)),
num_output
(
OperatorBase
::
GetSingleArg
<
int
>
(
"num_output"
,
1
)),
group
(
OperatorBase
::
GetSingleArg
<
int
>
(
"group"
,
1
)),
static_dsize
(
OperatorBase
::
GetRepeatedArg
<
int
>
(
"static_dsize"
)),
dynamic_dsize
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"dynamic_dsize"
))
{
output_dims_desc
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"output_shape"
))
{
if
(
data_format
==
"NCHW"
)
spatial_axis
=
2
;
else
if
(
data_format
==
"NHWC"
)
spatial_axis
=
1
;
else
LOG
(
FATAL
)
<<
"Unknown data format: "
<<
data_format
;
...
...
@@ -42,8 +41,7 @@ class ConvOpBase : public Operator<Context> {
TIndex
conv_in_channels
,
conv_out_channels
;
TIndex
conv_out_spatial_dim
,
kernel_dim
;
TIndex
col_offset
,
output_offset
,
weight_offset
,
x_offset
,
y_offset
;
vector
<
int
>
static_dsize
;
vector
<
string
>
dynamic_dsize
;
vector
<
string
>
output_dims_desc
;
bool
is_1x1
;
void
Setup
();
...
...
Dragon/include/operators/vision/nn_resize_op.h
View file @
7717903
...
...
@@ -16,8 +16,7 @@ class NNResizeOp : public Operator<Context> {
public
:
NNResizeOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
op_def
,
ws
),
static_dsize
(
OperatorBase
::
GetRepeatedArg
<
int
>
(
"static_dsize"
)),
dynamic_dsize
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"dynamic_dsize"
)),
dsize_desc
(
OperatorBase
::
GetRepeatedArg
<
string
>
(
"dsize"
)),
fy
(
OperatorBase
::
GetSingleArg
<
float
>
(
"fy"
,
-
1
.
0
)),
fx
(
OperatorBase
::
GetSingleArg
<
float
>
(
"fx"
,
-
1
.
0
)),
data_format
(
OperatorBase
::
GetSingleArg
<
string
>
(
"data_format"
,
"NCHW"
))
{
...
...
@@ -30,8 +29,7 @@ class NNResizeOp : public Operator<Context> {
template
<
typename
T
>
void
RunWithType
();
protected
:
vector
<
int
>
static_dsize
;
vector
<
string
>
dynamic_dsize
;
vector
<
string
>
dsize_desc
;
float
fy
,
fx
;
string
data_format
;
TIndex
n
,
c
,
h
,
w
,
out_h
,
out_w
,
spatial_axis
;
...
...
Dragon/include/utils/op_kernel.h
View file @
7717903
...
...
@@ -323,10 +323,10 @@ void At(const int count,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
T
*
indices
,
const
int
*
indices
,
const
T
*
x
,
T
*
y
,
Context
*
c
ontext
);
Context
*
c
tx
);
template
<
typename
T
,
class
Context
>
void
AtGrad
(
const
int
count
,
...
...
@@ -334,10 +334,9 @@ void AtGrad(const int count,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
T
*
indices
,
const
int
*
indices
,
const
T
*
dy
,
T
*
dx
,
Context
*
context
);
T
*
dx
);
/******************** ndarray.concat ********************/
...
...
Dragon/python/dragon/core/tensor.py
View file @
7717903
...
...
@@ -410,7 +410,7 @@ class Tensor(object):
"""
def
wrapper_indices
(
indices
):
tensor
=
Tensor
(
GetTensorName
())
ws
.
FeedTensor
(
tensor
,
np
.
array
(
indices
,
dtype
=
np
.
floa
t32
))
ws
.
FeedTensor
(
tensor
,
np
.
array
(
indices
,
dtype
=
np
.
in
t32
))
return
tensor
if
not
isinstance
(
item
,
tuple
):
...
...
@@ -422,8 +422,7 @@ class Tensor(object):
output
.
shape
[
0
]
=
1
return
output
else
:
# ND Crop
item
=
(
item
,
)
raise
TypeError
(
'Unsupported type of indices: {}'
.
format
(
type
(
item
)))
starts
=
[]
ends
=
[]
output_dims
=
[]
...
...
@@ -853,6 +852,21 @@ class Tensor(object):
"""
raise
NotImplementedError
(
'Implemented in <vm.tensorflow.framework.tensor_shape>'
)
def
eval
(
self
,
feed_dict
=
None
):
"""Run and return the computing results of this tensor.
Parameters
----------
feed_dict : dict
The values to feed.
Returns
-------
numpy.ndarray
The values of this tensor in the backend.
"""
raise
NotImplementedError
(
'Implemented in <vm.theano.compile.function>'
)
############################################
# #
# MISC #
...
...
@@ -970,6 +984,39 @@ class Tensor(object):
elif
nout
==
1
:
return
outputs
[
0
]
else
:
return
None
@classmethod
def
Convert
(
cls
,
value
,
dtype
=
'float32'
):
"""Convert the given value to a tensor.
Parameters
----------
value : numerical type
The value to convert.
dtype : str
The data type of the tensor.
Returns
-------
Tensor
The tensor converted with given value.
"""
if
isinstance
(
value
,
Tensor
):
return
value
else
:
if
isinstance
(
value
,
(
list
,
tuple
)):
np_value
=
np
.
array
(
value
,
dtype
=
dtype
)
elif
isinstance
(
value
,
np
.
ndarray
):
np_value
=
value
.
astype
(
dtype
=
dtype
)
else
:
try
:
np_value
=
np
.
array
(
value
,
dtype
=
dtype
)
except
:
raise
TypeError
(
'{} value can not be converted to tensor.'
.
format
(
type
(
value
)))
tensor
=
Tensor
(
shape
=
list
(
np_value
.
shape
),
dtype
=
dtype
)
tensor
.
set_value
(
np_value
)
return
tensor
def
Fill
(
self
,
type
,
**
kwargs
):
"""Fill self with the specific type of filler.
...
...
Dragon/python/dragon/docs/contents/core/tensor.rst
View file @
7717903
...
...
@@ -13,12 +13,14 @@ List Brief
============================== =============================================================================
`Tensor.name`_ Return or Set the name.
`Tensor.shape`_ Return or Set the shape.
`Tensor.get_shape`_ Return the shape.
`Tensor.dtype`_ Return or Set the data type.
`Tensor.set_value`_ Feed the values to C++ backend.
`Tensor.get_value`_ Fetch the values from C++ backend.
`Tensor.copy`_ Return a Tensor with same content.
`Tensor.reshape`_ Reshape the dimensions of input.
`Tensor.dimshuffle`_ Shuffle the dimensions.
`Tensor.eval`_ Run and return the computing results of this tensor.
`Tensor.CreateOperator`_ Construct a new Tensor with specific operator descriptor.
`Tensor.Fill`_ Fill self with the specific type of filler.
`Tensor.PrintExpressions`_ Return the stringified internal expressions.
...
...
@@ -102,12 +104,14 @@ API Reference
.. _Tensor.name: #dragon.core.tensor.Tensor.name
.. _Tensor.shape: #dragon.core.tensor.Tensor.shape
.. _Tensor.get_shape: #dragon.core.tensor.Tensor.get_shape
.. _Tensor.dtype: #dragon.core.tensor.Tensor.dtype
.. _Tensor.set_value: #dragon.core.tensor.Tensor.set_value
.. _Tensor.get_value: #dragon.core.tensor.Tensor.get_value
.. _Tensor.copy: #dragon.core.tensor.Tensor.copy
.. _Tensor.reshape: #dragon.core.tensor.Tensor.reshape
.. _Tensor.dimshuffle: #dragon.core.tensor.Tensor.dimshuffle
.. _Tensor.eval: #dragon.core.tensor.Tensor.eval
.. _Tensor.CreateOperator: #dragon.core.tensor.Tensor.CreateOperator
.. _Tensor.Fill: #dragon.core.tensor.Tensor.Fill
.. _Tensor.PrintExpressions: #dragon.core.tensor.Tensor.PrintExpressions
...
...
Dragon/python/dragon/operators/initializer.py
View file @
7717903
...
...
@@ -6,34 +6,48 @@
from
.
import
*
def
_wrap_input_shape
(
arguments
,
shape
):
if
isinstance
(
shape
,
Tensor
):
arguments
[
'extra_inputs'
]
=
shape
arguments
[
'shape'
]
=
shape
.
name
elif
isinstance
(
shape
,
(
list
,
tuple
)):
arguments
[
'extra_inputs'
]
=
[
Tensor
.
Convert
(
dim
,
dtype
=
'int32'
)
for
dim
in
shape
]
arguments
[
'dims'
]
=
[
dim
.
name
for
dim
in
arguments
[
'extra_inputs'
]]
arguments
[
'shape'
]
=
None
else
:
raise
TypeError
(
'Unsupported type of shape: {}'
.
format
(
type
(
shape
)))
return
arguments
def
_wrap_output_shape
(
output
,
shape
):
if
not
isinstance
(
shape
,
Tensor
):
if
any
(
isinstance
(
dim
,
Tensor
)
for
dim
in
shape
):
return
output
output
.
shape
=
[
dim
for
dim
in
shape
]
return
output
def
Fill
(
shape
,
value
=
0
,
**
kwargs
):
"""Return a Tensor with specific value filled.
Parameters
----------
shape : list, tuple or Tensor
The
shape of the new tensor
.
The
output shape
.
value : basic numerical type
The value
of the new tensor
.
The value
to fill
.
Returns
-------
Tensor
The
value-filled T
ensor.
The
constant-filled t
ensor.
"""
arguments
=
ParseArguments
(
locals
())
arguments
[
'value'
]
=
float
(
value
)
if
not
isinstance
(
shape
,
Tensor
):
arguments
[
'static_shape'
]
=
shape
else
:
arguments
[
'dynamic_shape'
]
=
shape
.
name
arguments
[
'extra_inputs'
]
=
shape
del
arguments
[
'shape'
]
arguments
=
_wrap_input_shape
(
arguments
,
shape
)
output
=
Tensor
.
CreateOperator
([],
nout
=
1
,
op_type
=
'Fill'
,
**
arguments
)
output
.
shape
=
arguments
[
'static_shape'
]
if
'static_shape'
in
arguments
else
None
return
output
return
_wrap_output_shape
(
output
,
shape
)
def
RandomUniform
(
shape
,
low
=-
1.0
,
high
=
1.0
,
**
kwargs
):
...
...
@@ -57,16 +71,9 @@ def RandomUniform(shape, low=-1.0, high=1.0, **kwargs):
arguments
=
ParseArguments
(
locals
())
arguments
[
'low'
]
=
float
(
low
)
arguments
[
'high'
]
=
float
(
high
)
if
not
isinstance
(
shape
,
Tensor
):
arguments
[
'static_shape'
]
=
shape
else
:
arguments
[
'dynamic_shape'
]
=
shape
.
name
arguments
[
'extra_inputs'
]
=
shape
del
arguments
[
'shape'
]
arguments
=
_wrap_input_shape
(
arguments
,
shape
)
output
=
Tensor
.
CreateOperator
([],
nout
=
1
,
op_type
=
'RandomUniform'
,
**
arguments
)
output
.
shape
=
arguments
[
'static_shape'
]
if
'static_shape'
in
arguments
else
None
return
output
return
_wrap_output_shape
(
output
,
shape
)
def
RandomNormal
(
shape
,
mean
=
0.0
,
std
=
1.0
,
**
kwargs
):
...
...
@@ -90,16 +97,9 @@ def RandomNormal(shape, mean=0.0, std=1.0, **kwargs):
arguments
=
ParseArguments
(
locals
())
arguments
[
'mean'
]
=
float
(
mean
)
arguments
[
'std'
]
=
float
(
std
)
if
not
isinstance
(
shape
,
Tensor
):
arguments
[
'static_shape'
]
=
shape
else
:
arguments
[
'dynamic_shape'
]
=
shape
.
name
arguments
[
'extra_inputs'
]
=
shape
del
arguments
[
'shape'
]
arguments
=
_wrap_input_shape
(
arguments
,
shape
)
output
=
Tensor
.
CreateOperator
([],
nout
=
1
,
op_type
=
'RandomNormal'
,
**
arguments
)
output
.
shape
=
arguments
[
'static_shape'
]
if
'static_shape'
in
arguments
else
None
return
output
return
_wrap_output_shape
(
output
,
shape
)
def
TruncatedNormal
(
shape
,
mean
=
0.0
,
std
=
1.0
,
**
kwargs
):
...
...
@@ -127,16 +127,9 @@ def TruncatedNormal(shape, mean=0.0, std=1.0, **kwargs):
arguments
[
'std'
]
=
float
(
std
)
arguments
[
'low'
]
=
float
(
mean
-
2.0
*
std
)
arguments
[
'high'
]
=
float
(
mean
+
2.0
*
std
)
if
not
isinstance
(
shape
,
Tensor
):
arguments
[
'static_shape'
]
=
shape
else
:
arguments
[
'dynamic_shape'
]
=
shape
.
name
arguments
[
'extra_inputs'
]
=
shape
del
arguments
[
'shape'
]
arguments
=
_wrap_input_shape
(
arguments
,
shape
)
output
=
Tensor
.
CreateOperator
([],
nout
=
1
,
op_type
=
'TruncatedNormal'
,
**
arguments
)
output
.
shape
=
arguments
[
'static_shape'
]
if
'static_shape'
in
arguments
else
None
return
output
return
_wrap_output_shape
(
output
,
shape
)
def
GlorotUniform
(
shape
,
scale
=
3.0
,
mode
=
'FAN_IN'
,
**
kwargs
):
...
...
@@ -162,16 +155,9 @@ def GlorotUniform(shape, scale=3.0, mode='FAN_IN', **kwargs):
arguments
=
ParseArguments
(
locals
())
arguments
[
'scale'
]
=
float
(
scale
)
arguments
[
'mode'
]
=
mode
.
lower
()
if
not
isinstance
(
shape
,
Tensor
):
arguments
[
'static_shape'
]
=
shape
else
:
arguments
[
'dynamic_shape'
]
=
shape
.
name
arguments
[
'extra_inputs'
]
=
shape
del
arguments
[
'shape'
]
arguments
=
_wrap_input_shape
(
arguments
,
shape
)
output
=
Tensor
.
CreateOperator
([],
nout
=
1
,
op_type
=
'GlorotUniform'
,
**
arguments
)
output
.
shape
=
arguments
[
'static_shape'
]
if
'static_shape'
in
arguments
else
None
return
output
return
_wrap_output_shape
(
output
,
shape
)
def
GlorotNormal
(
shape
,
scale
=
2.0
,
mode
=
'FAN_IN'
,
**
kwargs
):
...
...
@@ -197,13 +183,6 @@ def GlorotNormal(shape, scale=2.0, mode='FAN_IN', **kwargs):
arguments
=
ParseArguments
(
locals
())
arguments
[
'scale'
]
=
float
(
scale
)
arguments
[
'mode'
]
=
mode
.
lower
()
if
not
isinstance
(
shape
,
Tensor
):
arguments
[
'static_shape'
]
=
shape
else
:
arguments
[
'dynamic_shape'
]
=
shape
.
name
arguments
[
'extra_inputs'
]
=
shape
del
arguments
[
'shape'
]
arguments
=
_wrap_input_shape
(
arguments
,
shape
)
output
=
Tensor
.
CreateOperator
([],
nout
=
1
,
op_type
=
'GlorotNormal'
,
**
arguments
)
output
.
shape
=
arguments
[
'static_shape'
]
if
'static_shape'
in
arguments
else
None
return
output
\ No newline at end of file
return
_wrap_output_shape
(
output
,
shape
)
\ No newline at end of file
Dragon/python/dragon/operators/ndarray.py
View file @
7717903
...
...
@@ -470,7 +470,7 @@ def Tile(inputs, multiples, **kwargs):
----------
input : Tensor
The input tensor.
multiples : list
of int
multiples : list
The multiple of each axis.
Returns
...
...
@@ -481,15 +481,21 @@ def Tile(inputs, multiples, **kwargs):
"""
CheckInputs
(
inputs
,
1
)
arguments
=
ParseArguments
(
locals
())
arguments
[
'extra_inputs'
]
=
[
Tensor
.
Convert
(
multiple
,
dtype
=
'int32'
)
for
multiple
in
multiples
]
arguments
[
'multiples'
]
=
[
multiple
.
name
for
multiple
in
arguments
[
'extra_inputs'
]]
output
=
Tensor
.
CreateOperator
(
nout
=
1
,
op_type
=
'Tile'
,
**
arguments
)
if
inputs
.
shape
is
not
None
:
if
len
(
inputs
.
shape
)
!=
len
(
multiples
):
raise
ValueError
(
'
input ndim is {}, but multiples provide {}'
.
\
format
(
len
(
inputs
.
shape
),
len
(
multiples
)))
raise
ValueError
(
'
The num of dimensions of input is {}, but provided {}.'
.
format
(
len
(
inputs
.
shape
),
len
(
multiples
)))
output
.
shape
=
inputs
.
shape
[:]
for
i
,
multiple
in
enumerate
(
multiples
):
if
output
.
shape
[
i
]
is
None
or
\
isinstance
(
output
.
shape
[
i
],
Tensor
):
output
.
shape
[
i
]
=
None
else
:
output
.
shape
[
i
]
*=
multiple
return
output
...
...
@@ -755,7 +761,7 @@ def Arange(start, stop=None, step=1, dtype='FLOAT32', **kwargs):
step : int or Tensor
The interval between two elements.
dtype : str
The data type. ``
FLOAT32`` or ``INT
32``.
The data type. ``
float32`` or ``int
32``.
Returns
-------
...
...
@@ -764,30 +770,26 @@ def Arange(start, stop=None, step=1, dtype='FLOAT32', **kwargs):
"""
arguments
=
ParseArguments
(
locals
())
arguments
[
'extra_inputs'
]
=
[]
if
not
isinstance
(
start
,
Tensor
):
arguments
[
'static_start'
]
=
int
(
start
)
else
:
arguments
[
'dynamic_start'
]
=
start
.
name
arguments
[
'extra_inputs'
]
.
append
(
start
)
arguments
[
'extra_inputs'
]
=
[
Tensor
.
Convert
(
start
,
dtype
=
'int32'
),
Tensor
.
Convert
(
step
,
dtype
=
'int32'
)]
arguments
[
'start'
]
=
arguments
[
'extra_inputs'
][
0
]
.
name
arguments
[
'step'
]
=
arguments
[
'extra_inputs'
][
1
]
.
name
if
stop
is
not
None
:
if
not
isinstance
(
stop
,
Tensor
):
arguments
[
'static_stop'
]
=
int
(
stop
)
else
:
arguments
[
'dynamic_stop'
]
=
stop
.
name
arguments
[
'extra_inputs'
]
.
append
(
stop
)
del
arguments
[
'stop'
]
if
not
isinstance
(
step
,
Tensor
):
arguments
[
'static_step'
]
=
int
(
step
)
else
:
arguments
[
'dynamic_step'
]
=
step
.
name
arguments
[
'extra_inputs'
]
.
append
(
step
)
del
arguments
[
'start'
];
del
arguments
[
'step'
]
arguments
[
'extra_inputs'
]
.
append
(
Tensor
.
Convert
(
stop
,
dtype
=
'int32'
))
arguments
[
'stop'
]
=
arguments
[
'extra_inputs'
][
-
1
]
.
name
arguments
[
'dtype'
]
=
arguments
[
'dtype'
]
.
upper
()
output
=
Tensor
.
CreateOperator
([],
nout
=
1
,
op_type
=
'Arange'
,
**
arguments
)
if
'static_start'
in
arguments
and
\
'static_step'
in
arguments
:
if
'dynamic_stop'
not
in
arguments
:
if
stop
is
None
:
stop
=
start
;
start
=
0
count
=
(
stop
-
start
-
1
)
/
step
+
1
output
.
shape
=
[
np
.
long
(
count
)]
if
not
isinstance
(
start
,
Tensor
)
and
\
not
isinstance
(
step
,
Tensor
):
if
stop
is
not
None
:
if
isinstance
(
stop
,
Tensor
):
return
output
else
:
stop
=
start
start
=
0
count
=
int
((
stop
-
start
-
1
)
/
step
)
+
1
output
.
shape
=
[
count
]
return
output
\ No newline at end of file
Dragon/python/dragon/operators/vision.py
View file @
7717903
...
...
@@ -88,6 +88,7 @@ def Conv2d(inputs, num_output, kernel_size,
spatial_axis
=
2
if
data_format
==
'NCHW'
else
1
output
.
shape
[
channel_axis
]
=
num_output
for
i
in
xrange
(
2
):
input_size
=
output
.
shape
[
i
+
spatial_axis
]
k
=
arguments
[
'kernel_size'
][
i
]
if
i
<
len
(
arguments
[
'kernel_size'
])
\
else
arguments
[
'kernel_size'
][
-
1
]
s
=
arguments
[
'stride'
][
i
]
if
i
<
len
(
arguments
[
'stride'
])
\
...
...
@@ -99,10 +100,9 @@ def Conv2d(inputs, num_output, kernel_size,
dk
=
d
*
(
k
-
1
)
+
1
dp
=
2
*
p
if
padding
==
'SAME'
:
input_size
=
output
.
shape
[
i
+
spatial_axis
]
output_size
=
(
input_size
+
s
-
1
)
/
float
(
s
)
dp
=
int
(
max
(
0
,
(
output_size
-
1
)
*
s
+
k
-
input_size
))
output
.
shape
[
i
+
spatial_axis
]
=
int
(
output
.
shape
[
i
+
spatial_axis
]
+
dp
-
dk
/
s
)
+
1
output
.
shape
[
i
+
spatial_axis
]
=
int
((
input_size
+
s
-
1
)
/
s
)
else
:
output
.
shape
[
i
+
spatial_axis
]
=
int
((
input_size
+
dp
-
dk
)
/
s
)
+
1
return
output
...
...
@@ -173,15 +173,8 @@ def Conv2dTranspose(inputs, num_output, kernel_size,
if
output_shape
is
not
None
:
if
not
isinstance
(
output_shape
,
list
):
raise
TypeError
(
'The output shape should be a list.'
)
if
isinstance
(
output_shape
[
0
],
Tensor
):
arguments
[
'dynamic_dsize'
]
=
[]
arguments
[
'extra_inputs'
]
=
list
(
output_shape
)
for
dim
in
output_shape
:
arguments
[
'dynamic_dsize'
]
.
append
(
dim
)
else
:
arguments
[
'static_dsize'
]
=
[]
for
dim
in
output_shape
:
arguments
[
'static_dsize'
]
.
append
(
int
(
dim
))
arguments
[
'extra_inputs'
]
=
[
Tensor
.
Convert
(
dim
,
dtype
=
'int32'
)
for
dim
in
output_shape
]
arguments
[
'output_shape'
]
=
[
dim
.
name
for
dim
in
arguments
[
'extra_inputs'
]]
if
not
isinstance
(
arguments
[
'kernel_size'
],
list
):
arguments
[
'kernel_size'
]
=
[
arguments
[
'kernel_size'
]]
...
...
@@ -216,9 +209,10 @@ def Conv2dTranspose(inputs, num_output, kernel_size,
else
:
if
output_shape
is
None
:
raise
ValueError
(
'The output shape must be specified if using SAME padding algorithm.'
)
if
'dynamic_dsize'
in
arguments
:
if
isinstance
(
output_shape
[
i
+
spatial_axis
],
Tensor
)
:
output
.
shape
=
None
return
output
else
:
output
.
shape
[
i
+
spatial_axis
]
=
output_shape
[
i
+
spatial_axis
]
return
output
...
...
@@ -433,14 +427,11 @@ def NNResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs):
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
if
arguments
[
'dsize'
]
is
not
None
:
if
isinstance
(
arguments
[
'dsize'
][
0
],
Tensor
):
arguments
[
'dynamic_dsize'
]
=
[
arguments
[
'dsize'
][
0
]
.
name
,
arguments
[
'dsize'
][
1
]
.
name
]
arguments
[
'extra_inputs'
]
=
list
(
arguments
[
'dsize'
])
else
:
arguments
[
'static_size'
]
=
arguments
[
'dsize'
]
del
arguments
[
'dsize'
]
if
dsize
is
not
None
:
if
len
(
dsize
)
!=
2
:
raise
ValueError
(
'The dsize should be a list with 2 elements.'
)
arguments
[
'extra_inputs'
]
=
[
Tensor
.
Convert
(
size
,
dtype
=
'int32'
)
for
size
in
dsize
]
arguments
[
'dsize'
]
=
[
size
.
name
for
size
in
arguments
[
'extra_inputs'
]]
if
dsize
is
None
and
(
fy
==
-
1.0
or
fx
==
-
1.0
):
raise
RuntimeError
(
'The dsize or fy/fx should be specified either.'
)
...
...
@@ -450,12 +441,18 @@ def NNResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs):
if
inputs
.
shape
is
not
None
:
if
len
(
inputs
.
shape
)
!=
4
:
raise
ValueError
(
'The inputs should be a 4d Tensor.'
)
if
'dynamic_dsize'
not
in
arguments
:
possible_to_infer_shape
=
True
if
dsize
is
not
None
:
for
size
in
dsize
:
if
isinstance
(
size
,
Tensor
):
possible_to_infer_shape
=
False
if
possible_to_infer_shape
:
output
.
shape
=
inputs
.
shape
[:]
spatial_axis
=
2
if
data_format
==
'NCHW'
else
1
for
i
in
xrange
(
2
):
output_dim
=
output
.
shape
[
spatial_axis
+
i
]
if
'static_size'
in
arguments
:
if
dsize
is
not
None
:
output_dim
=
dsize
[
i
]
else
:
output_dim
=
int
(
float
(
output_dim
)
*
([
fy
,
fx
])[
i
])
...
...
@@ -494,14 +491,11 @@ def BilinearResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
if
arguments
[
'dsize'
]
is
not
None
:
if
isinstance
(
arguments
[
'dsize'
][
0
],
Tensor
):
arguments
[
'dynamic_dsize'
]
=
[
arguments
[
'dsize'
][
0
]
.
name
,
arguments
[
'dsize'
][
1
]
.
name
]
arguments
[
'extra_inputs'
]
=
list
(
arguments
[
'dsize'
])
else
:
arguments
[
'static_size'
]
=
arguments
[
'dsize'
]
del
arguments
[
'dsize'
]
if
dsize
is
not
None
:
if
len
(
dsize
)
!=
2
:
raise
ValueError
(
'The dsize should be a list with 2 elements.'
)
arguments
[
'extra_inputs'
]
=
[
Tensor
.
Convert
(
size
,
dtype
=
'int32'
)
for
size
in
dsize
]
arguments
[
'dsize'
]
=
[
size
.
name
for
size
in
arguments
[
'extra_inputs'
]]
if
dsize
is
None
and
(
fy
==
-
1.0
or
fx
==
-
1.0
):
raise
RuntimeError
(
'The dsize or fy/fx should be specified either.'
)
...
...
@@ -511,12 +505,18 @@ def BilinearResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs
if
inputs
.
shape
is
not
None
:
if
len
(
inputs
.
shape
)
!=
4
:
raise
ValueError
(
'The inputs should be a 4d Tensor.'
)
if
'dynamic_dsize'
not
in
arguments
:
possible_to_infer_shape
=
True
if
dsize
is
not
None
:
for
size
in
dsize
:
if
isinstance
(
size
,
Tensor
):
possible_to_infer_shape
=
False
if
possible_to_infer_shape
:
output
.
shape
=
inputs
.
shape
[:]
spatial_axis
=
2
if
data_format
==
'NCHW'
else
1
for
i
in
xrange
(
2
):
output_dim
=
output
.
shape
[
spatial_axis
+
i
]
if
'static_size'
in
arguments
:
if
dsize
is
not
None
:
output_dim
=
dsize
[
i
]
else
:
output_dim
=
int
(
float
(
output_dim
)
*
([
fy
,
fx
])[
i
])
...
...
Dragon/python/setup.py
View file @
7717903
...
...
@@ -36,7 +36,7 @@ find_packages('dragon')
find_modules
()
setup
(
name
=
'dragon'
,
version
=
'0.2'
,
version
=
'0.2
.1.1
'
,
description
=
'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework'
,
url
=
'https://github.com/neopenx/Dragon'
,
author
=
'Ting Pan'
,
...
...
Dragon/src/operators/misc/initialize_op.cc
View file @
7717903
...
...
@@ -13,16 +13,22 @@ void InitializeOp<Context>::RunWithType() {
template
<
class
Context
>
void
InitializeOp
<
Context
>::
RunOnDevice
()
{
vector
<
TIndex
>
dims
;
if
(
dynamic_shape
.
empty
())
{
for
(
auto
&
dim
:
static_shape
)
dims
.
push_back
(
dim
);
if
(
shape_desc
.
empty
())
{
// determine the shape from dimensions
for
(
auto
&
dim_desc
:
dims_desc
)
{
Tensor
*
dim
=
ws
()
->
GetTensor
(
dim_desc
);
CHECK_EQ
(
dim
->
count
(),
1
)
<<
"
\n
The dimension should be a scalar."
;
CHECK
(
dim
->
IsType
<
int
>
())
<<
"
\n
The type of dimension should be int32."
;
dims
.
push_back
(
dim
->
template
data
<
int
,
CPUContext
>
()[
0
]);
}
}
else
{
auto
*
shape_data
=
ws
()
->
GetTensor
(
dynamic_shape
)
->
template
data
<
float
,
CPUContext
>
();
TIndex
ndim
=
ws
()
->
GetTensor
(
dynamic_shape
)
->
count
();
for
(
int
i
=
0
;
i
<
ndim
;
i
++
)
dims
.
push_back
(
shape_data
[
i
]);
// determine the shape from given shape
Tensor
*
shape
=
ws
()
->
GetTensor
(
shape_desc
);
CHECK
(
shape
->
IsType
<
int
>
())
<<
"
\n
The type of shape should be int32."
;
auto
*
shape_data
=
shape
->
template
data
<
int
,
CPUContext
>
();
for
(
int
i
=
0
;
i
<
shape
->
count
();
i
++
)
dims
.
push_back
(
shape_data
[
i
]);
}
output
(
0
)
->
Reshape
(
dims
);
RunWithType
<
float
>
();
}
...
...
Dragon/src/operators/ndarray/arange_op.cc
View file @
7717903
...
...
@@ -6,43 +6,24 @@ namespace dragon {
template
<
class
Context
>
void
ArangeOp
<
Context
>::
Reshape
()
{
if
(
!
dynamic_start_
.
empty
())
{
dynamic_start
=
ws
()
->
GetTensor
(
dynamic_start_
);
CHECK_EQ
(
dynamic_start
->
count
(),
1
)
<<
"The start should be a scalar"
;
if
(
dynamic_start
->
IsType
<
int
>
())
{
start
=
dynamic_start
->
template
data
<
int
,
CPUContext
>
()[
0
];
}
else
if
(
dynamic_start
->
IsType
<
float
>
())
{
start
=
dynamic_start
->
template
data
<
float
,
CPUContext
>
()[
0
];
}
else
{
LOG
(
FATAL
)
<<
"Unsupported types of start."
;
}
}
if
(
!
dynamic_stop_
.
empty
())
{
dynamic_stop
=
ws
()
->
GetTensor
(
dynamic_stop_
);
CHECK_EQ
(
dynamic_stop
->
count
(),
1
)
<<
"The stop should be a scalar"
;
if
(
dynamic_stop
->
IsType
<
int
>
())
{
stop
=
dynamic_stop
->
template
data
<
int
,
CPUContext
>
()[
0
];
}
else
if
(
dynamic_stop
->
IsType
<
float
>
())
{
stop
=
dynamic_stop
->
template
data
<
float
,
CPUContext
>
()[
0
];
}
else
{
LOG
(
FATAL
)
<<
"Unsupported types of stop."
;
}
}
if
(
!
dynamic_step_
.
empty
())
{
dynamic_step
=
ws
()
->
GetTensor
(
dynamic_step_
);
CHECK_EQ
(
dynamic_step
->
count
(),
1
)
<<
"The step should be a scalar"
;
if
(
dynamic_step
->
IsType
<
int
>
())
{
step
=
dynamic_step
->
template
data
<
int
,
CPUContext
>
()[
0
];
}
else
if
(
dynamic_step
->
IsType
<
float
>
())
{
step
=
dynamic_step
->
template
data
<
float
,
CPUContext
>
()[
0
];
}
else
{
LOG
(
FATAL
)
<<
"Unsupported types of step."
;
}
}
if
(
stop
==
-
1
)
{
stop
=
start
;
start
=
0
;
}
// parse start & step & stop
Tensor
*
t
=
ws
()
->
GetTensor
(
start_desc
);
CHECK_EQ
(
t
->
count
(),
1
)
<<
"
\n
The start should be a scalar"
;
CHECK
(
t
->
IsType
<
int
>
())
<<
"
\n
The type of start should be int32."
;
start
=
t
->
template
data
<
int
,
CPUContext
>
()[
0
];
t
=
ws
()
->
GetTensor
(
step_desc
);
CHECK_EQ
(
t
->
count
(),
1
)
<<
"
\n
The step should be a scalar"
;
CHECK
(
t
->
IsType
<
int
>
())
<<
"
\n
The type of step should be int32."
;
step
=
t
->
template
data
<
int
,
CPUContext
>
()[
0
];
if
(
!
stop_desc
.
empty
())
{
t
=
ws
()
->
GetTensor
(
stop_desc
);
CHECK_EQ
(
t
->
count
(),
1
)
<<
"
\n
The stop should be a scalar"
;
CHECK
(
t
->
IsType
<
int
>
())
<<
"
\n
The type of stop should be int32."
;
stop
=
t
->
template
data
<
int
,
CPUContext
>
()[
0
];
}
else
{
stop
=
start
;
start
=
0
;
}
count
=
(
stop
-
start
-
1
)
/
step
+
1
;
output
(
0
)
->
Reshape
(
vector
<
TIndex
>
(
1
,
count
));
}
...
...
Dragon/src/operators/ndarray/at_op.cc
View file @
7717903
...
...
@@ -8,12 +8,11 @@ namespace dragon {
template
<
class
Context
>
template
<
typename
T
>
void
AtOp
<
Context
>::
RunWithType
()
{
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
indices
=
input
(
1
).
template
mutable_data
<
T
,
Context
>
();
auto
*
indices
=
input
(
1
).
template
mutable_data
<
int
,
Context
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
kernel
::
CanonicalAxis
<
T
,
Context
>
(
input
(
1
).
count
(),
x_slice_dim
,
indices
);
kernel
::
CanonicalAxis
<
int
,
Context
>
(
input
(
1
).
count
(),
x_slice_dim
,
indices
);
kernel
::
At
<
T
,
Context
>
(
output
(
0
)
->
count
(),
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
Xdata
,
Ydata
,
...
...
@@ -30,7 +29,9 @@ void AtOp<Context>::RunOnDevice() {
inner_dim
=
input
(
0
).
count
(
axis
+
1
);
output
(
0
)
->
Reshape
(
output_dims
);
CHECK
(
input
(
1
).
template
IsType
<
int
>
())
<<
"
\n
The type of indices should be int32."
;
if
(
input
(
0
).
template
IsType
<
float
>
())
RunWithType
<
float
>
();
else
if
(
input
(
0
).
template
IsType
<
int
>
())
RunWithType
<
int
>
();
else
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
...
...
@@ -42,12 +43,15 @@ OPERATOR_SCHEMA(At).NumInputs(2).NumOutputs(1);
template
<
class
Context
>
template
<
typename
T
>
void
AtGradientOp
<
Context
>::
RunWithType
()
{
auto
*
indices
=
input
(
1
).
template
data
<
T
,
Context
>
();
auto
*
indices
=
input
(
1
).
template
data
<
int
,
Context
>
();
auto
*
dYdata
=
input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
if
(
!
acc_grad
)
math
::
Set
<
T
,
Context
>
(
output
(
0
)
->
count
(),
0
,
dXdata
);
kernel
::
AtGrad
<
T
,
Context
>
(
input
(
-
1
).
count
(),
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dYdata
,
dXdata
,
&
ctx
());
x_slice_dim
,
y_slice_dim
,
indices
,
dYdata
,
dXdata
);
}
template
<
class
Context
>
...
...
@@ -58,7 +62,9 @@ void AtGradientOp<Context>::RunOnDevice() {
inner_dim
=
input
(
0
).
count
(
axis
+
1
);
output
(
0
)
->
ReshapeLike
(
input
(
0
));
CHECK
(
input
(
1
).
template
IsType
<
int
>
())
<<
"
\n
The type of indices should be int32."
;
if
(
input
(
0
).
template
IsType
<
float
>
())
RunWithType
<
float
>
();
else
if
(
input
(
0
).
template
IsType
<
int
>
())
RunWithType
<
int
>
();
else
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
...
...
Dragon/src/operators/ndarray/random_pick_op.cc
View file @
7717903
...
...
@@ -7,12 +7,12 @@ namespace dragon {
template
<
class
Context
>
template
<
typename
T
>
void
RandomPickOp
<
Context
>::
RunWithType
()
{
auto
*
indices
=
pick_indices
->
template
mutable_data
<
T
,
CPUContext
>
();
auto
*
indices
=
pick_indices
->
template
mutable_data
<
int
,
CPUContext
>
();
for
(
int
i
=
0
;
i
<
pick_indices
->
count
();
i
++
)
indices
[
i
]
=
T
((
*
rand_generator
())()
%
x_slice_dim
);
indices
[
i
]
=
int
((
*
rand_generator
())()
%
x_slice_dim
);
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
indices
=
pick_indices
->
template
mutable_data
<
T
,
Context
>
();
indices
=
pick_indices
->
template
mutable_data
<
int
,
Context
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
kernel
::
At
<
T
,
Context
>
(
output
(
0
)
->
count
(),
outer_dim
,
inner_dim
,
x_slice_dim
,
...
...
@@ -53,7 +53,7 @@ OPERATOR_SCHEMA(RandomPick).NumInputs(1).NumOutputs(2);
template
<
class
Context
>
template
<
typename
T
>
void
RandomPickGradientOp
<
Context
>::
RunWithType
()
{
auto
*
indices
=
pick_indices
->
template
data
<
T
,
Context
>
();
auto
*
indices
=
pick_indices
->
template
data
<
int
,
Context
>
();
auto
*
dYdata
=
input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
math
::
Set
<
T
,
Context
>
(
output
(
0
)
->
count
(),
0
,
dXdata
);
...
...
@@ -62,8 +62,7 @@ void RandomPickGradientOp<Context>::RunWithType() {
y_slice_dim
,
indices
,
dYdata
,
dXdata
,
&
ctx
());
dXdata
);
}
template
<
class
Context
>
...
...
Dragon/src/operators/ndarray/shape_op.cc
View file @
7717903
...
...
@@ -8,7 +8,7 @@ void ShapeOp<Context>::RunOnDevice() {
output
(
0
)
->
Reshape
(
vector
<
TIndex
>
(
1
,
input
(
0
).
ndim
()));
// forward
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
floa
t
,
CPUContext
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
in
t
,
CPUContext
>
();
for
(
int
i
=
0
;
i
<
input
(
0
).
ndim
();
i
++
)
Ydata
[
i
]
=
input
(
0
).
dim
(
i
);
}
...
...
Dragon/src/operators/ndarray/tile_op.cc
View file @
7717903
...
...
@@ -25,7 +25,16 @@ void TileOp<Context>::TileRunWithType() {
template
<
class
Context
>
void
TileOp
<
Context
>::
RunOnDevice
()
{
CHECK_EQ
(
multiples
.
size
(),
input
(
0
).
ndim
());
// parse tasks from desc
CHECK_EQ
(
multiples_desc
.
size
(),
input
(
0
).
ndim
())
<<
"
\n
The num of dimensions of input is "
<<
input
(
0
).
ndim
()
<<
", but provided "
<<
multiples_desc
.
size
()
<<
" multiples."
;
vector
<
pair
<
int
,
int
>
>
process_axes
;
for
(
int
i
=
0
;
i
<
multiples_desc
.
size
();
i
++
)
{
int
mult
=
ws
()
->
GetTensor
(
multiples_desc
[
i
])
->
template
data
<
int
,
CPUContext
>
()[
0
];
if
(
mult
>
1
)
process_axes
.
push_back
({
mult
,
i
});
}
std
::
sort
(
process_axes
.
begin
(),
process_axes
.
end
());
// do nothing
if
(
process_axes
.
size
()
==
0
)
{
...
...
@@ -81,7 +90,17 @@ void TileGradientOp<Context>::TileRunWithType() {
template
<
class
Context
>
void
TileGradientOp
<
Context
>::
RunOnDevice
()
{
CHECK_EQ
(
multiples
.
size
(),
input
(
-
1
).
ndim
());
// parse tasks from desc
CHECK_EQ
(
multiples_desc
.
size
(),
input
(
-
1
).
ndim
())
<<
"
\n
The num of dimensions of input is "
<<
input
(
-
1
).
ndim
()
<<
", but provided "
<<
multiples_desc
.
size
()
<<
" multiples."
;
vector
<
pair
<
int
,
int
>
>
process_axes
;
for
(
int
i
=
0
;
i
<
multiples_desc
.
size
();
i
++
)
{
int
mult
=
ws
()
->
GetTensor
(
multiples_desc
[
i
])
->
template
data
<
int
,
CPUContext
>
()[
0
];
if
(
mult
>
1
)
process_axes
.
push_back
({
mult
,
i
});
}
std
::
sort
(
process_axes
.
begin
(),
process_axes
.
end
());
std
::
reverse
(
process_axes
.
begin
(),
process_axes
.
end
());
// do nothing
if
(
process_axes
.
size
()
==
0
)
{
...
...
Dragon/src/operators/norm/cudnn_batch_norm_op.cc
View file @
7717903
...
...
@@ -173,52 +173,6 @@ void CuDNNBatchNormGradientOp<Context>::Setup() {
}
template
<
class
Context
>
template
<
typename
T
>
void
CuDNNBatchNormGradientOp
<
Context
>::
InferenceRunWithType
()
{
if
(
output
(
0
)
->
name
()
!=
"ignore"
)
{
INIT_MULTIPLIER
(
multiplier
,
NS
);
INIT_MULTIPLIER
(
num_multiplier
,
N
);
INIT_MULTIPLIER
(
spatial_multiplier
,
S
);
stddev
=
ws
()
->
GetBuffer
();
stddev
->
ReshapeLike
(
input
(
0
));
auto
*
dYdata
=
input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Std_data
=
stddev
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Sdata
=
input
(
3
).
template
data
<
T
,
Context
>
();
auto
*
hVar_data
=
input
(
2
).
template
data
<
T
,
Context
>
();
auto
*
tVar_data
=
var
->
template
mutable_data
<
T
,
Context
>
();
auto
*
NSMul_data
=
multiplier
->
template
data
<
T
,
Context
>
();
auto
*
SMul_data
=
spatial_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NMul_data
=
num_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NC_data
=
num_by_chans
.
template
mutable_data
<
T
,
Context
>
();
// compute stddev
ctx
().
template
Copy
<
T
,
Context
,
Context
>
(
var
->
count
(),
tVar_data
,
hVar_data
);
math
::
AddScalar
<
T
,
Context
>
(
var
->
count
(),
this
->
eps
,
tVar_data
);
math
::
Sqrt
<
T
,
Context
>
(
var
->
count
(),
tVar_data
,
tVar_data
);
// divide scale by stddev
math
::
Div
<
T
,
Context
>
(
var
->
count
(),
Sdata
,
tVar_data
,
tVar_data
);
// compute dE/dY \cot (scale / std(X))
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tVar_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
tVar_data
,
0.0
,
Std_data
);
}
math
::
Mul
<
T
,
Context
>
(
output
(
0
)
->
count
(),
dYdata
,
Std_data
,
dXdata
);
ws
()
->
ReleaseBuffer
(
stddev
);
}
}
template
<
class
Context
>
template
<
typename
T
>
void
CuDNNBatchNormGradientOp
<
Context
>::
TrainingRunWithType
()
{
// determine the bn desc
if
(
input
(
0
).
ndim
()
==
2
)
{
...
...
@@ -288,6 +242,52 @@ void CuDNNBatchNormGradientOp<Context>::TrainingRunWithType() {
}
}
template
<
class
Context
>
template
<
typename
T
>
void
CuDNNBatchNormGradientOp
<
Context
>::
InferenceRunWithType
()
{
if
(
output
(
0
)
->
name
()
!=
"ignore"
)
{
INIT_MULTIPLIER
(
multiplier
,
NS
);
INIT_MULTIPLIER
(
num_multiplier
,
N
);
INIT_MULTIPLIER
(
spatial_multiplier
,
S
);
stddev
=
ws
()
->
GetBuffer
();
stddev
->
ReshapeLike
(
input
(
0
));
auto
*
dYdata
=
input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Std_data
=
stddev
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Sdata
=
input
(
3
).
template
data
<
T
,
Context
>
();
auto
*
hVar_data
=
input
(
2
).
template
data
<
T
,
Context
>
();
auto
*
tVar_data
=
var
->
template
mutable_data
<
T
,
Context
>
();
auto
*
NSMul_data
=
multiplier
->
template
data
<
T
,
Context
>
();
auto
*
SMul_data
=
spatial_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NMul_data
=
num_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NC_data
=
num_by_chans
.
template
mutable_data
<
T
,
Context
>
();
// compute stddev
ctx
().
template
Copy
<
T
,
Context
,
Context
>
(
var
->
count
(),
tVar_data
,
hVar_data
);
math
::
AddScalar
<
T
,
Context
>
(
var
->
count
(),
this
->
eps
,
tVar_data
);
math
::
Sqrt
<
T
,
Context
>
(
var
->
count
(),
tVar_data
,
tVar_data
);
// divide scale by stddev
math
::
Div
<
T
,
Context
>
(
var
->
count
(),
Sdata
,
tVar_data
,
tVar_data
);
// compute dE/dY \cot (scale / std(X))
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tVar_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
tVar_data
,
0.0
,
Std_data
);
}
math
::
Mul
<
T
,
Context
>
(
output
(
0
)
->
count
(),
dYdata
,
Std_data
,
dXdata
);
ws
()
->
ReleaseBuffer
(
stddev
);
}
}
template
<
class
Context
>
void
CuDNNBatchNormGradientOp
<
Context
>::
RunOnDevice
()
{
Setup
();
...
...
Dragon/src/operators/norm/fused_batch_norm.cc
View file @
7717903
#include "operators/norm/batch_norm_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h"
#include "utils/filler.h"
namespace
dragon
{
template
<
class
Context
>
template
<
typename
T
>
void
FusedBatchNormOp
<
Context
>::
TrainingRunWithType
()
{
INIT_MULTIPLIER
(
multiplier
,
NS
);
INIT_MULTIPLIER
(
num_multiplier
,
N
);
INIT_MULTIPLIER
(
spatial_multiplier
,
S
);
TENSOR_FILL
(
input
(
1
),
vector
<
TIndex
>
(
1
,
C
));
// history_mean
TENSOR_FILL
(
input
(
2
),
vector
<
TIndex
>
(
1
,
C
));
// history_var
TENSOR_FILL
(
input
(
3
),
vector
<
TIndex
>
(
1
,
C
));
// scale
TENSOR_FILL
(
input
(
4
),
vector
<
TIndex
>
(
1
,
C
));
// bias
auto
*
hMean_data
=
input
(
1
).
template
mutable_data
<
T
,
Context
>
();
auto
*
hVar_data
=
input
(
2
).
template
mutable_data
<
T
,
Context
>
();
auto
*
Sdata
=
input
(
3
).
template
data
<
T
,
Context
>
();
auto
*
Bdata
=
input
(
4
).
template
data
<
T
,
Context
>
();
auto
*
tMean_data
=
mean
->
template
mutable_data
<
T
,
Context
>
();
auto
*
tVar_data
=
var
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
NSMul_data
=
multiplier
->
template
data
<
T
,
Context
>
();
auto
*
SMul_data
=
spatial_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NMul_data
=
num_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NC_data
=
num_by_chans
.
template
mutable_data
<
T
,
Context
>
();
auto
*
Std_data
=
stddev
->
template
mutable_data
<
T
,
Context
>
();
ctx
().
template
Copy
<
T
,
Context
,
Context
>
(
output
(
0
)
->
count
(),
Ydata
,
Xdata
);
// compute mean
if
(
data_format
==
"NCHW"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasNoTrans
,
NC
,
S
,
1.0
/
NS
,
Xdata
,
SMul_data
,
0
,
NC_data
);
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
N
,
C
,
1.0
,
NC_data
,
NMul_data
,
0
,
tMean_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
NS
,
C
,
1.0
/
NS
,
Xdata
,
NSMul_data
,
0
,
tMean_data
);
}
// subtract mean
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tMean_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
-
1.0
,
NC_data
,
SMul_data
,
1.0
,
Ydata
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
-
1.0
,
NSMul_data
,
tMean_data
,
1.0
,
Ydata
);
}
// compute variance
// note that we use VAR(X) = E((X - EX) ^ 2)
math
::
Square
<
T
,
Context
>
(
output
(
0
)
->
count
(),
Ydata
,
Std_data
);
if
(
data_format
==
"NCHW"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasNoTrans
,
NC
,
S
,
1.0
/
NS
,
Std_data
,
SMul_data
,
0.0
,
NC_data
);
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
N
,
C
,
1.0
,
NC_data
,
NMul_data
,
0.0
,
tVar_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
NS
,
C
,
1.0
/
NS
,
Std_data
,
NSMul_data
,
0.0
,
tVar_data
);
}
// compute moving average
if
(
!
is_recomputing
)
{
// History(X) = (1 - momentum) * Cur(X) + momentum * History(X)
math
::
Axpby
<
T
,
Context
>
(
mean
->
count
(),
1.0
-
momentum
,
tMean_data
,
momentum
,
hMean_data
);
math
::
Axpby
<
T
,
Context
>
(
var
->
count
(),
1.0
-
momentum
,
tVar_data
,
momentum
,
hVar_data
);
}
// compute stddev
math
::
AddScalar
<
T
,
Context
>
(
var
->
count
(),
eps
,
tVar_data
);
math
::
Sqrt
<
T
,
Context
>
(
var
->
count
(),
tVar_data
,
tVar_data
);
// divide by stddev
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tVar_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
tVar_data
,
0.0
,
Std_data
);
}
math
::
Div
<
T
,
Context
>
(
output
(
0
)
->
count
(),
Ydata
,
Std_data
,
Ydata
);
// store x_norm for backward
auto
*
XNorm_data
=
x_norm
->
template
mutable_data
<
T
,
Context
>
();
ctx
().
template
Copy
<
T
,
Context
,
Context
>
(
output
(
0
)
->
count
(),
XNorm_data
,
Ydata
);
// scale
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
Sdata
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
Sdata
,
0.0
,
Std_data
);
}
math
::
Mul
<
T
,
Context
>
(
output
(
0
)
->
count
(),
Ydata
,
Std_data
,
Ydata
);
// shift
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
Bdata
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
1.0
,
Ydata
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
Bdata
,
1.0
,
Ydata
);
}
ws
()
->
ReleaseBuffer
(
stddev
);
}
template
<
class
Context
>
template
<
typename
T
>
void
FusedBatchNormOp
<
Context
>::
InferenceRunWithType
()
{
INIT_MULTIPLIER
(
multiplier
,
NS
);
INIT_MULTIPLIER
(
num_multiplier
,
N
);
INIT_MULTIPLIER
(
spatial_multiplier
,
S
);
TENSOR_FILL
(
input
(
1
),
vector
<
TIndex
>
(
1
,
C
));
// history_mean
TENSOR_FILL
(
input
(
2
),
vector
<
TIndex
>
(
1
,
C
));
// history_var
TENSOR_FILL
(
input
(
3
),
vector
<
TIndex
>
(
1
,
C
));
// scale
TENSOR_FILL
(
input
(
4
),
vector
<
TIndex
>
(
1
,
C
));
// bias
auto
*
hMean_data
=
input
(
1
).
template
mutable_data
<
T
,
Context
>
();
auto
*
hVar_data
=
input
(
2
).
template
mutable_data
<
T
,
Context
>
();
auto
*
Sdata
=
input
(
3
).
template
data
<
T
,
Context
>
();
auto
*
Bdata
=
input
(
4
).
template
data
<
T
,
Context
>
();
auto
*
tMean_data
=
mean
->
template
mutable_data
<
T
,
Context
>
();
auto
*
tVar_data
=
var
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
NSMul_data
=
multiplier
->
template
data
<
T
,
Context
>
();
auto
*
SMul_data
=
spatial_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NMul_data
=
num_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NC_data
=
num_by_chans
.
template
mutable_data
<
T
,
Context
>
();
auto
*
Std_data
=
stddev
->
template
mutable_data
<
T
,
Context
>
();
ctx
().
template
Copy
<
T
,
Context
,
Context
>
(
input
(
0
).
count
(),
Ydata
,
Xdata
);
ctx
().
template
Copy
<
T
,
Context
,
Context
>
(
mean
->
count
(),
tMean_data
,
hMean_data
);
ctx
().
template
Copy
<
T
,
Context
,
Context
>
(
var
->
count
(),
tVar_data
,
hVar_data
);
// subtract mean
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tMean_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
-
1.0
,
NC_data
,
SMul_data
,
1.0
,
Ydata
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
-
1.0
,
NSMul_data
,
tMean_data
,
1.0
,
Ydata
);
}
// compute stddev
math
::
AddScalar
<
T
,
Context
>
(
var
->
count
(),
eps
,
tVar_data
);
math
::
Sqrt
<
T
,
Context
>
(
var
->
count
(),
tVar_data
,
tVar_data
);
// divide by stddev
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tVar_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
tVar_data
,
0.0
,
Std_data
);
}
math
::
Div
<
T
,
Context
>
(
output
(
0
)
->
count
(),
Ydata
,
Std_data
,
Ydata
);
// scale
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
Sdata
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
Sdata
,
0.0
,
Std_data
);
}
math
::
Mul
<
T
,
Context
>
(
output
(
0
)
->
count
(),
Ydata
,
Std_data
,
Ydata
);
// shift
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
Bdata
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
1.0
,
Ydata
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
Bdata
,
1.0
,
Ydata
);
}
ws
()
->
ReleaseBuffer
(
stddev
);
}
template
<
class
Context
>
void
FusedBatchNormOp
<
Context
>::
Setup
()
{
// determine the mode
if
(
use_stats
==
-
1
)
use_global_stats
=
phase
()
==
"TEST"
?
true
:
false
;
else
use_global_stats
=
use_stats
==
1
?
true
:
false
;
is_recomputing
=
ws
()
->
GetTensor
(
"/opt/mirror_stage/recompute_flag"
)
->
template
data
<
bool
,
CPUContext
>
()[
0
];
// determine the data format
TIndex
channel_axis
=
axis
;
data_format
=
"NCHW"
;
if
(
channel_axis
==
-
1
)
channel_axis
+=
(
int
)
input
(
0
).
ndim
();
if
(
channel_axis
+
1
==
(
int
)
input
(
0
).
ndim
())
data_format
=
"NHWC"
;
N
=
input
(
0
).
dim
(
0
);
C
=
input
(
0
).
dim
(
channel_axis
);
NC
=
N
*
C
;
S
=
input
(
0
).
count
()
/
NC
;
NS
=
N
*
S
;
// make resource
mean
=
ws
()
->
CreateTensor
(
"/mnt/"
+
anchor
()
+
"/bn_mean"
);
var
=
ws
()
->
CreateTensor
(
"/mnt/"
+
anchor
()
+
"/bn_var"
);
x_norm
=
ws
()
->
CreateTensor
(
"/mnt/"
+
anchor
()
+
"/bn_x_norm"
);
stddev
=
ws
()
->
GetBuffer
();
stddev
->
ReshapeLike
(
input
(
0
));
// reshape
mean
->
Reshape
(
vector
<
TIndex
>
(
1
,
C
));
var
->
Reshape
(
vector
<
TIndex
>
(
1
,
C
));
num_by_chans
.
Reshape
(
vector
<
TIndex
>
(
1
,
NC
));
x_norm
->
ReshapeLike
(
input
(
0
));
output
(
0
)
->
ReshapeLike
(
input
(
0
));
}
template
<
class
Context
>
void
FusedBatchNormOp
<
Context
>::
RunOnDevice
()
{
Setup
();
if
(
input
(
0
).
template
IsType
<
float
>
())
{
if
(
use_global_stats
)
InferenceRunWithType
<
float
>
();
else
TrainingRunWithType
<
float
>
();
}
#ifdef WITH_CUDA_FP16
else
if
(
input
(
0
).
template
IsType
<
float16
>
())
{
if
(
use_global_stats
)
InferenceRunWithType
<
float16
>
();
else
TrainingRunWithType
<
float16
>
();
}
#endif
else
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
DEPLOY_CPU
(
FusedBatchNorm
);
#ifdef WITH_CUDA
DEPLOY_CUDA
(
FusedBatchNorm
);
#endif
OPERATOR_SCHEMA
(
FusedBatchNorm
).
NumInputs
(
5
).
NumOutputs
(
1
);
template
<
class
Context
>
template
<
typename
T
>
void
FusedBatchNormGradientOp
<
Context
>::
TrainingRunWithType
()
{
INIT_MULTIPLIER
(
multiplier
,
NS
);
INIT_MULTIPLIER
(
num_multiplier
,
N
);
INIT_MULTIPLIER
(
spatial_multiplier
,
S
);
auto
*
dYdata
=
input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Sdata
=
input
(
3
).
template
data
<
T
,
Context
>
();
auto
*
Std_data
=
stddev
->
template
mutable_data
<
T
,
Context
>
();
auto
*
tMean_data
=
mean
->
template
mutable_data
<
T
,
Context
>
();
auto
*
tVar_data
=
var
->
template
mutable_data
<
T
,
Context
>
();
auto
*
NSMul_data
=
multiplier
->
template
data
<
T
,
Context
>
();
auto
*
SMul_data
=
spatial_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NMul_data
=
num_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NC_data
=
num_by_chans
.
template
mutable_data
<
T
,
Context
>
();
auto
*
XNorm_data
=
x_norm
->
template
data
<
T
,
Context
>
();
// gradient w.r.t. scale
if
(
output
(
1
)
->
name
()
!=
"ignore"
)
{
auto
*
dSdata
=
output
(
1
)
->
template
mutable_data
<
T
,
Context
>
();
math
::
Mul
<
T
,
Context
>
(
stddev
->
count
(),
XNorm_data
,
dYdata
,
Std_data
);
if
(
data_format
==
"NCHW"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasNoTrans
,
NC
,
S
,
1.0
,
Std_data
,
SMul_data
,
0.0
,
NC_data
);
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
N
,
C
,
1.0
,
NC_data
,
NMul_data
,
1.0
,
dSdata
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
NS
,
C
,
1.0
,
Std_data
,
NSMul_data
,
1.0
,
dSdata
);
}
}
// gradient w.r.t. bias
if
(
output
(
2
)
->
name
()
!=
"ignore"
)
{
auto
*
dBdata
=
output
(
2
)
->
template
mutable_data
<
T
,
Context
>
();
if
(
data_format
==
"NCHW"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasNoTrans
,
NC
,
S
,
1.0
,
dYdata
,
SMul_data
,
0.0
,
NC_data
);
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
N
,
C
,
1.0
,
NC_data
,
NMul_data
,
1.0
,
dBdata
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
NS
,
C
,
1.0
,
dYdata
,
NSMul_data
,
1.0
,
dBdata
);
}
}
// gradient w.r.t. x
if
(
output
(
0
)
->
name
()
!=
"ignore"
)
{
// scale * dY
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
Sdata
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
Sdata
,
0.0
,
Std_data
);
}
math
::
Mul
<
T
,
Context
>
(
stddev
->
count
(),
Std_data
,
dYdata
,
Std_data
);
// sum of x_hat * (dl / dx_hat)
math
::
Mul
<
T
,
Context
>
(
stddev
->
count
(),
XNorm_data
,
Std_data
,
dXdata
);
if
(
data_format
==
"NCHW"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasNoTrans
,
NC
,
S
,
1.0
,
dXdata
,
SMul_data
,
0.0
,
NC_data
);
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
N
,
C
,
1.0
,
NC_data
,
NMul_data
,
0.0
,
tMean_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
NS
,
C
,
1.0
,
dXdata
,
NSMul_data
,
0.0
,
tMean_data
);
}
// x_hat times the sum
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tMean_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
dXdata
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
tMean_data
,
0.0
,
dXdata
);
}
math
::
Mul
<
T
,
Context
>
(
stddev
->
count
(),
XNorm_data
,
dXdata
,
dXdata
);
// subtract the average of x_hat times the sum
if
(
data_format
==
"NCHW"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasNoTrans
,
NC
,
S
,
1.0
,
Std_data
,
SMul_data
,
0.0
,
NC_data
);
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
N
,
C
,
1.0
,
NC_data
,
NMul_data
,
0.0
,
tMean_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tMean_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
1.0
,
dXdata
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
NS
,
C
,
1.0
,
Std_data
,
NSMul_data
,
0.0
,
tMean_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
tMean_data
,
1.0
,
dXdata
);
}
math
::
Axpby
<
T
,
Context
>
(
stddev
->
count
(),
1.0
,
Std_data
,
-
1.0
/
NS
,
dXdata
);
// multiply with the inverse std
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tVar_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
tVar_data
,
0.0
,
Std_data
);
}
// divide by stddev
math
::
Div
<
T
,
Context
>
(
output
(
0
)
->
count
(),
dXdata
,
Std_data
,
dXdata
);
}
ws
()
->
ReleaseBuffer
(
stddev
);
}
template
<
class
Context
>
template
<
typename
T
>
void
FusedBatchNormGradientOp
<
Context
>::
InferenceRunWithType
()
{
if
(
output
(
0
)
->
name
()
!=
"ignore"
)
{
INIT_MULTIPLIER
(
multiplier
,
NS
);
INIT_MULTIPLIER
(
num_multiplier
,
N
);
INIT_MULTIPLIER
(
spatial_multiplier
,
S
);
auto
*
dYdata
=
input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Std_data
=
stddev
->
template
mutable_data
<
T
,
Context
>
();
auto
*
Sdata
=
input
(
3
).
template
data
<
T
,
Context
>
();
auto
*
hVar_data
=
input
(
2
).
template
data
<
T
,
Context
>
();
auto
*
tVar_data
=
var
->
template
mutable_data
<
T
,
Context
>
();
auto
*
NSMul_data
=
multiplier
->
template
data
<
T
,
Context
>
();
auto
*
SMul_data
=
spatial_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NMul_data
=
num_multiplier
->
template
data
<
T
,
Context
>
();
auto
*
NC_data
=
num_by_chans
.
template
mutable_data
<
T
,
Context
>
();
// divide scale by stddev
math
::
Div
<
T
,
Context
>
(
var
->
count
(),
Sdata
,
tVar_data
,
tVar_data
);
// compute dE/dY \cot (scale / std(X))
if
(
data_format
==
"NCHW"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
N
,
C
,
1
,
1.0
,
NMul_data
,
tVar_data
,
0.0
,
NC_data
);
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NC
,
S
,
1
,
1.0
,
NC_data
,
SMul_data
,
0.0
,
Std_data
);
}
else
if
(
data_format
==
"NHWC"
)
{
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
NS
,
C
,
1
,
1.0
,
NSMul_data
,
tVar_data
,
0.0
,
Std_data
);
}
math
::
Mul
<
T
,
Context
>
(
output
(
0
)
->
count
(),
dYdata
,
Std_data
,
dXdata
);
}
ws
()
->
ReleaseBuffer
(
stddev
);
}
template
<
class
Context
>
void
FusedBatchNormGradientOp
<
Context
>::
Setup
()
{
// determine the mode
if
(
use_stats
==
-
1
)
use_global_stats
=
phase
()
==
"TEST"
?
true
:
false
;
else
use_global_stats
=
use_stats
==
1
?
true
:
false
;
// determine the data format
TIndex
channel_axis
=
axis
;
data_format
=
"NCHW"
;
if
(
channel_axis
==
-
1
)
channel_axis
+=
(
int
)
input
(
0
).
ndim
();
if
(
channel_axis
+
1
==
(
int
)
input
(
0
).
ndim
())
data_format
=
"NHWC"
;
N
=
input
(
0
).
dim
(
0
);
C
=
input
(
0
).
dim
(
channel_axis
);
NC
=
N
*
C
;
S
=
input
(
0
).
count
()
/
NC
;
NS
=
N
*
S
;
// make resource
mean
=
ws
()
->
GetTensor
(
"/mnt/"
+
anchor
()
+
"/bn_mean"
);
var
=
ws
()
->
GetTensor
(
"/mnt/"
+
anchor
()
+
"/bn_var"
);
x_norm
=
ws
()
->
GetTensor
(
"/mnt/"
+
anchor
()
+
"/bn_x_norm"
);
stddev
=
ws
()
->
GetBuffer
();
stddev
->
ReshapeLike
(
input
(
0
));
// reshape
num_by_chans
.
Reshape
(
vector
<
TIndex
>
(
1
,
NC
));
output
(
0
)
->
ReshapeLike
(
input
(
0
));
}
template
<
class
Context
>
void
FusedBatchNormGradientOp
<
Context
>::
RunOnDevice
()
{
Setup
();
if
(
input
(
0
).
template
IsType
<
float
>
())
{
if
(
use_global_stats
)
InferenceRunWithType
<
float
>
();
else
TrainingRunWithType
<
float
>
();
}
#ifdef WITH_CUDA_FP16
else
if
(
input
(
0
).
template
IsType
<
float16
>
())
{
if
(
use_global_stats
)
InferenceRunWithType
<
float16
>
();
else
TrainingRunWithType
<
float16
>
();
}
#endif
else
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
template
<
class
Context
>
void
FusedBatchNormGradientOp
<
Context
>::
ShareGradient
()
{
if
(
use_global_stats
)
{
...
...
Dragon/src/operators/vision/bilinear_resize_op.cc
View file @
7717903
...
...
@@ -34,23 +34,13 @@ void BilinearResizeOp<Context>::RunWithType() {
template
<
class
Context
>
void
BilinearResizeOp
<
Context
>::
RunOnDevice
()
{
dims
=
input
(
0
).
dims
();
if
(
dynamic_dsize
.
size
()
>
0
)
{
CHECK_EQ
(
dynamic_dsize
.
size
(),
2
)
<<
"
\n
The dsize should be a scalar with 2 elements."
;
if
(
dsize_desc
.
size
()
>
0
)
{
CHECK_EQ
(
dsize_desc
.
size
(),
2
)
<<
"
\n
The dsize should be a scalar with 2 elements."
;
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
Tensor
*
t
=
ws
()
->
GetTensor
(
dynamic_dsize
[
i
]);
if
(
t
->
IsType
<
int
>
())
{
dims
[
spatial_axis
+
i
]
=
t
->
template
data
<
int
,
CPUContext
>
()[
0
];
}
else
if
(
t
->
IsType
<
float
>
())
{
dims
[
spatial_axis
+
i
]
=
t
->
template
data
<
float
,
CPUContext
>
()[
0
];
}
else
{
LOG
(
FATAL
)
<<
"Unsupported types of dsize."
;
}
Tensor
*
dsize
=
ws
()
->
GetTensor
(
dsize_desc
[
i
]);
CHECK
(
dsize
->
IsType
<
int
>
())
<<
"
\n
The type of dsize should be int32."
;
dims
[
spatial_axis
+
i
]
=
dsize
->
template
data
<
int
,
CPUContext
>
()[
0
];
}
}
else
if
(
static_dsize
.
size
()
>
0
)
{
CHECK_EQ
(
static_dsize
.
size
(),
2
)
<<
"
\n
The dsize should be a scalar with 2 elements."
;
for
(
int
i
=
0
;
i
<
2
;
i
++
)
dims
[
spatial_axis
+
i
]
=
static_dsize
[
i
];
}
else
{
CHECK
(
fy
!=
-
1.0
&&
fx
!=
-
1.0
)
<<
"
\n
The fx and fy should be set."
;
...
...
Dragon/src/operators/vision/conv_op_base.cc
View file @
7717903
...
...
@@ -29,15 +29,14 @@ void ConvOpBase<Context>::ComputeOutputShape() {
const
TIndex
output_dim
=
stride
[
i
]
*
(
input_dim
-
1
)
+
dilated_kernel
-
2
*
pad
[
i
];
output_shape
.
push_back
(
output_dim
);
}
else
{
TIndex
output_dim
=
-
1
;
if
(
dynamic_dsize
.
size
()
>
0
)
{
NOT_IMPLEMENTED
;
}
else
if
(
static_dsize
.
size
()
>
0
)
{
if
((
int
)
static_dsize
.
size
()
!=
num_spatial_axes
+
2
)
LOG
(
FATAL
)
<<
"The len of output shape should be "
<<
num_spatial_axes
+
2
<<
", but got "
<<
static_dsize
.
size
();
output_dim
=
static_dsize
[
spatial_axis
+
i
];
}
else
LOG
(
FATAL
)
<<
"The output shape must be specified if using SAME padding algorithm."
;
CHECK
(
output_dims_desc
.
size
()
>
0
)
<<
"
\n
The output shape must be specified if using SAME padding algorithm."
;
CHECK_EQ
((
int
)
output_dims_desc
.
size
(),
num_spatial_axes
+
2
)
<<
"
\n
The len of output shape should be "
<<
num_spatial_axes
+
2
<<
", but got "
<<
output_dims_desc
.
size
()
<<
"."
;
Tensor
*
t
=
ws
()
->
GetTensor
(
output_dims_desc
[
spatial_axis
+
i
]);
CHECK
(
t
->
IsType
<
int
>
())
<<
"
\n
The type of output shape should be int32."
;
TIndex
output_dim
=
t
->
template
data
<
int
,
CPUContext
>
()[
0
];
TIndex
padding_needed
=
stride
[
i
]
*
(
input_dim
-
1
)
+
dilated_kernel
-
output_dim
;
CHECK_GE
(
padding_needed
,
0
)
<<
"
\n
The output shape is incorrect."
...
...
Dragon/src/operators/vision/nn_resize_op.cc
View file @
7717903
...
...
@@ -34,23 +34,13 @@ void NNResizeOp<Context>::RunWithType() {
template
<
class
Context
>
void
NNResizeOp
<
Context
>::
RunOnDevice
()
{
vector
<
TIndex
>
dims
=
input
(
0
).
dims
();
if
(
dynamic_dsize
.
size
()
>
0
)
{
CHECK_EQ
(
dynamic_dsize
.
size
(),
2
)
<<
"
\n
The dsize should be a scalar with 2 elements."
;
if
(
dsize_desc
.
size
()
>
0
)
{
CHECK_EQ
(
dsize_desc
.
size
(),
2
)
<<
"
\n
The dsize should be a scalar with 2 elements."
;
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
Tensor
*
t
=
ws
()
->
GetTensor
(
dynamic_dsize
[
i
]);
if
(
t
->
IsType
<
int
>
())
{
dims
[
spatial_axis
+
i
]
=
t
->
template
data
<
int
,
CPUContext
>
()[
0
];
}
else
if
(
t
->
IsType
<
float
>
())
{
dims
[
spatial_axis
+
i
]
=
t
->
template
data
<
float
,
CPUContext
>
()[
0
];
}
else
{
LOG
(
FATAL
)
<<
"Unsupported types of dsize."
;
}
Tensor
*
dsize
=
ws
()
->
GetTensor
(
dsize_desc
[
i
]);
CHECK
(
dsize
->
IsType
<
int
>
())
<<
"
\n
The type of dsize should be int32."
;
dims
[
spatial_axis
+
i
]
=
dsize
->
template
data
<
int
,
CPUContext
>
()[
0
];
}
}
else
if
(
static_dsize
.
size
()
>
0
)
{
CHECK_EQ
(
static_dsize
.
size
(),
2
)
<<
"
\n
The dsize should be a scalar with 2 elements."
;
for
(
int
i
=
0
;
i
<
2
;
i
++
)
dims
[
spatial_axis
+
i
]
=
static_dsize
[
i
];
}
else
{
CHECK
(
fy
!=
-
1.0
&&
fx
!=
-
1.0
)
<<
"
\n
The fx and fy should be set."
;
...
...
Dragon/src/utils/math_functions.cc
View file @
7717903
...
...
@@ -150,6 +150,16 @@ template <> void Add<float, CPUContext>(const int n,
#endif // WITH_SSE
}
template
<>
void
Add
<
int
,
CPUContext
>
(
const
int
n
,
const
int
*
a
,
const
int
*
b
,
int
*
y
)
{
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(n))
#endif
for
(
int
i
=
0
;
i
<
n
;
++
i
)
y
[
i
]
=
a
[
i
]
+
b
[
i
];
}
template
<>
void
Sub
<
float
,
CPUContext
>
(
const
int
n
,
const
float
*
a
,
const
float
*
b
,
...
...
Dragon/src/utils/op_kernel.cc
View file @
7717903
...
...
@@ -904,22 +904,23 @@ template<> void Argmin<float, CPUContext>(const int count,
/******************** ndarray.at ********************/
template
<>
void
CanonicalAxis
<
float
,
CPUContext
>
(
const
int
count
,
const
int
dim
,
floa
t
*
y
)
{
template
<>
void
CanonicalAxis
<
int
,
CPUContext
>
(
const
int
count
,
const
int
dim
,
in
t
*
y
)
{
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for
(
int
i
=
0
;
i
<
count
;
++
i
)
if
(
y
[
i
]
<
0
)
y
[
i
]
+=
dim
;
}
template
<>
void
At
<
float
,
CPUContext
>
(
const
int
count
,
template
<
typename
T
>
void
_At
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
floa
t
*
indices
,
const
float
*
x
,
float
*
y
,
CPUContext
*
context
)
{
const
in
t
*
indices
,
const
T
*
x
,
T
*
y
,
CPUContext
*
ctx
)
{
TIndex
x_offset
,
y_offset
,
x_idx_offset
,
y_idx_offset
;
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
y_idx_offset
=
i
;
...
...
@@ -927,22 +928,51 @@ template <> void At<float, CPUContext>(const int count,
for
(
int
n
=
0
;
n
<
outer_dim
;
++
n
)
{
x_offset
=
(
n
*
x_slice_dim
+
x_idx_offset
)
*
inner_dim
;
y_offset
=
(
n
*
y_slice_dim
+
y_idx_offset
)
*
inner_dim
;
c
ontext
->
Copy
<
float
,
CPUContext
,
CPUContext
>
(
inner_dim
,
c
tx
->
Copy
<
T
,
CPUContext
,
CPUContext
>
(
inner_dim
,
y
+
y_offset
,
x
+
x_offset
);
}
}
}
template
<>
void
At
Grad
<
float
,
CPUContext
>
(
const
int
count
,
template
<>
void
At
<
float
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
float
*
indices
,
const
float
*
dy
,
float
*
dx
,
CPUContext
*
context
)
{
const
int
*
indices
,
const
float
*
x
,
float
*
y
,
CPUContext
*
ctx
)
{
_At
<
float
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
x
,
y
,
ctx
);
}
template
<>
void
At
<
int
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
*
x
,
int
*
y
,
CPUContext
*
ctx
)
{
_At
<
int
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
x
,
y
,
ctx
);
}
template
<
typename
T
>
void
_AtGrad
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
T
*
dy
,
T
*
dx
)
{
TIndex
x_offset
,
y_offset
,
x_idx_offset
,
y_idx_offset
;
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
y_idx_offset
=
i
;
...
...
@@ -950,7 +980,7 @@ template <> void AtGrad<float, CPUContext>(const int count,
for
(
int
n
=
0
;
n
<
outer_dim
;
++
n
)
{
x_offset
=
(
n
*
x_slice_dim
+
x_idx_offset
)
*
inner_dim
;
y_offset
=
(
n
*
y_slice_dim
+
y_idx_offset
)
*
inner_dim
;
math
::
Add
<
float
,
CPUContext
>
(
inner_dim
,
math
::
Add
<
T
,
CPUContext
>
(
inner_dim
,
dy
+
y_offset
,
dx
+
x_offset
,
dx
+
x_offset
);
...
...
@@ -958,6 +988,32 @@ template <> void AtGrad<float, CPUContext>(const int count,
}
}
template
<>
void
AtGrad
<
float
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
float
*
dy
,
float
*
dx
)
{
_AtGrad
<
float
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dy
,
dx
);
}
template
<>
void
AtGrad
<
int
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
*
dy
,
int
*
dx
)
{
_AtGrad
<
int
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dy
,
dx
);
}
/******************** ndarray.concat ********************/
template
<>
void
Concat
<
float
,
CPUContext
>
(
const
int
count
,
...
...
Dragon/src/utils/op_kernel.cu
View file @
7717903
...
...
@@ -1574,8 +1574,8 @@ __global__ void _CanonicalAxis(const int count, const int dim, T* y) {
}
}
template <> void CanonicalAxis<
float, CUDAContext>(const int count, const int dim, floa
t* y) {
_CanonicalAxis<
floa
t> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count, dim, y);
template <> void CanonicalAxis<
int, CUDAContext>(const int count, const int dim, in
t* y) {
_CanonicalAxis<
in
t> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count, dim, y);
CUDA_POST_KERNEL_CHECK;
}
...
...
@@ -1585,7 +1585,7 @@ __global__ void _At(const int count,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const
T
* indices,
const
int
* indices,
const T* x,
T* y) {
CUDA_KERNEL_LOOP(idx, count) {
...
...
@@ -1604,18 +1604,30 @@ template <> void At<float, CUDAContext>(const int count,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const
floa
t* indices,
const
in
t* indices,
const float* x,
float* y,
CUDAContext* context) {
_At<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
outer_dim,
inner_dim,
x_slice_dim,
y_slice_dim,
indices,
x,
y);
outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, x, y);
CUDA_POST_KERNEL_CHECK;
}
template <> void At<int, CUDAContext>(const int count,
const int outer_dim,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int* indices,
const int* x,
int* y,
CUDAContext* context) {
_At<int> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, x, y);
CUDA_POST_KERNEL_CHECK;
}
...
...
@@ -1625,7 +1637,7 @@ __global__ void _AtGrad(const int count,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const
T* indices,
const
int* indices,
const T* dy,
T* dx) {
CUDA_KERNEL_LOOP(idx, count) {
...
...
@@ -1644,18 +1656,28 @@ template <> void AtGrad<float, CUDAContext>(const int count,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const
floa
t* indices,
const
in
t* indices,
const float* dy,
float* dx,
CUDAContext* context) {
float* dx) {
_AtGrad<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
outer_dim,
inner_dim,
x_slice_dim,
y_slice_dim,
indices,
dy,
dx);
outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, dy, dx);
CUDA_POST_KERNEL_CHECK;
}
template <> void AtGrad<int, CUDAContext>(const int count,
const int outer_dim,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int* indices,
const int* dy,
int* dx) {
_AtGrad<int> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, dy, dx);
CUDA_POST_KERNEL_CHECK;
}
...
...
@@ -3769,6 +3791,12 @@ __global__ void _ROIPooling(const int count,
roi += n * 5;
int im_idx = roi[0];
if (im_idx < 0) {
y[idx] = 0;
mask[idx] = 0;
continue;
}
int x1 = round(roi[1] * spatial_scale);
int y1 = round(roi[2] * spatial_scale);
int x2 = round(roi[3] * spatial_scale);
...
...
@@ -3802,8 +3830,8 @@ __global__ void _ROIPooling(const int count,
max_val = x[x_idx];
max_idx = x_idx;
}
}
//end w
}
// end h
}
}
y[idx] = max_val;
mask[idx] = max_idx;
...
...
@@ -3857,7 +3885,6 @@ __global__ void _ROIPoolingGrad(const int count,
const T* cur_roi = roi + n * 5;
const int im_idx_spec = cur_roi[0];
// ignore wrong im_batch_idx
if (im_idx != im_idx_spec) continue;
int x1 = round(cur_roi[1] * spatial_scale);
...
...
@@ -3895,9 +3922,9 @@ __global__ void _ROIPoolingGrad(const int count,
if (mask_off[pool_idx] == (h * width + w)) {
diff += dy_off[pool_idx];
}
}
// end pw
}
// end ph
}
// end n
}
}
}
dx[idx] = diff;
}
}
...
...
@@ -3949,6 +3976,13 @@ __global__ void _ROIAlign(const int count,
roi += n * 5;
int roi_batch_ind = roi[0];
if (roi_batch_ind < 0) {
y[idx] = 0;
mask_h[idx] = 0;
mask_w[idx] = 0;
continue;
}
T roi_start_w = (roi[1]) * spatial_scale;
T roi_start_h = (roi[2]) * spatial_scale;
T roi_end_w = (roi[3]) * spatial_scale;
...
...
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment