Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SeetaResearch
/
Dragon
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit 1d03e8e2
authored
Jan 19, 2019
by
Ting PAN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimize GatherOp
1 parent
c5def39b
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
41 changed files
with
341 additions
and
366 deletions
Docs/api/python/_static/css/dragon.css
Docs/api/python/contents/config.rst
Docs/api/python/contents/core/tensor.rst
Docs/api/python/contents/core/tensor_utils.rst
Docs/api/python/contents/memonger.rst
Docs/api/python/contents/tools/db.rst
Docs/api/python/contents/tools/im2db.rst
Docs/api/python/contents/tools/summary_writer.rst
Docs/api/python/contents/tools/tensorboard.rst
Docs/api/python/contents/updaters.rst
Docs/api/python/contents/vm/caffe/layer.rst
Docs/api/python/contents/vm/caffe/misc.rst
Docs/api/python/contents/vm/caffe/net.rst
Docs/api/python/contents/vm/caffe/solver.rst
Docs/api/python/contents/vm/theano/compile.rst
Dragon/include/operators/ndarray/gather_op.h
Dragon/include/utils/op_kernel.h
Dragon/modules/cxx/dragon.cc
Dragon/modules/python/py_onnx.h
Dragon/python/dragon/config.py
Dragon/python/dragon/core/helper.py
Dragon/python/dragon/operators/ndarray.py
Dragon/python/dragon/operators/vision.py
Dragon/python/dragon/utils/vision/blob_fetcher.py
Dragon/python/dragon/utils/vision/data_batch.py
Dragon/python/dragon/vm/caffe/model_libs.py
Dragon/python/dragon/vm/caffe/net_spec.py
Dragon/python/dragon/vm/theano/compile/function.py
Dragon/python/dragon/vm/torch/ops/modules/axis.py
Dragon/src/contrib/rcnn/bbox_utils.h
Dragon/src/contrib/rcnn/proposal_op.cc
Dragon/src/kernels/ndarray/gather_op_kernel.cc
Dragon/src/kernels/ndarray/gather_op_kernel.cu
Dragon/src/contrib/onnx/onnx_attibute.cc → Dragon/src/onnx/onnx_attibute.cc
Dragon/src/contrib/onnx/onnx_backend.cc → Dragon/src/onnx/onnx_backend.cc
Dragon/src/contrib/onnx/onnx_backend.h → Dragon/src/onnx/onnx_backend.h
Dragon/src/contrib/onnx/onnx_importer.cc → Dragon/src/onnx/onnx_importer.cc
Dragon/src/contrib/onnx/onnx_initializer.cc → Dragon/src/onnx/onnx_initializer.cc
Dragon/src/operators/arithmetic/maximum_op.cc
Dragon/src/operators/ndarray/gather_op.cc
Dragon/src/operators/vision/drop_block2d_op.cc
Docs/api/python/_static/css/dragon.css
View file @
1d03e8e
...
...
@@ -283,14 +283,16 @@ code.docutils.literal:hover {
dt
{
font-weight
:
700
;
background
:
#
e7f2fa
;
background
:
#
f7f7f7
;
border-bottom
:
solid
#0079b2
;
border-radius
:
1
px
;
border-radius
:
8
px
;
margin-bottom
:
20px
;
padding
:
8px
;
width
:
75%
;
}
dt
:target
,
.highlighted
{
background-color
:
#
e7f2fa
;
background-color
:
#
f7f7f7
;
border-bottom
:
3px
solid
#c7254e
;
}
...
...
@@ -299,7 +301,7 @@ dt:target:before {
content
:
''
;
display
:
block
;
height
:
65px
;
margin
:
-20px
0
0
;
margin
:
-20px
-8px
8px
;
}
dl
.method
dt
{
...
...
Docs/api/python/contents/config.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
========================== =============================================================================
List Brief
...
...
Docs/api/python/contents/core/tensor.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
============================== =============================================================================
List Brief
...
...
Docs/api/python/contents/core/tensor_utils.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
============================== =============================================================================
List Brief
...
...
Docs/api/python/contents/memonger.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/tools/db.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/tools/im2db.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/tools/summary_writer.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/tools/tensorboard.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/updaters.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/vm/caffe/layer.rst
View file @
1d03e8e
...
...
@@ -112,8 +112,8 @@ List Brief
================================= =============================================================================
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/vm/caffe/misc.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
========================= ============================================================================
List Brief
...
...
Docs/api/python/contents/vm/caffe/net.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
========================= =============================================================================
List Brief
...
...
Docs/api/python/contents/vm/caffe/solver.rst
View file @
1d03e8e
...
...
@@ -5,8 +5,8 @@
.. toctree::
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
==================== =============================================================================
List Brief
...
...
Docs/api/python/contents/vm/theano/compile.rst
View file @
1d03e8e
...
...
@@ -6,8 +6,8 @@
:hidden:
Quick
Shortcut
--------------
Quick
Reference
--------------
-
============================== =======================================================================
List Brief
...
...
Dragon/include/operators/ndarray/gather_op.h
View file @
1d03e8e
...
...
@@ -39,15 +39,15 @@ class GatherGradientOp final : public Operator<Context> {
GatherGradientOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
def
,
ws
),
axis
(
OperatorBase
::
Arg
<
int64_t
>
(
"axis"
,
0
)),
acc_grad
(
OperatorBase
::
Arg
<
bool
>
(
"acc_gradient"
,
fals
e
))
{}
zero_grad
(
OperatorBase
::
Arg
<
bool
>
(
"zero_grad"
,
tru
e
))
{}
USE_OPERATOR_FUNCTIONS
;
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
RunWithType
();
protected
:
bool
zero_grad
;
int64_t
axis
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
;
bool
acc_grad
;
};
}
// namespace dragon
...
...
Dragon/include/utils/op_kernel.h
View file @
1d03e8e
...
...
@@ -601,32 +601,23 @@ void ArgMin(
/*! ndarray.gather */
template
<
typename
T
,
class
Context
>
void
CanonicalAxis
(
const
int
count
,
const
int
dim
,
T
*
y
,
Context
*
ctx
);
template
<
typename
T
,
class
Context
>
void
Gather
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
64_t
*
indices
,
const
T
*
x
,
T
*
y
,
Context
*
ctx
);
template
<
typename
T
,
class
Context
>
void
GatherGrad
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
64_t
*
indices
,
const
T
*
dy
,
T
*
dx
,
Context
*
ctx
);
...
...
Dragon/modules/cxx/dragon.cc
View file @
1d03e8e
...
...
@@ -3,7 +3,7 @@
#include "core/common.h"
#include "utils/proto_utils.h"
#include "utils/caffemodel.h"
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
#include "dragon.h"
...
...
Dragon/modules/python/py_onnx.h
View file @
1d03e8e
...
...
@@ -11,7 +11,7 @@
#ifndef DRAGON_PYTHON_PY_ONNX_H_
#define DRAGON_PYTHON_PY_ONNX_H_
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
#include "py_dragon.h"
...
...
Dragon/python/dragon/config.py
View file @
1d03e8e
...
...
@@ -270,7 +270,7 @@ def ExportMetaGraph(prefix=''):
These text files will be saved as the following format:
``prefix/Graph_xxx.metatxt``
*prefix/Graph.metatxt*
Note that an empty prefix will leads to invalid exporting.
...
...
@@ -293,12 +293,12 @@ def SetLoggingLevel(level):
Parameters
----------
level :
str
The l
evel, ``DEBUG``, ``INFO``, ``WARNING``, ``ERROR`` or ``FATAL``
.
level :
{'DEBUG', 'INFO, 'WARNING', 'ERROR', 'FATAL'}, required
The l
ogging level
.
Notes
-----
The default level is
``INFO``
.
The default level is
*INFO*
.
"""
C
.
SetLogLevelCC
(
level
)
...
...
Dragon/python/dragon/core/helper.py
View file @
1d03e8e
...
...
@@ -391,9 +391,12 @@ class OperatorHelper(object):
@classmethod
def
_apply_Gather
(
cls
,
arguments
,
inputs
,
outputs
):
outputs
[
0
]
.
dtype
=
inputs
[
0
]
.
dtype
axis
=
arguments
[
'axis'
]
try
:
outputs
[
0
]
.
shape
=
inputs
[
0
]
.
shape
[:]
outputs
[
0
]
.
shape
[
arguments
[
'axis'
]]
=
None
outputs
[
0
]
.
shape
=
\
inputs
[
0
]
.
shape
[:
axis
]
+
\
inputs
[
1
]
.
shape
[:]
+
\
inputs
[
0
]
.
shape
[
axis
+
1
:]
except
:
pass
return
outputs
...
...
Dragon/python/dragon/operators/ndarray.py
View file @
1d03e8e
...
...
@@ -17,10 +17,10 @@ from . import *
@OpSchema.Inputs
(
1
)
def
Gather
(
inputs
,
indices
,
axis
=
0
,
acc_gradient
=
Fals
e
,
**
kwargs
):
def
Gather
(
inputs
,
indices
,
axis
=
0
,
zero_grad
=
Tru
e
,
**
kwargs
):
"""Gather the input according to the indices along the given axis.
**Type Constraints**: (*
int32*, *float32
*)
**Type Constraints**: (*
bool*, *int8*, *uint8*, *int32*, *int64*, *float16*, *float32*, *float64
*)
Parameters
----------
...
...
@@ -30,7 +30,7 @@ def Gather(inputs, indices, axis=0, acc_gradient=False, **kwargs):
The indices to form output tensor.
axis : int, optional
The start axis, can be negative.
acc_gradient
: bool, optional
zero_grad
: bool, optional
Whether to accumulate the gradients.
Returns
...
...
@@ -40,24 +40,10 @@ def Gather(inputs, indices, axis=0, acc_gradient=False, **kwargs):
"""
arguments
=
ParseArgs
(
locals
())
arguments
[
'inputs'
],
arguments
[
'indices'
]
=
[
arguments
[
'inputs'
],
Tensor
.
Convert
(
indices
,
dtype
=
'int32'
)],
None
output
=
Tensor
.
CreateOperator
(
'Gather'
,
**
arguments
)
try
:
output
.
shape
=
inputs
.
shape
[:]
if
not
isinstance
(
indices
,
Tensor
):
if
not
isinstance
(
indices
,
(
list
,
tuple
)):
indices
=
[
indices
]
output
.
shape
[
axis
]
=
len
(
indices
)
else
:
output
.
shape
[
axis
]
=
None
except
:
pass
return
output
arguments
[
'inputs'
],
arguments
[
'indices'
]
=
\
[
arguments
[
'inputs'
],
Tensor
.
Convert
(
indices
,
dtype
=
'int64'
)],
None
return
Tensor
.
CreateOperator
(
'Gather'
,
**
arguments
)
@OpSchema.Inputs
(
1
)
...
...
Dragon/python/dragon/operators/vision.py
View file @
1d03e8e
...
...
@@ -283,9 +283,7 @@ def Pool2d(
@OpSchema.Inputs
(
2
)
def
ROIPool
(
inputs
,
pool_h
,
pool_w
,
spatial_scale
=
1.0
,
**
kwargs
):
"""Max RoI Pooling. `[Girshick, 2015] <https://arxiv.org/abs/1504.08083>`_.
The first dimension of input must be ``1``.
"""Max RoIPooling. `[Girshick, 2015] <https://arxiv.org/abs/1504.08083>`_.
**Type Constraints**: (*float16*, *float32*)
...
...
@@ -311,9 +309,7 @@ def ROIPool(inputs, pool_h, pool_w, spatial_scale=1.0, **kwargs):
@OpSchema.Inputs
(
2
)
def
ROIAlign
(
inputs
,
pool_h
=
0
,
pool_w
=
0
,
spatial_scale
=
1.0
,
sampling_ratio
=
2
,
**
kwargs
):
"""AVG ROIAlign. `[He et.al, 2017] <https://arxiv.org/abs/1703.06870>`_.
The first dimension of input must be ``1``.
"""AVG RoIAlign. `[He et.al, 2017] <https://arxiv.org/abs/1703.06870>`_.
**Type Constraints**: (*float16*, *float32*)
...
...
Dragon/python/dragon/utils/vision/blob_fetcher.py
View file @
1d03e8e
...
...
@@ -20,7 +20,7 @@ from multiprocessing import Process
class
BlobFetcher
(
Process
):
"""BlobFetcher is deployed to queue blobs from `DataTransformer`_.
It is supported to form
``NHWC`` image blobs and ``1D``
label blobs.
It is supported to form
*NHWC* image blobs and *1d*
label blobs.
"""
def
__init__
(
self
,
**
kwargs
):
...
...
Dragon/python/dragon/utils/vision/data_batch.py
View file @
1d03e8e
...
...
@@ -26,7 +26,7 @@ from .blob_fetcher import BlobFetcher
class
DataBatch
(
object
):
"""DataBatch aims to prefetch data by
``Triple-Buffering``
.
"""DataBatch aims to prefetch data by
*Triple-Buffering*
.
It takes full advantages of the Process/Thread of Python,
which provides remarkable I/O speed up for scalable distributed training.
...
...
Dragon/python/dragon/vm/caffe/model_libs.py
deleted
100644 → 0
View file @
c5def39
This diff is collapsed.
Click to expand it.
Dragon/python/dragon/vm/caffe/net_spec.py
View file @
1d03e8e
...
...
@@ -236,4 +236,4 @@ class Parameters(object):
_param_names
=
param_name_dict
()
layers
=
Layers
()
params
=
Parameters
()
params
=
Parameters
()
\ No newline at end of file
Dragon/python/dragon/vm/theano/compile/function.py
View file @
1d03e8e
...
...
@@ -354,15 +354,14 @@ class Function(object):
# Store for future development
self
.
meta_graph
=
meta_graph
self
.
graph_name
=
meta_graph
.
name
# Call c api to create graph
ws
.
CreateGraph
(
meta_graph
)
self
.
graph_name
=
ws
.
CreateGraph
(
meta_graph
)
# Bind a lambda callback to run this graph
callback_inputs
=
self
.
inputs
if
explicit_inputs
else
[]
self
.
callback
=
lambda
*
args
,
**
kwargs
:
\
ws
.
RunGraph
(
meta_graph
.
name
,
(
callback_inputs
,
args
),
self
.
outputs
,
**
kwargs
)
ws
.
RunGraph
(
self
.
graph_
name
,
(
callback_inputs
,
args
),
self
.
outputs
,
**
kwargs
)
# Self return
return
self
...
...
@@ -386,7 +385,7 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
----------
inputs : sequence of Tensor, optional
The inputs to feed.
in
puts : sequence of Tensor, optional
out
puts : sequence of Tensor, optional
The outputs to fetch.
givens : dict of Tensor, optional
The substitutions to use.
...
...
Dragon/python/dragon/vm/torch/ops/modules/axis.py
View file @
1d03e8e
...
...
@@ -60,6 +60,7 @@ class Gather(BaseModule):
'n_inputs'
:
2
,
'n_outputs'
:
1
,
'arguments'
:
{
'axis'
:
self
.
axis
,
'zero_grad'
:
True
,
}
}
...
...
Dragon/src/contrib/rcnn/bbox_utils.h
View file @
1d03e8e
...
...
@@ -188,16 +188,16 @@ inline void RetrieveRoIs(
template
<
typename
T
>
inline
int
roi_level
(
const
int
min_level
,
// e.g. 2
const
int
max_level
,
// e.g. 5
const
int
canonical_level
,
// e.g. 4
const
int
canonical_scale
,
// e.g. 224
const
int
min_level
,
const
int
max_level
,
const
int
canonical_level
,
const
int
canonical_scale
,
T
*
roi
)
{
T
w
=
roi
[
3
]
-
roi
[
1
]
+
1
;
T
h
=
roi
[
4
]
-
roi
[
2
]
+
1
;
// Refer the settings of paper
int
level
=
canonical_level
+
(
int
)
std
::
log
(
std
::
max
(
std
::
sqrt
(
w
*
h
),
(
T
)
1
)
/
(
T
)
canonical_scale
);
int
level
=
canonical_level
+
std
::
log2
(
std
::
max
(
std
::
sqrt
(
w
*
h
),
(
T
)
1
)
/
(
T
)
canonical_scale
);
return
std
::
min
(
max_level
,
std
::
max
(
min_level
,
level
));
}
...
...
Dragon/src/contrib/rcnn/proposal_op.cc
View file @
1d03e8e
...
...
@@ -80,7 +80,7 @@ void ProposalOp<Context>::RunWithType(
anchors_
.
Reshape
({
A
,
4
});
rcnn
::
GenerateAnchors
<
BT
>
(
strides
[
i
],
(
int
)
ratios
.
size
(),
1
,
&
ratios
[
0
],
&
scales
[
0
],
(
int
)
ratios
.
size
(),
1
,
&
ratios
[
0
],
&
scales
[
i
],
anchors_
.
template
mutable_data
<
BT
,
CPUContext
>
());
rcnn
::
GenerateGridAnchors
<
BT
>
(
...
...
Dragon/src/kernels/ndarray/gather_op_kernel.cc
View file @
1d03e8e
...
...
@@ -6,134 +6,93 @@ namespace dragon {
namespace
kernel
{
/*! CanonicalAxis <T = int32, Device = CPU> */
template
<>
void
CanonicalAxis
<
int
,
CPUContext
>
(
const
int
count
,
const
int
dim
,
int
*
y
,
CPUContext
*
ctx
)
{
#ifdef WITH_OMP
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for
(
int
i
=
0
;
i
<
count
;
++
i
)
if
(
y
[
i
]
<
0
)
y
[
i
]
+=
dim
;
}
/*! Gather <T = ?, Device = CPU> */
template
<
typename
T
>
void
_Gather
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
64_t
*
indices
,
const
T
*
x
,
T
*
y
,
CPUContext
*
ctx
)
{
int64_t
x_offset
,
y_offset
,
x_idx_offset
,
y_idx_offset
;
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
y_idx_offset
=
i
;
x_idx_offset
=
indices
[
y_idx_offset
];
for
(
int
n
=
0
;
n
<
outer_dim
;
++
n
)
{
x_offset
=
(
n
*
x_slice_dim
+
x_idx_offset
)
*
inner
_dim
;
y_offset
=
(
n
*
y_slice_dim
+
y_idx_offset
)
*
inner_dim
;
int64_t
x_offset
,
select_idx
;
for
(
int
n
=
0
;
n
<
outer_dim
;
++
n
)
{
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
select_idx
=
indices
[
i
];
select_idx
=
select_idx
>=
0
?
select_idx
:
select_idx
+
x_slice
_dim
;
x_offset
=
(
n
*
x_slice_dim
+
select_idx
)
*
inner_dim
;
ctx
->
Copy
<
T
,
CPUContext
,
CPUContext
>
(
inner_dim
,
y
+
y_offset
,
x
+
x_offset
);
inner_dim
,
y
,
x
+
x_offset
);
y
+=
inner_dim
;
}
}
}
/*! Gather <T = float32, Device = CPU> */
template
<>
void
Gather
<
float
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
float
*
x
,
float
*
y
,
CPUContext
*
ctx
)
{
_Gather
<
float
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
x
,
y
,
ctx
);
}
/*! Gather <T = int32, Device = CPU> */
template
<>
void
Gather
<
int
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
*
x
,
int
*
y
,
CPUContext
*
ctx
)
{
_Gather
<
int
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
x
,
y
,
ctx
);
}
/*! GatherGrad <T = ?, Device = CPU> */
template
<
typename
T
>
void
_GatherGrad
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
64_t
*
indices
,
const
T
*
dy
,
T
*
dx
,
CPUContext
*
ctx
)
{
int64_t
x_offset
,
y_offset
,
x_idx_offset
,
y_idx_offset
;
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
y_idx_offset
=
i
;
x_idx_offset
=
indices
[
y_idx_offset
];
for
(
int
n
=
0
;
n
<
outer_dim
;
++
n
)
{
x_offset
=
(
n
*
x_slice_dim
+
x_idx_offset
)
*
inner
_dim
;
y_offset
=
(
n
*
y_slice_dim
+
y_idx_offset
)
*
inner_dim
;
int64_t
x_offset
,
select_idx
;
for
(
int
n
=
0
;
n
<
outer_dim
;
++
n
)
{
for
(
int
i
=
0
;
i
<
y_slice_dim
;
++
i
)
{
select_idx
=
indices
[
i
];
select_idx
=
select_idx
>=
0
?
select_idx
:
select_idx
+
x_slice
_dim
;
x_offset
=
(
n
*
x_slice_dim
+
select_idx
)
*
inner_dim
;
math
::
Add
<
T
,
CPUContext
>
(
inner_dim
,
dy
+
y_offset
,
dx
+
x_offset
,
dx
+
x_offset
,
ctx
);
dy
,
dx
+
x_offset
,
dx
+
x_offset
,
ctx
);
dy
+=
inner_dim
;
}
}
}
/*! GatherGrad <T = float32, Device = CPU> */
template
<>
void
GatherGrad
<
float
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
float
*
dy
,
float
*
dx
,
CPUContext
*
ctx
)
{
_GatherGrad
<
float
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dy
,
dx
,
ctx
);
}
/*! GatherGrad <T = int32, Device = CPU> */
/*! Kernel Launchers */
#define DEFINE_GATHER_KERNEL_LAUNCHER(name, T) \
template <> void name<T, CPUContext>( \
const int outer_dim, \
const int inner_dim, \
const int x_slice_dim, \
const int y_slice_dim, \
const int64_t* indices, \
const T* x, \
T* y, \
CPUContext* ctx) { \
_##name<T> \
(outer_dim, inner_dim, x_slice_dim, \
y_slice_dim, indices, x, y, ctx); \
}
template
<>
void
GatherGrad
<
int
,
CPUContext
>
(
const
int
count
,
const
int
outer_dim
,
const
int
inner_dim
,
const
int
x_slice_dim
,
const
int
y_slice_dim
,
const
int
*
indices
,
const
int
*
dy
,
int
*
dx
,
CPUContext
*
ctx
)
{
_GatherGrad
<
int
>
(
count
,
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dy
,
dx
,
ctx
);
}
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
bool
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
int8_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
uint8_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
int
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
int64_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
float16
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
float
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
Gather
,
double
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
int8_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
uint8_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
int
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
int64_t
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
float16
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
float
);
DEFINE_GATHER_KERNEL_LAUNCHER
(
GatherGrad
,
double
);
#undef DEFINE_GATHER_KERNEL_LAUNCHER
}
// namespace kernel
...
...
Dragon/src/kernels/ndarray/gather_op_kernel.cu
View file @
1d03e8e
...
...
@@ -2,160 +2,176 @@
#include "core/context_cuda.h"
#include "utils/op_kernel.h"
#include "utils/cub_device.h"
namespace dragon {
namespace kernel {
/*! CanonicalAxis <T = int32, Device = CUDA> */
template <typename T>
__global__ void _CanonicalAxis(
const int count,
const int dim,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
if (y[idx] < 0) y[idx] += dim;
}
}
template <> void CanonicalAxis<int, CUDAContext>(
const int count,
const int dim,
int* y,
CUDAContext* ctx) {
_CanonicalAxis<int>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, dim, y);
}
/*! Gather <T = ?, Device = CUDA> */
template <typename T>
__global__ void _Gather(
const int count,
const int outer_dim,
const int nthreads,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int
*
indices,
const int
64_t*
indices,
const T* x,
T* y) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int outer_idx = idx / inner_dim / y_slice_dim;
const int slice_idx = idx % inner_dim;
const int y_idx_offset = (idx / inner_dim) % y_slice_dim;
const int x_idx_offset = indices[y_idx_offset];
const int x_idx = (outer_idx * x_slice_dim + x_idx_offset)
* inner_dim + slice_idx;
y[idx] = x[x_idx];
CUDA_1D_KERNEL_LOOP(y_idx, nthreads) {
const int outer_idx = y_idx / inner_dim / y_slice_dim;
const int inner_idx = y_idx % inner_dim;
#if __CUDA_ARCH__ >= 350
int select_idx = __ldg(indices +
((y_idx / inner_dim) % y_slice_dim));
#else
int select_idx = indices[
(y_idx / inner_dim) % y_slice_dim];
#endif
select_idx = select_idx >= 0 ?
select_idx : select_idx + x_slice_dim;
const int x_idx = (outer_idx * x_slice_dim + select_idx)
* inner_dim + inner_idx;
y[y_idx] = x[x_idx];
}
}
/*! Gather <T = float32, Device = CUDA> */
template <> void Gather<float, CUDAContext>(
const int count,
const int outer_dim,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int* indices,
const float* x,
float* y,
CUDAContext* ctx) {
_Gather<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, x, y);
}
/*! Gather <T = int32, Device = CUDA> */
template <> void Gather<int, CUDAContext>(
const int count,
const int outer_dim,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int* indices,
const int* x,
int* y,
CUDAContext* ctx) {
_Gather<int>
<< <CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, x, y);
}
/*! GatherGrad <T = ?, Device = CUDA> */
template <typename T>
__global__ void _GatherGrad(
const int count,
const int outer_dim,
const int nthreads,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int
*
indices,
const int
64_t*
indices,
const T* dy,
T* dx) {
CUDA_1D_KERNEL_LOOP(idx, count) {
const int outer_idx = idx / inner_dim / y_slice_dim;
const int slice_idx = idx % inner_dim;
const int y_idx_offset = (idx / inner_dim) % y_slice_dim;
const int x_idx_offset = indices[y_idx_offset];
const int x_idx = (outer_idx * x_slice_dim + x_idx_offset)
* inner_dim + slice_idx;
atomicAdd(dx + x_idx, dy[idx]);
CUDA_1D_KERNEL_LOOP(i, nthreads) {
const int outer_idx = i / inner_dim;
const int inner_idx = i % inner_dim;
for (int j = 0; j < y_slice_dim; ++j) {
#if __CUDA_ARCH__ >= 350
int select_idx = __ldg(indices + j);
#else
int select_idx = indices[j];
#endif
select_idx = select_idx >= 0 ?
select_idx : select_idx + x_slice_dim;
const int x_idx = (outer_idx * x_slice_dim + select_idx)
* inner_dim + inner_idx;
const int y_idx = (outer_idx * y_slice_dim + j)
* inner_dim + inner_idx;
dx[x_idx] += dy[y_idx];
}
}
}
/*! GatherGrad <T = float
32
, Device = CUDA> */
/*! GatherGrad <T = float
16
, Device = CUDA> */
template <> void GatherGrad<float, CUDAContext>(
const int count,
const int outer_dim,
template <> __global__ void _GatherGrad<half>(
const int nthreads,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int* indices,
const float* dy,
float* dx,
CUDAContext* ctx) {
_GatherGrad<float>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, dy, dx);
const int64_t* indices,
const half* dy,
half* dx) {
CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 530
const int outer_idx = i / inner_dim;
const int inner_idx = i % inner_dim;
for (int j = 0; j < y_slice_dim; ++j) {
int select_idx = __ldg(indices + j);
select_idx = select_idx >= 0 ?
select_idx : select_idx + x_slice_dim;
const int x_idx = (outer_idx * x_slice_dim + select_idx)
* inner_dim + inner_idx;
const int y_idx = (outer_idx * y_slice_dim + j)
* inner_dim + inner_idx;
dx[x_idx] = __hadd(dx[x_idx], dy[y_idx]);
}
#endif
}
}
/*! GatherGrad <T = int32, Device = CUDA> */
/*! Kernel Launchers */
#define DEFINE_GATHER_KERNEL_LAUNCHER(T) \
template <> void Gather<T, CUDAContext>( \
const int outer_dim, \
const int inner_dim, \
const int x_slice_dim, \
const int y_slice_dim, \
const int64_t* indices, \
const T* x, \
T* y, \
CUDAContext* ctx) { \
auto nthreads = outer_dim * y_slice_dim * inner_dim; \
_Gather<T> \
<< < CUDA_BLOCKS(nthreads), CUDA_THREADS, \
0, ctx->cuda_stream() >> > \
(nthreads, inner_dim, x_slice_dim, \
y_slice_dim, indices, x, y); \
}
#define DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(T) \
template <> void GatherGrad<T, CUDAContext>( \
const int outer_dim, \
const int inner_dim, \
const int x_slice_dim, \
const int y_slice_dim, \
const int64_t* indices, \
const T* dy, \
T* dx, \
CUDAContext* ctx) { \
auto nthreads = outer_dim * inner_dim; \
_GatherGrad<T> \
<< < CUDA_BLOCKS(nthreads), CUDA_THREADS, \
0, ctx->cuda_stream() >> > \
(nthreads, inner_dim, x_slice_dim, \
y_slice_dim, indices, dy, dx); \
}
template <> void GatherGrad<int, CUDAContext>(
const int count,
DEFINE_GATHER_KERNEL_LAUNCHER(bool);
DEFINE_GATHER_KERNEL_LAUNCHER(int8_t);
DEFINE_GATHER_KERNEL_LAUNCHER(uint8_t);
DEFINE_GATHER_KERNEL_LAUNCHER(int);
DEFINE_GATHER_KERNEL_LAUNCHER(int64_t);
DEFINE_GATHER_KERNEL_LAUNCHER(float16);
DEFINE_GATHER_KERNEL_LAUNCHER(float);
DEFINE_GATHER_KERNEL_LAUNCHER(double);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(int8_t);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(uint8_t);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(int);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(int64_t);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(float);
DEFINE_GATHER_GRAD_KERNEL_LAUNCHER(double);
template <> void GatherGrad<float16, CUDAContext>(
const int outer_dim,
const int inner_dim,
const int x_slice_dim,
const int y_slice_dim,
const int
*
indices,
const
int*
dy,
int*
dx,
const int
64_t*
indices,
const
float16*
dy,
float16*
dx,
CUDAContext* ctx) {
_GatherGrad<int>
<< < CUDA_BLOCKS(count), CUDA_THREADS,
auto nthreads = outer_dim * inner_dim;
_GatherGrad<half>
<< < CUDA_BLOCKS(nthreads), CUDA_THREADS,
0, ctx->cuda_stream() >> >
(count, outer_dim, inner_dim,
x_slice_dim, y_slice_dim,
indices, dy, dx);
(nthreads, inner_dim, x_slice_dim,
y_slice_dim, indices,
reinterpret_cast<const half*>(dy),
reinterpret_cast<half*>(dx));
}
#undef DEFINE_GATHER_KERNEL_LAUNCHER
#undef DEFINE_GATHER_GRAD_KERNEL_LAUNCHER
} // namespace kernel
} // namepsace dragon
...
...
Dragon/src/
contrib/
onnx/onnx_attibute.cc
→
Dragon/src/onnx/onnx_attibute.cc
View file @
1d03e8e
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
namespace
dragon
{
...
...
Dragon/src/
contrib/
onnx/onnx_backend.cc
→
Dragon/src/onnx/onnx_backend.cc
View file @
1d03e8e
#include "core/operator_schema.h"
#include "utils/proto_utils.h"
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
namespace
dragon
{
...
...
Dragon/src/
contrib/
onnx/onnx_backend.h
→
Dragon/src/onnx/onnx_backend.h
View file @
1d03e8e
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* Codes are based on:
*
* <https://github.com/pytorch/pytorch/blob/master/caffe2/onnx/backend.h>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_
CONTRIB_
ONNX_ONNX_BACKEND_H_
#define DRAGON_
CONTRIB_
ONNX_ONNX_BACKEND_H_
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* Codes are based on:
*
* <https://github.com/pytorch/pytorch/blob/master/caffe2/onnx/backend.h>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_ONNX_ONNX_BACKEND_H_
#define DRAGON_ONNX_ONNX_BACKEND_H_
#include "core/common.h"
#include "proto/onnx.pb.h"
...
...
@@ -228,4 +228,4 @@ class ONNXBackend {
}
// namespace dragon
#endif // DRAGON_CONTRIB_ONNX_ONNX_BACKEND_H_
\ No newline at end of file
#endif // DRAGON_ONNX_ONNX_BACKEND_H_
\ No newline at end of file
Dragon/src/
contrib/
onnx/onnx_importer.cc
→
Dragon/src/onnx/onnx_importer.cc
View file @
1d03e8e
#include "utils/map_utils.h"
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
namespace
dragon
{
...
...
Dragon/src/
contrib/
onnx/onnx_initializer.cc
→
Dragon/src/onnx/onnx_initializer.cc
View file @
1d03e8e
#include "
contrib/
onnx/onnx_backend.h"
#include "onnx/onnx_backend.h"
namespace
dragon
{
...
...
Dragon/src/operators/arithmetic/maximum_op.cc
View file @
1d03e8e
...
...
@@ -57,7 +57,7 @@ void MaximumOp<Context>::RunOnDevice() {
else
if
(
XIsType
(
Input
(
0
),
double
))
RunWithType
<
double
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"int8"
,
"uint8"
,
"int32"
,
"int64"
,
"float16"
,
"float32"
,
"float64"
,
"float16"
,
"float32"
,
"float64"
,
});
}
...
...
Dragon/src/operators/ndarray/gather_op.cc
View file @
1d03e8e
...
...
@@ -13,12 +13,10 @@ namespace dragon {
template
<
class
Context
>
template
<
typename
T
>
void
GatherOp
<
Context
>::
RunWithType
()
{
auto
*
Xdata
=
Input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
indices
=
Input
(
1
).
template
mutable_data
<
int
,
Context
>
();
auto
*
indices
=
Input
(
1
).
template
mutable_data
<
int
64_t
,
Context
>
();
auto
*
Ydata
=
Output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
kernel
::
CanonicalAxis
(
Input
(
1
).
count
(),
x_slice_dim
,
indices
,
ctx
());
kernel
::
Gather
(
Output
(
0
)
->
count
(),
kernel
::
Gather
(
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
Xdata
,
Ydata
,
ctx
());
...
...
@@ -28,22 +26,38 @@ template <class Context>
void
GatherOp
<
Context
>::
RunOnDevice
()
{
DETERMINE_RUNTIME_ARGUMENTS
(
Input
(
0
));
output_dims
=
Input
(
0
).
dims
();
x_slice_dim
=
Input
(
0
).
dim
(
axis
);
output_dims
[
axis
]
=
y_slice_dim
=
Input
(
1
).
count
();
y_slice_dim
=
Input
(
1
).
count
();
outer_dim
=
Input
(
0
).
count
(
0
,
axis
);
inner_dim
=
Input
(
0
).
count
(
axis
+
1
);
CHECK_GT
(
y_slice_dim
,
0
)
<<
"
\n
Length of indices must > 0."
;
const
auto
&
s1
=
Input
(
0
).
dims
().
begin
();
const
auto
&
e1
=
s1
+
axis
,
s3
=
e1
+
1
;
const
auto
&
e3
=
Input
(
0
).
dims
().
end
();
const
auto
&
s2
=
Input
(
1
).
dims
().
begin
();
const
auto
&
e2
=
Input
(
1
).
dims
().
end
();
output_dims
.
assign
(
s1
,
e1
);
output_dims
.
insert
(
output_dims
.
end
(),
s2
,
e2
);
output_dims
.
insert
(
output_dims
.
end
(),
s3
,
e3
);
Output
(
0
)
->
Reshape
(
output_dims
);
CHECK
(
Input
(
1
).
template
IsType
<
int
>
())
<<
"
\n
The type of indices should be int
32
."
;
CHECK
(
Input
(
1
).
template
IsType
<
int
64_t
>
())
<<
"
\n
The type of indices should be int
64
."
;
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
if
(
XIsType
(
Input
(
0
),
bool
))
RunWithType
<
bool
>
();
else
if
(
XIsType
(
Input
(
0
),
int8_t
))
RunWithType
<
int8_t
>
();
else
if
(
XIsType
(
Input
(
0
),
uint8_t
))
RunWithType
<
uint8_t
>
();
else
if
(
XIsType
(
Input
(
0
),
int
))
RunWithType
<
int
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"float32"
,
"int32"
});
else
if
(
XIsType
(
Input
(
0
),
int64_t
))
RunWithType
<
int64_t
>
();
else
if
(
XIsType
(
Input
(
0
),
float16
))
RunWithType
<
float16
>
();
else
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
else
if
(
XIsType
(
Input
(
0
),
double
))
RunWithType
<
double
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"bool"
,
"int8"
,
"uint8"
,
"int32"
,
"int64"
,
"float16"
,
"float32"
,
"float64"
,
});
}
DEPLOY_CPU
(
Gather
);
...
...
@@ -54,18 +68,17 @@ OPERATOR_SCHEMA(Gather).NumInputs(2).NumOutputs(1);
template
<
class
Context
>
template
<
typename
T
>
void
GatherGradientOp
<
Context
>::
RunWithType
()
{
auto
*
indices
=
Input
(
1
).
template
data
<
int
,
Context
>
();
auto
*
indices
=
Input
(
1
).
template
data
<
int
64_t
,
Context
>
();
auto
*
dYdata
=
Input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
Output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
T
*
dXdata
=
nullptr
;
if
(
!
acc_grad
)
{
dXdata
=
Output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
math
::
Set
(
Output
(
0
)
->
count
(),
cast
::
to
<
T
>
(
0.
f
),
dXdata
,
ctx
());
}
else
{
dXdata
=
Output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
// Zero the gradients Optionally
if
(
zero_grad
)
{
math
::
Set
(
Output
(
0
)
->
count
(),
cast
::
to
<
T
>
(
0.
f
),
dXdata
,
ctx
());
}
kernel
::
GatherGrad
(
Input
(
-
1
).
count
(),
kernel
::
GatherGrad
(
outer_dim
,
inner_dim
,
x_slice_dim
,
y_slice_dim
,
indices
,
dYdata
,
dXdata
,
ctx
());
...
...
@@ -82,12 +95,20 @@ void GatherGradientOp<Context>::RunOnDevice() {
Output
(
0
)
->
ReshapeLike
(
Input
(
0
));
CHECK
(
Input
(
1
).
template
IsType
<
int
>
())
<<
"
\n
The type of indices should be int
32
."
;
CHECK
(
Input
(
1
).
template
IsType
<
int
64_t
>
())
<<
"
\n
The type of indices should be int
64
."
;
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
if
(
XIsType
(
Input
(
0
),
int8_t
))
RunWithType
<
int8_t
>
();
else
if
(
XIsType
(
Input
(
0
),
uint8_t
))
RunWithType
<
uint8_t
>
();
else
if
(
XIsType
(
Input
(
0
),
int
))
RunWithType
<
int
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"float32"
,
"int32"
});
else
if
(
XIsType
(
Input
(
0
),
int64_t
))
RunWithType
<
int64_t
>
();
else
if
(
XIsType
(
Input
(
0
),
float16
))
RunWithType
<
float16
>
();
else
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
else
if
(
XIsType
(
Input
(
0
),
double
))
RunWithType
<
double
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"int8"
,
"uint8"
,
"int32"
,
"int64"
,
"float16"
,
"float32"
,
"float64"
,
});
}
DEPLOY_CPU
(
GatherGradient
);
...
...
Dragon/src/operators/vision/drop_block2d_op.cc
View file @
1d03e8e
...
...
@@ -15,6 +15,27 @@ void DropBlock2dOp<Context>::RunWithType() {
Output
(
0
)
->
count
(),
Ydata
,
Xdata
);
}
}
else
if
(
phase
()
==
"TRAIN"
)
{
if
(
data_format
==
"NCHW"
)
{
n
=
Input
(
0
).
dim
(
0
),
c
=
Input
(
0
).
dim
(
1
);
h
=
Input
(
0
).
dim
(
2
),
w
=
Input
(
0
).
dim
(
3
);
}
else
if
(
data_format
==
"NHWC"
)
{
n
=
Input
(
0
).
dim
(
0
),
c
=
Input
(
0
).
dim
(
-
1
);
h
=
Input
(
0
).
dim
(
1
),
w
=
Input
(
0
).
dim
(
2
);
}
seed_h
=
h
-
block_size
+
1
;
seed_w
=
w
-
block_size
+
1
;
CHECK
(
seed_h
>
0
&&
seed_w
>
0
)
<<
"
\n
Excepted block_size <= feat_size."
;
if
(
decrement
>
0
&&
apply_prob
>
keep_prob
())
{
apply_prob
-=
decrement
;
}
else
{
apply_prob
=
keep_prob
();
}
gamma
=
(
1.
f
-
apply_prob
)
/
(
block_size
*
block_size
);
gamma
*=
(
alpha
*
(
h
*
w
)
/
(
seed_h
*
seed_w
));
auto
*
mask
=
ws
()
->
CreateTensor
(
mount_name
(
"drop_block/mask"
))
->
ReshapeLike
(
Input
(
0
));
auto
*
norm
=
ws
()
->
CreateTensor
(
mount_name
(
...
...
@@ -58,29 +79,8 @@ void DropBlock2dOp<Context>::RunWithType() {
template
<
class
Context
>
void
DropBlock2dOp
<
Context
>::
RunOnDevice
()
{
if
(
data_format
==
"NCHW"
)
{
n
=
Input
(
0
).
dim
(
0
),
c
=
Input
(
0
).
dim
(
1
);
h
=
Input
(
0
).
dim
(
2
),
w
=
Input
(
0
).
dim
(
3
);
}
else
if
(
data_format
==
"NHWC"
)
{
n
=
Input
(
0
).
dim
(
0
),
c
=
Input
(
0
).
dim
(
-
1
);
h
=
Input
(
0
).
dim
(
1
),
w
=
Input
(
0
).
dim
(
2
);
}
seed_h
=
h
-
block_size
+
1
;
seed_w
=
w
-
block_size
+
1
;
CHECK
(
seed_h
>
0
&&
seed_w
>
0
)
<<
"
\n
Excepted block_size <= feat_size."
;
Output
(
0
)
->
ReshapeLike
(
Input
(
0
));
if
(
decrement
>
0
&&
apply_prob
>
keep_prob
())
{
apply_prob
-=
decrement
;
}
else
{
apply_prob
=
keep_prob
();
}
gamma
=
(
1.
f
-
apply_prob
)
/
(
block_size
*
block_size
);
gamma
*=
(
alpha
*
(
h
*
w
)
/
(
seed_h
*
seed_w
));
if
(
XIsType
(
Input
(
0
),
float
))
RunWithType
<
float
>
();
else
if
(
XIsType
(
Input
(
0
),
float16
))
RunWithType
<
float16
>
();
else
LOG
(
FATAL
)
<<
DTypeHelper
(
Input
(
0
),
{
"float32"
,
"float16"
});
...
...
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment