Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SeetaResearch
/
Dragon
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit c9db9eee
authored
Dec 14, 2017
by
Ting PAN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix/Refactor the GroupConvolution on cuDNN
1 parent
6f2751b1
Hide whitespace changes
Inline
Side-by-side
Showing
24 changed files
with
496 additions
and
346 deletions
Dragon/include/core/context.h
Dragon/include/core/context_cuda.h
Dragon/include/core/mixedmem.h
Dragon/include/core/tensor.h
Dragon/include/operators/norm/l2_norm_op.h
Dragon/include/operators/vision/conv_op.h
Dragon/include/operators/vision/conv_transpose_op.h
Dragon/include/operators/vision/lrn_op.h
Dragon/include/utils/cudnn_device.h
Dragon/python/dragon/core/tensor.py
Dragon/python/dragon/docs/install.rst
Dragon/python/dragon/operators/ndarray.py
Dragon/python/dragon/operators/norm.py
Dragon/python/dragon/operators/vision.py
Dragon/python/dragon/vm/caffe/layers/vision.py
Dragon/src/core/graph.cc
Dragon/src/core/mixedmem.cc
Dragon/src/core/operator.cc
Dragon/src/operators/norm/l2_norm_op.cc
Dragon/src/operators/vision/cudnn_conv2d_op.cc
Dragon/src/operators/vision/cudnn_conv2d_transpose_op.cc
Dragon/src/operators/vision/cudnn_lrn_op.cc
Dragon/src/operators/vision/lrn_op.cc
Dragon/src/utils/cudnn_device.cc
Dragon/include/core/context.h
View file @
c9db9ee
...
...
@@ -28,7 +28,7 @@ class CPUContext {
public
:
CPUContext
()
:
random_seed_
(
3
)
{
generator
();
}
CPUContext
(
unsigned
int
random_seed
)
:
random_seed_
(
random_seed
)
{
generator
();
}
CPUContext
(
const
DeviceOption
&
option
)
:
random_seed_
(
option
.
has_random_seed
()
?
CPUContext
(
const
DeviceOption
&
option
)
:
random_seed_
(
option
.
has_random_seed
()
?
option
.
random_seed
()
:
3
)
{
generator
();
}
virtual
~
CPUContext
()
{}
...
...
@@ -51,6 +51,9 @@ class CPUContext {
inline
static
void
Memcpy
(
size_t
nbytes
,
void
*
dst
,
const
void
*
src
)
{
memcpy
(
dst
,
src
,
nbytes
);
}
inline
static
void
Delete
(
void
*
data
)
{
free
(
data
);
}
template
<
class
DstContext
,
class
SrcContext
>
inline
static
void
MemcpyAsync
(
size_t
nbytes
,
void
*
dst
,
const
void
*
src
)
{
NOT_IMPLEMENTED
;
}
template
<
typename
T
,
class
DstContext
,
class
SrcContext
>
inline
static
void
Copy
(
int
n
,
T
*
dst
,
const
T
*
src
)
{
if
(
dst
==
src
)
return
;
...
...
@@ -62,7 +65,7 @@ class CPUContext {
inline
std
::
mt19937
*
generator
()
{
auto
&
generator
=
cpu_object_
.
rand_generator
;
if
(
!
generator
.
get
())
if
(
!
generator
.
get
())
generator
.
reset
(
new
std
::
mt19937
(
random_seed_
));
return
generator
.
get
();
}
...
...
@@ -79,4 +82,4 @@ static inline std::mt19937* rand_generator() {
}
// namepsace dragon
#endif // DRAGON_CORE_CONTEXT_H_
\ No newline at end of file
#endif // DRAGON_CORE_CONTEXT_H_
Dragon/include/core/context_cuda.h
View file @
c9db9ee
...
...
@@ -60,7 +60,7 @@ class CUDAObject {
class
CUDAContext
{
public
:
CUDAContext
(
const
DeviceOption
&
option
)
CUDAContext
(
const
DeviceOption
&
option
)
:
gpu_id_
(
option
.
gpu_id
()),
random_seed_
(
option
.
has_random_seed
()
?
option
.
random_seed
()
:
3
)
{
CPUContext
context
(
option
);
...
...
@@ -72,7 +72,7 @@ class CUDAContext {
#endif
}
CUDAContext
(
const
int
gpu_id
=
0
)
CUDAContext
(
const
int
gpu_id
=
0
)
:
gpu_id_
(
gpu_id
),
random_seed_
(
3
)
{
CPUContext
context
;
cublas_handle
();
...
...
@@ -90,7 +90,7 @@ class CUDAContext {
void
FinishDeviceCompution
()
{
cudaStreamSynchronize
(
cudaStreamDefault
);
cudaError_t
error
=
cudaGetLastError
();
CHECK_EQ
(
error
,
cudaSuccess
)
CHECK_EQ
(
error
,
cudaSuccess
)
<<
"CUDA Error: "
<<
cudaGetErrorString
(
error
);
}
...
...
@@ -108,11 +108,11 @@ class CUDAContext {
CUDA_CHECK
(
cudaMemcpy
(
dst
,
src
,
nbytes
,
cudaMemcpyDefault
));
}
template
<
class
DstContext
,
class
SrcContext
>
inline
static
void
MemcpyAsync
(
size_t
nbytes
,
void
*
dst
,
const
void
*
src
)
{
cudaStream_t
stream
;
CUDA_CHECK
(
cudaStreamCreateWithFlags
(
&
stream
,
cudaStreamNonBlocking
));
CUDA_CHECK
(
cudaMemcpyAsync
(
dst
,
src
,
nbytes
,
cudaMemcpyDefault
,
stream
));
CUDA_CHECK
(
cudaStreamSynchronize
(
stream
));
CUDA_CHECK
(
cudaStreamDestroy
(
stream
));
}
...
...
@@ -205,4 +205,4 @@ class CUDAContext {
}
// namespace dragon
#endif // DRAGON_CORE_CONTEXT_CUDA_H_
\ No newline at end of file
#endif // DRAGON_CORE_CONTEXT_CUDA_H_
Dragon/include/core/mixedmem.h
View file @
c9db9ee
...
...
@@ -17,12 +17,12 @@ class MixedMemory {
public
:
enum
State
{
UNINITIALIZED
,
STATE_AT_CPU
,
STATE_AT_CUDA
,
SWITCHED
,
SYNCED
};
MixedMemory
()
:
state_
(
UNINITIALIZED
),
cpu_ptr_
(
nullptr
),
cuda_ptr_
(
nullptr
),
:
state_
(
UNINITIALIZED
),
cpu_ptr_
(
nullptr
),
cuda_ptr_
(
nullptr
),
nbytes_
(
0
)
{}
MixedMemory
(
const
TypeMeta
&
meta
,
const
size_t
nbytes
)
MixedMemory
(
const
TypeMeta
&
meta
,
const
size_t
nbytes
)
:
state_
(
UNINITIALIZED
),
meta_
(
meta
),
cpu_ptr_
(
nullptr
),
cuda_ptr_
(
nullptr
),
cpu_ptr_
(
nullptr
),
cuda_ptr_
(
nullptr
),
nbytes_
(
nbytes
)
{}
~
MixedMemory
();
...
...
@@ -55,4 +55,4 @@ class MixedMemory {
}
// namespace dragon
#endif
\ No newline at end of file
#endif
Dragon/include/core/tensor.h
View file @
c9db9ee
...
...
@@ -37,7 +37,7 @@ class Tensor {
capacity_
=
0
;
}
}
else
{
if
(
ex_memory_
&&
TIndex
(
ex_memory_
->
nbytes
())
<
if
(
ex_memory_
&&
TIndex
(
ex_memory_
->
nbytes
())
<
TIndex
(
new_size
*
meta_
.
itemsize
()))
{
delete
ex_memory_
;
ex_memory_
=
nullptr
;
...
...
@@ -72,7 +72,7 @@ class Tensor {
inline
TIndex
count
()
const
{
return
size_
;
}
inline
TIndex
count
(
const
TIndex
start
)
const
{
return
count
(
start
,
ndim
());
}
inline
TIndex
offset
(
const
TIndex
n
,
const
TIndex
c
=
0
,
inline
TIndex
offset
(
const
TIndex
n
,
const
TIndex
c
=
0
,
const
TIndex
h
=
0
,
const
TIndex
w
=
0
)
{
CHECK_LE
(
n
,
dim
(
0
));
CHECK_LE
(
c
,
dim
(
1
));
...
...
@@ -103,13 +103,13 @@ class Tensor {
inline
void
Corrupt
()
{
is_corrupted_
=
true
;
}
MixedMemory
*
memory
()
const
{
return
own_mem_
?
memory_
.
get
()
:
ex_memory_
;
}
MixedMemory
::
State
memory_state
()
const
{
MixedMemory
::
State
memory_state
()
const
{
MixedMemory
*
mem
=
memory
();
CHECK
(
mem
)
<<
"
\n
Memory access before allowcating."
;
return
memory
()
->
state
();
return
memory
()
->
state
();
}
void
SwitchToDevice
()
{
void
SwitchToDevice
()
{
MixedMemory
*
mem
=
own_mem_
?
memory_
.
get
()
:
ex_memory_
;
if
(
mem
)
mem
->
SwitchToDevice
();
}
...
...
@@ -166,15 +166,15 @@ class Tensor {
template
<
class
Context
>
void
*
raw_mutable_data
()
{
CHECK_NE
(
meta_
.
id
(),
0
)
CHECK_NE
(
meta_
.
id
(),
0
)
<<
"
\n
Tensor("
<<
name_
<<
"): unknown type, "
<<
"or does not have a type."
;
return
raw_mutable_data
<
Context
>
(
meta_
);
}
template
<
class
Context
>
const
void
*
raw_data
()
const
{
return
const_data_ptr
<
Context
>
();
const
void
*
raw_data
()
const
{
return
const_data_ptr
<
Context
>
();
}
template
<
typename
T
,
class
Context
>
...
...
@@ -186,8 +186,8 @@ class Tensor {
}
template
<
typename
T
,
class
Context
>
const
T
*
data
()
const
{
return
static_cast
<
const
T
*>
(
raw_data
<
Context
>
());
const
T
*
data
()
const
{
return
static_cast
<
const
T
*>
(
raw_data
<
Context
>
());
}
inline
void
Share
(
const
Tensor
&
other
)
{
...
...
@@ -198,7 +198,7 @@ class Tensor {
}
inline
void
Move
(
MixedMemory
*
mem
)
{
if
(
mem
!=
nullptr
)
ex_memory_
=
mem
;
if
(
mem
!=
nullptr
)
ex_memory_
=
mem
;
else
ex_memory_
=
new
MixedMemory
(
TypeMeta
::
Make
<
float
>
(),
4
);
own_mem_
=
false
;
}
...
...
@@ -215,11 +215,11 @@ class Tensor {
TIndex
size_
=
0
,
capacity_
=
0
;
TypeMeta
meta_
;
string
name_
;
shared_ptr
<
MixedMemory
>
memory_
;
shared_ptr
<
MixedMemory
>
memory_
,
host_memory_
;
MixedMemory
*
ex_memory_
=
nullptr
;
bool
is_corrupted_
=
false
,
own_mem_
=
true
;
};
}
// namespace dragon
#endif // DRAONG_CORE_TENSOR_H_
\ No newline at end of file
#endif // DRAONG_CORE_TENSOR_H_
Dragon/include/operators/norm/l2_norm_op.h
View file @
c9db9ee
...
...
@@ -18,7 +18,8 @@ class L2NormOp final : public Operator<Context> {
:
Operator
<
Context
>
(
op_def
,
ws
),
axis
(
OperatorBase
::
GetSingleArg
<
int
>
(
"axis"
,
0
)),
num_axes
(
OperatorBase
::
GetSingleArg
<
int
>
(
"num_axes"
,
-
1
)),
eps
(
OperatorBase
::
GetSingleArg
<
float
>
(
"eps"
,
float
(
1e-5
)))
{}
eps
(
OperatorBase
::
GetSingleArg
<
float
>
(
"eps"
,
float
(
1e-5
))),
mode
(
OperatorBase
::
GetSingleArg
<
string
>
(
"mode"
,
"SUM"
))
{}
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
RunWithType
();
...
...
@@ -26,6 +27,7 @@ class L2NormOp final : public Operator<Context> {
protected
:
float
eps
;
TIndex
axis
,
num_axes
,
end_axis
;
string
mode
;
bool
across_inner
;
Tensor
*
norm
,
*
buffer
,
*
multiplier
;
TIndex
outer_dim
,
dim
,
inner_dim
,
spatial_dim
;
...
...
Dragon/include/operators/vision/conv_op.h
View file @
c9db9ee
...
...
@@ -30,7 +30,7 @@ class Conv2dOp : public ConvOpBase<Context> {
template
<
class
Context
>
class
Conv2dGradientOp
:
public
Conv2dOp
<
Context
>
{
public
:
Conv2dGradientOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
Conv2dGradientOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
:
Conv2dOp
<
Context
>
(
def
,
ws
)
{}
bool
HasBias
()
override
{
return
output
(
2
)
->
name
()
!=
"ignore"
;
}
...
...
@@ -48,10 +48,15 @@ class CuDNNConv2dOp : public Conv2dOp<Context> {
public
:
CuDNNConv2dOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
:
Conv2dOp
<
Context
>
(
def
,
ws
)
{
handle
=
new
cudnnHandle_t
[
this
->
group
];
stream
=
new
cudaStream_t
[
this
->
group
];
#if CUDNN_VERSION_MIN(7, 0, 0)
cudnn_group
=
1
;
#else
cudnn_group
=
this
->
group
;
#endif
handle
=
new
cudnnHandle_t
[
cudnn_group
];
stream
=
new
cudaStream_t
[
cudnn_group
];
ctx
().
SwitchToDevice
();
for
(
int
g
=
0
;
g
<
this
->
group
;
g
++
)
{
for
(
int
g
=
0
;
g
<
cudnn_
group
;
g
++
)
{
CUDA_CHECK
(
cudaStreamCreate
(
&
stream
[
g
]));
CUDNN_CHECK
(
cudnnCreate
(
&
handle
[
g
]));
CUDNN_CHECK
(
cudnnSetStream
(
handle
[
g
],
stream
[
g
]));
...
...
@@ -78,17 +83,22 @@ class CuDNNConv2dOp : public Conv2dOp<Context> {
cudnnConvolutionDescriptor_t
conv_desc
;
cudnnFilterDescriptor_t
filter_desc
;
size_t
workspace_fwd_data_size
;
TIndex
bias_offset
;
TIndex
bias_offset
,
cudnn_group
;
};
template
<
class
Context
>
class
CuDNNConv2dGradientOp
:
public
Conv2dGradientOp
<
Context
>
{
public
:
CuDNNConv2dGradientOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
CuDNNConv2dGradientOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
:
Conv2dGradientOp
<
Context
>
(
def
,
ws
)
{
handle
=
new
cudnnHandle_t
[
this
->
group
*
3
];
stream
=
new
cudaStream_t
[
this
->
group
*
3
];
for
(
int
g
=
0
;
g
<
this
->
group
*
3
;
g
++
)
{
#if CUDNN_VERSION_MIN(7, 0, 0)
cudnn_group
=
1
;
#else
cudnn_group
=
this
->
group
;
#endif
handle
=
new
cudnnHandle_t
[
cudnn_group
*
3
];
stream
=
new
cudaStream_t
[
cudnn_group
*
3
];
for
(
int
g
=
0
;
g
<
cudnn_group
*
3
;
g
++
)
{
CUDA_CHECK
(
cudaStreamCreate
(
&
stream
[
g
]));
CUDNN_CHECK
(
cudnnCreate
(
&
handle
[
g
]));
CUDNN_CHECK
(
cudnnSetStream
(
handle
[
g
],
stream
[
g
]));
...
...
@@ -116,7 +126,7 @@ class CuDNNConv2dGradientOp : public Conv2dGradientOp<Context> {
cudnnConvolutionDescriptor_t
conv_desc
;
cudnnFilterDescriptor_t
filter_desc
;
size_t
workspace_bwd_filter_size
,
workspace_bwd_data_size
;
int
bias_offset
;
TIndex
bias_offset
,
cudnn_group
;
};
#endif // WITH_CUDNN
...
...
Dragon/include/operators/vision/conv_transpose_op.h
View file @
c9db9ee
...
...
@@ -52,8 +52,13 @@ class CuDNNConv2dTransposeOp : public Conv2dTransposeOp<Context> {
public
:
CuDNNConv2dTransposeOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
:
Conv2dTransposeOp
<
Context
>
(
def
,
ws
)
{
handle
=
new
cudnnHandle_t
[
this
->
group
];
stream
=
new
cudaStream_t
[
this
->
group
];
#if CUDNN_VERSION_MIN(7, 0, 0)
cudnn_group
=
1
;
#else
cudnn_group
=
this
->
group
;
#endif
handle
=
new
cudnnHandle_t
[
cudnn_group
];
stream
=
new
cudaStream_t
[
cudnn_group
];
for
(
int
g
=
0
;
g
<
this
->
group
;
g
++
)
{
CUDA_CHECK
(
cudaStreamCreate
(
&
stream
[
g
]));
CUDNN_CHECK
(
cudnnCreate
(
&
handle
[
g
]));
...
...
@@ -80,7 +85,7 @@ class CuDNNConv2dTransposeOp : public Conv2dTransposeOp<Context> {
cudnnConvolutionDescriptor_t
conv_desc
;
cudnnFilterDescriptor_t
filter_desc
;
size_t
workspace_fwd_data_size
;
int
bias_offset
;
TIndex
bias_offset
,
cudnn_group
;
};
template
<
class
Context
>
...
...
@@ -88,9 +93,14 @@ class CuDNNConv2dTransposeGradientOp : public Conv2dTransposeGradientOp<Context>
public
:
CuDNNConv2dTransposeGradientOp
(
const
OperatorDef
&
def
,
Workspace
*
ws
)
:
Conv2dTransposeGradientOp
<
Context
>
(
def
,
ws
)
{
handle
=
new
cudnnHandle_t
[
this
->
group
*
3
];
stream
=
new
cudaStream_t
[
this
->
group
*
3
];
for
(
int
g
=
0
;
g
<
this
->
group
*
3
;
g
++
)
{
#if CUDNN_VERSION_MIN(7, 0, 0)
cudnn_group
=
1
;
#else
cudnn_group
=
this
->
group
;
#endif
handle
=
new
cudnnHandle_t
[
cudnn_group
*
3
];
stream
=
new
cudaStream_t
[
cudnn_group
*
3
];
for
(
int
g
=
0
;
g
<
cudnn_group
*
3
;
g
++
)
{
CUDA_CHECK
(
cudaStreamCreate
(
&
stream
[
g
]));
CUDNN_CHECK
(
cudnnCreate
(
&
handle
[
g
]));
CUDNN_CHECK
(
cudnnSetStream
(
handle
[
g
],
stream
[
g
]));
...
...
@@ -117,7 +127,7 @@ public:
cudnnConvolutionDescriptor_t
conv_desc
;
cudnnFilterDescriptor_t
filter_desc
;
size_t
workspace_bwd_filter_size
,
workspace_bwd_data_size
;
int
bias_offset
;
TIndex
bias_offset
,
cudnn_group
;
};
#endif // WITH_CUDNN
...
...
Dragon/include/operators/vision/lrn_op.h
View file @
c9db9ee
...
...
@@ -18,11 +18,12 @@ class LRNOp : public Operator<Context> {
public
:
LRNOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
op_def
,
ws
),
mode
((
LRNMode
)
OperatorBase
::
GetSingleArg
<
int
>
(
"mode"
,
ACROSS_CHANNELS
)),
local_size
(
OperatorBase
::
GetSingleArg
<
int
>
(
"local_size"
,
5
)),
alpha
(
OperatorBase
::
GetSingleArg
<
float
>
(
"alpha"
,
float
(
0
.
0001
))),
beta
(
OperatorBase
::
GetSingleArg
<
float
>
(
"beta"
,
float
(
0
.
75
))),
k
(
OperatorBase
::
GetSingleArg
<
float
>
(
"k"
,
float
(
2
.
0
)))
{}
k
(
OperatorBase
::
GetSingleArg
<
float
>
(
"k"
,
float
(
2
.
0
))),
mode
(
OperatorBase
::
GetSingleArg
<
string
>
(
"mode"
,
"ACROSS_CHANNELS"
)),
data_format
(
OperatorBase
::
GetSingleArg
<
string
>
(
"data_format"
,
"NCHW"
))
{}
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
RunWithType
();
...
...
@@ -34,9 +35,9 @@ class LRNOp : public Operator<Context> {
template
<
typename
T
>
void
ProdRunWithType
();
protected
:
LRNMode
mode
;
int
local_size
;
float
alpha
,
beta
,
k
;
string
mode
,
data_format
;
unique_ptr
<
OperatorBase
>
sqr_op
,
pool_op
,
pow_op
,
prod_op
;
Tensor
*
sqr_in
,
*
prod_in
,
*
sqr_out
,
*
pool_out
,
*
pow_out
;
Tensor
*
scale
;
...
...
@@ -47,11 +48,12 @@ class LRNGradientOp : public Operator<Context> {
public
:
LRNGradientOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
Context
>
(
op_def
,
ws
),
mode
((
LRNMode
)
OperatorBase
::
GetSingleArg
<
int
>
(
"mode"
,
ACROSS_CHANNELS
)),
local_size
(
OperatorBase
::
GetSingleArg
<
int
>
(
"local_size"
,
5
)),
alpha
(
OperatorBase
::
GetSingleArg
<
float
>
(
"alpha"
,
float
(
0
.
0001
))),
beta
(
OperatorBase
::
GetSingleArg
<
float
>
(
"beta"
,
float
(
0
.
75
))),
k
(
OperatorBase
::
GetSingleArg
<
float
>
(
"k"
,
float
(
2
.
0
)))
{}
k
(
OperatorBase
::
GetSingleArg
<
float
>
(
"k"
,
float
(
2
.
0
))),
mode
(
OperatorBase
::
GetSingleArg
<
string
>
(
"mode"
,
"ACROSS_CHANNELS"
)),
data_format
(
OperatorBase
::
GetSingleArg
<
string
>
(
"data_format"
,
"NCHW"
))
{}
void
RunOnDevice
()
override
;
template
<
typename
T
>
void
RunWithType
();
...
...
@@ -63,9 +65,9 @@ class LRNGradientOp : public Operator<Context> {
template
<
typename
T
>
void
ProdRunWithType
();
protected
:
LRNMode
mode
;
int
local_size
;
float
alpha
,
beta
,
k
;
string
mode
,
data_format
;
unique_ptr
<
OperatorBase
>
sqr_op
,
pool_op
,
pow_op
,
prod_op
;
Tensor
*
sqr_in
,
*
prod_in
,
*
sqr_out
,
*
pool_out
,
*
pow_out
;
Tensor
*
scale
;
...
...
Dragon/include/utils/cudnn_device.h
View file @
c9db9ee
...
...
@@ -76,6 +76,9 @@ template <typename T>
void
cudnnSetTensor4dDesc
(
cudnnTensorDescriptor_t
*
desc
,
const
string
&
data_format
,
const
std
::
vector
<
int64_t
>&
dims
);
template
<
typename
T
>
void
cudnnSetTensor4dDescWithGroup
(
cudnnTensorDescriptor_t
*
desc
,
const
string
&
data_format
,
const
std
::
vector
<
int64_t
>&
dims
,
const
int64_t
group
);
template
<
typename
T
>
void
cudnnSetTensor5dDesc
(
cudnnTensorDescriptor_t
*
desc
,
const
string
&
data_format
,
const
std
::
vector
<
int64_t
>&
dims
);
template
<
typename
T
>
...
...
Dragon/python/dragon/core/tensor.py
View file @
c9db9ee
...
...
@@ -156,29 +156,39 @@ class Tensor(object):
"""
return
self
.
Normal
(
mu
=
mean
,
sigma
=
std
)
def
Xavier
(
self
):
def
Xavier
(
self
,
scale
=
3.0
):
"""
Register as a variable with xavier initializer.
"""
return
self
.
_no_parameter_filler
(
'xavier'
)
filler
=
pb
.
TensorFiller
()
filler
.
tensor
=
self
.
name
filler
.
type
=
'xavier'
filler
.
scale
=
scale
ws
.
CreateFiller
(
filler
)
return
self
def
MSRA
(
self
):
def
MSRA
(
self
,
scale
=
2.0
):
"""
Register as a variable with msra initializer.
"""
return
self
.
_no_parameter_filler
(
'msra'
)
filler
=
pb
.
TensorFiller
()
filler
.
tensor
=
self
.
name
filler
.
type
=
'msra'
filler
.
scale
=
scale
ws
.
CreateFiller
(
filler
)
return
self
def
GlorotUniform
(
self
):
def
GlorotUniform
(
self
,
scale
=
3.0
):
"""
Register as a variable with glorot uniform initializer.
"""
return
self
.
Xavier
()
return
self
.
Xavier
(
scale
)
def
GlorotNormal
(
self
):
def
GlorotNormal
(
self
,
scale
=
2.0
):
"""
Register as a variable with glorot normal initializer.
"""
return
self
.
MSRA
()
return
self
.
MSRA
(
scale
)
##############################################
# #
...
...
Dragon/python/dragon/docs/install.rst
View file @
c9db9ee
...
...
@@ -19,10 +19,18 @@ Installation - Linux (Normal, CPU)
**Step 1:** Install C++ Dependencies
**$** Setup Python Development Environment
.. code-block:: shell
sudo apt-get install libpython-dev
**Note:** You can also use `Anaconda`_, A powerful toolkit for Data Science.
**$** Setup C++ Development Environment
sudo apt-get install libprotobuf-dev
sudo apt-get install protobuf-compiler
sudo apt-get install libopenblas-dev
**Step 2:** Install Python Requirements
...
...
@@ -83,10 +91,18 @@ Installation - Linux (Normal, GPU)
**Step 2:** Install C++ Dependencies
**$** Setup Python Development Environment
.. code-block:: shell
sudo apt-get install libpython-dev
**Note:** You can also use `Anaconda`_, A powerful toolkit for Data Science.
**$** Setup C++ Development Environment
sudo apt-get install libprotobuf-dev
sudo apt-get install protobuf-compiler
sudo apt-get install libopenblas-dev
**Step 3:** Install Python Requirements
...
...
@@ -149,10 +165,18 @@ Installation - Linux (Distributed, CPU)
**Step 2:** Install C++ Dependencies
**$** Setup Python Development Environment
.. code-block:: shell
sudo apt-get install libpython-dev
**Note:** You can also use `Anaconda`_, A powerful toolkit for Data Science.
**$** Setup C++ Development Environment
sudo apt-get install libprotobuf-dev
sudo apt-get install protobuf-compiler
sudo apt-get install libopenblas-dev
**Step 3:** Install Python Requirements
...
...
@@ -229,10 +253,18 @@ Installation - Linux (Distributed, GPU)
**Step 3:** Install C++ Dependencies
**$** Setup Python Development Environment
.. code-block:: shell
sudo apt-get install libpython-dev
**Note:** You can also use `Anaconda`_, A powerful toolkit for Data Science.
**$** Setup C++ Development Environment
sudo apt-get install libprotobuf-dev
sudo apt-get install protobuf-compiler
sudo apt-get install libopenblas-dev
**Step 4:** Install Python Requirements
...
...
@@ -564,6 +596,7 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables
python setup.py install --user
.. _Anaconda: https://www.anaconda.com/download
.. _CUDA: https://developer.nvidia.com/cuda-toolkit
.. _CUDNN: https://developer.nvidia.com/cudnn
.. _NCCL: https://developer.nvidia.com/nccl
...
...
Dragon/python/dragon/operators/ndarray.py
View file @
c9db9ee
...
...
@@ -673,6 +673,7 @@ def Reshape(inputs, shape, **kwargs):
output
.
shape
=
[
1
]
*
len
(
shape
)
for
i
,
s
in
enumerate
(
shape
):
if
s
==
-
1
:
output
.
shape
[
i
]
=
1
elif
s
==
0
:
output
.
shape
[
i
]
=
inputs
.
shape
[
i
]
else
:
output
.
shape
[
i
]
=
s
return
output
...
...
Dragon/python/dragon/operators/norm.py
View file @
c9db9ee
...
...
@@ -189,7 +189,7 @@ def InstanceNorm(inputs, axis=-1, eps=1e-3, **kwargs):
return
output
def
L2Norm
(
inputs
,
axis
=
0
,
num_axes
=-
1
,
eps
=
1e-5
,
**
kwargs
):
def
L2Norm
(
inputs
,
axis
=
0
,
num_axes
=-
1
,
eps
=
1e-5
,
mode
=
'SUM'
,
**
kwargs
):
"""L2 Normalization, introduced by `[Liu et.al, 2015] <https://arxiv.org/abs/1506.04579>`_.
Parameters
...
...
@@ -202,6 +202,8 @@ def L2Norm(inputs, axis=0, num_axes=-1, eps=1e-5, **kwargs):
The number of axes of stats region. Default is ``-1`` (Till End).
eps : float
The eps.
mode : str
The mode on computing normalizer. ``SUM`` or ``MEAN``.
Returns
-------
...
...
Dragon/python/dragon/operators/vision.py
View file @
c9db9ee
...
...
@@ -61,6 +61,12 @@ def Conv2d(inputs, num_output, kernel_size,
"""
CheckInputs
(
inputs
,
2
,
3
)
arguments
=
ParseArguments
(
locals
())
if
padding
not
in
(
'VALID'
,
'SAME'
):
raise
ValueError
(
'Unsupported padding algorithm: {}'
.
format
(
padding
))
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
if
not
isinstance
(
arguments
[
'kernel_size'
],
list
):
arguments
[
'kernel_size'
]
=
[
arguments
[
'kernel_size'
]]
if
not
isinstance
(
arguments
[
'stride'
],
list
):
...
...
@@ -154,6 +160,11 @@ def Conv2dTranspose(inputs, num_output, kernel_size,
CheckInputs
(
inputs
,
2
,
3
)
arguments
=
ParseArguments
(
locals
())
if
padding
not
in
(
'VALID'
,
'SAME'
):
raise
ValueError
(
'Unsupported padding algorithm: {}'
.
format
(
padding
))
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
arguments
[
'output_shape'
]
=
None
if
output_shape
is
not
None
:
if
not
isinstance
(
output_shape
,
list
):
...
...
@@ -170,17 +181,43 @@ def Conv2dTranspose(inputs, num_output, kernel_size,
if
not
isinstance
(
arguments
[
'kernel_size'
],
list
):
arguments
[
'kernel_size'
]
=
[
arguments
[
'kernel_size'
]]
if
not
isinstance
(
arguments
[
'stride'
],
list
):
arguments
[
'stride'
]
=
[
arguments
[
'stride'
]]
if
not
isinstance
(
arguments
[
'pad'
],
list
):
arguments
[
'pad'
]
=
[
arguments
[
'pad'
]]
if
not
isinstance
(
arguments
[
'dilation'
],
list
):
arguments
[
'dilation'
]
=
[
arguments
[
'dilation'
]]
return
Tensor
.
CreateOperator
(
nout
=
1
,
op_type
=
'Conv2dTranspose'
,
**
arguments
)
output
=
Tensor
.
CreateOperator
(
nout
=
1
,
op_type
=
'Conv2dTranspose'
,
**
arguments
)
if
inputs
[
0
]
.
shape
is
not
None
:
output
.
shape
=
inputs
[
0
]
.
shape
[:]
channel_axis
=
1
if
data_format
==
'NCHW'
else
-
1
spatial_axis
=
2
if
data_format
==
'NCHW'
else
1
output
.
shape
[
channel_axis
]
=
num_output
for
i
in
xrange
(
2
):
k
=
arguments
[
'kernel_size'
][
i
]
if
i
<
len
(
arguments
[
'kernel_size'
])
\
else
arguments
[
'kernel_size'
][
-
1
]
s
=
arguments
[
'stride'
][
i
]
if
i
<
len
(
arguments
[
'stride'
])
\
else
arguments
[
'stride'
][
-
1
]
p
=
arguments
[
'pad'
][
i
]
if
i
<
len
(
arguments
[
'pad'
])
\
else
arguments
[
'pad'
][
-
1
]
d
=
arguments
[
'dilation'
][
i
]
if
i
<
len
(
arguments
[
'dilation'
])
\
else
arguments
[
'dilation'
][
-
1
]
dk
=
d
*
(
k
-
1
)
+
1
dp
=
2
*
p
input_size
=
output
.
shape
[
i
+
spatial_axis
]
if
padding
!=
'SAME'
:
output
.
shape
[
i
+
spatial_axis
]
=
s
*
(
input_size
-
1
)
+
dk
-
dp
else
:
if
output_shape
is
None
:
raise
ValueError
(
'The output shape must be specified if using SAME padding algorithm.'
)
if
'dynamic_dsize'
in
arguments
:
output
.
shape
=
None
return
output
output
.
shape
[
i
+
spatial_axis
]
=
output_shape
[
i
+
spatial_axis
]
return
output
def
Pool2d
(
inputs
,
kernel_size
,
stride
,
pad
=
0
,
padding
=
'VALID'
,
...
...
@@ -222,6 +259,14 @@ def Pool2d(inputs, kernel_size, stride, pad=0, padding='VALID',
"""
CheckInputs
(
inputs
,
1
)
arguments
=
ParseArguments
(
locals
())
if
mode
not
in
(
'MAX'
,
'AVG'
):
raise
ValueError
(
'Unsupported lrn mode: {}'
.
format
(
mode
))
if
padding
not
in
(
'VALID'
,
'SAME'
):
raise
ValueError
(
'Unsupported padding algorithm: {}'
.
format
(
padding
))
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
if
not
isinstance
(
arguments
[
'kernel_size'
],
list
):
arguments
[
'kernel_size'
]
=
[
arguments
[
'kernel_size'
]]
if
not
isinstance
(
arguments
[
'stride'
],
list
):
...
...
@@ -311,7 +356,8 @@ def ROIAlign(inputs, pool_h=0, pool_w=0, spatial_scale=1.0, **kwargs):
return
Tensor
.
CreateOperator
(
nout
=
1
,
op_type
=
'ROIAlign'
,
**
arguments
)
def
LRN
(
inputs
,
local_size
=
5
,
alpha
=
0.0001
,
beta
=
0.75
,
k
=
2.0
,
mode
=
'ACROSS_CHANNELS'
,
**
kwargs
):
def
LRN
(
inputs
,
local_size
=
5
,
alpha
=
0.0001
,
beta
=
0.75
,
k
=
2.0
,
mode
=
'ACROSS_CHANNELS'
,
data_format
=
'NCHW'
,
**
kwargs
):
"""Local Response Normalization, introduced by `[Krizhevsky et.al, 2012] <http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks>`_.
Parameters
...
...
@@ -328,17 +374,22 @@ def LRN(inputs, local_size=5, alpha=0.0001, beta=0.75, k=2.0, mode='ACROSS_CHANN
The k of LRN.
mode : str
The mode, ``ACROSS_CHANNELS`` or ``WITHIN_CHANNEL``.
data_format : str
The data format. ``NCHW`` or ``NHWC``.
Returns
-------
Tensor
The
normalized
tensor.
The
output
tensor.
"""
CheckInputs
(
inputs
,
1
)
arguments
=
ParseArguments
(
locals
())
SUPPORT_MODES
=
{
'ACROSS_CHANNELS'
:
0
,
'WITHIN_CHANNEL'
:
1
}
arguments
[
'mode'
]
=
SUPPORT_MODES
[
mode
]
if
mode
not
in
(
'ACROSS_CHANNELS'
,
'WITHIN_CHANNEL'
):
raise
ValueError
(
'Unsupported lrn mode: {}'
.
format
(
mode
))
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
output
=
Tensor
.
CreateOperator
(
nout
=
1
,
op_type
=
'LRN'
,
**
arguments
)
...
...
@@ -356,9 +407,9 @@ def NNResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs):
Parameters
----------
inputs : Tensor
The input ten
os
r.
The input ten
so
r.
dsize : tuple, list, Tensor or None
The output size.
The output size
, formats as (h, w)
.
fy : float
The scale factor based on src height. Default is ``-1.0`` (Discarded).
fx : float
...
...
@@ -374,6 +425,10 @@ def NNResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs):
"""
CheckInputs
(
inputs
,
1
)
arguments
=
ParseArguments
(
locals
())
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
if
arguments
[
'dsize'
]
is
not
None
:
if
isinstance
(
arguments
[
'dsize'
][
0
],
Tensor
):
arguments
[
'dynamic_dsize'
]
=
[
arguments
[
'dsize'
][
0
]
.
name
,
...
...
@@ -388,6 +443,20 @@ def NNResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs):
output
=
Tensor
.
CreateOperator
(
nout
=
1
,
op_type
=
'NNResize'
,
**
arguments
)
if
inputs
.
shape
is
not
None
:
if
len
(
inputs
.
shape
)
!=
4
:
raise
ValueError
(
'The inputs should be a 4d Tensor.'
)
if
'dynamic_dsize'
not
in
arguments
:
output
.
shape
=
inputs
.
shape
[:]
spatial_axis
=
2
if
data_format
==
'NCHW'
else
1
for
i
in
xrange
(
2
):
output_dim
=
output
.
shape
[
spatial_axis
+
i
]
if
'static_size'
in
arguments
:
output_dim
=
dsize
[
i
]
else
:
output_dim
=
int
(
float
(
output_dim
)
*
([
fy
,
fx
])[
i
])
output
.
shape
[
spatial_axis
+
i
]
=
output_dim
return
output
...
...
@@ -399,9 +468,9 @@ def BilinearResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs
Parameters
----------
inputs : Tensor
The input ten
os
r.
The input ten
so
r.
dsize : tuple, list, Tensor or None
The
dest output size
.
The
output size, formats as (h, w)
.
fy : float
The scale factor based on src height. Default is ``-1.0`` (Discarded).
fx : float
...
...
@@ -417,6 +486,10 @@ def BilinearResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs
"""
CheckInputs
(
inputs
,
1
)
arguments
=
ParseArguments
(
locals
())
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
if
arguments
[
'dsize'
]
is
not
None
:
if
isinstance
(
arguments
[
'dsize'
][
0
],
Tensor
):
arguments
[
'dynamic_dsize'
]
=
[
arguments
[
'dsize'
][
0
]
.
name
,
...
...
@@ -431,6 +504,20 @@ def BilinearResize(inputs, dsize, fy=-1.0, fx=-1.0, data_format='NCHW', **kwargs
output
=
Tensor
.
CreateOperator
(
nout
=
1
,
op_type
=
'BilinearResize'
,
**
arguments
)
if
inputs
.
shape
is
not
None
:
if
len
(
inputs
.
shape
)
!=
4
:
raise
ValueError
(
'The inputs should be a 4d Tensor.'
)
if
'dynamic_dsize'
not
in
arguments
:
output
.
shape
=
inputs
.
shape
[:]
spatial_axis
=
2
if
data_format
==
'NCHW'
else
1
for
i
in
xrange
(
2
):
output_dim
=
output
.
shape
[
spatial_axis
+
i
]
if
'static_size'
in
arguments
:
output_dim
=
dsize
[
i
]
else
:
output_dim
=
int
(
float
(
output_dim
)
*
([
fy
,
fx
])[
i
])
output
.
shape
[
spatial_axis
+
i
]
=
output_dim
return
output
...
...
@@ -453,6 +540,9 @@ def BiasAdd(inputs, data_format='NCHW', **kwargs):
CheckInputs
(
inputs
,
2
)
arguments
=
ParseArguments
(
locals
())
if
data_format
not
in
(
'NCHW'
,
'NHWC'
):
raise
ValueError
(
'Unsupported data format: {}'
.
format
(
data_format
))
output
=
Tensor
.
CreateOperator
(
nout
=
1
,
op_type
=
'BiasAdd'
,
**
arguments
)
if
inputs
[
0
]
.
shape
is
not
None
:
...
...
Dragon/python/dragon/vm/caffe/layers/vision.py
View file @
c9db9ee
...
...
@@ -229,7 +229,9 @@ class LRNLayer(Layer):
self
.
_param
=
{
'local_size'
:
param
.
local_size
,
'alpha'
:
param
.
alpha
,
'beta'
:
param
.
beta
,
'mode'
:
{
0
:
'ACROSS_CHANNELS'
,
1
:
'WITHIN_CHANNEL'
}[
param
.
norm_region
]}
'mode'
:
{
0
:
'ACROSS_CHANNELS'
,
1
:
'WITHIN_CHANNEL'
}[
param
.
norm_region
],
'data_format'
:
'NCHW'
}
def
Setup
(
self
,
bottom
):
super
(
LRNLayer
,
self
)
.
Setup
(
bottom
)
input
=
bottom
[
0
]
if
isinstance
(
bottom
,
list
)
else
bottom
...
...
Dragon/src/core/graph.cc
View file @
c9db9ee
...
...
@@ -18,7 +18,7 @@ GraphBase::GraphBase(const GraphDef& meta_graph, Workspace* ws)
// check inputs
for
(
auto
&
in
:
op
.
input
())
CHECK
(
known_tensors
.
count
(
in
)
||
ws_
->
HasTensor
(
in
))
<<
"
\n
Input: "
<<
in
<<
" for op: "
<<
"
\n
Input: "
<<
in
<<
" for op: "
<<
op
.
name
()
<<
" is unknown."
;
// add outputs
for
(
auto
&
out
:
op
.
output
())
known_tensors
.
insert
(
out
);
...
...
@@ -55,13 +55,13 @@ void Graph::ForwardShareDyeing(string u, string ancestor) {
auto
*
schema
=
OpSchemaRegistry
::
Schema
(
op_type
);
if
(
schema
->
AllowInplace
())
ForwardShareDyeing
(
dag_
[
u
].
childs
[
0
],
ancestor
);
}
}
}
void
Graph
::
ForwardPruneDyeing
(
string
u
,
string
leaf
,
vector
<
string
>
path
)
{
if
(
visited_
.
count
(
u
))
{
if
(
visited_
[
u
])
for
(
auto
&
node
:
path
)
if
(
visited_
[
u
])
for
(
auto
&
node
:
path
)
visited_
[
node
]
=
colored_
[
node
]
=
true
;
return
;
}
...
...
@@ -71,7 +71,7 @@ void Graph::ForwardPruneDyeing(string u, string leaf, vector<string> path) {
vector
<
string
>
new_path
(
path
);
new_path
.
push_back
(
v
);
if
(
v
==
leaf
)
{
for
(
auto
&
node
:
new_path
)
for
(
auto
&
node
:
new_path
)
visited_
[
node
]
=
colored_
[
node
]
=
true
;
return
;
}
...
...
@@ -260,8 +260,8 @@ GraphDef Graph::MakeUpdate(const GraphDef& meta_graph) {
collective_ops
.
push_back
(
op_def
);
}
else
if
(
this
->
args_
[
"parallel_mode"
].
s
()
==
"MIXED"
)
{
/*
See: Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour
Link
s
: http://arxiv.org/abs/1706.02677
See: Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour
Link: http://arxiv.org/abs/1706.02677
*/
NOT_IMPLEMENTED
;
}
...
...
@@ -282,11 +282,11 @@ bool Graph::Create(const GraphDef& optimized_graph, Workspace* ws) {
bool
has_share_grads
=
optimized_graph
.
has_share_grads
();
for
(
const
OperatorDef
&
plain_op_def
:
optimized_graph
.
op
())
{
OperatorDef
op_def
(
plain_op_def
);
LOG
(
DEBUG
)
<<
"Create Operator "
<<
plain_op_def
.
name
()
LOG
(
DEBUG
)
<<
"Create Operator "
<<
plain_op_def
.
name
()
<<
": "
<<
plain_op_def
.
type
();
// inherit device option if necessary
if
(
!
op_def
.
has_device_option
()
&&
has_device_option
)
if
(
!
op_def
.
has_device_option
()
&&
has_device_option
)
op_def
.
mutable_device_option
()
->
CopyFrom
(
optimized_graph
.
device_option
());
// inherit debug mode if necessary
...
...
@@ -316,7 +316,7 @@ void Graph::RecomputingAware(const GraphDef& optimized_graph, Workspace* ws) {
bool
mirror_stage
=
ops_
[
i
]
->
GetSingleArg
<
bool
>
(
"mirror_stage"
,
false
);
for
(
auto
&
u
:
optimized_graph
.
op
(
i
).
input
())
{
bool
inplace_flag
=
false
;
for
(
auto
&
v
:
optimized_graph
.
op
(
i
).
output
())
for
(
auto
&
v
:
optimized_graph
.
op
(
i
).
output
())
if
(
u
==
v
)
inplace_flag
=
true
;
mirror_stage
&=
(
!
inplace_flag
);
if
(
!
inplace_flag
)
multi_use_count
[
u
]
++
;
...
...
@@ -324,7 +324,7 @@ void Graph::RecomputingAware(const GraphDef& optimized_graph, Workspace* ws) {
if
(
mirror_stage
)
{
// TODO(PhyscalX): we assume input(0)->output(0) as a in-place currently
OperatorDef
*
op
=
fake_graph
.
mutable_op
(
i
);
if
(
rename_map
.
count
(
op
->
input
(
0
)))
if
(
rename_map
.
count
(
op
->
input
(
0
)))
*
op
->
mutable_input
(
0
)
=
rename_map
[
op
->
input
(
0
)];
rename_map
[
op
->
output
(
0
)]
=
op
->
input
(
0
);
*
op
->
mutable_output
(
0
)
=
op
->
input
(
0
);
...
...
@@ -339,19 +339,19 @@ void Graph::RecomputingAware(const GraphDef& optimized_graph, Workspace* ws) {
OperatorDef
op
=
optimized_graph
.
op
(
i
);
for
(
int
j
=
0
;
j
<
op
.
output_size
();
j
++
)
{
string
v
=
op
.
output
(
j
);
string
fake_v
=
fake_op
.
output
(
j
);
string
fake_v
=
fake_op
.
output
(
j
);
if
(
!
fake_recompute_map
.
count
(
fake_v
))
fake_recompute_map
[
fake_v
]
=
vector
<
OperatorBase
*>
();
fake_recompute_map
[
fake_v
]
=
vector
<
OperatorBase
*>
();
if
(
v
!=
fake_v
)
{
if
(
multi_use_count
[
fake_v
]
>=
2
)
fake_recompute_map
[
fake_v
]
=
recompute_map
[
fake_v
];
}
}
fake_recompute_map
[
fake_v
].
push_back
(
ops_
[
i
]);
for
(
int
k
=
0
;
k
<
fake_recompute_map
[
fake_v
].
size
();
k
++
)
{
if
(
!
hash_map
.
count
(
v
))
hash_map
[
v
]
=
Set
<
string
>
();
string
op_name
=
fake_recompute_map
[
fake_v
][
k
]
->
name
();
if
(
!
hash_map
[
v
].
count
(
op_name
))
{
if
(
!
recompute_map
.
count
(
v
))
if
(
!
recompute_map
.
count
(
v
))
recompute_map
[
v
]
=
vector
<
OperatorBase
*>
();
recompute_map
[
v
].
push_back
(
fake_recompute_map
[
fake_v
][
k
]);
hash_map
[
v
].
insert
(
op_name
);
...
...
@@ -359,7 +359,7 @@ void Graph::RecomputingAware(const GraphDef& optimized_graph, Workspace* ws) {
}
}
}
// prepare resources
for
(
auto
&
ops
:
ops_
)
ops
->
set_recompute_map
(
recompute_map
);
Tensor
*
head
=
ws
->
CreateTensor
(
"/opt/mirror_stage/head"
);
...
...
@@ -403,7 +403,7 @@ Graph::Graph(const GraphDef& meta_graph, Workspace* ws)
bool
Graph
::
Run
(
const
string
&
include
,
const
string
&
exclude
)
{
LOG
(
DEBUG
)
<<
"Run Graph: "
<<
name
();
for
(
auto
op
:
ops_
)
{
if
(
!
include
.
empty
())
if
(
!
include
.
empty
())
if
(
op
->
type
().
find
(
include
)
==
string
::
npos
)
continue
;
if
(
!
exclude
.
empty
())
if
(
op
->
type
().
find
(
exclude
)
!=
string
::
npos
)
continue
;
...
...
@@ -422,4 +422,4 @@ GraphBase* NewGraph(const GraphDef& meta_graph, Workspace* ws) {
return
GraphRegistry
()
->
Create
(
meta_graph
.
graph_type
(),
meta_graph
,
ws
);
}
}
//
namespace
dragon
\ No newline at end of file
}
// namespace dragon
Dragon/src/core/mixedmem.cc
View file @
c9db9ee
...
...
@@ -112,4 +112,4 @@ void MixedMemory::SwitchToDevice() {
}
}
}
//
namespace
dragon
\ No newline at end of file
}
// namespace dragon
Dragon/src/core/operator.cc
View file @
c9db9ee
...
...
@@ -4,7 +4,7 @@
namespace
dragon
{
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
op_def_
(
op_def
),
ws_
(
ws
)
{
for
(
auto
&
arg
:
this
->
op_def_
.
arg
())
{
CHECK_GT
(
arg
.
name
().
size
(),
0
);
...
...
@@ -39,7 +39,7 @@ OperatorBase* TryCreateOperator(const string& key, const OperatorDef& op_def, Wo
case
CPU
:
return
CPUOperatorRegistry
()
->
Create
(
key
,
op_def
,
ws
);
case
CUDA
:
if
(
op_def
.
device_option
().
has_engine
()
&&
if
(
op_def
.
device_option
().
has_engine
()
&&
op_def
.
device_option
().
engine
()
==
"CUDNN"
&&
CUDNNOperatorRegistry
()
->
Has
(
key
))
return
CUDNNOperatorRegistry
()
->
Create
(
key
,
op_def
,
ws
);
...
...
@@ -59,15 +59,15 @@ OperatorBase* CreateOperator(const OperatorDef& op_def, Workspace* ws) {
Gradient
MakeGradientForOp
(
const
OperatorDef
&
def
,
const
vector
<
string
>&
g_outputs
)
{
unique_ptr
<
GradientMakerBase
>
maker
(
GradientRegistry
()
->
Create
(
def
.
type
(),
def
,
g_outputs
));
if
(
maker
.
get
()
==
nullptr
)
if
(
maker
.
get
()
==
nullptr
)
LOG
(
FATAL
)
<<
"Gradient maker for operator "
<<
def
.
type
()
<<
"not implemented."
;
Gradient
grad
=
maker
->
Make
();
// copy device option, engine, and arguments if needed
if
(
maker
->
CopyDeviceOption
()
&&
def
.
has_device_option
())
for
(
auto
&
grad_def
:
grad
.
ops
)
for
(
auto
&
grad_def
:
grad
.
ops
)
grad_def
.
mutable_device_option
()
->
CopyFrom
(
def
.
device_option
());
// copy arguments if needed
if
(
maker
->
CopyArguments
()
&&
def
.
arg_size
())
if
(
maker
->
CopyArguments
()
&&
def
.
arg_size
())
for
(
auto
&
grad_def
:
grad
.
ops
)
grad_def
.
mutable_arg
()
->
MergeFrom
(
def
.
arg
());
return
grad
;
}
...
...
@@ -95,7 +95,7 @@ void Operator<Context>::ElimateCorruption() {
all_heads
.
clear
();
for
(
int
i
=
0
;
i
<
head
->
count
();
i
++
)
{
bool
safe
=
true
;
for
(
int
j
=
0
;
j
<
InputSize
();
j
++
)
for
(
int
j
=
0
;
j
<
InputSize
();
j
++
)
if
(
head_data
[
i
]
==
input
(
j
).
name
())
safe
=
false
;
if
(
safe
)
safe_heads
.
push
(
i
);
all_heads
.
insert
(
head_data
[
i
]);
...
...
@@ -149,7 +149,9 @@ void Operator<Context>::CleanResource() {
Tensor
*
buffer
=
ws
()
->
GetTensor
(
used
);
if
(
output
(
i
)
->
memory
()
!=
buffer
->
memory
())
buffer
->
Move
(
output
(
i
)
->
memory
());
}
}
}
// post-process for sharing grads
if
(
allow_share_grads_
)
{
// TODO(PhyscalX): we preset input(-1)->output(0) to share
Tensor
*
dY
=
&
input
(
-
1
);
...
...
@@ -201,4 +203,4 @@ template void Operator<CUDAContext>::MakeResource();
template
void
Operator
<
CPUContext
>::
CleanResource
();
template
void
Operator
<
CUDAContext
>::
CleanResource
();
}
//
namespace
dragon
\ No newline at end of file
}
// namespace dragon
Dragon/src/operators/norm/l2_norm_op.cc
View file @
c9db9ee
...
...
@@ -30,24 +30,25 @@ void L2NormOp<Context>::RunWithType() {
if
(
across_inner
)
{
auto
*
Ndata_
=
norm
->
template
mutable_data
<
float
,
CPUContext
>
();
float
sum_of_sqr
=
math
::
Dot
<
T
,
Context
>
(
buffer
->
count
(),
Xdata
,
Xdata
);
if
(
mode
==
"MEAN"
)
sum_of_sqr
=
sum_of_sqr
/
dim
;
Ndata_
[
n
]
=
pow
(
sum_of_sqr
+
eps
,
0.5
);
math
::
Scale
<
T
,
Context
>
(
buffer
->
count
(),
1.0
/
Ndata_
[
n
],
Xdata
,
Ydata
);
}
else
{
math
::
Set
<
T
,
Context
>
(
norm
->
count
(),
dragon_cast
<
T
,
float
>
(
eps
),
Ndata
);
math
::
Square
<
T
,
Context
>
(
buffer
->
count
(),
Xdata
,
Bdata
);
// compute T1 = \sum_{i} x_{i,j}^{2}
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
dim
,
inner_dim
,
math
::
Gemv
<
T
,
Context
>
(
CblasTrans
,
dim
,
inner_dim
,
mode
==
"MEAN"
?
1.0
/
dim
:
1.0
,
Bdata
,
DMuldata
,
1.0
,
Bdata
,
DMuldata
,
1.0
,
Ndata
);
// compute T2 = \sqrt{T1}
math
::
Sqrt
<
T
,
Context
>
(
inner_dim
,
Ndata
,
Ndata
);
// compute T3 = x / [(T2)]_{dim}
math
::
Gemm
<
T
,
Context
>
(
CblasNoTrans
,
CblasNoTrans
,
dim
,
inner_dim
,
1
,
1.0
,
DMuldata
,
Ndata
,
0.0
,
1.0
,
DMuldata
,
Ndata
,
0.0
,
Bdata
);
math
::
Div
<
T
,
Context
>
(
buffer
->
count
(),
Xdata
,
Bdata
,
Ydata
);
Ndata
+=
inner_dim
;
...
...
Dragon/src/operators/vision/cudnn_conv2d_op.cc
View file @
c9db9ee
...
...
@@ -15,74 +15,56 @@ void CuDNNConv2dOp<Context>::RunWithType() {
CUDNN_CHECK
(
cudnnSetFilter4dDescriptor
(
filter_desc
,
CUDNNType
<
T
>::
type
,
format
,
this
->
num_output
/
this
->
group
,
this
->
num_output
/
cudnn_
group
,
this
->
channels
/
this
->
group
,
this
->
kernel_size
[
0
],
this
->
kernel_size
[
1
]));
#else
CUDNN_CHECK
(
cudnnSetFilter4dDescriptor_v4
(
filter_desc
,
CUDNNType
<
T
>::
type
,
format
,
this
->
num_output
/
this
->
group
,
this
->
num_output
/
cudnn_
group
,
this
->
channels
/
this
->
group
,
this
->
kernel_size
[
0
],
this
->
kernel_size
[
1
]));
#endif
Tensor
fake_tensor
;
vector
<
TIndex
>
fake_dims
;
if
(
this
->
data_format
==
"NCHW"
)
{
// determine the input shape
fake_tensor
.
ReshapeLike
(
input
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
input_desc
,
this
->
data_format
,
fake_dims
);
// determine the output shape
fake_tensor
.
ReshapeLike
(
*
output
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
output_desc
,
this
->
data_format
,
fake_dims
);
// determine the bias shape if necessary
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
this
->
group
;
// determine the input & output shape
cudnnSetTensor4dDescWithGroup
<
T
>
(
&
input_desc
,
this
->
data_format
,
input
(
0
).
dims
(),
cudnn_group
);
cudnnSetTensor4dDescWithGroup
<
T
>
(
&
output_desc
,
this
->
data_format
,
output
(
0
)
->
dims
(),
cudnn_group
);
// determine the bias shape and misc
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
cudnn_group
;
if
(
this
->
data_format
==
"NCHW"
)
{
cudnnSetTensor4dDesc
<
T
>
(
&
bias_desc
,
this
->
data_format
,
vector
<
TIndex
>
({
1
,
bias_offset
,
1
,
1
}));
}
}
else
if
(
this
->
data_format
==
"NHWC"
)
{
// determine the input shape
fake_tensor
.
ReshapeLike
(
input
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
fake_dims
.
size
()
-
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
input_desc
,
this
->
data_format
,
fake_dims
);
// determine the output shape
fake_tensor
.
ReshapeLike
(
*
output
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
fake_dims
.
size
()
-
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
output_desc
,
this
->
data_format
,
fake_dims
);
// determine the bias shape if necessary
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
this
->
group
;
this
->
x_offset
=
input
(
0
).
count
(
1
)
/
cudnn_group
;
this
->
y_offset
=
output
(
0
)
->
count
(
1
)
/
cudnn_group
;
}
else
if
(
this
->
data_format
==
"NHWC"
)
{
cudnnSetTensor4dDesc
<
T
>
(
&
bias_desc
,
this
->
data_format
,
vector
<
TIndex
>
({
1
,
1
,
1
,
bias_offset
}));
this
->
x_offset
=
input
(
0
).
dim
(
-
1
)
/
cudnn_group
;
this
->
y_offset
=
output
(
0
)
->
dim
(
-
1
)
/
cudnn_group
;
}
}
CUDNN_CHECK
(
cudnnGetConvolutionForwardAlgorithm
(
handle
[
0
],
input_desc
,
filter_desc
,
conv_desc
,
input_desc
,
filter_desc
,
conv_desc
,
output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
WORKSPACE_LIMIT_BYTES
,
WORKSPACE_LIMIT_BYTES
,
&
fwd_algo
));
CUDNN_CHECK
(
cudnnGetConvolutionForwardWorkspaceSize
(
handle
[
0
],
input_desc
,
filter_desc
,
conv_desc
,
input_desc
,
filter_desc
,
conv_desc
,
output_desc
,
fwd_algo
,
fwd_algo
,
&
workspace_fwd_data_size
));
Tensor
*
buffer
=
ws
()
->
GetBuffer
();
if
(
workspace_fwd_data_size
==
0
)
workspace_fwd_data_size
+=
1
;
buffer
->
Reshape
(
vector
<
TIndex
>
(
1
,
this
->
group
*
workspace_fwd_data_size
));
buffer
->
Reshape
(
vector
<
TIndex
>
(
1
,
cudnn_
group
*
workspace_fwd_data_size
));
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
...
...
@@ -90,9 +72,9 @@ void CuDNNConv2dOp<Context>::RunWithType() {
auto
*
Wdata
=
input
(
1
).
template
data
<
T
,
Context
>
();
if
(
HasBias
())
TENSOR_FILL
(
input
(
2
),
this
->
bias_shape
);
for
(
int
g
=
0
;
g
<
this
->
group
;
g
++
)
{
for
(
int
g
=
0
;
g
<
cudnn_
group
;
g
++
)
{
auto
*
workspace
=
buffer
->
template
mutable_data
<
char
,
Context
>
();
CUDNN_CHECK
(
cudnnConvolutionForward
(
handle
[
g
],
CUDNN_CHECK
(
cudnnConvolutionForward
(
handle
[
g
],
CUDNNType
<
T
>::
one
,
input_desc
,
Xdata
+
this
->
x_offset
*
g
,
filter_desc
,
Wdata
+
this
->
weight_offset
*
g
,
conv_desc
,
...
...
@@ -101,7 +83,7 @@ void CuDNNConv2dOp<Context>::RunWithType() {
CUDNNType
<
T
>::
zero
,
output_desc
,
Ydata
+
this
->
y_offset
*
g
));
if
(
HasBias
())
{
auto
*
bias
=
input
(
2
).
template
data
<
T
,
Context
>
();
CUDNN_CHECK
(
cudnnAddTensor
(
handle
[
g
],
CUDNN_CHECK
(
cudnnAddTensor
(
handle
[
g
],
CUDNNType
<
T
>::
one
,
bias_desc
,
bias
+
this
->
bias_offset
*
g
,
CUDNNType
<
T
>::
one
,
output_desc
,
Ydata
+
this
->
y_offset
*
g
));
}
...
...
@@ -117,41 +99,45 @@ void CuDNNConv2dOp<Context>::RunOnDevice() {
if
(
this
->
dilation
[
i
]
!=
1
)
return
Conv2dOp
<
Context
>::
RunOnDevice
();
#endif
Conv2dOp
<
Context
>::
Reshape
();
this
->
x_offset
/=
this
->
group
;
this
->
y_offset
/=
this
->
group
;
if
(
input
(
0
).
template
IsType
<
float
>
())
{
#if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolution2dDescriptor
(
conv_desc
,
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
stride
[
0
],
this
->
stride
[
1
],
this
->
dilation
[
0
],
this
->
dilation
[
1
],
CUDNN_CROSS_CORRELATION
,
this
->
dilation
[
0
],
this
->
dilation
[
1
],
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_CHECK
(
cudnnSetConvolution2dDescriptor
(
conv_desc
,
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
stride
[
0
],
this
->
stride
[
1
],
1
,
1
,
1
,
1
,
CUDNN_CROSS_CORRELATION
));
#endif
#if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolutionGroupCount
(
conv_desc
,
this
->
group
));
#endif
RunWithType
<
float
>
();
}
else
if
(
input
(
0
).
template
IsType
<
float16
>
())
{
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolution2dDescriptor
(
conv_desc
,
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
stride
[
0
],
this
->
stride
[
1
],
this
->
dilation
[
0
],
this
->
dilation
[
1
],
CUDNN_CROSS_CORRELATION
,
this
->
dilation
[
0
],
this
->
dilation
[
1
],
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_CHECK
(
cudnnSetConvolution2dDescriptor
(
conv_desc
,
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
stride
[
0
],
this
->
stride
[
1
],
1
,
1
,
1
,
1
,
CUDNN_CROSS_CORRELATION
));
#endif
#if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolutionGroupCount
(
conv_desc
,
this
->
group
));
#endif
RunWithType
<
float16
>
();
#endif // WITH_CUDA_FP16
}
else
{
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
...
...
@@ -165,51 +151,33 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
CUDNN_CHECK
(
cudnnSetFilter4dDescriptor
(
filter_desc
,
CUDNNType
<
T
>::
type
,
format
,
this
->
num_output
/
this
->
group
,
this
->
num_output
/
cudnn_
group
,
this
->
channels
/
this
->
group
,
this
->
kernel_size
[
0
],
this
->
kernel_size
[
1
]));
#else
CUDNN_CHECK
(
cudnnSetFilter4dDescriptor_v4
(
filter_desc
,
CUDNNType
<
T
>::
type
,
format
,
this
->
num_output
/
this
->
group
,
this
->
num_output
/
cudnn_
group
,
this
->
channels
/
this
->
group
,
this
->
kernel_size
[
0
],
this
->
kernel_size
[
1
]));
#endif
Tensor
fake_tensor
;
vector
<
TIndex
>
fake_dims
;
if
(
this
->
data_format
==
"NCHW"
)
{
// determine the input shape
fake_tensor
.
ReshapeLike
(
input
(
-
1
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
input_desc
,
this
->
data_format
,
fake_dims
);
// determine the output shape
fake_tensor
.
ReshapeLike
(
input
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
output_desc
,
this
->
data_format
,
fake_dims
);
// determine the bias shape if necessary
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
this
->
group
;
// determine the input & output shape
cudnnSetTensor4dDescWithGroup
<
T
>
(
&
input_desc
,
this
->
data_format
,
input
(
-
1
).
dims
(),
cudnn_group
);
cudnnSetTensor4dDescWithGroup
<
T
>
(
&
output_desc
,
this
->
data_format
,
input
(
0
).
dims
(),
cudnn_group
);
// determine the bias shape and misc
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
cudnn_group
;
if
(
this
->
data_format
==
"NCHW"
)
{
cudnnSetTensor4dDesc
<
T
>
(
&
bias_desc
,
this
->
data_format
,
vector
<
TIndex
>
({
1
,
bias_offset
,
1
,
1
}));
}
}
else
if
(
this
->
data_format
==
"NHWC"
)
{
// determine the input shape
fake_tensor
.
ReshapeLike
(
input
(
-
1
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
fake_dims
.
size
()
-
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
input_desc
,
this
->
data_format
,
fake_dims
);
// determine the output shape
fake_tensor
.
ReshapeLike
(
input
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
fake_dims
.
size
()
-
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
output_desc
,
this
->
data_format
,
fake_dims
);
// determine the bias shape if necessary
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
this
->
group
;
this
->
x_offset
=
input
(
0
).
count
(
1
)
/
cudnn_group
;
this
->
y_offset
=
input
(
-
1
).
count
(
1
)
/
cudnn_group
;
}
else
if
(
this
->
data_format
==
"NHWC"
)
{
cudnnSetTensor4dDesc
<
T
>
(
&
bias_desc
,
this
->
data_format
,
vector
<
TIndex
>
({
1
,
1
,
1
,
bias_offset
}));
this
->
x_offset
=
input
(
0
).
dim
(
-
1
)
/
cudnn_group
;
this
->
y_offset
=
input
(
-
1
).
dim
(
-
1
)
/
cudnn_group
;
}
}
...
...
@@ -251,11 +219,11 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
Tensor
*
buffer2
=
ws
()
->
GetBuffer
();
if
(
workspace_bwd_data_size
==
0
)
workspace_bwd_data_size
+=
1
;
if
(
workspace_bwd_filter_size
==
0
)
workspace_bwd_filter_size
+=
1
;
buffer1
->
Reshape
(
vector
<
TIndex
>
(
1
,
this
->
group
*
workspace_bwd_data_size
));
buffer2
->
Reshape
(
vector
<
TIndex
>
(
1
,
this
->
group
*
workspace_bwd_filter_size
));
buffer1
->
Reshape
(
vector
<
TIndex
>
(
1
,
cudnn_
group
*
workspace_bwd_data_size
));
buffer2
->
Reshape
(
vector
<
TIndex
>
(
1
,
cudnn_
group
*
workspace_bwd_filter_size
));
const
T
*
dYdata
=
input
(
2
).
template
data
<
T
,
Context
>
();
for
(
int
g
=
0
;
g
<
this
->
group
;
g
++
)
{
for
(
int
g
=
0
;
g
<
cudnn_
group
;
g
++
)
{
if
(
output
(
2
)
->
name
()
!=
"ignore"
)
{
T
*
dBdata
=
output
(
2
)
->
template
mutable_data
<
T
,
Context
>
();
CUDNN_CHECK
(
cudnnConvolutionBackwardBias
(
handle
[
g
],
...
...
@@ -266,7 +234,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
dWdata
=
output
(
1
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
workspace
=
buffer2
->
mutable_data
<
char
,
Context
>
();
CUDNN_CHECK
(
cudnnConvolutionBackwardFilter
(
handle
[
1
*
this
->
group
+
g
],
CUDNN_CHECK
(
cudnnConvolutionBackwardFilter
(
handle
[
1
*
cudnn_
group
+
g
],
CUDNNType
<
T
>::
one
,
output_desc
,
Xdata
+
this
->
x_offset
*
g
,
input_desc
,
dYdata
+
this
->
y_offset
*
g
,
conv_desc
,
...
...
@@ -278,7 +246,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
auto
*
Wdata
=
input
(
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
workspace
=
buffer1
->
mutable_data
<
char
,
Context
>
();
CUDNN_CHECK
(
cudnnConvolutionBackwardData
(
handle
[
2
*
this
->
group
+
g
],
CUDNN_CHECK
(
cudnnConvolutionBackwardData
(
handle
[
2
*
cudnn_
group
+
g
],
CUDNNType
<
T
>::
one
,
filter_desc
,
Wdata
+
this
->
weight_offset
*
g
,
input_desc
,
dYdata
+
this
->
y_offset
*
g
,
conv_desc
,
...
...
@@ -299,40 +267,44 @@ void CuDNNConv2dGradientOp<Context>::RunOnDevice() {
if
(
this
->
dilation
[
i
]
!=
1
)
return
Conv2dGradientOp
<
Context
>::
RunOnDevice
();
#endif
Conv2dGradientOp
<
Context
>::
GradientReshape
();
this
->
x_offset
/=
this
->
group
;
this
->
y_offset
/=
this
->
group
;
if
(
input
(
0
).
template
IsType
<
float
>
())
{
#if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolution2dDescriptor
(
conv_desc
,
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
stride
[
0
],
this
->
stride
[
1
],
this
->
dilation
[
0
],
this
->
dilation
[
1
],
CUDNN_CROSS_CORRELATION
,
this
->
dilation
[
0
],
this
->
dilation
[
1
],
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_CHECK
(
cudnnSetConvolution2dDescriptor
(
conv_desc
,
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
stride
[
0
],
this
->
stride
[
1
],
1
,
1
,
1
,
1
,
CUDNN_CROSS_CORRELATION
));
#endif
#if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolutionGroupCount
(
conv_desc
,
this
->
group
));
#endif
RunWithType
<
float
>
();
}
else
if
(
input
(
0
).
template
IsType
<
float16
>
())
{
#ifdef WITH_CUDA_FP16
#if CUDNN_VERSION_MIN(6, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolution2dDescriptor
(
conv_desc
,
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
stride
[
0
],
this
->
stride
[
1
],
this
->
dilation
[
0
],
this
->
dilation
[
1
],
CUDNN_CROSS_CORRELATION
,
this
->
dilation
[
0
],
this
->
dilation
[
1
],
CUDNN_CROSS_CORRELATION
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_CHECK
(
cudnnSetConvolution2dDescriptor
(
conv_desc
,
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
pad
[
0
],
this
->
pad
[
1
],
this
->
stride
[
0
],
this
->
stride
[
1
],
1
,
1
,
CUDNN_CROSS_CORRELATION
));
#endif
#if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolutionGroupCount
(
conv_desc
,
this
->
group
));
#endif
RunWithType
<
float16
>
();
#endif // WITH_CUDA_FP16
}
else
{
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
...
...
@@ -342,4 +314,4 @@ DEPLOY_CUDNN(Conv2dGradient);
}
// namespace dragon
#endif // WITH_CUDNN
\ No newline at end of file
#endif // WITH_CUDNN
Dragon/src/operators/vision/cudnn_conv2d_transpose_op.cc
View file @
c9db9ee
...
...
@@ -15,51 +15,34 @@ void CuDNNConv2dTransposeOp<Context>::RunWithType() {
CUDNN_CHECK
(
cudnnSetFilter4dDescriptor
(
filter_desc
,
CUDNNType
<
T
>::
type
,
format
,
this
->
num_output
/
this
->
group
,
this
->
num_output
/
cudnn_
group
,
this
->
channels
/
this
->
group
,
this
->
kernel_size
[
0
],
this
->
kernel_size
[
1
]));
#else
CUDNN_CHECK
(
cudnnSetFilter4dDescriptor_v4
(
filter_desc
,
CUDNNType
<
T
>::
type
,
format
,
this
->
num_output
/
this
->
group
,
this
->
num_output
/
cudnn_
group
,
this
->
channels
/
this
->
group
,
this
->
kernel_size
[
0
],
this
->
kernel_size
[
1
]));
#endif
Tensor
fake_tensor
;
vector
<
TIndex
>
fake_dims
;
if
(
this
->
data_format
==
"NCHW"
)
{
// determine the input shape
fake_tensor
.
ReshapeLike
(
input
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
input_desc
,
this
->
data_format
,
fake_dims
);
// determine the output shape
fake_tensor
.
ReshapeLike
(
*
output
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
output_desc
,
this
->
data_format
,
fake_dims
);
// determine the bias shape if necessary
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
this
->
group
;
// determine the input & output shape
cudnnSetTensor4dDescWithGroup
<
T
>
(
&
input_desc
,
this
->
data_format
,
input
(
0
).
dims
(),
cudnn_group
);
cudnnSetTensor4dDescWithGroup
<
T
>
(
&
output_desc
,
this
->
data_format
,
output
(
0
)
->
dims
(),
cudnn_group
);
// determine the bias shape and misc
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
cudnn_group
;
if
(
this
->
data_format
==
"NCHW"
)
{
cudnnSetTensor4dDesc
<
T
>
(
&
bias_desc
,
this
->
data_format
,
vector
<
TIndex
>
({
1
,
bias_offset
,
1
,
1
}));
this
->
x_offset
=
input
(
0
).
count
(
1
)
/
cudnn_group
;
this
->
y_offset
=
output
(
0
)
->
count
(
1
)
/
cudnn_group
;
}
}
else
if
(
this
->
data_format
==
"NHWC"
)
{
// determine the input shape
fake_tensor
.
ReshapeLike
(
input
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
fake_dims
.
size
()
-
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
input_desc
,
this
->
data_format
,
fake_dims
);
// determine the output shape
fake_tensor
.
ReshapeLike
(
*
output
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
fake_dims
.
size
()
-
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
output_desc
,
this
->
data_format
,
fake_dims
);
// determine the bias shape if necessary
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
this
->
group
;
else
if
(
this
->
data_format
==
"NHWC"
)
{
cudnnSetTensor4dDesc
<
T
>
(
&
bias_desc
,
this
->
data_format
,
vector
<
TIndex
>
({
1
,
1
,
1
,
bias_offset
}));
this
->
x_offset
=
input
(
0
).
dim
(
-
1
)
/
cudnn_group
;
this
->
y_offset
=
output
(
0
)
->
dim
(
-
1
)
/
cudnn_group
;
}
}
...
...
@@ -82,7 +65,7 @@ void CuDNNConv2dTransposeOp<Context>::RunWithType() {
Tensor
*
buffer
=
ws
()
->
GetBuffer
();
if
(
workspace_fwd_data_size
==
0
)
workspace_fwd_data_size
+=
1
;
buffer
->
Reshape
(
vector
<
TIndex
>
(
1
,
this
->
group
*
workspace_fwd_data_size
));
buffer
->
Reshape
(
vector
<
TIndex
>
(
1
,
cudnn_
group
*
workspace_fwd_data_size
));
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
...
...
@@ -90,7 +73,7 @@ void CuDNNConv2dTransposeOp<Context>::RunWithType() {
auto
*
Wdata
=
input
(
1
).
template
data
<
T
,
Context
>
();
if
(
HasBias
())
TENSOR_FILL
(
input
(
2
),
this
->
bias_shape
);
for
(
int
g
=
0
;
g
<
this
->
group
;
g
++
)
{
for
(
int
g
=
0
;
g
<
cudnn_
group
;
g
++
)
{
auto
*
workspace
=
buffer
->
template
mutable_data
<
char
,
Context
>
();
CUDNN_CHECK
(
cudnnConvolutionBackwardData
(
handle
[
g
],
CUDNNType
<
T
>::
one
,
filter_desc
,
Wdata
+
this
->
weight_offset
*
g
,
...
...
@@ -118,8 +101,6 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() {
if
(
this
->
dilation
[
i
]
!=
1
)
return
Conv2dTransposeOp
<
Context
>::
RunOnDevice
();
#endif
Conv2dTransposeOp
<
Context
>::
Reshape
();
this
->
x_offset
/=
this
->
group
;
this
->
y_offset
/=
this
->
group
;
if
(
input
(
0
).
template
IsType
<
float
>
())
{
#if CUDNN_VERSION_MIN(6, 0, 0)
...
...
@@ -136,6 +117,9 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() {
1
,
1
,
CUDNN_CROSS_CORRELATION
));
#endif
#if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolutionGroupCount
(
conv_desc
,
this
->
group
));
#endif
RunWithType
<
float
>
();
}
else
if
(
input
(
0
).
template
IsType
<
float16
>
())
{
#ifdef WITH_CUDA_FP16
...
...
@@ -153,6 +137,9 @@ void CuDNNConv2dTransposeOp<Context>::RunOnDevice() {
1
,
1
,
CUDNN_CROSS_CORRELATION
));
#endif
#if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolutionGroupCount
(
conv_desc
,
this
->
group
));
#endif
RunWithType
<
float16
>
();
#endif // WITH_CUDA_FP16
}
else
{
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
...
...
@@ -166,51 +153,34 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunWithType() {
CUDNN_CHECK
(
cudnnSetFilter4dDescriptor
(
filter_desc
,
CUDNNType
<
T
>::
type
,
format
,
this
->
num_output
/
this
->
group
,
this
->
num_output
/
cudnn_
group
,
this
->
channels
/
this
->
group
,
this
->
kernel_size
[
0
],
this
->
kernel_size
[
1
]));
#else
CUDNN_CHECK
(
cudnnSetFilter4dDescriptor_v4
(
filter_desc
,
CUDNNType
<
T
>::
type
,
format
,
this
->
num_output
/
this
->
group
,
this
->
num_output
/
cudnn_
group
,
this
->
channels
/
this
->
group
,
this
->
kernel_size
[
0
],
this
->
kernel_size
[
1
]));
#endif
Tensor
fake_tensor
;
vector
<
TIndex
>
fake_dims
;
if
(
this
->
data_format
==
"NCHW"
)
{
// determine the input shape
fake_tensor
.
ReshapeLike
(
input
(
-
1
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
input_desc
,
this
->
data_format
,
fake_dims
);
// determine the output shape
fake_tensor
.
ReshapeLike
(
input
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
output_desc
,
this
->
data_format
,
fake_dims
);
// determine the bias shape if necessary
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
this
->
group
;
// determine the input & output shape
cudnnSetTensor4dDescWithGroup
<
T
>
(
&
input_desc
,
this
->
data_format
,
input
(
-
1
).
dims
(),
cudnn_group
);
cudnnSetTensor4dDescWithGroup
<
T
>
(
&
output_desc
,
this
->
data_format
,
input
(
0
).
dims
(),
cudnn_group
);
// determine the bias shape and misc
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
cudnn_group
;
if
(
this
->
data_format
==
"NCHW"
)
{
cudnnSetTensor4dDesc
<
T
>
(
&
bias_desc
,
this
->
data_format
,
vector
<
TIndex
>
({
1
,
bias_offset
,
1
,
1
}));
this
->
x_offset
=
input
(
0
).
count
(
1
)
/
cudnn_group
;
this
->
y_offset
=
input
(
-
1
).
count
(
1
)
/
cudnn_group
;
}
}
else
if
(
this
->
data_format
==
"NHWC"
)
{
// determine the input shape
fake_tensor
.
ReshapeLike
(
input
(
-
1
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
fake_dims
.
size
()
-
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
input_desc
,
this
->
data_format
,
fake_dims
);
// determine the output shape
fake_tensor
.
ReshapeLike
(
input
(
0
));
fake_dims
=
fake_tensor
.
dims
();
fake_dims
[
fake_dims
.
size
()
-
1
]
/=
this
->
group
;
cudnnSetTensor4dDesc
<
T
>
(
&
output_desc
,
this
->
data_format
,
fake_dims
);
// determine the bias shape if necessary
if
(
HasBias
())
{
bias_offset
=
this
->
num_output
/
this
->
group
;
else
if
(
this
->
data_format
==
"NHWC"
)
{
cudnnSetTensor4dDesc
<
T
>
(
&
bias_desc
,
this
->
data_format
,
vector
<
TIndex
>
({
1
,
1
,
1
,
bias_offset
}));
this
->
x_offset
=
input
(
0
).
dim
(
-
1
)
/
cudnn_group
;
this
->
y_offset
=
input
(
-
1
).
dim
(
-
1
)
/
cudnn_group
;
}
}
...
...
@@ -252,14 +222,14 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunWithType() {
Tensor
*
buffer2
=
ws
()
->
GetBuffer
();
if
(
workspace_bwd_data_size
==
0
)
workspace_bwd_data_size
+=
1
;
if
(
workspace_bwd_filter_size
==
0
)
workspace_bwd_filter_size
+=
1
;
buffer1
->
Reshape
(
vector
<
TIndex
>
(
1
,
this
->
group
*
workspace_bwd_data_size
));
buffer2
->
Reshape
(
vector
<
TIndex
>
(
1
,
this
->
group
*
workspace_bwd_filter_size
));
buffer1
->
Reshape
(
vector
<
TIndex
>
(
1
,
cudnn_
group
*
workspace_bwd_data_size
));
buffer2
->
Reshape
(
vector
<
TIndex
>
(
1
,
cudnn_
group
*
workspace_bwd_filter_size
));
const
T
*
dYdata
=
input
(
2
).
template
data
<
T
,
Context
>
();
for
(
int
g
=
0
;
g
<
this
->
group
;
g
++
)
{
for
(
int
g
=
0
;
g
<
cudnn_
group
;
g
++
)
{
if
(
output
(
2
)
->
name
()
!=
"ignore"
)
{
T
*
dBdata
=
output
(
2
)
->
template
mutable_data
<
T
,
Context
>
();
CUDNN_CHECK
(
cudnnConvolutionBackwardBias
(
handle
[
g
],
CUDNN_CHECK
(
cudnnConvolutionBackwardBias
(
handle
[
g
],
CUDNNType
<
T
>::
one
,
input_desc
,
dYdata
+
this
->
y_offset
*
g
,
CUDNNType
<
T
>::
one
,
bias_desc
,
dBdata
+
bias_offset
*
g
));
}
...
...
@@ -267,7 +237,7 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunWithType() {
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
dWdata
=
output
(
1
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
workspace
=
buffer2
->
mutable_data
<
char
,
Context
>
();
CUDNN_CHECK
(
cudnnConvolutionBackwardFilter
(
handle
[
1
*
this
->
group
+
g
],
CUDNN_CHECK
(
cudnnConvolutionBackwardFilter
(
handle
[
1
*
cudnn_
group
+
g
],
CUDNNType
<
T
>::
one
,
input_desc
,
dYdata
+
this
->
y_offset
*
g
,
output_desc
,
Xdata
+
this
->
x_offset
*
g
,
conv_desc
,
...
...
@@ -279,7 +249,7 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunWithType() {
auto
*
Wdata
=
input
(
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
auto
*
workspace
=
buffer1
->
mutable_data
<
char
,
Context
>
();
CUDNN_CHECK
(
cudnnConvolutionForward
(
handle
[
2
*
this
->
group
+
g
],
CUDNN_CHECK
(
cudnnConvolutionForward
(
handle
[
2
*
cudnn_
group
+
g
],
CUDNNType
<
T
>::
one
,
input_desc
,
dYdata
+
this
->
y_offset
*
g
,
filter_desc
,
Wdata
+
this
->
weight_offset
*
g
,
conv_desc
,
...
...
@@ -300,8 +270,6 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() {
if
(
this
->
dilation
[
i
]
!=
1
)
return
Conv2dTransposeGradientOp
<
Context
>::
RunOnDevice
();
#endif
Conv2dTransposeGradientOp
<
Context
>::
GradientReshape
();
this
->
x_offset
/=
this
->
group
;
this
->
y_offset
/=
this
->
group
;
if
(
input
(
0
).
template
IsType
<
float
>
())
{
#if CUDNN_VERSION_MIN(6, 0, 0)
...
...
@@ -318,6 +286,9 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() {
1
,
1
,
CUDNN_CROSS_CORRELATION
));
#endif
#if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolutionGroupCount
(
conv_desc
,
this
->
group
));
#endif
RunWithType
<
float
>
();
}
else
if
(
input
(
0
).
template
IsType
<
float16
>
())
{
#ifdef WITH_CUDA_FP16
...
...
@@ -335,6 +306,9 @@ void CuDNNConv2dTransposeGradientOp<Context>::RunOnDevice() {
1
,
1
,
CUDNN_CROSS_CORRELATION
));
#endif
#if CUDNN_VERSION_MIN(7, 0, 0)
CUDNN_CHECK
(
cudnnSetConvolutionGroupCount
(
conv_desc
,
this
->
group
));
#endif
RunWithType
<
float16
>
();
#endif // WITH_CUDA_FP16
}
else
{
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
...
...
Dragon/src/operators/vision/cudnn_lrn_op.cc
View file @
c9db9ee
...
...
@@ -6,30 +6,33 @@ namespace dragon {
template
<
class
Context
>
template
<
typename
T
>
void
CuDNNLRNOp
<
Context
>::
RunWithType
()
{
cudnnSetTensorDesc
<
T
>
(
&
input_desc
,
&
input
(
0
));
cudnnSetTensorDesc
<
T
>
(
&
output_desc
,
output
(
0
));
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
CUDNN_CHECK
(
cudnnLRNCrossChannelForward
(
cudnn_handle
(),
norm_desc
,
CUDNN_LRN_CROSS_CHANNEL_DIM1
,
CUDNNType
<
T
>::
one
,
input_desc
,
Xdata
,
CUDNNType
<
T
>::
zero
,
output_desc
,
Ydata
));
if
(
this
->
data_format
==
"NCHW"
)
{
cudnnSetTensorDesc
<
T
>
(
&
input_desc
,
&
input
(
0
));
cudnnSetTensorDesc
<
T
>
(
&
output_desc
,
output
(
0
));
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
Ydata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
CUDNN_CHECK
(
cudnnLRNCrossChannelForward
(
cudnn_handle
(),
norm_desc
,
CUDNN_LRN_CROSS_CHANNEL_DIM1
,
CUDNNType
<
T
>::
one
,
input_desc
,
Xdata
,
CUDNNType
<
T
>::
zero
,
output_desc
,
Ydata
));
}
else
LOG
(
FATAL
)
<<
"Unknown data format: "
<<
this
->
data_format
;
}
template
<
class
Context
>
void
CuDNNLRNOp
<
Context
>::
RunOnDevice
()
{
output
(
0
)
->
ReshapeLike
(
input
(
0
));
if
(
this
->
mode
==
ACROSS_CHANNELS
)
{
if
(
this
->
mode
==
"ACROSS_CHANNELS"
)
{
if
(
input
(
0
).
template
IsType
<
float
>
())
RunWithType
<
float
>
();
#ifdef WITH_CUDA_FP16
else
if
(
input
(
0
).
template
IsType
<
float16
>
())
RunWithType
<
float16
>
();
#endif
else
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
else
{
else
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
else
if
(
this
->
mode
==
"WITHIN_CHANNEL"
)
{
LRNOp
<
Context
>::
RunOnDevice
();
}
else
{
LOG
(
FATAL
)
<<
"Unsupported lrn mode: "
<<
this
->
mode
;
}
}
...
...
@@ -37,34 +40,38 @@ DEPLOY_CUDNN(LRN);
template
<
class
Context
>
template
<
typename
T
>
void
CuDNNLRNGradientOp
<
Context
>::
RunWithType
()
{
cudnnSetTensorDesc
<
T
>
(
&
input_desc
,
&
input
(
-
1
));
cudnnSetTensorDesc
<
T
>
(
&
output_desc
,
output
(
0
));
if
(
this
->
data_format
==
"NCHW"
)
{
cudnnSetTensorDesc
<
T
>
(
&
input_desc
,
&
input
(
-
1
));
cudnnSetTensorDesc
<
T
>
(
&
output_desc
,
output
(
0
));
auto
*
dYdata
=
input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
Ydata
=
input
(
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
CUDNN_CHECK
(
cudnnLRNCrossChannelBackward
(
cudnn_handle
(),
norm_desc
,
CUDNN_LRN_CROSS_CHANNEL_DIM1
,
CUDNNType
<
T
>::
one
,
input_desc
,
Ydata
,
input_desc
,
dYdata
,
output_desc
,
Xdata
,
CUDNNType
<
T
>::
zero
,
output_desc
,
dXdata
));
auto
*
dYdata
=
input
(
-
1
).
template
data
<
T
,
Context
>
();
auto
*
Xdata
=
input
(
0
).
template
data
<
T
,
Context
>
();
auto
*
Ydata
=
input
(
1
).
template
data
<
T
,
Context
>
();
auto
*
dXdata
=
output
(
0
)
->
template
mutable_data
<
T
,
Context
>
();
CUDNN_CHECK
(
cudnnLRNCrossChannelBackward
(
cudnn_handle
(),
norm_desc
,
CUDNN_LRN_CROSS_CHANNEL_DIM1
,
CUDNNType
<
T
>::
one
,
input_desc
,
Ydata
,
input_desc
,
dYdata
,
output_desc
,
Xdata
,
CUDNNType
<
T
>::
zero
,
output_desc
,
dXdata
));
}
else
LOG
(
FATAL
)
<<
"Unknown data format: "
<<
this
->
data_format
;
}
template
<
class
Context
>
void
CuDNNLRNGradientOp
<
Context
>::
RunOnDevice
()
{
output
(
0
)
->
ReshapeLike
(
input
(
0
));
if
(
this
->
mode
==
ACROSS_CHANNELS
)
{
if
(
this
->
mode
==
"ACROSS_CHANNELS"
)
{
if
(
input
(
0
).
template
IsType
<
float
>
())
RunWithType
<
float
>
();
#ifdef WITH_CUDA_FP16
else
if
(
input
(
0
).
template
IsType
<
float16
>
())
RunWithType
<
float16
>
();
#endif
else
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
else
{
}
else
if
(
this
->
mode
==
"WITHIN_CHANNEL"
)
{
LRNGradientOp
<
Context
>::
RunOnDevice
();
}
else
{
LOG
(
FATAL
)
<<
"Unsupported lrn mode: "
<<
this
->
mode
;
}
}
...
...
Dragon/src/operators/vision/lrn_op.cc
View file @
c9db9ee
...
...
@@ -45,15 +45,16 @@ template <class Context> template <typename T>
void
LRNOp
<
Context
>::
PoolRunWithType
()
{
pool_out
=
ws
()
->
CreateTensor
(
"/mnt/"
+
anchor
()
+
"/pool_out"
);
if
(
!
pool_op
)
{
Argument
ks
,
s
,
p
,
m
ode
;
Argument
ks
,
s
,
p
,
m
,
df
;
ks
.
set_name
(
"kernel_size"
);
ks
.
add_ints
(
local_size
);
s
.
set_name
(
"stride"
);
s
.
add_ints
(
1
);
p
.
set_name
(
"pad"
);
p
.
add_ints
((
local_size
-
1
)
/
2
);
mode
.
set_name
(
"mode"
);
mode
.
set_s
(
"AVG"
);
OperatorDef
pool_op_def
=
MakeOperatorDef
(
"Pooling"
,
""
,
m
.
set_name
(
"mode"
);
m
.
set_s
(
"AVG"
);
df
.
set_name
(
"data_format"
);
df
.
set_s
(
data_format
);
OperatorDef
pool_op_def
=
MakeOperatorDef
(
"Pooling2d"
,
""
,
vector
<
string
>
({
sqr_out
->
name
()
}),
vector
<
string
>
({
pool_out
->
name
()
}),
vector
<
Argument
>
({
ks
,
s
,
p
,
m
ode
}));
vector
<
string
>
({
pool_out
->
name
()
}),
vector
<
Argument
>
({
ks
,
s
,
p
,
m
,
df
}));
if
(
this
->
op_def
().
has_device_option
())
pool_op_def
.
mutable_device_option
()
->
CopyFrom
(
this
->
op_def
().
device_option
());
pool_op
.
reset
(
CreateOperator
(
pool_op_def
,
ws
()));
...
...
@@ -99,12 +100,11 @@ void LRNOp<Context>::ProdRunWithType() {
template
<
class
Context
>
void
LRNOp
<
Context
>::
RunOnDevice
()
{
if
(
mode
==
ACROSS_CHANNELS
)
{
if
(
mode
==
"ACROSS_CHANNELS"
)
{
if
(
input
(
0
).
template
IsType
<
float
>
())
{
AcrossRunWithType
<
float
>
();
}
else
{
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
}
else
{
}
else
if
(
mode
==
"WITHIN_CHANNEL"
)
{
if
(
input
(
0
).
template
IsType
<
float
>
())
{
SplitRunWithType
<
float
>
();
SquareRunWithType
<
float
>
();
...
...
@@ -112,6 +112,8 @@ void LRNOp<Context>::RunOnDevice() {
PowRunWithType
<
float
>
();
ProdRunWithType
<
float
>
();
}
else
{
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
}
else
{
LOG
(
FATAL
)
<<
"Unsupported lrn mode: "
<<
mode
;
}
}
...
...
@@ -135,10 +137,10 @@ void LRNGradientOp<Context>::ProdRunWithType() {
Argument
operation
;
operation
.
set_name
(
"operation"
);
operation
.
set_s
(
"PROD"
);
OperatorDef
prod_op_def
=
MakeOperatorDef
(
"EltwiseGradient"
,
""
,
vector
<
string
>
({
prod_in
->
name
(),
pow_out
->
name
(),
vector
<
string
>
({
prod_in
->
name
(),
pow_out
->
name
(),
input
(
-
1
).
name
()
}),
vector
<
string
>
({
prod_in
->
name
()
+
"_grad"
,
vector
<
string
>
({
prod_in
->
name
()
+
"_grad"
,
pow_out
->
name
()
+
"_grad"
}),
vector
<
Argument
>
({
operation
}));
if
(
this
->
op_def
().
has_device_option
())
...
...
@@ -173,17 +175,18 @@ template <class Context> template <typename T>
void
LRNGradientOp
<
Context
>::
PoolRunWithType
()
{
sqr_out
=
ws
()
->
GetTensor
(
"/mnt/"
+
anchor
()
+
"/sqr_out"
);
if
(
!
pool_op
)
{
Argument
ks
,
s
,
p
,
m
ode
;
Argument
ks
,
s
,
p
,
m
,
df
;
ks
.
set_name
(
"kernel_size"
);
ks
.
add_ints
(
local_size
);
s
.
set_name
(
"stride"
);
s
.
add_ints
(
1
);
p
.
set_name
(
"pad"
);
p
.
add_ints
((
local_size
-
1
)
/
2
);
mode
.
set_name
(
"mode"
);
mode
.
set_s
(
"AVG"
);
OperatorDef
pool_op_def
=
MakeOperatorDef
(
"PoolingGradient"
,
""
,
m
.
set_name
(
"mode"
);
m
.
set_s
(
"AVG"
);
df
.
set_name
(
"data_format"
);
df
.
set_s
(
data_format
);
OperatorDef
pool_op_def
=
MakeOperatorDef
(
"Pooling2dGradient"
,
""
,
vector
<
string
>
({
sqr_out
->
name
(),
pool_out
->
name
(),
pool_out
->
name
()
+
"_grad"
}),
vector
<
string
>
({
sqr_out
->
name
()
+
"_grad"
}),
vector
<
Argument
>
({
ks
,
s
,
p
,
m
ode
}));
vector
<
Argument
>
({
ks
,
s
,
p
,
m
,
df
}));
if
(
this
->
op_def
().
has_device_option
())
pool_op_def
.
mutable_device_option
()
->
CopyFrom
(
this
->
op_def
().
device_option
());
pool_op
.
reset
(
CreateOperator
(
pool_op_def
,
ws
()));
...
...
@@ -224,12 +227,11 @@ void LRNGradientOp<Context>::SplitRunWithType() {
template
<
class
Context
>
void
LRNGradientOp
<
Context
>::
RunOnDevice
()
{
if
(
mode
==
ACROSS_CHANNELS
)
{
if
(
mode
==
"ACROSS_CHANNELS"
)
{
if
(
input
(
0
).
template
IsType
<
float
>
())
{
AcrossRunWithType
<
float
>
();
}
else
{
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
}
else
{
}
else
if
(
mode
==
"WITHIN_CHANNEL"
)
{
if
(
input
(
0
).
template
IsType
<
float
>
())
{
ProdRunWithType
<
float
>
();
PowRunWithType
<
float
>
();
...
...
@@ -237,6 +239,8 @@ void LRNGradientOp<Context>::RunOnDevice() {
SquareRunWithType
<
float
>
();
SplitRunWithType
<
float
>
();
}
else
{
LOG
(
FATAL
)
<<
"Unsupported input types."
;
}
}
else
{
LOG
(
FATAL
)
<<
"Unsupported lrn mode: "
<<
mode
;
}
}
...
...
Dragon/src/utils/cudnn_device.cc
View file @
c9db9ee
...
...
@@ -65,7 +65,35 @@ void cudnnSetTensor4dDesc(cudnnTensorDescriptor_t* desc,
dims
[
3
],
dims
[
1
],
dims
[
2
]));
}
else
LOG
(
FATAL
)
<<
"Unknown data format: "
<<
data_format
;
}
else
LOG
(
FATAL
)
<<
"Unknown data format: "
<<
data_format
;
}
template
<
typename
T
>
void
cudnnSetTensor4dDescWithGroup
(
cudnnTensorDescriptor_t
*
desc
,
const
string
&
data_format
,
const
vector
<
TIndex
>&
dims
,
const
TIndex
group
)
{
if
(
data_format
==
"NCHW"
)
{
CUDNN_CHECK
(
cudnnSetTensor4dDescriptorEx
(
*
desc
,
CUDNNType
<
T
>::
type
,
dims
[
0
],
dims
[
1
]
/
group
,
dims
[
2
],
dims
[
3
],
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
],
dims
[
2
]
*
dims
[
3
],
dims
[
3
],
1
));
}
else
if
(
data_format
==
"NHWC"
)
{
CUDNN_CHECK
(
cudnnSetTensor4dDescriptorEx
(
*
desc
,
CUDNNType
<
T
>::
type
,
dims
[
0
],
dims
[
3
]
/
group
,
dims
[
1
],
dims
[
2
],
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
],
1
,
dims
[
2
]
*
dims
[
3
],
dims
[
3
]));
}
else
LOG
(
FATAL
)
<<
"Unknown data format: "
<<
data_format
;
}
template
<
typename
T
>
...
...
@@ -87,7 +115,7 @@ void cudnnSetTensor5dDesc(cudnnTensorDescriptor_t* desc,
5
,
fake_dims
.
data
(),
fake_strides
.
data
()));
}
else
LOG
(
FATAL
)
<<
"Unknown data format: "
<<
data_format
;
}
else
LOG
(
FATAL
)
<<
"Unknown data format: "
<<
data_format
;
}
template
<
typename
T
>
...
...
@@ -169,6 +197,7 @@ template void cudnnSetTensorDesc<float>(cudnnTensorDescriptor_t*, const vector<T
template
void
cudnnSetTensor4dDesc
<
float
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor5dDesc
<
float
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor3dDesc
<
float
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor4dDescWithGroup
<
float
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
,
const
TIndex
);
template
void
cudnnSetTensorDesc
<
float
>
(
cudnnTensorDescriptor_t
*
,
const
vector
<
TIndex
>&
,
const
vector
<
TIndex
>&
);
...
...
@@ -180,6 +209,7 @@ template void cudnnSetTensorDesc<double>(cudnnTensorDescriptor_t*, const vector<
template
void
cudnnSetTensor4dDesc
<
double
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor5dDesc
<
double
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor3dDesc
<
double
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor4dDescWithGroup
<
double
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
,
const
TIndex
);
template
void
cudnnSetTensorDesc
<
double
>
(
cudnnTensorDescriptor_t
*
,
const
vector
<
TIndex
>&
,
const
vector
<
TIndex
>&
);
...
...
@@ -192,9 +222,10 @@ template void cudnnSetTensorDesc<float16>(cudnnTensorDescriptor_t*, const vector
template
void
cudnnSetTensor4dDesc
<
float16
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor5dDesc
<
float16
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor3dDesc
<
float16
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
);
template
void
cudnnSetTensor4dDescWithGroup
<
float16
>
(
cudnnTensorDescriptor_t
*
,
const
string
&
,
const
vector
<
TIndex
>&
,
const
TIndex
);
template
void
cudnnSetTensorDesc
<
float16
>
(
cudnnTensorDescriptor_t
*
,
const
vector
<
TIndex
>&
,
const
vector
<
TIndex
>&
);
#endif
}
// namespace dragon
#endif // WITH_CUDNN
\ No newline at end of file
#endif // WITH_CUDNN
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment