Commit b35f9320 by Ting PAN

Change the representation of NULL tensor

1 parent 418e0c0a
Showing with 816 additions and 714 deletions
...@@ -12,6 +12,7 @@ Quick Reference ...@@ -12,6 +12,7 @@ Quick Reference
List Brief List Brief
============================== ============================================================================= ============================== =============================================================================
`IsCUDADriverSufficient`_ Is cuda driver sufficient? `IsCUDADriverSufficient`_ Is cuda driver sufficient?
`EnableCUDNN`_ Enable the CuDNN engine.
`GetDevice`_ Get the current active cuda device. `GetDevice`_ Get the current active cuda device.
`SynchronizeStream`_ Synchronize the specified cuda stream. `SynchronizeStream`_ Synchronize the specified cuda stream.
============================== ============================================================================= ============================== =============================================================================
...@@ -20,5 +21,6 @@ List Brief ...@@ -20,5 +21,6 @@ List Brief
:members: :members:
.. _IsCUDADriverSufficient: #dragon.core.cuda.IsCUDADriverSufficient .. _IsCUDADriverSufficient: #dragon.core.cuda.IsCUDADriverSufficient
.. _EnableCUDNN: #dragon.core.cuda.EnableCUDNN
.. _GetDevice: #dragon.core.cuda.GetDevice .. _GetDevice: #dragon.core.cuda.GetDevice
.. _SynchronizeStream: #dragon.core.cuda.SynchronizeStream .. _SynchronizeStream: #dragon.core.cuda.SynchronizeStream
\ No newline at end of file
...@@ -93,6 +93,7 @@ API Reference ...@@ -93,6 +93,7 @@ API Reference
.. automethod:: __eq__ .. automethod:: __eq__
.. automethod:: __repr__ .. automethod:: __repr__
.. automethod:: __getitem__ .. automethod:: __getitem__
.. automethod:: __setitem__
.. automethod:: __call__ .. automethod:: __call__
.. _Tensor.Variable: #dragon.core.tensor.Tensor.Variable .. _Tensor.Variable: #dragon.core.tensor.Tensor.Variable
......
...@@ -59,16 +59,12 @@ Custom ...@@ -59,16 +59,12 @@ Custom
:hidden: :hidden:
operators/custom/minibatch operators/custom/minibatch
operators/custom/data_process
operators/custom/vec_mult
========================================= ===================================================================== ========================================= =====================================================================
List Brief List Brief
========================================= ===================================================================== ========================================= =====================================================================
`dragon.operators.custom.minibatch`_ How to form a minibatch based on `dragon.io`_ package. `dragon.operators.custom.minibatch`_ Form a mini-batch based on `dragon.utils.vision`_ package.
`dragon.operators.custom.data_process`_ How to custom a RunOp for data processing.
`dragon.operators.custom.vec_mult`_ How to custom a TemplateOp for Vector Multiplication.
========================================= ===================================================================== ========================================= =====================================================================
...@@ -99,9 +95,9 @@ List Brief ...@@ -99,9 +95,9 @@ List Brief
.. _dragon.operators.recurrent: operators/recurrent.html .. _dragon.operators.recurrent: operators/recurrent.html
.. _dragon.operators.loss: operators/loss.html .. _dragon.operators.loss: operators/loss.html
.. _dragon.operators.norm: operators/norm.html .. _dragon.operators.norm: operators/norm.html
.. _dragon.io: io.html
.. _dragon.operators.custom.minibatch: operators/custom/minibatch.html .. _dragon.operators.custom.minibatch: operators/custom/minibatch.html
.. _dragon.operators.custom.data_process: operators/custom/data_process.html .. _dragon.operators.custom.data_process: operators/custom/data_process.html
.. _dragon.operators.custom.vec_mult: operators/custom/vec_mult.html .. _dragon.operators.custom.vec_mult: operators/custom/vec_mult.html
.. _dragon.operators.contrib.rcnn: operators/contrib/rcnn.html .. _dragon.operators.contrib.rcnn: operators/contrib/rcnn.html
.. _dragon.utils.vision: utils.html#vision
==================
:mod:`DataProcess`
==================
.. toctree::
:hidden:
.. currentmodule:: dragon.operators.custom.data_process
.. autoclass:: DataProcessOp
:members:
\ No newline at end of file
...@@ -10,4 +10,4 @@ ...@@ -10,4 +10,4 @@
.. autoclass:: MiniBatchOp .. autoclass:: MiniBatchOp
:members: :members:
.. _dragon.io: ../../io.html .. _dragon.utils.vision: ../../utils.html#vision
\ No newline at end of file \ No newline at end of file
==============
:mod:`VecMult`
==============
.. toctree::
:hidden:
.. currentmodule:: dragon.operators.custom.vec_mult
.. autoclass:: VecMultOp
:members:
\ No newline at end of file
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
:members: :members:
.. _LMDB: http://lmdb.readthedocs.io/en/release .. _LMDB: http://lmdb.readthedocs.io/en/release
.. _DataBatch: ../io/data_batch.html#dragon.io.data_batch .. _DataBatch: ../utils/vision/data_batch.html
.. _DataReader: ../io/data_reader.html#dragon.io.data_reader .. _DataReader: ../utils/vision/data_reader.html
.. _DataTransformer: ../io/data_transformer.html#dragon.io.data_transformer .. _DataTransformer: ../utils/vision/data_transformer.html
.. _BlobFetcher: ../io/blob_fetcher.html#dragon.io.blob_fetcher .. _BlobFetcher: ../utils/vision/blob_fetcher.html
\ No newline at end of file \ No newline at end of file
...@@ -23,7 +23,7 @@ if (NOT THIRD_PARTY_DIR) ...@@ -23,7 +23,7 @@ if (NOT THIRD_PARTY_DIR)
set(THIRD_PARTY_DIR ${PROJECT_SOURCE_DIR}/../ThirdParty) set(THIRD_PARTY_DIR ${PROJECT_SOURCE_DIR}/../ThirdParty)
endif() endif()
# Set your protobuf compiler(protc) if necessary # Set your protobuf compiler(protoc) if necessary
# if not, a default "protoc" in the environment path will be used # if not, a default "protoc" in the environment path will be used
if (NOT PROTOC_EXECUTABLE) if (NOT PROTOC_EXECUTABLE)
set(PROTOC_EXECUTABLE protoc) set(PROTOC_EXECUTABLE protoc)
......
...@@ -128,6 +128,8 @@ class CUDAObject { ...@@ -128,6 +128,8 @@ class CUDAObject {
#ifdef WITH_CUDNN #ifdef WITH_CUDNN
vector<cudnnHandle_t> cudnn_handles[CUDA_MAX_DEVICES]; vector<cudnnHandle_t> cudnn_handles[CUDA_MAX_DEVICES];
#endif #endif
bool cudnn_enabled = true;
}; };
class CUDAContext { class CUDAContext {
......
...@@ -84,7 +84,7 @@ class Graph : public GraphBase { ...@@ -84,7 +84,7 @@ class Graph : public GraphBase {
/*! \brief Create a graph from the raw def */ /*! \brief Create a graph from the raw def */
GraphBase* NewGraph( GraphBase* NewGraph(
const GraphDef& meta_graph, const GraphDef& def,
Workspace* ws); Workspace* ws);
DECLARE_REGISTRY( DECLARE_REGISTRY(
......
...@@ -142,7 +142,7 @@ class Operator : public OperatorBase { ...@@ -142,7 +142,7 @@ class Operator : public OperatorBase {
allow_run_ = true; allow_run_ = true;
allow_run_ &= MPICheck(); allow_run_ &= MPICheck();
allow_run_ &= (!(OutputSize() == 1 && allow_run_ &= (!(OutputSize() == 1 &&
Output(0)->name() == "ignore")); Output(0)->name() == "NULL"));
} }
/*! \brief Run this operator on the specified stream */ /*! \brief Run this operator on the specified stream */
...@@ -168,10 +168,10 @@ class Operator : public OperatorBase { ...@@ -168,10 +168,10 @@ class Operator : public OperatorBase {
/*! \brief Coordinate the context of inputs and outputs */ /*! \brief Coordinate the context of inputs and outputs */
virtual void MemorySwitch() { virtual void MemorySwitch() {
for (auto* e : inputs_) for (auto* e : inputs_)
if(e->name() != "ignore") if(e->name() != "NULL")
e->SwitchToDevice(ctx()->device_id()); e->SwitchToDevice(ctx()->device_id());
for (auto* e : outputs_) for (auto* e : outputs_)
if(e->name() != "ignore") if(e->name() != "NULL")
e->SwitchToDevice(ctx()->device_id()); e->SwitchToDevice(ctx()->device_id());
} }
......
...@@ -76,23 +76,23 @@ class GradientMakerBase { ...@@ -76,23 +76,23 @@ class GradientMakerBase {
const string I(const int i) const { const string I(const int i) const {
return i < def.input_size() ? return i < def.input_size() ?
def.input(i) : "ignore"; def.input(i) : "NULL";
} }
const string O(const int i) const { const string O(const int i) const {
return i < def.output_size() ? return i < def.output_size() ?
def.output(i) : "ignore"; def.output(i) : "NULL";
} }
string GI(const int i) { string GI(const int i) {
if (i >= g_inputs_.size()) return "ignore"; if (i >= g_inputs_.size()) return "NULL";
g_inputs_[i] = def.input(i) + "_grad"; g_inputs_[i] = def.input(i) + "_grad";
return g_inputs_[i]; return g_inputs_[i];
} }
const string GO(const int i) const { const string GO(const int i) const {
return i < g_outputs_.size() ? return i < g_outputs_.size() ?
g_outputs_[i] : "ignore"; g_outputs_[i] : "NULL";
} }
protected: protected:
......
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
#ifndef DRAGON_CORE_OPERATOR_SCHEMA_H_ #ifndef DRAGON_CORE_OPERATOR_SCHEMA_H_
#define DRAGON_CORE_OPERATOR_SCHEMA_H_ #define DRAGON_CORE_OPERATOR_SCHEMA_H_
#include <functional>
#include <limits> #include <limits>
#include <functional>
#include "common.h" #include "common.h"
...@@ -92,7 +92,7 @@ class OpSchemaRegistry { ...@@ -92,7 +92,7 @@ class OpSchemaRegistry {
} }
private: private:
static Map<string, OpSchema>& schema_map() { static Map<string, OpSchema>& schema_map() {
static Map<string, OpSchema> schema_map_; static Map<string, OpSchema> schema_map_;
return schema_map_; return schema_map_;
} }
......
...@@ -41,7 +41,7 @@ class GradientGatherOp final : public Operator<Context> { ...@@ -41,7 +41,7 @@ class GradientGatherOp final : public Operator<Context> {
GradientGatherOp(const OperatorDef& def, Workspace* ws) GradientGatherOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) { : Operator<Context>(def, ws) {
for (int i = 0; i < InputSize(); i++) for (int i = 0; i < InputSize(); i++)
if (Input(i).name() != "ignore") indices.push_back(i); if (Input(i).name() != "NULL") indices.push_back(i);
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
......
...@@ -43,7 +43,7 @@ class Conv2dGradientOp : public Conv2dOp<Context> { ...@@ -43,7 +43,7 @@ class Conv2dGradientOp : public Conv2dOp<Context> {
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
USE_CONVOLUTION_FUNCTIONS; USE_CONVOLUTION_FUNCTIONS;
bool HasBias() override { return Output(2)->name() != "ignore"; } bool HasBias() override { return Output(2)->name() != "NULL"; }
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -47,7 +47,7 @@ class ConvTranspose2dGradientOp : public ConvTranspose2dOp<Context> { ...@@ -47,7 +47,7 @@ class ConvTranspose2dGradientOp : public ConvTranspose2dOp<Context> {
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
USE_CONVOLUTION_FUNCTIONS; USE_CONVOLUTION_FUNCTIONS;
bool HasBias() override { return Output(2)->name() != "ignore"; } bool HasBias() override { return Output(2)->name() != "NULL"; }
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -50,7 +50,7 @@ class DepthwiseConv2dGradientOp ...@@ -50,7 +50,7 @@ class DepthwiseConv2dGradientOp
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
USE_CONVOLUTION_FUNCTIONS; USE_CONVOLUTION_FUNCTIONS;
bool HasBias() override { return Output(2)->name() != "ignore"; } bool HasBias() override { return Output(2)->name() != "NULL"; }
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
......
...@@ -131,7 +131,7 @@ struct CUDADeviceProps { ...@@ -131,7 +131,7 @@ struct CUDADeviceProps {
CUDADeviceProps() : props(CUDA_NUM_DEVICES()) { CUDADeviceProps() : props(CUDA_NUM_DEVICES()) {
for (int i = 0; i < CUDA_NUM_DEVICES(); ++i) for (int i = 0; i < CUDA_NUM_DEVICES(); ++i)
CUDA_CHECK(cudaGetDeviceProperties(&props[i], i)); CUDA_CHECK(cudaGetDeviceProperties(&props[i], i));
} }
vector<cudaDeviceProp> props; vector<cudaDeviceProp> props;
}; };
......
...@@ -28,8 +28,7 @@ inline OperatorDef MakeOperatorDef( ...@@ -28,8 +28,7 @@ inline OperatorDef MakeOperatorDef(
const IterableInputs& inputs, const IterableInputs& inputs,
const IterableOutputs& outputs, const IterableOutputs& outputs,
const IterableArgs& args, const IterableArgs& args,
const DeviceOption& device_option, const DeviceOption& device_option) {
const string& engine) {
OperatorDef def; OperatorDef def;
def.set_type(type); def.set_type(type);
def.set_name(name); def.set_name(name);
...@@ -51,8 +50,8 @@ inline OperatorDef MakeOperatorDef( ...@@ -51,8 +50,8 @@ inline OperatorDef MakeOperatorDef(
const IterableOutputs& outputs, const IterableOutputs& outputs,
const IterableArgs& args) { const IterableArgs& args) {
return MakeOperatorDef( return MakeOperatorDef(
type, name, inputs, outputs, args, type, name, inputs, outputs,
DeviceOption(), ""); args, DeviceOption());
} }
template <class IterableInputs, template <class IterableInputs,
...@@ -64,7 +63,7 @@ inline OperatorDef MakeOperatorDef( ...@@ -64,7 +63,7 @@ inline OperatorDef MakeOperatorDef(
const IterableOutputs& outputs) { const IterableOutputs& outputs) {
return MakeOperatorDef( return MakeOperatorDef(
type, name, inputs, outputs, type, name, inputs, outputs,
vector<Argument>(), DeviceOption(), ""); vector<Argument>(), DeviceOption());
} }
bool ParseProtoFromText( bool ParseProtoFromText(
......
...@@ -88,9 +88,8 @@ std::string CreateGraph( ...@@ -88,9 +88,8 @@ std::string CreateGraph(
auto graph_def_copy(*graph_def); auto graph_def_copy(*graph_def);
// Overwritten device options // Overwritten device options
DeviceOption* device_option = graph_def_copy.mutable_device_option(); DeviceOption* device_option = graph_def_copy.mutable_device_option();
device_option->set_device_type((DeviceType)device.device_type()); device_option->set_device_type((DeviceTypeProto)device.device_type());
device_option->set_device_id(device.device_id()); device_option->set_device_id(device.device_id());
device_option->set_engine("CUDNN");
auto* graph = ws->CreateGraph(graph_def_copy); auto* graph = ws->CreateGraph(graph_def_copy);
if (!graph) LOG(FATAL) << "Can not create the graph."; if (!graph) LOG(FATAL) << "Can not create the graph.";
return graph->name(); return graph->name();
......
...@@ -53,6 +53,7 @@ void AddGradientMethods(pybind11::module& m) { ...@@ -53,6 +53,7 @@ void AddGradientMethods(pybind11::module& m) {
if (is_sharing) maker.Share(backward_ops); if (is_sharing) maker.Share(backward_ops);
pybind11::gil_scoped_release g; pybind11::gil_scoped_release g;
for (auto& op : backward_ops.op()) { for (auto& op : backward_ops.op()) {
if (op.type().empty()) continue;
if (verbose) std::cout << op.DebugString() << std::endl; if (verbose) std::cout << op.DebugString() << std::endl;
if (op.has_uid()) ws()->RunOperator(op); if (op.has_uid()) ws()->RunOperator(op);
else ws()->RunOperatorOnce(op); else ws()->RunOperatorOnce(op);
......
...@@ -31,6 +31,13 @@ void AddCUDAMethods(pybind11::module& m) { ...@@ -31,6 +31,13 @@ void AddCUDAMethods(pybind11::module& m) {
#endif #endif
}); });
m.def("EnableCUDNN", [](bool enabled) {
#ifdef WITH_CUDA
CUDAContext::cuda_object()
->cudnn_enabled = enabled;
#endif
});
m.def("cudaGetDevice", []() { m.def("cudaGetDevice", []() {
return CUDAContext::active_device_id(); return CUDAContext::active_device_id();
}); });
......
...@@ -27,9 +27,6 @@ option['device'] = 'cpu' ...@@ -27,9 +27,6 @@ option['device'] = 'cpu'
# The device index # The device index
option['device_id'] = 0 option['device_id'] = 0
# Whether to use cuDNN if possible
option['use_cudnn'] = False
# The global random seed # The global random seed
option['random_seed'] = 3 option['random_seed'] = 3
...@@ -77,15 +74,13 @@ def EnableCPU(): ...@@ -77,15 +74,13 @@ def EnableCPU():
option['device'] = 'cpu' option['device'] = 'cpu'
def EnableCUDA(gpu_id=0, use_cudnn=True): def EnableCUDA(gpu_id=0):
"""Enable NVIDIA's CUDA mode globally. """Enable NVIDIA's CUDA mode globally.
Parameters Parameters
---------- ----------
gpu_id : int gpu_id : int
The index of GPU to use. The index of GPU to use.
use_cudnn : boolean
Whether to use cuDNN if available.
Returns Returns
------- -------
...@@ -95,7 +90,6 @@ def EnableCUDA(gpu_id=0, use_cudnn=True): ...@@ -95,7 +90,6 @@ def EnableCUDA(gpu_id=0, use_cudnn=True):
global option global option
option['device'] = 'cuda' option['device'] = 'cuda'
option['device_id'] = gpu_id option['device_id'] = gpu_id
option['use_cudnn'] = use_cudnn
def EnableCNML(mlu_id=0): def EnableCNML(mlu_id=0):
......
...@@ -24,12 +24,28 @@ def IsCUDADriverSufficient(): ...@@ -24,12 +24,28 @@ def IsCUDADriverSufficient():
Returns Returns
------- -------
boolean boolean
``True`` if your device(s) support CUDA otherwise ``False``. *True* if your device(s) support CUDA otherwise *False*.
""" """
return _C.IsCUDADriverSufficient() return _C.IsCUDADriverSufficient()
def EnableCUDNN(enabled=True):
"""Enable the CuDNN engine.
Parameters
----------
enabled : boolean
*True* to enable.
Returns
-------
None
"""
return _C.EnableCUDNN(enabled)
def GetDevice(): def GetDevice():
"""Get the current active cuda device. """Get the current active cuda device.
......
...@@ -164,7 +164,7 @@ class GraphGradientMaker(object): ...@@ -164,7 +164,7 @@ class GraphGradientMaker(object):
is_skip, gen_grads = \ is_skip, gen_grads = \
cls.CheckGrad(forward_op, inputs_to_grads, blacklist, targets) cls.CheckGrad(forward_op, inputs_to_grads, blacklist, targets)
# Missing grads are represented as ``None`` # Missing grads are represented as ``None``
g_outputs = list(inputs_to_grads.get(name, 'ignore') for name in forward_op.output) g_outputs = list(inputs_to_grads.get(name, 'NULL') for name in forward_op.output)
g_ops, g_inputs, defaults = cls.CreateGrad(forward_op, g_outputs) g_ops, g_inputs, defaults = cls.CreateGrad(forward_op, g_outputs)
# Append ops # Append ops
......
...@@ -72,7 +72,7 @@ else: ...@@ -72,7 +72,7 @@ else:
def MakeOperatorDef( def MakeOperatorDef(
op_type, inputs=(), outputs=(), op_type, inputs=(), outputs=(),
name='', uid=None, device_option=None, name='', uid=None, device_option=None,
arg=None, engine=None, **kwargs): arg=None, **kwargs):
operator = pb.OperatorDef() operator = pb.OperatorDef()
operator.type = op_type operator.type = op_type
operator.name = name operator.name = name
...@@ -80,14 +80,12 @@ def MakeOperatorDef( ...@@ -80,14 +80,12 @@ def MakeOperatorDef(
operator.output.extend([str(tensor) for tensor in outputs]) operator.output.extend([str(tensor) for tensor in outputs])
if device_option is not None: if device_option is not None:
operator.device_option.CopyFrom(device_option) operator.device_option.CopyFrom(device_option)
if engine is not None:
operator.device_option.engine = engine
if 'random_seed' in kwargs: if 'random_seed' in kwargs:
operator.device_option.random_seed = kwargs['random_seed'] operator.device_option.random_seed = kwargs['random_seed']
del kwargs['random_seed'] del kwargs['random_seed']
if uid is not None: operator.uid = uid if uid is not None: operator.uid = uid
if arg is not None: operator.arg.extend(arg) if arg is not None: operator.arg.extend(arg)
for k,v in kwargs.items(): for k, v in kwargs.items():
if v is None: continue if v is None: continue
operator.arg.add().CopyFrom(MakeArgument(k,v)) operator.arg.add().CopyFrom(MakeArgument(k,v))
return operator return operator
...@@ -96,46 +94,36 @@ def MakeOperatorDef( ...@@ -96,46 +94,36 @@ def MakeOperatorDef(
def MakeCXXOperatorDef( def MakeCXXOperatorDef(
op_type, inputs=(), outputs=(), op_type, inputs=(), outputs=(),
name='', uid=None, device_option=None, name='', uid=None, device_option=None,
arg=None, engine=None, **kwargs): arg=None, **kwargs):
c_def = _C.OperatorDef() c_def = _C.OperatorDef()
py_def = MakeOperatorDef( py_def = MakeOperatorDef(
op_type, inputs, outputs, name, uid, op_type, inputs, outputs, name, uid,
device_option, arg, engine, **kwargs) device_option, arg, **kwargs)
c_def.ParseFrom(py_def.SerializeToString()) c_def.ParseFrom(py_def.SerializeToString())
return c_def return c_def
def MakeDeviceOption( def MakeDeviceOption(device_type, device_id, rng_seed=None):
device_type, device_id,
engine=None, rng_seed=None):
option = pb.DeviceOption() option = pb.DeviceOption()
option.device_type = device_type option.device_type = device_type
option.device_id = device_id option.device_id = device_id
if engine is not None: option.engine = engine
if rng_seed is not None: option.random_seed = rng_seed if rng_seed is not None: option.random_seed = rng_seed
return option return option
_PREDEFINED_DEVICE_LIMITS = 16 _PREDEFINED_DEVICE_LIMITS = 16
_PREDEFINED_DEVICE_ENGINES = ['', 'CUDNN']
_PREDEFINED_DEVICE_DICT = {'cpu': 0, 'cuda': 1, 'cnml': 2} _PREDEFINED_DEVICE_DICT = {'cpu': 0, 'cuda': 1, 'cnml': 2}
_PREDEFINED_DEVICE_OPTION_DICT = {} _PREDEFINED_DEVICE_OPTION_DICT = {}
for i in range(_PREDEFINED_DEVICE_LIMITS): for i in range(_PREDEFINED_DEVICE_LIMITS):
for device, identify in _PREDEFINED_DEVICE_DICT.items(): for device, identify in _PREDEFINED_DEVICE_DICT.items():
for engine in _PREDEFINED_DEVICE_ENGINES: _PREDEFINED_DEVICE_OPTION_DICT[(device, i)] = \
_PREDEFINED_DEVICE_OPTION_DICT[(device, i, engine)] = \ MakeDeviceOption(identify, i)
MakeDeviceOption(identify, i, engine)
if device == 'cuda':
_PREDEFINED_DEVICE_OPTION_DICT[('cuda', i)] = \ def GetDeviceOption(device_type, device_id=0, rng_seed=None):
MakeDeviceOption(identify, i, 'CUDNN') ctx = (device_type, device_id)
def GetDeviceOption(
device_type, device_id=0,
engine=None, rng_seed=None):
ctx = (device_type, device_id, engine if engine else '')
option = _PREDEFINED_DEVICE_OPTION_DICT[ctx] option = _PREDEFINED_DEVICE_OPTION_DICT[ctx]
if rng_seed is not None: if rng_seed is not None:
option_copy = copy.deepcopy(option) option_copy = copy.deepcopy(option)
...@@ -149,16 +137,15 @@ def GetDefaultDeviceOption(): ...@@ -149,16 +137,15 @@ def GetDefaultDeviceOption():
if device_info is not None: if device_info is not None:
return GetDeviceOption( return GetDeviceOption(
device_info['device_type'], device_info['device_type'],
device_info['device_id'], device_info['device_id'])
device_info['device_engine'])
return None return None
def GetGlobalDeviceOption(): def GetGlobalDeviceOption():
option = cfg.GetGlobalOptions() option = cfg.GetGlobalOptions()
return GetDeviceOption( return GetDeviceOption(
option['device'], option['device_id'], option['device'],
'CUDNN' if option['use_cudnn'] else '') option['device_id'])
# Fix the python stdout # Fix the python stdout
......
...@@ -128,7 +128,7 @@ def name_scope(name): ...@@ -128,7 +128,7 @@ def name_scope(name):
return _GLOBAL_TENSOR_STACK.get_controller(default) return _GLOBAL_TENSOR_STACK.get_controller(default)
def device_scope(device_type, device_id=0, engine='AUTO'): def device_scope(device_type, device_id=0):
"""Nest the the specific device info. """Nest the the specific device info.
Parameters Parameters
...@@ -137,20 +137,15 @@ def device_scope(device_type, device_id=0, engine='AUTO'): ...@@ -137,20 +137,15 @@ def device_scope(device_type, device_id=0, engine='AUTO'):
The type of device. The type of device.
device_id : int, optional device_id : int, optional
The index of the device. The index of the device.
engine : {'AUTO', 'CUDNN'}, optional
The auxiliary accelerating library to use.
""" """
device_type, device_id, device_engine = \ device_type, device_id, device_type.lower(), device_id
device_type.upper(), device_id, engine.upper()
assert device_type in ['cpu', 'gpu', 'cuda', 'cnml'] assert device_type in ['cpu', 'gpu', 'cuda', 'cnml']
# Default names # Default names
if device_type == 'gpu': device_type = 'cuda' if device_type == 'gpu': device_type = 'cuda'
if device_engine == 'AUTO': device_engine = 'CUDNN'
return _GLOBAL_DEVICE_STACK.get_controller({ return _GLOBAL_DEVICE_STACK.get_controller({
'device_type': device_type, 'device_type': device_type,
'device_id': device_id, 'device_id': device_id})
'device_engine': device_engine})
def phase_scope(phase): def phase_scope(phase):
...@@ -209,7 +204,7 @@ def get_default_device(): ...@@ -209,7 +204,7 @@ def get_default_device():
The device dict contains the following keys: The device dict contains the following keys:
(``device_type``, ``device_id``, ``device_engine``). (``device_type``, ``device_id``).
Returns Returns
------- -------
......
...@@ -32,29 +32,29 @@ def Proposal(inputs, strides, ratios, scales, ...@@ -32,29 +32,29 @@ def Proposal(inputs, strides, ratios, scales,
Parameters Parameters
---------- ----------
inputs : list of Tensor inputs : sequence of Tensor
The inputs. The inputs.
strides : list of int strides : sequence of int
The strides of anchors. The strides of anchors.
ratios : list of float ratios : sequence of float
The ratios of anchors. The ratios of anchors.
scales : list of float scales : sequence of float
The scales of anchors. The scales of anchors.
pre_nms_top_n : int pre_nms_top_n : int, optional, default=6000
The number of anchors before nms. The number of anchors before nms.
post_nms_top_n : int post_nms_top_n : int, optional, default=300
The number of anchors after nms. The number of anchors after nms.
nms_thresh : float nms_thresh : float, optional, default=0.7
The threshold of nms. The threshold of nms.
min_size : int min_size : int, optional, default=16
The min size of anchors. The min size of anchors.
min_level : int min_level : int, optional, default=2
Finest level of the FPN pyramid. Finest level of the FPN pyramid.
max_level : int max_level : int, optional, default=5
Coarsest level of the FPN pyramid. Coarsest level of the FPN pyramid.
canonical_scale : int canonical_scale : int, optional, default=224
The baseline scale of mapping policy. The baseline scale of mapping policy.
canonical_level : int canonical_level : int, optional, default=4
Heuristic level of the canonical scale. Heuristic level of the canonical scale.
Returns Returns
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon as dg
from multiprocessing import Process, Queue
class Fetcher(Process):
def __init__(self, queue):
super(Fetcher, self).__init__()
self._queue = queue
self.daemon = True
def cleanup():
print('Terminating Fetcher......')
self.terminate()
self.join()
import atexit
atexit.register(cleanup)
def run(self):
while True:
self._queue.put(np.ones((5, 10)))
class DataProcessOp(object):
"""How to custom a RunOp for data processing.
Examples
--------
>>> import dragon as dg
>>> y = dg.ops.Run([], module=__name__, op='DataProcessOp', num_outputs=1)
>>> foo = dg.function(outputs=y)
>>> foo()
>>> print(y.get_value())
>>> [[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
"""
def setup(self, inputs, outputs):
"""Setup for params or options.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
self._queue = Queue(100)
self._fetcher = Fetcher(self._queue)
self._fetcher.start()
def run(self, inputs, outputs):
"""Run method, i.e., forward pass.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
dg.workspace.FeedTensor(outputs[0], self._queue.get())
if __name__ == '__main__':
# Def
y = dg.ops.Run([], module=__name__, op='DataProcessOp', num_outputs=1)
foo = dg.function(outputs=y)
# Run
foo()
# Fetch
print(y.get_value())
\ No newline at end of file
...@@ -9,23 +9,26 @@ ...@@ -9,23 +9,26 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import dragon.core.workspace as ws from __future__ import absolute_import
from dragon.utils.vision import DataBatch from __future__ import division
from __future__ import print_function
import dragon
import dragon.utils.vision
class MiniBatchOp(object): class MiniBatchOp(object):
"""How to form a minibatch based on `dragon.io`_ package. """Form a mini-batch based on `dragon.utils.vision`_ package."""
"""
def setup(self, inputs, outputs): def setup(self, inputs, outputs):
"""Setup for params or options. """Setup for params or options.
Parameters Parameters
---------- ----------
inputs : list of str inputs : sequence of str
Indicating the name of input tensors. The name of inputs.
outputs : list of str outputs : sequence of str
Indicating the name of output tensors. The name of outputs.
Returns Returns
------- -------
...@@ -33,18 +36,17 @@ class MiniBatchOp(object): ...@@ -33,18 +36,17 @@ class MiniBatchOp(object):
""" """
kwargs = eval(self.param_str) kwargs = eval(self.param_str)
self._data_batch = DataBatch(**kwargs) self._data_batch = dragon.utils.vision.DataBatch(**kwargs)
def run(self, inputs, outputs): def run(self, inputs, outputs):
"""Run method, i.e., forward pass. """Run method, i.e., forward pass.
Parameters Parameters
---------- ----------
inputs : list of str inputs : sequence of str
Indicating the name of input tensors. The name of inputs.
outputs : list of str outputs : sequence of str
Indicating the name of output tensors. The name of outputs.
Returns Returns
------- -------
...@@ -53,4 +55,4 @@ class MiniBatchOp(object): ...@@ -53,4 +55,4 @@ class MiniBatchOp(object):
""" """
blobs = self._data_batch.get() blobs = self._data_batch.get()
for idx, blob in enumerate(blobs): for idx, blob in enumerate(blobs):
ws.FeedTensor(outputs[idx], blob) dragon.workspace.FeedTensor(outputs[idx], blob)
\ No newline at end of file \ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon as dg
class VecMultOp(object):
"""How to custom a TemplateOp for Vector Multiplication.
Examples
--------
>>> import dragon as dg
>>> x1 = dg.Tensor('x1').Variable()
>>> x2 = dg.Tensor('x2').Variable()
>>> y = dg.ops.Template([x1, x2], module=__name__, op='VecMultOp', num_outputs=1)
>>> dx1 = dg.grad(y, x1)
>>> dx2 = dg.grad(y, x2)
>>> foo = dg.function(outputs=y)
>>> dg.workspace.FeedTensor(x1, np.ones((5, 3), dtype=np.float32))
>>> dg.workspace.FeedTensor(x2, np.ones((5, 3), dtype=np.float32) * 5.0)
>>> foo()
>>> print(y.get_value())
>>> [[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]]
>>> print(dx1.get_value())
>>> [[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]]
>>> print(dx2.get_value())
>>> [[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]]
"""
def setup(self, inputs, outputs):
"""Setup for params or options.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
pass
def run(self, inputs, outputs):
"""Run method, i.e., forward pass.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
x1 = dg.workspace.FetchTensor(inputs[0])
x2 = dg.workspace.FetchTensor(inputs[1])
dg.workspace.FeedTensor(outputs[0], x1 * x2) # call numpy mult
def grad(self, inputs, outputs):
"""Gradient method, i.e., backward pass.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
x1 = dg.workspace.FetchTensor(inputs[0])
x2 = dg.workspace.FetchTensor(inputs[1])
dy = dg.workspace.FetchTensor(inputs[-1])
dx1 = dy * x2
dx2 = dy * x1
dg.workspace.FeedTensor(outputs[0], dx1)
dg.workspace.FeedTensor(outputs[1], dx2)
if __name__ == '__main__':
# Def
x1 = dg.Tensor('x1').Variable()
x2 = dg.Tensor('x2').Variable()
y = dg.ops.Template([x1, x2], module=__name__, op='VecMultOp', num_outputs=1)
dx1 = dg.grad(y, x1)
dx2 = dg.grad(y, x2)
foo = dg.function(outputs=y)
# Feed
dg.workspace.FeedTensor(x1, np.ones((5, 3), dtype=np.float32))
dg.workspace.FeedTensor(x2, np.ones((5, 3), dtype=np.float32) * 5.0)
# Run
foo()
# Fetch
print('y \n-------------- \n', y.get_value(), '\n')
print('dx1 \n-------------- \n', dx1.get_value(), '\n')
print('dx2 \n-------------- \n', dx2.get_value(), '\n')
\ No newline at end of file
...@@ -25,40 +25,40 @@ def LMDBData(**kwargs): ...@@ -25,40 +25,40 @@ def LMDBData(**kwargs):
---------- ----------
source : str source : str
The path of database. The path of database.
shuffle : bool shuffle : bool, optional, default=False
Whether to shuffle the data. Whether to shuffle the data.
node_step: bool node_step: bool
Whether to split data for multiple parallel nodes. Whether to split data for multiple parallel nodes.
num_chunks : int num_chunks : int, optional, default=2048
The number of chunks to split. Default is ``2048``. The number of chunks to split.
chunk_size : int chunk_size : int, optional, default=-1
The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``). The size(MB) of each chunk.
mean_values : list mean_values : list, optional
The mean value of each image channel. The mean value of each image channel.
scale : float scale : float, optional, default=1.
The scale performed after mean subtraction. Default is ``1.0``. The scale performed after mean subtraction.
padding : int padding : int, optional, default=0
The zero-padding size. Default is ``0``. The zero-padding size.
crop_size : int crop_size : int, optional, default=0
The crop size. Default is ``0`` (Disabled). The cropping size.
mirror : bool mirror : bool, optional, default=False
Whether to mirror(flip horizontally) images. Default is ``False``. Whether to mirror(flip horizontally) images.
color_augmentation : bool color_augmentation : bool, optional, default=False
Whether to use color distortion. Default is ``False``. Whether to use color distortion.1
min_random_scale : float min_random_scale : float, optional, default=1.
The min scale of the input images. Default is ``1.0``. The min scale of the input images.
max_random_scale : float max_random_scale : float, optional, default=1.
The max scale of the input images. Default is ``1.0``. The max scale of the input images.
force_gray : bool force_gray : bool, optional, default=False
Set not to duplicate channel for gray. Default is ``False``. Set not to duplicate channel for gray.
phase : str phase : {'TRAIN', 'TEST'}, optional
The phase of this operator, ``TRAIN`` or ``TEST``. The phase of this operator.
batch_size : int batch_size : int, optional, default=128
The size of a mini-batch. The size of a mini-batch.
partition : bool partition : bool, optional, default=False
Whether to partition batch for parallelism. Default is ``False``. Whether to partition batch for parallelism.
prefetch : int prefetch : int, optional, default=5
The prefetch count. Default is ``5``. The prefetch count.
Returns Returns
------- -------
...@@ -85,8 +85,7 @@ def LMDBData(**kwargs): ...@@ -85,8 +85,7 @@ def LMDBData(**kwargs):
@OpSchema.Inputs(1) @OpSchema.Inputs(1)
def ImageData( def ImageData(
inputs, mean_values=None, std_values=None, inputs, mean_values=None, std_values=None,
dtype='float32', data_format='NCHW', **kwargs dtype='float32', data_format='NCHW', **kwargs):
):
"""Process the images from 4D raw data. """Process the images from 4D raw data.
Note that we assume the data format of raw data is **NHWC**. Note that we assume the data format of raw data is **NHWC**.
...@@ -99,10 +98,10 @@ def ImageData( ...@@ -99,10 +98,10 @@ def ImageData(
The optional mean values to subtract. The optional mean values to subtract.
std_values : sequence of float, optional std_values : sequence of float, optional
The optional std values to divide. The optional std values to divide.
dtype : str dtype : {'float16', 'float32'}, optional
The type of output. ``float32`` or ``float16``. The data type of output.
data_format : str data_format : {'NCHW', 'NHWC'}, optional
The data format of output. ``NCHW`` or ``NHWC``. The data format of output.
Returns Returns
------- -------
......
...@@ -52,7 +52,7 @@ def Conv2d( ...@@ -52,7 +52,7 @@ def Conv2d(
The inputs, represent [input, weights] + [bias]. The inputs, represent [input, weights] + [bias].
num_output : int num_output : int
The output channels of convolution. The output channels of convolution.
kernel_shape : sequence of int. kernel_shape : sequence of int
The shape of convolution kernel. The shape of convolution kernel.
strides : sequence of int, optional, default=1 strides : sequence of int, optional, default=1
The stride(s) of convolution. The stride(s) of convolution.
......
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd. // Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
// Licensed under the BSD 2-Clause License. // Licensed under the BSD 2-Clause License.
// Codes are based on:
// https://github.com/pytorch/pytorch/blob/master/caffe2/proto/caffe2.proto
syntax = "proto2"; syntax = "proto2";
package dragon; package dragon;
// Store the serialized Tensor objects.
message TensorProto { message TensorProto {
repeated int32 dims = 1; repeated int32 dims = 1;
enum DataType { enum DataType {
UNDEFINED = 0; UNDEFINED = 0;
// Basic types.
FLOAT = 1; FLOAT = 1;
INT32 = 2; INT32 = 2;
BYTE = 3; BYTE = 3;
STRING = 4; STRING = 4;
// Less-commonly used data types.
BOOL = 5;
UINT8 = 6;
INT8 = 7;
UINT16 = 8;
INT16 = 9;
INT64 = 10;
FLOAT16 = 12; FLOAT16 = 12;
DOUBLE = 13;
} }
optional DataType data_type = 2 [default = FLOAT]; optional DataType data_type = 2 [default = FLOAT];
// For float.
repeated float float_data = 3 [packed = true]; repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true]; repeated int32 int32_data = 4 [packed = true];
// For bytes.
optional bytes byte_data = 5; optional bytes byte_data = 5;
// For strings.
repeated bytes string_data = 6; repeated bytes string_data = 6;
// For double.
repeated double double_data = 9 [packed = true];
// For int64.
repeated int64 int64_data = 10 [packed = true];
// Store the raw data, contents are serialized as little-endian.
optional bytes raw_data = 13;
// Optionally, a name for the tensor.
optional string name = 7; optional string name = 7;
} }
// Record the filler of Tensor.
// This structure is kept for backward compatibility
// with caffe1, which relies implicit initializer.
message TensorFillerProto { message TensorFillerProto {
optional string tensor = 1; optional string tensor = 1;
optional string type = 2 [default = 'constant']; optional string type = 2 [default = 'constant'];
...@@ -36,67 +67,120 @@ message TensorFillerProto { ...@@ -36,67 +67,120 @@ message TensorFillerProto {
optional VarianceNorm variance_norm = 9 [default = FAN_IN]; optional VarianceNorm variance_norm = 9 [default = FAN_IN];
} }
// Store multiple TensorProto objects in one single proto.
message TensorProtos { message TensorProtos {
repeated TensorProto protos = 1; repeated TensorProto protos = 1;
} }
enum DeviceType { // DeviceType that Dragon currently supports.
PROTO_CPU = 0; enum DeviceTypeProto {
PROTO_CUDA = 1; // The default device.
PROTO_CNML = 2; PROTO_CPU = 0;
// NVIDIA's CUDA Environment.
PROTO_CUDA = 1;
// CAMBRICON's CNML Environment.
PROTO_CNML = 2;
} }
// Device-specific options.
message DeviceOption { message DeviceOption {
optional DeviceType device_type = 1 [default = PROTO_CPU]; // The type of device to dispatch executions.
optional DeviceTypeProto device_type = 1 [default = PROTO_CPU];
// The index of this device.
optional int32 device_id = 2 [default = 0]; optional int32 device_id = 2 [default = 0];
// The random seed to start the random generator.
optional uint32 random_seed = 3 [default = 3]; optional uint32 random_seed = 3 [default = 3];
optional string engine = 4;
} }
// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument { message Argument {
// The name of this argument.
optional string name = 1; optional string name = 1;
// Store the float32 value.
optional float f = 2; optional float f = 2;
// Store the bool, int32, int64 value.
optional int64 i = 3; optional int64 i = 3;
// Store the string value.
optional bytes s = 4; optional bytes s = 4;
// Store the float32 values.
repeated float floats = 7; repeated float floats = 7;
// Store the bool, int32, int64 values.
repeated int64 ints = 8; repeated int64 ints = 8;
// Store the string values.
repeated bytes strings = 9; repeated bytes strings = 9;
} }
// Operator Definition
message OperatorDef { message OperatorDef {
// The unique id of this operator.
// Set it to persist operators in the dynamic graph.
optional string uid = 1; optional string uid = 1;
// The name of inputs.
repeated string input = 2; repeated string input = 2;
// The name of outputs.
repeated string output = 3; repeated string output = 3;
// The optional name of this operator.
optional string name = 4; optional string name = 4;
// The operator type.
optional string type = 5; optional string type = 5;
// The arguments.
repeated Argument arg = 6; repeated Argument arg = 6;
// The device option that the operator should run under.
optional DeviceOption device_option = 7; optional DeviceOption device_option = 7;
} }
// Record the gradient information
message GradientProto { message GradientProto {
// The derivative target.
optional string cost = 1; optional string cost = 1;
// The target with respect to?
optional string wrt = 2; optional string wrt = 2;
// The external gradient
optional string external = 3; optional string external = 3;
} }
// Record the updater information
message UpdaterProto { message UpdaterProto {
// The operator name to use.
optional string name = 1; optional string name = 1;
// The operator type.
optional string type = 2; optional string type = 2;
// The tensor to update.
repeated string tensor = 3; repeated string tensor = 3;
// The arguments.
repeated Argument arg = 4; repeated Argument arg = 4;
} }
// Graph Definition
message GraphDef { message GraphDef {
// The graph name.
optional string name = 1; optional string name = 1;
// The operators to execute.
repeated OperatorDef op = 2; repeated OperatorDef op = 2;
// The type of graph.
optional string graph_type = 3; optional string graph_type = 3;
// The device option for this graph.
optional DeviceOption device_option = 5; optional DeviceOption device_option = 5;
// The arguments.
repeated Argument arg = 6; repeated Argument arg = 6;
// The name of inputs.
repeated string input = 7; repeated string input = 7;
// The name of outputs.
repeated string output = 8; repeated string output = 8;
// The gradients information.
repeated GradientProto gradient = 9; repeated GradientProto gradient = 9;
// The updaters information.
repeated UpdaterProto updater = 10; repeated UpdaterProto updater = 10;
} }
\ No newline at end of file
...@@ -28,12 +28,12 @@ class BlobFetcher(Process): ...@@ -28,12 +28,12 @@ class BlobFetcher(Process):
Parameters Parameters
---------- ----------
batch_size : int batch_size : int, optional, default=128
The size of a training batch. The size of a mini-batch.
partition : boolean partition : bool, optional, default=False
Whether to partition batch. Default is ``False``. Whether to partition batch for parallelism.
prefetch : int prefetch : int, optional, default=5
The prefetch count. Default is ``5``. The prefetch count.
""" """
super(BlobFetcher, self).__init__() super(BlobFetcher, self).__init__()
......
...@@ -40,38 +40,38 @@ class DataBatch(object): ...@@ -40,38 +40,38 @@ class DataBatch(object):
---------- ----------
source : str source : str
The path of database. The path of database.
multiple_nodes: boolean multiple_nodes: boolean, optional, default=False
Whether to split data for multiple parallel nodes. Default is ``False``. Whether to split data for multiple parallel nodes.
shuffle : boolean shuffle : bool, optional, default=False
Whether to shuffle the data. Default is ``False``. Whether to shuffle the data.
num_chunks : int num_chunks : int, optional, default=2048
The number of chunks to split. Default is ``2048``. The number of chunks to split.
chunk_size : int chunk_size : int, optional, default=-1
The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``). The size(MB) of each chunk.
padding : int padding : int, optional, default=0
The zero-padding size. Default is ``0`` (Disabled). The zero-padding size.
fill_value : int fill_value : int, optional, default=127
The value to fill when padding is valid. Default is ``127``. The value to fill when padding is valid.
crop_size : int crop_size : int, optional, default=0
The crop size. Default is ``0`` (Disabled). The cropping size.
mirror : boolean mirror : bool, optional, default=False
Whether to flip(horizontally) images. Default is ``False``. Whether to mirror(flip horizontally) images.
color_augmentation : boolean color_augmentation : bool, optional, default=False
Whether to distort colors. Default is ``False``. Whether to use color distortion.1
min_random_scale : float min_random_scale : float, optional, default=1.
The min scale of the input images. Default is ``1.0``. The min scale of the input images.
max_random_scale : float max_random_scale : float, optional, default=1.
The max scale of the input images. Default is ``1.0``. The max scale of the input images.
force_color : boolean force_gray : bool, optional, default=False
Set to duplicate channels for gray. Default is ``False``. Set not to duplicate channel for gray.
phase : str phase : {'TRAIN', 'TEST'}, optional
The phase of this operator, ``TRAIN`` or ``TEST``. Default is ``TRAIN``. The optional running phase.
batch_size : int batch_size : int, optional, default=128
The size of a training batch. The size of a mini-batch.
partition : boolean partition : bool, optional, default=False
Whether to partition batch. Default is ``False``. Whether to partition batch for parallelism.
prefetch : int prefetch : int, optional, default=5
The prefetch count. Default is ``5``. The prefetch count.
""" """
super(DataBatch, self).__init__() super(DataBatch, self).__init__()
...@@ -109,7 +109,7 @@ class DataBatch(object): ...@@ -109,7 +109,7 @@ class DataBatch(object):
self._num_transformers += 1 self._num_transformers += 1
self._num_transformers = min(self._num_transformers, self._max_transformers) self._num_transformers = min(self._num_transformers, self._max_transformers)
self._batch_size = kwargs.get('batch_size', 100) self._batch_size = kwargs.get('batch_size', 128)
self._partition = kwargs.get('partition', False) self._partition = kwargs.get('partition', False)
if self._partition: if self._partition:
self._batch_size = int(self._batch_size / kwargs['group_size']) self._batch_size = int(self._batch_size / kwargs['group_size'])
......
...@@ -35,14 +35,14 @@ class DataReader(Process): ...@@ -35,14 +35,14 @@ class DataReader(Process):
---------- ----------
source : str source : str
The path of database. The path of database.
multiple_nodes: boolean multiple_nodes: boolean, optional, default=False
Whether to split data for multiple parallel nodes. Default is ``False``. Whether to split data for multiple parallel nodes.
shuffle : boolean shuffle : bool, optional, default=False
Whether to shuffle the data. Default is ``False``. Whether to shuffle the data.
num_chunks : int num_chunks : int, optional, default=2048
The number of chunks to split. Default is ``2048``. The number of chunks to split.
chunk_size : int chunk_size : int, optional, default=-1
The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``). The size(MB) of each chunk.
""" """
super(DataReader, self).__init__() super(DataReader, self).__init__()
......
...@@ -42,24 +42,24 @@ class DataTransformer(Process): ...@@ -42,24 +42,24 @@ class DataTransformer(Process):
Parameters Parameters
---------- ----------
padding : int padding : int, optional, default=0
The padding size. Default is ``0`` (Disabled). The zero-padding size.
fill_value : int fill_value : int, optional, default=127
The value to fill when padding is valid. Default is ``127``. The value to fill when padding is valid.
crop_size : int crop_size : int, optional, default=0
The crop size. Default is ``0`` (Disabled). The cropping size.
mirror : boolean mirror : bool, optional, default=False
Whether to flip(horizontally) images. Default is ``False``. Whether to mirror(flip horizontally) images.
color_augmentation : boolean color_augmentation : bool, optional, default=False
Whether to distort colors. Default is ``False``. Whether to use color distortion.1
min_random_scale : float min_random_scale : float, optional, default=1.
The min scale of the input images. Default is ``1.0``. The min scale of the input images.
max_random_scale : float max_random_scale : float, optional, default=1.
The max scale of the input images. Default is ``1.0``. The max scale of the input images.
force_color : boolean force_gray : bool, optional, default=False
Set to duplicate channels for gray. Default is ``False``. Set not to duplicate channel for gray.
phase : str phase : {'TRAIN', 'TEST'}, optional
The phase of this operator, ``TRAIN`` or ``TEST``. Default is ``TRAIN``. The optional running phase.
""" """
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
......
...@@ -183,8 +183,6 @@ def GraphDef_Device(graph_def): ...@@ -183,8 +183,6 @@ def GraphDef_Device(graph_def):
device_option.device_type = supports[option['device']] device_option.device_type = supports[option['device']]
device_option.device_id = option['device_id'] device_option.device_id = option['device_id']
device_option.random_seed = option['random_seed'] device_option.random_seed = option['random_seed']
if option['device'] == 'cuda':
if option['use_cudnn']: device_option.engine = 'CUDNN'
graph_def.device_option.CopyFrom(device_option) graph_def.device_option.CopyFrom(device_option)
......
...@@ -93,14 +93,14 @@ def RunOperator( ...@@ -93,14 +93,14 @@ def RunOperator(
op_name = recorder.append(op) op_name = recorder.append(op)
op.name = op_name op.name = op_name
for ix in range(len(outputs)): for ix in range(len(outputs)):
outputs[ix]._requires_grad = True outputs[ix].requires_grad = True
outputs[ix].__jit_recorder__ = recorder outputs[ix].__jit_recorder__ = recorder
if len(ignored_grads) > 0: if len(ignored_grads) > 0:
outputs[ix]._ignored_grads = ignored_grads outputs[ix]._ignored_grads = ignored_grads
else: else:
# Reset status # Reset status
for ix in range(len(outputs)): for ix in range(len(outputs)):
outputs[ix]._requires_grad = False outputs[ix].requires_grad = False
# Callback on Run # Callback on Run
if callback_on_run: callback_on_run(op_name) if callback_on_run: callback_on_run(op_name)
......
...@@ -315,9 +315,8 @@ class Module(object): ...@@ -315,9 +315,8 @@ class Module(object):
op_type=self.op_meta['op_type'], op_type=self.op_meta['op_type'],
device_option=proto_utils. device_option=proto_utils.
GetDeviceOption( GetDeviceOption(
self._device.type, self._device.type,
self._device.index, self._device.index),
engine='CUDNN'),
**self.op_meta['arguments'] **self.op_meta['arguments']
) )
......
...@@ -413,12 +413,15 @@ class RNNCellBase(Module): ...@@ -413,12 +413,15 @@ class RNNCellBase(Module):
for weight in self.parameters(): for weight in self.parameters():
weight.data.uniform_(-stdv, stdv) weight.data.uniform_(-stdv, stdv)
from .activation import Tanh, Sigmoid
class LSTMCell(RNNCellBase): class LSTMCell(RNNCellBase):
def __init__(self, input_size, hidden_size, bias=True): def __init__(self, input_size, hidden_size, bias=True):
super(LSTMCell, self).__init__( super(LSTMCell, self).__init__(
input_size, hidden_size, bias, num_chunks=4) input_size, hidden_size, bias, num_chunks=4)
self.register_op() self.register_op()
self.tanh = Tanh()
self.sigmoid = Sigmoid()
def register_op(self): def register_op(self):
self.op_meta = {'op_type': 'LSTMCell', 'arguments': {}} self.op_meta = {'op_type': 'LSTMCell', 'arguments': {}}
......
...@@ -349,7 +349,7 @@ class OneHot(BaseModule): ...@@ -349,7 +349,7 @@ class OneHot(BaseModule):
def forward(self, x): def forward(self, x):
inputs = [x]; self.unify_devices(inputs) inputs = [x]; self.unify_devices(inputs)
outputs = [self.register_output()] outputs = [self.register_output()]
return self.run(inputs, outputs) with no_grad(): return self.run(inputs, outputs)
class Cast(BaseModule): class Cast(BaseModule):
...@@ -376,8 +376,7 @@ class Cast(BaseModule): ...@@ -376,8 +376,7 @@ class Cast(BaseModule):
y = self.run(inputs, outputs) y = self.run(inputs, outputs)
else: else:
self.unify_devices([x]) self.unify_devices([x])
with no_grad(): with no_grad(): y = self.run([], [x])
y = self.run([], [x])
return y return y
...@@ -400,4 +399,4 @@ class Multinomial(BaseModule): ...@@ -400,4 +399,4 @@ class Multinomial(BaseModule):
def forward(self, x, y): def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs) inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output()] outputs = [y] if y else [self.register_output()]
return self.run(inputs, outputs) with no_grad(): return self.run(inputs, outputs)
\ No newline at end of file \ No newline at end of file
...@@ -60,6 +60,37 @@ inline int BBoxTransform( ...@@ -60,6 +60,37 @@ inline int BBoxTransform(
return (bbox_w >= min_box_w) * (bbox_h >= min_box_h); return (bbox_w >= min_box_w) * (bbox_h >= min_box_h);
} }
template <typename T>
inline void BBoxTransform(
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T im_scale,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)) / im_scale;
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)) / im_scale;
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)) / im_scale;
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale;
}
/******************** Anchor ********************/ /******************** Anchor ********************/
template <typename T> template <typename T>
...@@ -117,6 +148,38 @@ inline void GenerateGridAnchors( ...@@ -117,6 +148,38 @@ inline void GenerateGridAnchors(
} }
} }
template <typename T>
inline void GenerateGridAnchors(
const int num_proposals,
const int num_classes,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_4d, a, h, w;
int lr = num_classes * base_offset;
int rr = num_classes * (num_anchors * feat_h * feat_w);
for (int i = 0; i < num_proposals; ++i) {
idx_4d = (int)indices[i] - lr;
if (idx_4d >= 0 && idx_4d < rr) {
idx_4d /= num_classes;
w = idx_4d % feat_w;
h = (idx_4d / feat_w) % feat_h;
a = idx_4d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 7 + 1;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
}
}
/******************** Proposal ********************/ /******************** Proposal ********************/
template <typename T> template <typename T>
...@@ -164,14 +227,16 @@ void GenerateMSProposals( ...@@ -164,14 +227,16 @@ void GenerateMSProposals(
const int64_t* indices, const int64_t* indices,
T* proposals) { T* proposals) {
int64_t index; int64_t index;
int64_t num_candidates_2x = 2 * num_candidates;
int64_t num_candidates_3x = 3 * num_candidates;
float* proposal = proposals; float* proposal = proposals;
float dx, dy, d_log_w, d_log_h; float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) { for (int i = 0; i < num_proposals; ++i) {
index = indices[i]; index = indices[i];
dx = deltas[index]; dx = deltas[index];
dy = deltas[num_candidates + index]; dy = deltas[num_candidates + index];
d_log_w = deltas[2 * num_candidates + index]; d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[3 * num_candidates + index]; d_log_h = deltas[num_candidates_3x + index];
proposal[4] = BBoxTransform<float>( proposal[4] = BBoxTransform<float>(
dx, dy, d_log_w, d_log_h, dx, dy, d_log_w, d_log_h,
im_w, im_h, min_box_w, min_box_h, im_w, im_h, min_box_w, min_box_h,
...@@ -181,6 +246,41 @@ void GenerateMSProposals( ...@@ -181,6 +246,41 @@ void GenerateMSProposals(
} }
template <typename T> template <typename T>
void GenerateMCProposals(
const int num_proposals,
const int num_boxes,
const int num_classes,
const int im_idx,
const float im_h,
const float im_w,
const float im_scale,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, cls;
int64_t num_boxes_2x = 2 * num_boxes;
int64_t num_boxes_3x = 3 * num_boxes;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
cls = indices[i] % num_classes;
index = indices[i] / num_classes;
dx = deltas[index];
dy = deltas[num_boxes + index];
d_log_w = deltas[num_boxes_2x + index];
d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx;
BBoxTransform<float>(
dx, dy, d_log_w, d_log_h,
im_w, im_h, im_scale, proposal + 1);
proposal[5] = scores[indices[i]];
proposal[6] = cls + 1;
proposal += 7;
}
}
template <typename T>
inline void SortProposals( inline void SortProposals(
const int start, const int start,
const int end, const int end,
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
namespace dragon { namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void ProposalOp<Context>::RunWithType() { void ProposalOp<Context>::RunWithRCNN() {
using BT = float; // DType of BBox using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox using BC = CPUContext; // Context of BBox
...@@ -15,7 +15,6 @@ void ProposalOp<Context>::RunWithType() { ...@@ -15,7 +15,6 @@ void ProposalOp<Context>::RunWithType() {
int total_rois = 0, num_rois; int total_rois = 0, num_rois;
int num_candidates, num_proposals; int num_candidates, num_proposals;
auto* RIdata = roi_indices.data();
auto* batch_scores = Input(-3).template data<T, BC>(); auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>(); auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>(); auto* im_info = Input(-1).template data<BT, BC>();
...@@ -32,11 +31,10 @@ void ProposalOp<Context>::RunWithType() { ...@@ -32,11 +31,10 @@ void ProposalOp<Context>::RunWithType() {
if (strides.size() == 1) { if (strides.size() == 1) {
// Case 1: single stride // Case 1: single stride
feat_h = Input(0).dim(2), feat_w = Input(0).dim(3); feat_h = Input(0).dim(2), feat_w = Input(0).dim(3);
K = feat_h * feat_w, A = int(ratios.size() * scales.size()); A = int(ratios.size() * scales.size()), K = feat_h * feat_w;
// Select the Top-K candidates as proposals // Select the Top-K candidates as proposals
num_candidates = K * A; num_candidates = A * K;
num_proposals = std::min( num_proposals = std::min(num_candidates, (int)pre_nms_top_n);
num_candidates, (int)pre_nms_top_n);
utils::math::ArgPartition( utils::math::ArgPartition(
num_candidates, num_proposals, num_candidates, num_proposals,
true, scores, indices); true, scores, indices);
...@@ -50,16 +48,16 @@ void ProposalOp<Context>::RunWithType() { ...@@ -50,16 +48,16 @@ void ProposalOp<Context>::RunWithType() {
&ratios[0], &scales[0], Adata); &ratios[0], &scales[0], Adata);
rcnn::GenerateGridAnchors( rcnn::GenerateGridAnchors(
num_proposals, A, feat_h, feat_w, num_proposals, A, feat_h, feat_w,
strides[0], 0, Adata, indices.data(), Pdata); strides[0], 0, Adata, &indices[0], Pdata);
rcnn::GenerateSSProposals(K, num_proposals, rcnn::GenerateSSProposals(K, num_proposals,
im_h, im_w, min_box_h, min_box_w, im_h, im_w, min_box_h, min_box_w,
scores, deltas, indices.data(), Pdata); scores, deltas, &indices[0], Pdata);
// Sort, NMS and Retrieve // Sort, NMS and Retrieve
rcnn::SortProposals(0, num_proposals - 1, num_proposals, Pdata); rcnn::SortProposals(0, num_proposals - 1, num_proposals, Pdata);
rcnn::ApplyNMS(num_proposals, post_nms_top_n, nms_thresh, rcnn::ApplyNMS(num_proposals, post_nms_top_n, nms_thresh,
proposals_.template mutable_data<BT, Context>(), proposals_.template mutable_data<BT, Context>(),
RIdata, num_rois, ctx()); &roi_indices[0], num_rois, ctx());
rcnn::RetrieveRoIs(num_rois, n, Pdata, RIdata, Ydata); rcnn::RetrieveRoIs(num_rois, n, Pdata, &roi_indices[0], Ydata);
} else if (strides.size() > 1) { } else if (strides.size() > 1) {
// Case 2: multiple stridess // Case 2: multiple stridess
CHECK_EQ(strides.size(), InputSize() - 3) CHECK_EQ(strides.size(), InputSize() - 3)
...@@ -70,8 +68,7 @@ void ProposalOp<Context>::RunWithType() { ...@@ -70,8 +68,7 @@ void ProposalOp<Context>::RunWithType() {
<< scales.size() << " scales"; << scales.size() << " scales";
// Select the Top-K candidates as proposals // Select the Top-K candidates as proposals
num_candidates = Input(-3).dim(1); num_candidates = Input(-3).dim(1);
num_proposals = std::min( num_proposals = std::min(num_candidates, (int)pre_nms_top_n);
num_candidates, (int)pre_nms_top_n);
utils::math::ArgPartition( utils::math::ArgPartition(
num_candidates, num_proposals, num_candidates, num_proposals,
true, scores, indices); true, scores, indices);
...@@ -90,19 +87,19 @@ void ProposalOp<Context>::RunWithType() { ...@@ -90,19 +87,19 @@ void ProposalOp<Context>::RunWithType() {
rcnn::GenerateGridAnchors( rcnn::GenerateGridAnchors(
num_proposals, A, feat_h, feat_w, num_proposals, A, feat_h, feat_w,
strides[i], base_offset, strides[i], base_offset,
Adata, indices.data(), Pdata); Adata, &indices[0], Pdata);
base_offset += K * A; base_offset += (A * K);
} }
rcnn::GenerateMSProposals( rcnn::GenerateMSProposals(
num_candidates, num_proposals, num_candidates, num_proposals,
im_h, im_w, min_box_h, min_box_w, im_h, im_w, min_box_h, min_box_w,
scores, deltas, indices.data(), Pdata); scores, deltas, &indices[0], Pdata);
// Sort, NMS and Retrieve // Sort, NMS and Retrieve
rcnn::SortProposals(0, num_proposals - 1, num_proposals, Pdata); rcnn::SortProposals(0, num_proposals - 1, num_proposals, Pdata);
rcnn::ApplyNMS(num_proposals, post_nms_top_n, nms_thresh, rcnn::ApplyNMS(num_proposals, post_nms_top_n, nms_thresh,
proposals_.template mutable_data<BT, Context>(), proposals_.template mutable_data<BT, Context>(),
RIdata, num_rois, ctx()); &roi_indices[0], num_rois, ctx());
rcnn::RetrieveRoIs(num_rois, n, Pdata, RIdata, Ydata); rcnn::RetrieveRoIs(num_rois, n, Pdata, &roi_indices[0], Ydata);
} else { } else {
LOG(FATAL) << "Excepted at least one stride for proposals."; LOG(FATAL) << "Excepted at least one stride for proposals.";
} }
...@@ -126,7 +123,7 @@ void ProposalOp<Context>::RunWithType() { ...@@ -126,7 +123,7 @@ void ProposalOp<Context>::RunWithType() {
ctx()->template Copy<BT, BC, BC>(Y.count(), ctx()->template Copy<BT, BC, BC>(Y.count(),
rois, Output(0)->template data<BT, BC>()); rois, Output(0)->template data<BT, BC>());
rcnn::CollectRoIs<BT>(total_rois, min_level, max_level, rcnn::CollectRoIs(total_rois, min_level, max_level,
canonical_level, canonical_scale, rois, bins); canonical_level, canonical_scale, rois, bins);
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
...@@ -138,17 +135,92 @@ void ProposalOp<Context>::RunWithType() { ...@@ -138,17 +135,92 @@ void ProposalOp<Context>::RunWithType() {
} }
} }
template <class Context> template <typename T>
void ProposalOp<Context>::RunWithRetinaNet() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w, C = Input(-3).dim(2), A, K;
int total_proposals = 0;
int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* Ydata = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images; ++n) {
const BT im_h = im_info[0];
const BT im_w = im_info[1];
const BT im_scale = im_info[2];
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides.size(), InputSize() - 3)
<< "\nGiven " << strides.size() << " strides and "
<< InputSize() - 3 << " feature inputs";
// Select the Top-K candidates as proposals
num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1);
roi_indices.resize(num_candidates); num_candidates = 0;
for (int i = 0; i < roi_indices.size(); ++i)
if (scores[i] > score_thresh)
roi_indices[num_candidates++] = i;
scores_ex.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i)
scores_ex[i] = scores[roi_indices[i]];
num_proposals = std::min(num_candidates, (int)pre_nms_top_n);
utils::math::ArgPartition(
num_candidates, num_proposals,
true, &scores_ex[0], indices);
for (int i = 0; i < num_proposals; ++i)
indices[i] = roi_indices[indices[i]];
// Decode the candidates
int base_offset = 0;
for (int i = 0; i < strides.size(); i++) {
feat_h = Input(i).dim(2), feat_w = Input(i).dim(3);
A = int(ratios.size() * scales.size()), K = feat_h * feat_w;
anchors_.Reshape({ A, 4 });
auto* Adata = anchors_.template mutable_data<BT, BC>();
rcnn::GenerateAnchors(strides[i],
(int)ratios.size(), (int)scales.size(),
&ratios[0], &scales[0], Adata);
rcnn::GenerateGridAnchors(
num_proposals, C, A, feat_h, feat_w,
strides[i], base_offset,
Adata, &indices[0], Ydata);
base_offset += (A * K);
}
rcnn::GenerateMCProposals(
num_proposals, num_boxes, C, n,
im_h, im_w, im_scale,
scores, deltas, &indices[0], Ydata);
total_proposals += num_proposals;
Ydata += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 });
}
template <class Context> template <class Context>
void ProposalOp<Context>::RunOnDevice() { void ProposalOp<Context>::RunOnDevice() {
num_images = Input(0).dim(0); num_images = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images) CHECK_EQ(Input(-1).dim(0), num_images)
<< "\nExcepted " << num_images << " groups image info, " << "\nExcepted " << num_images << " groups image info, "
<< "but got " << Input(-1).dim(0) << "."; << "but got " << Input(-1).dim(0) << ".";
roi_indices.resize(post_nms_top_n);
Output(0)->Reshape({ num_images * post_nms_top_n, 5 });
if (XIsType(Input(-3), float)) RunWithType<float>(); if (det_type == "RCNN") {
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" }); roi_indices.resize(post_nms_top_n);
Output(0)->Reshape({ num_images * post_nms_top_n, 5 });
if (XIsType(Input(-3), float)) { RunWithRCNN<float>(); }
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else if (det_type == "RETINANET") {
Output(0)->Reshape({ num_images * pre_nms_top_n, 7 });
if (XIsType(Input(-3), float)) { RunWithRetinaNet<float>(); }
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
LOG(FATAL) << "Unsupported detector: " << det_type;
}
} }
DEPLOY_CPU(Proposal); DEPLOY_CPU(Proposal);
......
...@@ -22,12 +22,14 @@ class ProposalOp final : public Operator<Context> { ...@@ -22,12 +22,14 @@ class ProposalOp final : public Operator<Context> {
public: public:
ProposalOp(const OperatorDef& def, Workspace* ws) ProposalOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), : Operator<Context>(def, ws),
det_type(OperatorBase::Arg<string>("det_type", "RCNN")),
strides(OperatorBase::Args<int64_t>("strides")), strides(OperatorBase::Args<int64_t>("strides")),
ratios(OperatorBase::Args<float>("ratios")), ratios(OperatorBase::Args<float>("ratios")),
scales(OperatorBase::Args<float>("scales")), scales(OperatorBase::Args<float>("scales")),
pre_nms_top_n(OperatorBase::Arg<int64_t>("pre_nms_top_n", 6000)), pre_nms_top_n(OperatorBase::Arg<int64_t>("pre_nms_top_n", 6000)),
post_nms_top_n(OperatorBase::Arg<int64_t>("post_nms_top_n", 300)), post_nms_top_n(OperatorBase::Arg<int64_t>("post_nms_top_n", 300)),
nms_thresh(OperatorBase::Arg<float>("nms_thresh", (float)0.7)), nms_thresh(OperatorBase::Arg<float>("nms_thresh", 0.7f)),
score_thresh(OperatorBase::Arg<float>("score_thresh", 0.05f)),
min_size(OperatorBase::Arg<int64_t>("min_size", 16)), min_size(OperatorBase::Arg<int64_t>("min_size", 16)),
min_level(OperatorBase::Arg<int64_t>("min_level", 2)), min_level(OperatorBase::Arg<int64_t>("min_level", 2)),
max_level(OperatorBase::Arg<int64_t>("max_level", 5)), max_level(OperatorBase::Arg<int64_t>("max_level", 5)),
...@@ -37,14 +39,16 @@ class ProposalOp final : public Operator<Context> { ...@@ -37,14 +39,16 @@ class ProposalOp final : public Operator<Context> {
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithRCNN();
template <typename T> void RunWithRetinaNet();
protected: protected:
string det_type;
float nms_thresh, score_thresh;
vector<int64_t> strides, indices, roi_indices; vector<int64_t> strides, indices, roi_indices;
vector<float> ratios, scales; vector<float> ratios, scales, scores_ex;
int64_t pre_nms_top_n, post_nms_top_n, min_size, num_images; int64_t pre_nms_top_n, post_nms_top_n, min_size, num_images;
int64_t min_level, max_level, canonical_level, canonical_scale; int64_t min_level, max_level, canonical_level, canonical_scale;
float nms_thresh;
Tensor anchors_, proposals_, nms_mask_; Tensor anchors_, proposals_, nms_mask_;
}; };
......
...@@ -22,7 +22,7 @@ bool GraphGradientMaker::CheckGrad( ...@@ -22,7 +22,7 @@ bool GraphGradientMaker::CheckGrad(
if (external_grads_.count(g_output)) if (external_grads_.count(g_output))
inputs_to_grads_[output] = g_output; inputs_to_grads_[output] = g_output;
// Consider generate virtual grad // Consider generate virtual grad
else if (targets.count(output) && g_output != "ignore") { else if (targets.count(output) && g_output != "NULL") {
gen_grads.push_back({ output, idx }); gen_grads.push_back({ output, idx });
inputs_to_grads_[output] = g_output; inputs_to_grads_[output] = g_output;
} }
...@@ -88,7 +88,7 @@ void GraphGradientMaker::Make( ...@@ -88,7 +88,7 @@ void GraphGradientMaker::Make(
string g_output = ""; string g_output = "";
if (inputs_to_grads_.count(output) > 0) if (inputs_to_grads_.count(output) > 0)
g_output = inputs_to_grads_[output]; g_output = inputs_to_grads_[output];
if (g_output.empty()) g_output = "ignore"; if (g_output.empty()) g_output = "NULL";
g_outputs.emplace_back(g_output); g_outputs.emplace_back(g_output);
} }
Gradient grad = MakeGradientForOp(op, g_outputs); Gradient grad = MakeGradientForOp(op, g_outputs);
...@@ -194,10 +194,10 @@ void GraphGradientMaker::Make( ...@@ -194,10 +194,10 @@ void GraphGradientMaker::Make(
#define SHARE_OUTPUTS_BODY \ #define SHARE_OUTPUTS_BODY \
{string output = op->output(ix); \ {string output = op->output(ix); \
if (output == "ignore") continue; \ if (output == "NULL") continue; \
if (ref_count.count(output) == 0) { \ if (ref_count.count(output) == 0) { \
if (ignore_grads_.count(output) > 0) \ if (ignore_grads_.count(output) > 0) \
*op->mutable_output(ix) = "ignore"; \ *op->mutable_output(ix) = "NULL"; \
continue; \ continue; \
} \ } \
if (op->type() == "TemplateGradient" || \ if (op->type() == "TemplateGradient" || \
...@@ -212,13 +212,22 @@ void GraphGradientMaker::Make( ...@@ -212,13 +212,22 @@ void GraphGradientMaker::Make(
*op->mutable_output(ix) = temp_grad;} *op->mutable_output(ix) = temp_grad;}
void GraphGradientMaker::Share(GraphDef& graph) { void GraphGradientMaker::Share(GraphDef& graph) {
Set<int> invalid_ops;
Map<string, int> ref_count; Map<string, int> ref_count;
// Count the refs for detecting leaf nodes // Count the refs for detecting leaf nodes
for (auto& op : graph.op()) { for (int i = 0; i < graph.op_size(); ++i) {
const OperatorDef& op = graph.op(i);
// Ignore the non-gradient ops // Ignore the non-gradient ops
if (op.type().find("Gradient") == string::npos) continue; if (op.type().find("Gradient") == string::npos) continue;
if (op.type() == "GradientGather" &&
ignore_grads_.count(op.output(0))) {
for (auto& input : op.input())
ignore_grads_.insert(input);
invalid_ops.insert(i); continue;
}
for (auto& input : op.input()) for (auto& input : op.input())
if (input.find("grad") != string::npos) ref_count[input] += 1; if (input.find("grad") != string::npos)
ref_count[input] += 1;
} }
// Prepare the Gradients Pool // Prepare the Gradients Pool
...@@ -247,6 +256,8 @@ void GraphGradientMaker::Share(GraphDef& graph) { ...@@ -247,6 +256,8 @@ void GraphGradientMaker::Share(GraphDef& graph) {
OperatorDef* op = graph.mutable_op(i); OperatorDef* op = graph.mutable_op(i);
// Ignore the non-gradient ops // Ignore the non-gradient ops
if (op->type().find("Gradient") == string::npos) continue; if (op->type().find("Gradient") == string::npos) continue;
// Ignore the invalid ops
if (invalid_ops.count(i)) { op->mutable_type()->clear(); continue; }
// GC to store the grads that have finished lifecycle // GC to store the grads that have finished lifecycle
vector<string> GC; vector<string> GC;
// Inplace-aware // Inplace-aware
......
...@@ -19,7 +19,7 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) { ...@@ -19,7 +19,7 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) {
if (!op.input_size()) sp_u.resize(op.output_size(), ""); if (!op.input_size()) sp_u.resize(op.output_size(), "");
else sp_u.assign(op.input().begin(), op.input().end()); else sp_u.assign(op.input().begin(), op.input().end());
for (const auto& u : sp_u) { for (const auto& u : sp_u) {
if (u == "ignore") continue; if (u == "NULL") continue;
dag_[v].parents.push_back(u); dag_[v].parents.push_back(u);
dag_[u].childs.push_back(v); dag_[u].childs.push_back(v);
dag_[v].op_idx = i; dag_[v].op_idx = i;
...@@ -66,32 +66,32 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) { ...@@ -66,32 +66,32 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) {
for (int i = 0; i < input_def.op(it).input_size(); ++i) { for (int i = 0; i < input_def.op(it).input_size(); ++i) {
string input = input_def.op(it).input(i); string input = input_def.op(it).input(i);
if (!colored_[input] || !outputs.count(input)) if (!colored_[input] || !outputs.count(input))
*op_def.mutable_input(i) = "ignore"; *op_def.mutable_input(i) = "NULL";
} }
// Rewritten for outputs // Rewritten for outputs
for (int i = 0; i < input_def.op(it).output_size(); ++i) { for (int i = 0; i < input_def.op(it).output_size(); ++i) {
string output = input_def.op(it).output(i); string output = input_def.op(it).output(i);
if (!colored_[output]) *op_def.mutable_output(i) = "ignore"; if (!colored_[output]) *op_def.mutable_output(i) = "NULL";
else outputs.insert(op_def.output(i)); else outputs.insert(op_def.output(i));
} }
// Rewritten for some hand-craft cases // Rewritten for some hand-craft cases
if (op_def.type() == "AffineGradient") { if (op_def.type() == "AffineGradient") {
// Trigger in-place if not solving dAlpha // Trigger in-place if not solving dAlpha
if (op_def.output(1) == "ignore") if (op_def.output(1) == "NULL")
*op_def.mutable_input(0) = "ignore"; *op_def.mutable_input(0) = "NULL";
} else if (op_def.type() == "MulGradient" || } else if (op_def.type() == "MulGradient" ||
op_def.type() == "RMulGradient") { op_def.type() == "RMulGradient") {
if (op_def.output(0) == "ignore") if (op_def.output(0) == "NULL")
*op_def.mutable_input(1) = "ignore"; *op_def.mutable_input(1) = "NULL";
if (op_def.output(1) == "ignore") if (op_def.output(1) == "NULL")
*op_def.mutable_input(0) = "ignore"; *op_def.mutable_input(0) = "NULL";
} else if (op_def.type() == "DivGradient" || } else if (op_def.type() == "DivGradient" ||
op_def.type() == "RDivGradient") { op_def.type() == "RDivGradient") {
// dX2 requires both X1 and X2 // dX2 requires both X1 and X2
if (op_def.output(1) == "ignore") { if (op_def.output(1) == "NULL") {
*op_def.mutable_input(0) = "ignore"; *op_def.mutable_input(0) = "NULL";
if (op_def.output(0) == "ignore") if (op_def.output(0) == "NULL")
*op_def.mutable_input(1) = "ignore"; *op_def.mutable_input(1) = "NULL";
} }
} }
// Push into the final sequence // Push into the final sequence
...@@ -117,7 +117,7 @@ GraphDef GraphOptimizer::AddInplace(const GraphDef& input_def) { ...@@ -117,7 +117,7 @@ GraphDef GraphOptimizer::AddInplace(const GraphDef& input_def) {
if (!op.input_size()) sp_u.resize(op.output_size(), ""); if (!op.input_size()) sp_u.resize(op.output_size(), "");
else sp_u.assign(op.input().begin(), op.input().end()); else sp_u.assign(op.input().begin(), op.input().end());
for (const auto& u : sp_u) { for (const auto& u : sp_u) {
if (u == "ignore") continue; if (u == "NULL") continue;
dag_[v].parents.push_back(u); dag_[v].parents.push_back(u);
dag_[u].childs.push_back(v); dag_[u].childs.push_back(v);
dag_[v].op_idx = i; dag_[v].op_idx = i;
......
...@@ -100,10 +100,11 @@ OperatorBase* TryCreateOperator( ...@@ -100,10 +100,11 @@ OperatorBase* TryCreateOperator(
case PROTO_CPU: case PROTO_CPU:
return CPUOperatorRegistry()->Create(key, def, ws); return CPUOperatorRegistry()->Create(key, def, ws);
case PROTO_CUDA: case PROTO_CUDA:
if (def.device_option().has_engine() && #ifdef WITH_CUDNN
def.device_option().engine() == "CUDNN" && if (CUDNNOperatorRegistry()->Has(key) &&
CUDNNOperatorRegistry()->Has(key)) CUDAContext::cuda_object()->cudnn_enabled)
return CUDNNOperatorRegistry()->Create(key, def, ws); return CUDNNOperatorRegistry()->Create(key, def, ws);
#endif
return CUDAOperatorRegistry()->Create(key, def, ws); return CUDAOperatorRegistry()->Create(key, def, ws);
case PROTO_CNML: case PROTO_CNML:
return CNMLOperatorRegistry()->Create(key, def, ws); return CNMLOperatorRegistry()->Create(key, def, ws);
...@@ -155,7 +156,7 @@ Gradient MakeGradientForOp( ...@@ -155,7 +156,7 @@ Gradient MakeGradientForOp(
); );
} }
} }
// Copy device option, engine, and arguments // Copy device option and arguments
if (maker->CopyDeviceOption() && def.has_device_option()) if (maker->CopyDeviceOption() && def.has_device_option())
for (auto& grad_def : grad.ops) for (auto& grad_def : grad.ops)
grad_def.mutable_device_option()->CopyFrom( grad_def.mutable_device_option()->CopyFrom(
......
...@@ -16,9 +16,9 @@ bool OpSchema::Verify(const OperatorDef& def) const { ...@@ -16,9 +16,9 @@ bool OpSchema::Verify(const OperatorDef& def) const {
<< ", max=" << max_output_ << "]"; << ", max=" << max_output_ << "]";
} }
for (int in = 0; in < def.input_size(); in++) { for (int in = 0; in < def.input_size(); in++) {
if (def.input(in) == "ignore") continue; if (def.input(in) == "NULL") continue;
for (int out = 0; out < def.output_size(); out++) { for (int out = 0; out < def.output_size(); out++) {
if (def.output(out) == "ignore") continue; if (def.output(out) == "NULL") continue;
if (def.input(in) == def.output(out) && (!CheckInplace(in, out))) if (def.input(in) == def.output(out) && (!CheckInplace(in, out)))
LOG(FATAL) << indicator << "Input(" << in << ") and " LOG(FATAL) << indicator << "Input(" << in << ") and "
<< "Output(" << out << ") can not be set to inplace."; << "Output(" << out << ") can not be set to inplace.";
......
...@@ -7,7 +7,7 @@ namespace dragon { ...@@ -7,7 +7,7 @@ namespace dragon {
/*! Create some internal tensors */ /*! Create some internal tensors */
void Workspace::InitWorkspace() { void Workspace::InitWorkspace() {
CreateTensor("ignore"); CreateTensor("NULL");
Tensor* recomputing_flag = CreateTensor( Tensor* recomputing_flag = CreateTensor(
"/opt/recomputing_flag")->Reshape({ 1 }); "/opt/recomputing_flag")->Reshape({ 1 });
recomputing_flag->mutable_data recomputing_flag->mutable_data
......
...@@ -351,7 +351,7 @@ ONNXImporterReturns ONNXBackend::ArgReduceNodeImporter( ...@@ -351,7 +351,7 @@ ONNXImporterReturns ONNXBackend::ArgReduceNodeImporter(
auto* operation = attributes.AddRewrittenAttribute("operation"); auto* operation = attributes.AddRewrittenAttribute("operation");
if (onnx_node->node.op_type() == "ArgMax") operation->set_s("ARGMAX"); if (onnx_node->node.op_type() == "ArgMax") operation->set_s("ARGMAX");
else if (onnx_node->node.op_type() == "ArgMin") operation->set_s("ARGMIN"); else if (onnx_node->node.op_type() == "ArgMin") operation->set_s("ARGMIN");
node.add_output("ignore"); // A dummy output("Value") is required node.add_output("NULL"); // A dummy output("Value") is required
return CommonONNXNodeImporter(&onnx_node_v2, ctx); return CommonONNXNodeImporter(&onnx_node_v2, ctx);
} }
......
...@@ -46,7 +46,7 @@ void PReluGradientOp<Context>::RunWithType() { ...@@ -46,7 +46,7 @@ void PReluGradientOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
DECLARE_MULTIPLIER(multiplier, channels * dim); DECLARE_MULTIPLIER(multiplier, channels * dim);
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
auto* dWBdata = ws()->template caches<T, Context>({ channels * dim })[0]; auto* dWBdata = ws()->template caches<T, Context>({ channels * dim })[0];
...@@ -55,7 +55,7 @@ void PReluGradientOp<Context>::RunWithType() { ...@@ -55,7 +55,7 @@ void PReluGradientOp<Context>::RunWithType() {
dYdata, Xdata, multiplier, dWBdata, dWdata, ctx()); dYdata, Xdata, multiplier, dWBdata, dWdata, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::PReluGrad(Output(0)->count(), channels, dim, kernel::PReluGrad(Output(0)->count(), channels, dim,
......
...@@ -60,13 +60,13 @@ template <class Context> template <typename T> ...@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void AddGradientOp<Context>::EltwiseRunWithType() { void AddGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
Output(1)->count(), dx2, dy); Output(1)->count(), dx2, dy);
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
Output(0)->count(), dx1, dy); Output(0)->count(), dx1, dy);
...@@ -78,14 +78,14 @@ void AddGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -78,14 +78,14 @@ void AddGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
vector<int> dims = { rows, cols }, axes = { type }; vector<int> dims = { rows, cols }, axes = { type };
kernel::ReduceSum(2, dims.data(), kernel::ReduceSum(2, dims.data(),
1, axes.data(), 1.f, dy, dx2, ctx()); 1, axes.data(), 1.f, dy, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
X1->count(), dx1, dy); X1->count(), dx1, dy);
......
...@@ -60,7 +60,7 @@ void AffineGradientOp<Context>::RunWithType() { ...@@ -60,7 +60,7 @@ void AffineGradientOp<Context>::RunWithType() {
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
// dA = X * dY // dA = X * dY
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
Output(1)->ReshapeLike(Input(1)); Output(1)->ReshapeLike(Input(1));
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dAdata = Output(1)->template mutable_data<T, Context>(); auto* dAdata = Output(1)->template mutable_data<T, Context>();
...@@ -74,7 +74,7 @@ void AffineGradientOp<Context>::RunWithType() { ...@@ -74,7 +74,7 @@ void AffineGradientOp<Context>::RunWithType() {
} }
// dB = dY // dB = dY
if (Output(2)->name() != "ignore") { if (Output(2)->name() != "NULL") {
Output(2)->ReshapeLike(Input(1)); Output(2)->ReshapeLike(Input(1));
auto* dBdata = Output(2)->template mutable_data<T, Context>(); auto* dBdata = Output(2)->template mutable_data<T, Context>();
// Eltwise // Eltwise
...@@ -87,7 +87,7 @@ void AffineGradientOp<Context>::RunWithType() { ...@@ -87,7 +87,7 @@ void AffineGradientOp<Context>::RunWithType() {
} }
// dX = alpha * dY // dX = alpha * dY
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
kernel::AffineGrad(outer_dim, inner_dim, scale_dim, kernel::AffineGrad(outer_dim, inner_dim, scale_dim,
dYdata, Adata, dXdata, ctx()); dYdata, Adata, dXdata, ctx());
} }
......
...@@ -101,7 +101,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() { ...@@ -101,7 +101,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() {
CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN)); CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN));
// dA = X * dY // dA = X * dY
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
Output(1)->ReshapeLike(Input(1)); Output(1)->ReshapeLike(Input(1));
auto* Xdata = Input(0).template data<DT, Context>(); auto* Xdata = Input(0).template data<DT, Context>();
auto* dAdata = Output(1)->template mutable_data<DT, Context>(); auto* dAdata = Output(1)->template mutable_data<DT, Context>();
...@@ -119,7 +119,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() { ...@@ -119,7 +119,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() {
} }
// dB = dY // dB = dY
if (Output(2)->name() != "ignore") { if (Output(2)->name() != "NULL") {
Output(2)->ReshapeLike(Input(1)); Output(2)->ReshapeLike(Input(1));
auto* dBdata = Output(2)->template mutable_data<DT, Context>(); auto* dBdata = Output(2)->template mutable_data<DT, Context>();
// Eltwise // Eltwise
...@@ -136,7 +136,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() { ...@@ -136,7 +136,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() {
} }
// dX = alpha * dY // dX = alpha * dY
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
CUDNN_CHECK(cudnnOpTensor( CUDNN_CHECK(cudnnOpTensor(
ctx()->cudnn_handle(), mul_desc, ctx()->cudnn_handle(), mul_desc,
CUDNNType<DT>::one, input_desc, dYdata, CUDNNType<DT>::one, input_desc, dYdata,
......
...@@ -61,7 +61,7 @@ void DivGradientOp<Context>::EltwiseRunWithType() { ...@@ -61,7 +61,7 @@ void DivGradientOp<Context>::EltwiseRunWithType() {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>(); auto* x1 = Input(0).template data<T, Context>();
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
...@@ -73,7 +73,7 @@ void DivGradientOp<Context>::EltwiseRunWithType() { ...@@ -73,7 +73,7 @@ void DivGradientOp<Context>::EltwiseRunWithType() {
math::Scale(X2->count(), -1.f, dx2, dx2, ctx()); math::Scale(X2->count(), -1.f, dx2, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::Div(X1->count(), dy, x2, dx1, ctx()); math::Div(X1->count(), dy, x2, dx1, ctx());
...@@ -85,7 +85,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -85,7 +85,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>(); auto* x1 = Input(0).template data<T, Context>();
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
...@@ -100,7 +100,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -100,7 +100,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) {
1, axes.data(), -1.f, cs[0], dx2, ctx()); 1, axes.data(), -1.f, cs[0], dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::BroadcastDiv(rows, cols, type, dy, x2, dx1, ctx()); math::BroadcastDiv(rows, cols, type, dy, x2, dx1, ctx());
......
...@@ -116,13 +116,13 @@ void DotGradientOp<Context>::DotRunWithType() { ...@@ -116,13 +116,13 @@ void DotGradientOp<Context>::DotRunWithType() {
auto* Bdata = Input(1).template data<T, Context>(); auto* Bdata = Input(1).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, CPUContext>(); auto* dYdata = Input(-1).template data<T, CPUContext>();
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dAdata = Output(0)->template mutable_data<T, Context>(); auto* dAdata = Output(0)->template mutable_data<T, Context>();
math::Scale(Output(0)->count(), cast::to<float>( math::Scale(Output(0)->count(), cast::to<float>(
dYdata[0]), Bdata, dAdata, ctx()); dYdata[0]), Bdata, dAdata, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dBdata = Output(1)->template mutable_data<T, Context>(); auto* dBdata = Output(1)->template mutable_data<T, Context>();
math::Scale(Output(0)->count(), cast::to<float>( math::Scale(Output(0)->count(), cast::to<float>(
dYdata[0]), Adata, dBdata, ctx()); dYdata[0]), Adata, dBdata, ctx());
...@@ -145,7 +145,7 @@ void DotGradientOp<Context>::GemmRunWithType() { ...@@ -145,7 +145,7 @@ void DotGradientOp<Context>::GemmRunWithType() {
auto* X2data = Input(1).template data<T, Context>(); auto* X2data = Input(1).template data<T, Context>();
auto* dYdata = Input(2).template data<T, Context>(); auto* dYdata = Input(2).template data<T, Context>();
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dX1data = Output(0)->template mutable_data<T, Context>(); auto* dX1data = Output(0)->template mutable_data<T, Context>();
if (transA) { if (transA) {
math::Gemm( math::Gemm(
...@@ -162,7 +162,7 @@ void DotGradientOp<Context>::GemmRunWithType() { ...@@ -162,7 +162,7 @@ void DotGradientOp<Context>::GemmRunWithType() {
} }
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dX2data = Output(1)->template mutable_data<T, Context>(); auto* dX2data = Output(1)->template mutable_data<T, Context>();
if (transB) { if (transB) {
math::Gemm( math::Gemm(
......
...@@ -73,7 +73,7 @@ void EltwiseGradientOp<Context>::SumRunWithType() { ...@@ -73,7 +73,7 @@ void EltwiseGradientOp<Context>::SumRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "NULL") continue;
auto* dXdata = Output(i)->template mutable_data<T, Context>(); auto* dXdata = Output(i)->template mutable_data<T, Context>();
// Copy the dY to dX and Apply the coeffients // Copy the dY to dX and Apply the coeffients
math::Scale(nelements, coeffs[i], dYdata, dXdata, ctx()); math::Scale(nelements, coeffs[i], dYdata, dXdata, ctx());
...@@ -86,7 +86,7 @@ void EltwiseGradientOp<Context>::ProdRunWithType() { ...@@ -86,7 +86,7 @@ void EltwiseGradientOp<Context>::ProdRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "NULL") continue;
auto* dXdata = Output(i)->template mutable_data<T, Context>(); auto* dXdata = Output(i)->template mutable_data<T, Context>();
// Compute the first term of dX // Compute the first term of dX
bool initialized = false; bool initialized = false;
......
...@@ -122,7 +122,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() { ...@@ -122,7 +122,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dYdata = Input(2).template data<T, Context>(); auto* dYdata = Input(2).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
Output(1)->ReshapeLike(Input(1)); Output(1)->ReshapeLike(Input(1));
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
if (transW) { if (transW) {
...@@ -140,7 +140,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() { ...@@ -140,7 +140,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
} }
} }
if (Output(2)->name() != "ignore") { if (Output(2)->name() != "NULL") {
DECLARE_MULTIPLIER(multiplier, M); DECLARE_MULTIPLIER(multiplier, M);
Output(2)->Reshape({ N }); Output(2)->Reshape({ N });
auto* dBdata = Output(2)->template mutable_data<T, Context>(); auto* dBdata = Output(2)->template mutable_data<T, Context>();
...@@ -150,7 +150,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() { ...@@ -150,7 +150,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
0.f, dBdata, ctx()); 0.f, dBdata, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
if (transW) { if (transW) {
......
...@@ -70,14 +70,14 @@ void MatmulGradientOp<Context>::RunWithType() { ...@@ -70,14 +70,14 @@ void MatmulGradientOp<Context>::RunWithType() {
T* dAdata = nullptr, *dBdata = nullptr; T* dAdata = nullptr, *dBdata = nullptr;
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
dAdata = Output(0)->template mutable_data<T, Context>(); dAdata = Output(0)->template mutable_data<T, Context>();
} if (Output(1)->name() != "ignore") { } if (Output(1)->name() != "NULL") {
dBdata = Output(1)->template mutable_data<T, Context>(); dBdata = Output(1)->template mutable_data<T, Context>();
} }
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
if (transA) { if (transA) {
math::Gemm( math::Gemm(
transB ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
...@@ -94,7 +94,7 @@ void MatmulGradientOp<Context>::RunWithType() { ...@@ -94,7 +94,7 @@ void MatmulGradientOp<Context>::RunWithType() {
0.f, dAdata + i * A_stride, ctx()); 0.f, dAdata + i * A_stride, ctx());
} }
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
if (transB) { if (transB) {
math::Gemm( math::Gemm(
CblasTrans, CblasTrans,
......
...@@ -83,11 +83,11 @@ template <class Context> template <typename T> ...@@ -83,11 +83,11 @@ template <class Context> template <typename T>
void MaximumGradientOp<Context>::BroadcastRunWithType() { void MaximumGradientOp<Context>::BroadcastRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
if (Input(0).count() == 1) { if (Input(0).count() == 1) {
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dAdata = Output(0)->template mutable_data<T, Context>(); auto* dAdata = Output(0)->template mutable_data<T, Context>();
math::Set(1, cast::to<T>(0.f), dAdata, ctx()); math::Set(1, cast::to<T>(0.f), dAdata, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* Adata = Input(0).template data<T, CPUContext>(); auto* Adata = Input(0).template data<T, CPUContext>();
auto* Bdata = Input(1).template data<T, Context>(); auto* Bdata = Input(1).template data<T, Context>();
auto* dBdata = Output(1)->template mutable_data<T, Context>(); auto* dBdata = Output(1)->template mutable_data<T, Context>();
...@@ -95,14 +95,14 @@ void MaximumGradientOp<Context>::BroadcastRunWithType() { ...@@ -95,14 +95,14 @@ void MaximumGradientOp<Context>::BroadcastRunWithType() {
Bdata, Adata[0], dYdata, dBdata, (T*)nullptr, ctx()); Bdata, Adata[0], dYdata, dBdata, (T*)nullptr, ctx());
} }
} else if (Input(1).count() == 1) { } else if (Input(1).count() == 1) {
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Adata = Input(0).template data<T, Context>(); auto* Adata = Input(0).template data<T, Context>();
auto* Bdata = Input(1).template data<T, CPUContext>(); auto* Bdata = Input(1).template data<T, CPUContext>();
auto* dAdata = Output(0)->template mutable_data<T, Context>(); auto* dAdata = Output(0)->template mutable_data<T, Context>();
kernel::BroadcastMaximumGrad(Output(0)->count(), kernel::BroadcastMaximumGrad(Output(0)->count(),
Adata, Bdata[0], dYdata, dAdata, (T*)nullptr, ctx()); Adata, Bdata[0], dYdata, dAdata, (T*)nullptr, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dBdata = Output(1)->template mutable_data<T, Context>(); auto* dBdata = Output(1)->template mutable_data<T, Context>();
math::Set(1, cast::to<T>(0.f), dBdata, ctx()); math::Set(1, cast::to<T>(0.f), dBdata, ctx());
} }
......
...@@ -83,11 +83,11 @@ template <class Context> template <typename T> ...@@ -83,11 +83,11 @@ template <class Context> template <typename T>
void MinimumGradientOp<Context>::BroadcastRunWithType() { void MinimumGradientOp<Context>::BroadcastRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
if (Input(0).count() == 1) { if (Input(0).count() == 1) {
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dAdata = Output(0)->template mutable_data<T, Context>(); auto* dAdata = Output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(1, cast::to<T>(0.f), dAdata, ctx()); math::Set<T, Context>(1, cast::to<T>(0.f), dAdata, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* Adata = Input(0).template data<T, CPUContext>(); auto* Adata = Input(0).template data<T, CPUContext>();
auto* Bdata = Input(1).template data<T, Context>(); auto* Bdata = Input(1).template data<T, Context>();
auto* dBdata = Output(1)->template mutable_data<T, Context>(); auto* dBdata = Output(1)->template mutable_data<T, Context>();
...@@ -95,14 +95,14 @@ void MinimumGradientOp<Context>::BroadcastRunWithType() { ...@@ -95,14 +95,14 @@ void MinimumGradientOp<Context>::BroadcastRunWithType() {
Bdata, Adata[0], dYdata, dBdata, (T*)nullptr, ctx()); Bdata, Adata[0], dYdata, dBdata, (T*)nullptr, ctx());
} }
} else if (Input(1).count() == 1) { } else if (Input(1).count() == 1) {
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Adata = Input(0).template data<T, Context>(); auto* Adata = Input(0).template data<T, Context>();
auto* Bdata = Input(1).template data<T, CPUContext>(); auto* Bdata = Input(1).template data<T, CPUContext>();
auto* dAdata = Output(0)->template mutable_data<T, Context>(); auto* dAdata = Output(0)->template mutable_data<T, Context>();
kernel::BroadcastMinimumGrad(Output(0)->count(), kernel::BroadcastMinimumGrad(Output(0)->count(),
Adata, Bdata[0], dYdata, dAdata, (T*)nullptr, ctx()); Adata, Bdata[0], dYdata, dAdata, (T*)nullptr, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dBdata = Output(1)->template mutable_data<T, Context>(); auto* dBdata = Output(1)->template mutable_data<T, Context>();
math::Set<T, Context>(1, cast::to<T>(0.f), dBdata, ctx()); math::Set<T, Context>(1, cast::to<T>(0.f), dBdata, ctx());
} }
......
...@@ -59,13 +59,13 @@ template <class Context> template <typename T> ...@@ -59,13 +59,13 @@ template <class Context> template <typename T>
void MulGradientOp<Context>::EltwiseRunWithType() { void MulGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>(); auto* x1 = Input(0).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Mul(Output(1)->count(), dy, x1, dx2, ctx()); math::Mul(Output(1)->count(), dy, x1, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::Mul(Output(0)->count(), dy, x2, dx1, ctx()); math::Mul(Output(0)->count(), dy, x2, dx1, ctx());
...@@ -77,7 +77,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -77,7 +77,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>(); auto* x1 = Input(0).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
auto* c = ws()->template caches<T, Context>({ X1->count() })[0]; auto* c = ws()->template caches<T, Context>({ X1->count() })[0];
...@@ -87,7 +87,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -87,7 +87,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) {
1, axes.data(), 1.f, c, dx2, ctx()); 1, axes.data(), 1.f, c, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::BroadcastMul(rows, cols, type, dy, x2, dx1, ctx()); math::BroadcastMul(rows, cols, type, dy, x2, dx1, ctx());
......
...@@ -60,13 +60,13 @@ template <class Context> template <typename T> ...@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void RAddGradientOp<Context>::EltwiseRunWithType() { void RAddGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
Output(1)->count(), dx2, dy); Output(1)->count(), dx2, dy);
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
Output(0)->count(), dx1, dy); Output(0)->count(), dx1, dy);
...@@ -78,14 +78,14 @@ void RAddGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -78,14 +78,14 @@ void RAddGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
vector<int> dims = { rows, cols }, axes = { type - 2 }; vector<int> dims = { rows, cols }, axes = { type - 2 };
kernel::ReduceSum(2, dims.data(), kernel::ReduceSum(2, dims.data(),
1, axes.data(), 1.f, dy, dx1, ctx()); 1, axes.data(), 1.f, dy, dx1, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
X2->count(), dx2, dy); X2->count(), dx2, dy);
...@@ -99,23 +99,23 @@ void RAddGradientOp<Context>::RunOnDevice() { ...@@ -99,23 +99,23 @@ void RAddGradientOp<Context>::RunOnDevice() {
Output(1)->ReshapeLike(*X2); Output(1)->ReshapeLike(*X2);
if (XIsType(Input(-1), int8_t)) { if (XIsType(Input(-1), int8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int8_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int8_t);
} else if (XIsType(Input(-1), uint8_t)) { } else if (XIsType(Input(-1), uint8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(uint8_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(uint8_t);
} else if (XIsType(Input(-1), int)) { } else if (XIsType(Input(-1), int)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int);
} else if (XIsType(Input(-1), int64_t)) { } else if (XIsType(Input(-1), int64_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int64_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int64_t);
} else if (XIsType(Input(-1), float16)) { } else if (XIsType(Input(-1), float16)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float16); DEFINE_FUNDAMENTAL_TYPED_RCALLER(float16);
} else if (XIsType(Input(-1), float)) { } else if (XIsType(Input(-1), float)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float); DEFINE_FUNDAMENTAL_TYPED_RCALLER(float);
} else if (XIsType(Input(-1), double)) { } else if (XIsType(Input(-1), double)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(double); DEFINE_FUNDAMENTAL_TYPED_RCALLER(double);
} else { } else {
LOG(FATAL) << DTypeHelper(Input(0), { LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64", "int8", "uint8", "int32", "int64",
"float16", "float32", "float64", "float16", "float32", "float64",
}); });
} }
} }
......
...@@ -61,7 +61,7 @@ void RDivGradientOp<Context>::EltwiseRunWithType() { ...@@ -61,7 +61,7 @@ void RDivGradientOp<Context>::EltwiseRunWithType() {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>(); auto* x1 = Input(0).template data<T, Context>();
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
...@@ -73,7 +73,7 @@ void RDivGradientOp<Context>::EltwiseRunWithType() { ...@@ -73,7 +73,7 @@ void RDivGradientOp<Context>::EltwiseRunWithType() {
math::Scale(X2->count(), -1.f, dx2, dx2, ctx()); math::Scale(X2->count(), -1.f, dx2, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::Div(X1->count(), dy, x2, dx1, ctx()); math::Div(X1->count(), dy, x2, dx1, ctx());
...@@ -85,7 +85,7 @@ void RDivGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -85,7 +85,7 @@ void RDivGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
auto* c = ws()->template caches<T, Context>({ X2->count() })[0]; auto* c = ws()->template caches<T, Context>({ X2->count() })[0];
...@@ -95,7 +95,7 @@ void RDivGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -95,7 +95,7 @@ void RDivGradientOp<Context>::BroadcastRunWithType(int type) {
1, axes.data(), 1.f, c, dx1, ctx()); 1, axes.data(), 1.f, c, dx1, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>(); auto* x1 = Input(0).template data<T, Context>();
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
...@@ -114,19 +114,19 @@ void RDivGradientOp<Context>::RunOnDevice() { ...@@ -114,19 +114,19 @@ void RDivGradientOp<Context>::RunOnDevice() {
Output(1)->ReshapeLike(*X2); Output(1)->ReshapeLike(*X2);
if (XIsType(Input(-1), int8_t)) { if (XIsType(Input(-1), int8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int8_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int8_t);
} else if (XIsType(Input(-1), uint8_t)) { } else if (XIsType(Input(-1), uint8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(uint8_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(uint8_t);
} else if (XIsType(Input(-1), int)) { } else if (XIsType(Input(-1), int)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int);
} else if (XIsType(Input(-1), int64_t)) { } else if (XIsType(Input(-1), int64_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int64_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int64_t);
} else if (XIsType(Input(-1), float16)) { } else if (XIsType(Input(-1), float16)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float16); DEFINE_FUNDAMENTAL_TYPED_RCALLER(float16);
} else if (XIsType(Input(-1), float)) { } else if (XIsType(Input(-1), float)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float); DEFINE_FUNDAMENTAL_TYPED_RCALLER(float);
} else if (XIsType(Input(-1), double)) { } else if (XIsType(Input(-1), double)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(double); DEFINE_FUNDAMENTAL_TYPED_RCALLER(double);
} else { } else {
LOG(FATAL) << DTypeHelper(Input(0), { LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64", "int8", "uint8", "int32", "int64",
......
...@@ -60,13 +60,13 @@ template <class Context> template <typename T> ...@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void RMulGradientOp<Context>::EltwiseRunWithType() { void RMulGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>(); auto* x1 = Input(0).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Mul(Output(1)->count(), dy, x1, dx2, ctx()); math::Mul(Output(1)->count(), dy, x1, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::Mul(Output(0)->count(), dy, x2, dx1, ctx()); math::Mul(Output(0)->count(), dy, x2, dx1, ctx());
...@@ -78,7 +78,7 @@ void RMulGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -78,7 +78,7 @@ void RMulGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>(); auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
auto* c = ws()->template caches<T, Context>({ X2->count() })[0]; auto* c = ws()->template caches<T, Context>({ X2->count() })[0];
...@@ -88,7 +88,7 @@ void RMulGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -88,7 +88,7 @@ void RMulGradientOp<Context>::BroadcastRunWithType(int type) {
1, axes.data(), 1.f, c, dx1, ctx()); 1, axes.data(), 1.f, c, dx1, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>(); auto* x1 = Input(0).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::BroadcastMul(rows, cols, type - 2, dy, x1, dx2, ctx()); math::BroadcastMul(rows, cols, type - 2, dy, x1, dx2, ctx());
...@@ -102,19 +102,19 @@ void RMulGradientOp<Context>::RunOnDevice() { ...@@ -102,19 +102,19 @@ void RMulGradientOp<Context>::RunOnDevice() {
Output(1)->ReshapeLike(*X2); Output(1)->ReshapeLike(*X2);
if (XIsType(Input(-1), int8_t)) { if (XIsType(Input(-1), int8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int8_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int8_t);
} else if (XIsType(Input(-1), uint8_t)) { } else if (XIsType(Input(-1), uint8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(uint8_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(uint8_t);
} else if (XIsType(Input(-1), int)) { } else if (XIsType(Input(-1), int)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int);
} else if (XIsType(Input(-1), int64_t)) { } else if (XIsType(Input(-1), int64_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int64_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int64_t);
} else if (XIsType(Input(-1), float16)) { } else if (XIsType(Input(-1), float16)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float16); DEFINE_FUNDAMENTAL_TYPED_RCALLER(float16);
} else if (XIsType(Input(-1), float)) { } else if (XIsType(Input(-1), float)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float); DEFINE_FUNDAMENTAL_TYPED_RCALLER(float);
} else if (XIsType(Input(-1), double)) { } else if (XIsType(Input(-1), double)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(double); DEFINE_FUNDAMENTAL_TYPED_RCALLER(double);
} else { } else {
LOG(FATAL) << DTypeHelper(Input(0), { LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64", "int8", "uint8", "int32", "int64",
......
...@@ -60,13 +60,13 @@ template <class Context> template <typename T> ...@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void RSubGradientOp<Context>::EltwiseRunWithType() { void RSubGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Scale<T, Context>( math::Scale<T, Context>(
Output(1)->count(), -1, dy, dx2, ctx()); Output(1)->count(), -1, dy, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
Output(0)->count(), dx1, dy); Output(0)->count(), dx1, dy);
...@@ -78,14 +78,14 @@ void RSubGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -78,14 +78,14 @@ void RSubGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
vector<int> dims = { rows, cols }, axes = { type - 2 }; vector<int> dims = { rows, cols }, axes = { type - 2 };
kernel::ReduceSum(2, dims.data(), kernel::ReduceSum(2, dims.data(),
1, axes.data(), 1.f, dy, dx1, ctx()); 1, axes.data(), 1.f, dy, dx1, ctx());
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Scale(X2->count(), -1.f, dy, dx2, ctx()); math::Scale(X2->count(), -1.f, dy, dx2, ctx());
} }
...@@ -98,19 +98,19 @@ void RSubGradientOp<Context>::RunOnDevice() { ...@@ -98,19 +98,19 @@ void RSubGradientOp<Context>::RunOnDevice() {
Output(1)->ReshapeLike(*X2); Output(1)->ReshapeLike(*X2);
if (XIsType(Input(-1), int8_t)) { if (XIsType(Input(-1), int8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int8_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int8_t);
} else if (XIsType(Input(-1), uint8_t)) { } else if (XIsType(Input(-1), uint8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(uint8_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(uint8_t);
} else if (XIsType(Input(-1), int)) { } else if (XIsType(Input(-1), int)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int);
} else if (XIsType(Input(-1), int64_t)) { } else if (XIsType(Input(-1), int64_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int64_t); DEFINE_FUNDAMENTAL_TYPED_RCALLER(int64_t);
} else if (XIsType(Input(-1), float16)) { } else if (XIsType(Input(-1), float16)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float16); DEFINE_FUNDAMENTAL_TYPED_RCALLER(float16);
} else if (XIsType(Input(-1), float)) { } else if (XIsType(Input(-1), float)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float); DEFINE_FUNDAMENTAL_TYPED_RCALLER(float);
} else if (XIsType(Input(-1), double)) { } else if (XIsType(Input(-1), double)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(double); DEFINE_FUNDAMENTAL_TYPED_RCALLER(double);
} else { } else {
LOG(FATAL) << DTypeHelper(Input(0), { LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64", "int8", "uint8", "int32", "int64",
......
...@@ -60,13 +60,13 @@ template <class Context> template <typename T> ...@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void SubGradientOp<Context>::EltwiseRunWithType() { void SubGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Scale<T, Context>(Output(1)->count(), math::Scale<T, Context>(Output(1)->count(),
-1.f, dy, dx2, ctx()); -1.f, dy, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
Output(0)->count(), dx1, dy); Output(0)->count(), dx1, dy);
...@@ -78,14 +78,14 @@ void SubGradientOp<Context>::BroadcastRunWithType(int type) { ...@@ -78,14 +78,14 @@ void SubGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2; DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>(); auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>(); auto* dx2 = Output(1)->template mutable_data<T, Context>();
vector<int> dims = { rows, cols }, axes = { type }; vector<int> dims = { rows, cols }, axes = { type };
kernel::ReduceSum(2, dims.data(), kernel::ReduceSum(2, dims.data(),
1, axes.data(), -1.f, dy, dx2, ctx()); 1, axes.data(), -1.f, dy, dx2, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>(); auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>( ctx()->template Copy<T, Context, Context>(
X1->count(), dx1, dy); X1->count(), dx1, dy);
......
...@@ -19,7 +19,7 @@ void ArgReduceOp<Context>::RunWithType() { ...@@ -19,7 +19,7 @@ void ArgReduceOp<Context>::RunWithType() {
// It's difficult to implement device code when top_k > 1 // It's difficult to implement device code when top_k > 1
auto* Xdata = Input(0).template data<T, CPUContext>(); auto* Xdata = Input(0).template data<T, CPUContext>();
auto* Idata = Output(0)->template mutable_data<int64_t, CPUContext>(); auto* Idata = Output(0)->template mutable_data<int64_t, CPUContext>();
auto* Vdata = Output(1)->name() != "ignore" ? Output(1) auto* Vdata = Output(1)->name() != "NULL" ? Output(1)
->template mutable_data<T, CPUContext>() : nullptr; ->template mutable_data<T, CPUContext>() : nullptr;
static CPUContext cctx; static CPUContext cctx;
if (operation == "ARGMAX") { if (operation == "ARGMAX") {
...@@ -34,7 +34,7 @@ void ArgReduceOp<Context>::RunWithType() { ...@@ -34,7 +34,7 @@ void ArgReduceOp<Context>::RunWithType() {
} else { } else {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* Idata = Output(0)->template mutable_data<int64_t, Context>(); auto* Idata = Output(0)->template mutable_data<int64_t, Context>();
auto* Vdata = Output(1)->name() != "ignore" ? Output(1) auto* Vdata = Output(1)->name() != "NULL" ? Output(1)
->template mutable_data<T, Context>() : nullptr; ->template mutable_data<T, Context>() : nullptr;
if (operation == "ARGMAX") { if (operation == "ARGMAX") {
kernel::ArgMax(outer_dim, inner_dim, axis_dim, kernel::ArgMax(outer_dim, inner_dim, axis_dim,
......
...@@ -82,7 +82,7 @@ void ConcatGradientOp<Context>::RunWithType() { ...@@ -82,7 +82,7 @@ void ConcatGradientOp<Context>::RunWithType() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
x_concat_dim = Input(i).dim(axis); x_concat_dim = Input(i).dim(axis);
if (Output(i)->name() != "ignore") { if (Output(i)->name() != "NULL") {
auto* dXdata = Output(i)->template mutable_data<T, Context>(); auto* dXdata = Output(i)->template mutable_data<T, Context>();
kernel::Slice( kernel::Slice(
outer_dim, inner_dim, outer_dim, inner_dim,
...@@ -95,7 +95,7 @@ void ConcatGradientOp<Context>::RunWithType() { ...@@ -95,7 +95,7 @@ void ConcatGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void ConcatGradientOp<Context>::RunOnDevice() { void ConcatGradientOp<Context>::RunOnDevice() {
if (Input(-1).name() == "ignore") return; if (Input(-1).name() == "NULL") return;
DETERMINE_RUNTIME_ARGUMENTS(Input(0)); DETERMINE_RUNTIME_ARGUMENTS(Input(0));
......
...@@ -89,7 +89,7 @@ void SliceGradientOp<Context>::RunWithType() { ...@@ -89,7 +89,7 @@ void SliceGradientOp<Context>::RunWithType() {
<< "\nIllegal slice points: " << Tensor::DimString(slice_points) << "\nIllegal slice points: " << Tensor::DimString(slice_points)
<< " for dimension " << Input(0).dim(axis) << "."; << " for dimension " << Input(0).dim(axis) << ".";
const T* dYdata = Input(i + 1).name() != "ignore" ? const T* dYdata = Input(i + 1).name() != "NULL" ?
Input(i + 1).template data<T, Context>() : nullptr; Input(i + 1).template data<T, Context>() : nullptr;
kernel::SliceGrad( kernel::SliceGrad(
......
...@@ -69,7 +69,7 @@ void StackGradientOp<Context>::RunWithType() { ...@@ -69,7 +69,7 @@ void StackGradientOp<Context>::RunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() != "ignore") { if (Output(i)->name() != "NULL") {
auto* dXdata = Output(i)->template mutable_data<T, Context>(); auto* dXdata = Output(i)->template mutable_data<T, Context>();
kernel::Slice( kernel::Slice(
outer_dim, inner_dim, outer_dim, inner_dim,
...@@ -81,7 +81,7 @@ void StackGradientOp<Context>::RunWithType() { ...@@ -81,7 +81,7 @@ void StackGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void StackGradientOp<Context>::RunOnDevice() { void StackGradientOp<Context>::RunOnDevice() {
if (Input(-1).name() == "ignore") return; if (Input(-1).name() == "NULL") return;
DETERMINE_RUNTIME_ARGUMENTS(Input(-1)); DETERMINE_RUNTIME_ARGUMENTS(Input(-1));
......
...@@ -131,7 +131,7 @@ void ScanOp<Context>::UnrollTemplate() { ...@@ -131,7 +131,7 @@ void ScanOp<Context>::UnrollTemplate() {
func_def.output(i) + func_def.output(i) +
"@" + std::to_string(nsteps)); "@" + std::to_string(nsteps));
// Concat all steps if necessary // Concat all steps if necessary
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "NULL") continue;
OperatorDef* op = new_def.add_op(); OperatorDef* op = new_def.add_op();
op->set_name(name() + "(BodyOp." + std::to_string( op->set_name(name() + "(BodyOp." + std::to_string(
nseqs + nrepeats + i) + ")"); nseqs + nrepeats + i) + ")");
...@@ -186,7 +186,7 @@ void ScanGradientOp<Context>::MakeOps( ...@@ -186,7 +186,7 @@ void ScanGradientOp<Context>::MakeOps(
maker.SetOperatorPrefix(name() + "(BodyOp."); maker.SetOperatorPrefix(name() + "(BodyOp.");
maker.SetOperatorSuffix(")"); maker.SetOperatorSuffix(")");
for (int i = 0; i < forward_outputs.size(); i++) { for (int i = 0; i < forward_outputs.size(); i++) {
if (Input(i + (int)OutputSize()).name() != "ignore") if (Input(i + (int)OutputSize()).name() != "NULL")
maker.AddExternalGrad(Input(i + (int)OutputSize()).name()); maker.AddExternalGrad(Input(i + (int)OutputSize()).name());
} }
...@@ -197,8 +197,8 @@ void ScanGradientOp<Context>::MakeOps( ...@@ -197,8 +197,8 @@ void ScanGradientOp<Context>::MakeOps(
new_def.set_name(name() + "(ScanLen." + std::to_string(nsteps) + ")"); new_def.set_name(name() + "(ScanLen." + std::to_string(nsteps) + ")");
for (const auto& target : forward_def.output()) { for (const auto& target : forward_def.output()) {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "NULL") continue;
if (Input(i).name() == "ignore") continue; if (Input(i).name() == "NULL") continue;
auto* gradient = new_def.add_gradient(); auto* gradient = new_def.add_gradient();
gradient->set_cost(target); gradient->set_cost(target);
gradient->set_wrt(Input(i).name()); gradient->set_wrt(Input(i).name());
......
...@@ -83,13 +83,13 @@ void L1LossGradientOp<Context>::RunWithType() { ...@@ -83,13 +83,13 @@ void L1LossGradientOp<Context>::RunWithType() {
} else { dYHost *= scale; } } else { dYHost *= scale; }
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i)); Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(i)->template mutable_data<T, Context>(); auto* dXdata = Output(i)->template mutable_data<T, Context>();
math::Scale(Output(i)->count(), math::Scale(Output(i)->count(),
dYHost * (i == 0 ? 1.f : -1.f), dYHost * (i == 0 ? 1.f : -1.f),
Ddata, dXdata, ctx()); Ddata, dXdata, ctx());
if (Input(2).name() != "ignore") { if (Input(2).name() != "NULL") {
auto* mask = Input(2).template data<T, Context>(); auto* mask = Input(2).template data<T, Context>();
math::Mul(Output(i)->count(), mask, dXdata, dXdata, ctx()); math::Mul(Output(i)->count(), mask, dXdata, dXdata, ctx());
} }
......
...@@ -88,13 +88,13 @@ void L2LossGradientOp<Context>::RunWithType() { ...@@ -88,13 +88,13 @@ void L2LossGradientOp<Context>::RunWithType() {
} else { dYHost *= scale; } } else { dYHost *= scale; }
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i)); Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(i)->template mutable_data<T, Context>(); auto* dXdata = Output(i)->template mutable_data<T, Context>();
math::Scale(Output(i)->count(), math::Scale(Output(i)->count(),
dYHost * (i == 0 ? 1.f : -1.f), dYHost * (i == 0 ? 1.f : -1.f),
Ddata, dXdata, ctx()); Ddata, dXdata, ctx());
if (Input(2).name() != "ignore") { if (Input(2).name() != "NULL") {
auto* mask = Input(2).template data<T, Context>(); auto* mask = Input(2).template data<T, Context>();
math::Mul(Output(i)->count(), mask, dXdata, dXdata, ctx()); math::Mul(Output(i)->count(), mask, dXdata, dXdata, ctx());
} }
......
...@@ -79,7 +79,7 @@ void SmoothL1LossGradientOp<Context>::RunWithType() { ...@@ -79,7 +79,7 @@ void SmoothL1LossGradientOp<Context>::RunWithType() {
} }
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i)); Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(i)->template mutable_data<T, Context>(); auto* dXdata = Output(i)->template mutable_data<T, Context>();
math::Scale(Output(i)->count(), math::Scale(Output(i)->count(),
......
...@@ -8,7 +8,7 @@ namespace dragon { ...@@ -8,7 +8,7 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void GradientGenerateOp<Context>::RunWithType() { void GradientGenerateOp<Context>::RunWithType() {
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue; if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i)); Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
math::Set(Output(0)->count(), math::Set(Output(0)->count(),
......
...@@ -174,8 +174,8 @@ void BatchNormGradientOp<Context>::InferenceRunWithType() { ...@@ -174,8 +174,8 @@ void BatchNormGradientOp<Context>::InferenceRunWithType() {
Tp* dgamma = nullptr, *dbeta = nullptr; Tp* dgamma = nullptr, *dbeta = nullptr;
// Gradient w.r.t. gamma or beta if necessary // Gradient w.r.t. gamma or beta if necessary
if (Output(1)->name() != "ignore" || if (Output(1)->name() != "NULL" ||
Output(2)->name() != "ignore") { Output(2)->name() != "NULL") {
dgamma = Output(1)->template mutable_data<Tp, Context>(); dgamma = Output(1)->template mutable_data<Tp, Context>();
dbeta = Output(2)->template mutable_data<Tp, Context>(); dbeta = Output(2)->template mutable_data<Tp, Context>();
} }
......
...@@ -242,8 +242,8 @@ void CuDNNBatchNormGradientOp<Context>::InferenceRunWithType() { ...@@ -242,8 +242,8 @@ void CuDNNBatchNormGradientOp<Context>::InferenceRunWithType() {
Tp* dgamma = nullptr, *dbeta = nullptr; Tp* dgamma = nullptr, *dbeta = nullptr;
// Gradient w.r.t. gamma or beta if necessary // Gradient w.r.t. gamma or beta if necessary
if (Output(1)->name() != "ignore" || if (Output(1)->name() != "NULL" ||
Output(2)->name() != "ignore") { Output(2)->name() != "NULL") {
dgamma = Output(1)->template mutable_data<Tp, Context>(); dgamma = Output(1)->template mutable_data<Tp, Context>();
dbeta = Output(2)->template mutable_data<Tp, Context>(); dbeta = Output(2)->template mutable_data<Tp, Context>();
} }
......
...@@ -116,7 +116,7 @@ void CuDNNRecurrentOp<Context>::RunWithType() { ...@@ -116,7 +116,7 @@ void CuDNNRecurrentOp<Context>::RunWithType() {
}; };
auto YsData = [this](int i) { auto YsData = [this](int i) {
if (i >= OutputSize()) return (T*)NULL; if (i >= OutputSize()) return (T*)NULL;
if (Output(i)->name() == "ignore") return (T*)NULL; if (Output(i)->name() == "NULL") return (T*)NULL;
return Output(i)->template mutable_data<T, Context>(); return Output(i)->template mutable_data<T, Context>();
}; };
...@@ -171,12 +171,12 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() { ...@@ -171,12 +171,12 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() {
auto XsData = [this](int i) { auto XsData = [this](int i) {
if (i >= InputSize()) return (const T*)NULL; if (i >= InputSize()) return (const T*)NULL;
if (Input(i).name() == "ignore") return (const T*)NULL; if (Input(i).name() == "NULL") return (const T*)NULL;
return Input(i).template data<T, Context>(); return Input(i).template data<T, Context>();
}; };
auto YsData = [this](int i) { auto YsData = [this](int i) {
if (i >= OutputSize()) return (T*)NULL; if (i >= OutputSize()) return (T*)NULL;
if (Output(i)->name() == "ignore" && i > 0) return (T*)NULL; if (Output(i)->name() == "NULL" && i > 0) return (T*)NULL;
return Output(i)->template mutable_data<T, Context>(); return Output(i)->template mutable_data<T, Context>();
}; };
...@@ -194,10 +194,10 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() { ...@@ -194,10 +194,10 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() {
auto handle = ctx()->cudnn_handle(); auto handle = ctx()->cudnn_handle();
if (Output(0)->name() != "ignore" || if (Output(0)->name() != "NULL" ||
Output(1)->name() != "ignore" || Output(1)->name() != "NULL" ||
Output(2)->name() != "ignore" || Output(2)->name() != "NULL" ||
Output(3)->name() != "ignore") { Output(3)->name() != "NULL") {
CUDNN_CHECK(cudnnRNNBackwardData(handle, rnn_desc, CUDNN_CHECK(cudnnRNNBackwardData(handle, rnn_desc,
seq_length, seq_length,
ys_desc->descs(), XsData(4), // Y ys_desc->descs(), XsData(4), // Y
...@@ -214,7 +214,7 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() { ...@@ -214,7 +214,7 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() {
RSdata, reserve_size)); RSdata, reserve_size));
} }
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
CUDNN_CHECK(cudnnRNNBackwardWeights(handle, rnn_desc, CUDNN_CHECK(cudnnRNNBackwardWeights(handle, rnn_desc,
seq_length, seq_length,
xs_desc->descs(), XsData(0), // X xs_desc->descs(), XsData(0), // X
......
...@@ -7,14 +7,14 @@ namespace dragon { ...@@ -7,14 +7,14 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void LSTMCellOp<Context>::RunWithType() { void LSTMCellOp<Context>::RunWithType() {
auto* XAdata = Input(0).template mutable_data<T, Context>(); auto* Xdata = Input(0).template mutable_data<T, Context>();
auto* CXdata = Input(1).template data<T, Context>(); auto* HXdata = Input(1).template data<T, Context>();
auto* Hdata = Output(0)->template mutable_data<T, Context>(); auto* Hdata = Output(0)->template mutable_data<T, Context>();
auto* Cdata = Output(1)->template mutable_data<T, Context>(); auto* Cdata = Output(1)->template mutable_data<T, Context>();
kernel::LSTMCell(Input(1).count(), Input(1).dim(0), kernel::LSTMCell(Input(1).count(), Input(1).dim(0),
Input(1).ndim() == 2 ? Input(1).dim(1) : Input(1).dim(2), Input(1).ndim() == 2 ? Input(1).dim(1) : Input(1).dim(2),
CXdata, XAdata, Cdata, Hdata, ctx()); HXdata, Xdata, Cdata, Hdata, ctx());
} }
template <class Context> template <class Context>
...@@ -34,17 +34,23 @@ OPERATOR_SCHEMA(LSTMCell).NumInputs(2, 3).NumOutputs(2); ...@@ -34,17 +34,23 @@ OPERATOR_SCHEMA(LSTMCell).NumInputs(2, 3).NumOutputs(2);
template <class Context> template <typename T> template <class Context> template <typename T>
void LSTMCellGradientOp<Context>::RunWithType() { void LSTMCellGradientOp<Context>::RunWithType() {
auto* XAdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* CXdata = Input(1).template data<T, Context>(); auto* HXdata = Input(1).template data<T, Context>();
auto* Cdata = Input(2).template data<T, Context>(); auto* Cdata = Input(2).template data<T, Context>();
auto* dHdata = Input(3).template data<T, Context>(); auto* dHdata = Input(-2).template data<T, Context>();
auto* dCdata = Input(4).template data<T, Context>(); auto* dCdata = Input(4).template mutable_data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* dCXdata = Output(1)->template mutable_data<T, Context>(); auto* dHXdata = Output(1)->template mutable_data<T, Context>();
if (Input(-1).name() == "NULL") {
math::Set(Input(-1).count(),
cast::to<T>(0.f), dCdata, ctx());
}
kernel::LSTMCellGrad(Input(1).count(), Input(1).dim(0), kernel::LSTMCellGrad(Input(1).count(), Input(1).dim(0),
Input(1).ndim() == 2 ? Input(1).dim(1) : Input(1).dim(2), Input(1).ndim() == 2 ? Input(1).dim(1) : Input(1).dim(2),
CXdata, XAdata, Cdata, dCdata, dHdata, dCXdata, dXdata, ctx()); HXdata, Xdata, Cdata, dCdata, dHdata,
dHXdata, dXdata, ctx());
} }
template <class Context> template <class Context>
...@@ -52,6 +58,12 @@ void LSTMCellGradientOp<Context>::RunOnDevice() { ...@@ -52,6 +58,12 @@ void LSTMCellGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
Output(1)->ReshapeLike(Input(1)); Output(1)->ReshapeLike(Input(1));
if (Input(-1).name() == "NULL") {
// dC will be ignored if C is not solved
// We should Zero-Reset the dC
Input(-1).ReshapeLike(Input(-2));
}
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} }
...@@ -72,8 +84,8 @@ class GetLSTMCellGradient final : public GradientMakerBase { ...@@ -72,8 +84,8 @@ class GetLSTMCellGradient final : public GradientMakerBase {
vector<string>({ I(0), I(1), O(0), GO(0), GO(1) }), vector<string>({ I(0), I(1), O(0), GO(0), GO(1) }),
vector<string>({ GI(0), GI(1) })); vector<string>({ GI(0), GI(1) }));
} }
// fill zero for dc_{T+1} // Fill zero for dCNext
vector<float> DefaultValues() override{ return { 0.f, 1.f }; } vector<float> DefaultValues() override{ return { 1.f, 0.f }; }
}; };
REGISTER_GRADIENT(LSTMCell, GetLSTMCellGradient); REGISTER_GRADIENT(LSTMCell, GetLSTMCellGradient);
......
...@@ -24,14 +24,14 @@ class GetRecurrentGradient final : public GradientMakerBase { ...@@ -24,14 +24,14 @@ class GetRecurrentGradient final : public GradientMakerBase {
GRADIENT_MAKER_CTOR(GetRecurrentGradient); GRADIENT_MAKER_CTOR(GetRecurrentGradient);
vector<OperatorDef> MakeDefs() override { vector<OperatorDef> MakeDefs() override {
vector<string> inputs({ I(0), I(1), vector<string> inputs({ I(0), I(1),
def.input_size() > 2 ? I(2) : "ignore", def.input_size() > 2 ? I(2) : "NULL",
def.input_size() > 3 ? I(3) : "ignore", def.input_size() > 3 ? I(3) : "NULL",
O(0), GO(0), O(0), GO(0),
def.output_size() > 1 ? GO(1) : "ignore", def.output_size() > 1 ? GO(1) : "NULL",
def.output_size() > 2 ? GO(2) : "ignore"}); def.output_size() > 2 ? GO(2) : "NULL"});
vector<string> outputs({ GI(0), GI(1), vector<string> outputs({ GI(0), GI(1),
def.input_size() > 2 ? GI(2) : "ignore", def.input_size() > 2 ? GI(2) : "NULL",
def.input_size() > 3 ? GI(3) : "ignore"}); def.input_size() > 3 ? GI(3) : "NULL"});
return SingleDef(def.type() + "Gradient", "", inputs, outputs); return SingleDef(def.type() + "Gradient", "", inputs, outputs);
} }
}; };
......
...@@ -49,7 +49,7 @@ OPERATOR_SCHEMA(BiasAdd) ...@@ -49,7 +49,7 @@ OPERATOR_SCHEMA(BiasAdd)
template <class Context> template <typename T> template <class Context> template <typename T>
void BiasAddGradientOp<Context>::RunWithType() { void BiasAddGradientOp<Context>::RunWithType() {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
DECLARE_MULTIPLIER(multiplier, inner_dim); DECLARE_MULTIPLIER(multiplier, inner_dim);
auto* dYdata = Input(-1).template mutable_data<T, Context>(); auto* dYdata = Input(-1).template mutable_data<T, Context>();
auto* dBias = Output(1)->template mutable_data<T, Context>(); auto* dBias = Output(1)->template mutable_data<T, Context>();
...@@ -75,7 +75,7 @@ void BiasAddGradientOp<Context>::RunWithType() { ...@@ -75,7 +75,7 @@ void BiasAddGradientOp<Context>::RunWithType() {
} }
} }
if (Output(0)->name() != "ignore" && if (Output(0)->name() != "NULL" &&
Output(0)->name() != Input(-1).name()) { Output(0)->name() != Input(-1).name()) {
Output(0)->ReshapeLike(Input(-1)); Output(0)->ReshapeLike(Input(-1));
Output(0)->template CopyFrom<Context>(Input(-1), ctx()); Output(0)->template CopyFrom<Context>(Input(-1), ctx());
......
...@@ -51,12 +51,12 @@ void Conv2dGradientOp<Context>::RunWithType() { ...@@ -51,12 +51,12 @@ void Conv2dGradientOp<Context>::RunWithType() {
} }
for (int n = 0; n < Input(2).dim(0); n++) { for (int n = 0; n < Input(2).dim(0); n++) {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
Dw(dYdata + n * y_offset, Xdata + n * x_offset, dWdata); Dw(dYdata + n * y_offset, Xdata + n * x_offset, dWdata);
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
Dx(dYdata + n * y_offset, Wdata, dXdata + n * x_offset); Dx(dYdata + n * y_offset, Wdata, dXdata + n * x_offset);
......
...@@ -48,22 +48,22 @@ template <class Context> template <typename T> ...@@ -48,22 +48,22 @@ template <class Context> template <typename T>
void ConvTranspose2dGradientOp<Context>::RunWithType() { void ConvTranspose2dGradientOp<Context>::RunWithType() {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
if (Output(2)->name() != "ignore") { if (Output(2)->name() != "NULL") {
auto* dBdata = Output(2)->template mutable_data<T, Context>(); auto* dBdata = Output(2)->template mutable_data<T, Context>();
for (int n = 0; n < Input(2).dim(0); n++) for (int n = 0; n < Input(2).dim(0); n++)
Db(dYdata + n * y_offset, dBdata); Db(dYdata + n * y_offset, dBdata);
} }
for (int n = 0; n < Input(2).dim(0); n++) { for (int n = 0; n < Input(2).dim(0); n++) {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
Dw(Xdata + n * x_offset, dYdata + n * y_offset, dWdata); Dw(Xdata + n * x_offset, dYdata + n * y_offset, dWdata);
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
bool skip = Output(1)->name() != "ignore"; bool skip = Output(1)->name() != "NULL";
Wx(dYdata + n * y_offset, Wdata, dXdata + n * x_offset, skip); Wx(dYdata + n * y_offset, Wdata, dXdata + n * x_offset, skip);
} }
} }
......
...@@ -74,7 +74,7 @@ void CuDNNBiasAddGradientOp<Context>::RunWithType() { ...@@ -74,7 +74,7 @@ void CuDNNBiasAddGradientOp<Context>::RunWithType() {
CUDNNType<T>::one, input_desc, dYdata, CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::zero, bias_desc, dBdata)); CUDNNType<T>::zero, bias_desc, dBdata));
if (Output(0)->name() != "ignore" && if (Output(0)->name() != "NULL" &&
Output(0)->name() != Input(-1).name()) { Output(0)->name() != Input(-1).name()) {
Output(0)->ReshapeLike(Input(-1)); Output(0)->ReshapeLike(Input(-1));
Output(0)->template CopyFrom<Context>(Input(-1), ctx()); Output(0)->template CopyFrom<Context>(Input(-1), ctx());
......
...@@ -294,7 +294,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() { ...@@ -294,7 +294,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle(); auto cudnn_handle = ctx()->cudnn_handle();
if (Output(2)->name() != "ignore") { if (Output(2)->name() != "NULL") {
T* dBdata = Output(2)->template mutable_data<T, Context>(); T* dBdata = Output(2)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle,
CUDNNType<T>::one, input2b_desc, dYdata, CUDNNType<T>::one, input2b_desc, dYdata,
...@@ -302,7 +302,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() { ...@@ -302,7 +302,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
} }
for (int g = 0; g < cudnn_group; g++) { for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardFilter(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardFilter(cudnn_handle,
...@@ -311,7 +311,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() { ...@@ -311,7 +311,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
conv_desc, bwd_filter_algo, WSdata, bwd_filter_size, conv_desc, bwd_filter_algo, WSdata, bwd_filter_size,
CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g)); CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g));
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardData(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardData(cudnn_handle,
......
...@@ -290,7 +290,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() { ...@@ -290,7 +290,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle(); auto cudnn_handle = ctx()->cudnn_handle();
if (Output(2)->name() != "ignore") { if (Output(2)->name() != "NULL") {
T* dBdata = Output(2)->template mutable_data<T, Context>(); T* dBdata = Output(2)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle,
CUDNNType<T>::one, input2b_desc, dYdata, CUDNNType<T>::one, input2b_desc, dYdata,
...@@ -298,7 +298,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() { ...@@ -298,7 +298,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() {
} }
for (int g = 0; g < cudnn_group; g++) { for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardFilter(cudnn_handle, CUDNN_CHECK(cudnnConvolutionBackwardFilter(cudnn_handle,
...@@ -307,7 +307,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() { ...@@ -307,7 +307,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() {
conv_desc, bwd_filter_algo, WSdata, bwd_filter_size, conv_desc, bwd_filter_algo, WSdata, bwd_filter_size,
CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g)); CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g));
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionForward(cudnn_handle, CUDNN_CHECK(cudnnConvolutionForward(cudnn_handle,
......
...@@ -79,7 +79,7 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::RunWithType() { ...@@ -79,7 +79,7 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::RunWithType() {
} }
for (int n = 0; n < Input(2).dim(0); n++) { for (int n = 0; n < Input(2).dim(0); n++) {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx()); math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx());
...@@ -88,7 +88,7 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::RunWithType() { ...@@ -88,7 +88,7 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::RunWithType() {
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1], kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1],
data_format, dYdata, Xdata, dWdata, ctx()); data_format, dYdata, Xdata, dWdata, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::DepthwiseConv2dGrad(Input(0).dim(0), channels, kernel::DepthwiseConv2dGrad(Input(0).dim(0), channels,
......
...@@ -54,7 +54,7 @@ void DepthwiseConv2dGradientOp<Context>::RunWithType() { ...@@ -54,7 +54,7 @@ void DepthwiseConv2dGradientOp<Context>::RunWithType() {
} }
for (int n = 0; n < Input(2).dim(0); n++) { for (int n = 0; n < Input(2).dim(0); n++) {
if (Output(1)->name() != "ignore") { if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>(); auto* dWdata = Output(1)->template mutable_data<T, Context>();
math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx()); math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx());
...@@ -63,7 +63,7 @@ void DepthwiseConv2dGradientOp<Context>::RunWithType() { ...@@ -63,7 +63,7 @@ void DepthwiseConv2dGradientOp<Context>::RunWithType() {
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1], kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1],
data_format, dYdata, Xdata, dWdata, ctx()); data_format, dYdata, Xdata, dWdata, ctx());
} }
if (Output(0)->name() != "ignore") { if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>(); auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::DepthwiseConv2dGrad(Input(0).dim(0), channels, kernel::DepthwiseConv2dGrad(Input(0).dim(0), channels,
......
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd. // Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
// Licensed under the BSD 2-Clause License. // Licensed under the BSD 2-Clause License.
// Codes are based on:
// https://github.com/pytorch/pytorch/blob/master/caffe2/proto/caffe2.proto
syntax = "proto2"; syntax = "proto2";
package dragon; package dragon;
// Store the serialized Tensor objects.
message TensorProto { message TensorProto {
repeated int32 dims = 1; repeated int32 dims = 1;
enum DataType { enum DataType {
UNDEFINED = 0; UNDEFINED = 0;
// Basic types.
FLOAT = 1; FLOAT = 1;
INT32 = 2; INT32 = 2;
BYTE = 3; BYTE = 3;
STRING = 4; STRING = 4;
// Less-commonly used data types.
BOOL = 5;
UINT8 = 6;
INT8 = 7;
UINT16 = 8;
INT16 = 9;
INT64 = 10;
FLOAT16 = 12; FLOAT16 = 12;
DOUBLE = 13;
} }
optional DataType data_type = 2 [default = FLOAT]; optional DataType data_type = 2 [default = FLOAT];
// For float.
repeated float float_data = 3 [packed = true]; repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true]; repeated int32 int32_data = 4 [packed = true];
// For bytes.
optional bytes byte_data = 5; optional bytes byte_data = 5;
// For strings.
repeated bytes string_data = 6; repeated bytes string_data = 6;
// For double.
repeated double double_data = 9 [packed = true];
// For int64.
repeated int64 int64_data = 10 [packed = true];
// Store the raw data, contents are serialized as little-endian.
optional bytes raw_data = 13;
// Optionally, a name for the tensor.
optional string name = 7; optional string name = 7;
} }
// Record the filler of Tensor.
// This structure is kept for backward compatibility
// with caffe1, which relies implicit initializer.
message TensorFillerProto { message TensorFillerProto {
optional string tensor = 1; optional string tensor = 1;
optional string type = 2 [default = 'constant']; optional string type = 2 [default = 'constant'];
...@@ -36,67 +67,120 @@ message TensorFillerProto { ...@@ -36,67 +67,120 @@ message TensorFillerProto {
optional VarianceNorm variance_norm = 9 [default = FAN_IN]; optional VarianceNorm variance_norm = 9 [default = FAN_IN];
} }
// Store multiple TensorProto objects in one single proto.
message TensorProtos { message TensorProtos {
repeated TensorProto protos = 1; repeated TensorProto protos = 1;
} }
enum DeviceType { // DeviceType that Dragon currently supports.
PROTO_CPU = 0; enum DeviceTypeProto {
PROTO_CUDA = 1; // The default device.
PROTO_CNML = 2; PROTO_CPU = 0;
// NVIDIA's CUDA Environment.
PROTO_CUDA = 1;
// CAMBRICON's CNML Environment.
PROTO_CNML = 2;
} }
// Device-specific options.
message DeviceOption { message DeviceOption {
optional DeviceType device_type = 1 [default = PROTO_CPU]; // The type of device to dispatch executions.
optional DeviceTypeProto device_type = 1 [default = PROTO_CPU];
// The index of this device.
optional int32 device_id = 2 [default = 0]; optional int32 device_id = 2 [default = 0];
// The random seed to start the random generator.
optional uint32 random_seed = 3 [default = 3]; optional uint32 random_seed = 3 [default = 3];
optional string engine = 4;
} }
// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument { message Argument {
// The name of this argument.
optional string name = 1; optional string name = 1;
// Store the float32 value.
optional float f = 2; optional float f = 2;
// Store the bool, int32, int64 value.
optional int64 i = 3; optional int64 i = 3;
// Store the string value.
optional bytes s = 4; optional bytes s = 4;
// Store the float32 values.
repeated float floats = 7; repeated float floats = 7;
// Store the bool, int32, int64 values.
repeated int64 ints = 8; repeated int64 ints = 8;
// Store the string values.
repeated bytes strings = 9; repeated bytes strings = 9;
} }
// Operator Definition
message OperatorDef { message OperatorDef {
// The unique id of this operator.
// Set it to persist operators in the dynamic graph.
optional string uid = 1; optional string uid = 1;
// The name of inputs.
repeated string input = 2; repeated string input = 2;
// The name of outputs.
repeated string output = 3; repeated string output = 3;
// The optional name of this operator.
optional string name = 4; optional string name = 4;
// The operator type.
optional string type = 5; optional string type = 5;
// The arguments.
repeated Argument arg = 6; repeated Argument arg = 6;
// The device option that the operator should run under.
optional DeviceOption device_option = 7; optional DeviceOption device_option = 7;
} }
// Record the gradient information
message GradientProto { message GradientProto {
// The derivative target.
optional string cost = 1; optional string cost = 1;
// The target with respect to?
optional string wrt = 2; optional string wrt = 2;
// The external gradient
optional string external = 3; optional string external = 3;
} }
// Record the updater information
message UpdaterProto { message UpdaterProto {
// The operator name to use.
optional string name = 1; optional string name = 1;
// The operator type.
optional string type = 2; optional string type = 2;
// The tensor to update.
repeated string tensor = 3; repeated string tensor = 3;
// The arguments.
repeated Argument arg = 4; repeated Argument arg = 4;
} }
// Graph Definition
message GraphDef { message GraphDef {
// The graph name.
optional string name = 1; optional string name = 1;
// The operators to execute.
repeated OperatorDef op = 2; repeated OperatorDef op = 2;
// The type of graph.
optional string graph_type = 3; optional string graph_type = 3;
// The device option for this graph.
optional DeviceOption device_option = 5; optional DeviceOption device_option = 5;
// The arguments.
repeated Argument arg = 6; repeated Argument arg = 6;
// The name of inputs.
repeated string input = 7; repeated string input = 7;
// The name of outputs.
repeated string output = 8; repeated string output = 8;
// The gradients information.
repeated GradientProto gradient = 9; repeated GradientProto gradient = 9;
// The updaters information.
repeated UpdaterProto updater = 10; repeated UpdaterProto updater = 10;
} }
\ No newline at end of file
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!