Commit b35f9320 by Ting PAN

Change the representation of NULL tensor

1 parent 418e0c0a
Showing with 816 additions and 714 deletions
......@@ -12,6 +12,7 @@ Quick Reference
List Brief
============================== =============================================================================
`IsCUDADriverSufficient`_ Is cuda driver sufficient?
`EnableCUDNN`_ Enable the CuDNN engine.
`GetDevice`_ Get the current active cuda device.
`SynchronizeStream`_ Synchronize the specified cuda stream.
============================== =============================================================================
......@@ -20,5 +21,6 @@ List Brief
:members:
.. _IsCUDADriverSufficient: #dragon.core.cuda.IsCUDADriverSufficient
.. _EnableCUDNN: #dragon.core.cuda.EnableCUDNN
.. _GetDevice: #dragon.core.cuda.GetDevice
.. _SynchronizeStream: #dragon.core.cuda.SynchronizeStream
\ No newline at end of file
......@@ -93,6 +93,7 @@ API Reference
.. automethod:: __eq__
.. automethod:: __repr__
.. automethod:: __getitem__
.. automethod:: __setitem__
.. automethod:: __call__
.. _Tensor.Variable: #dragon.core.tensor.Tensor.Variable
......
......@@ -59,16 +59,12 @@ Custom
:hidden:
operators/custom/minibatch
operators/custom/data_process
operators/custom/vec_mult
========================================= =====================================================================
List Brief
========================================= =====================================================================
`dragon.operators.custom.minibatch`_ How to form a minibatch based on `dragon.io`_ package.
`dragon.operators.custom.data_process`_ How to custom a RunOp for data processing.
`dragon.operators.custom.vec_mult`_ How to custom a TemplateOp for Vector Multiplication.
`dragon.operators.custom.minibatch`_ Form a mini-batch based on `dragon.utils.vision`_ package.
========================================= =====================================================================
......@@ -99,9 +95,9 @@ List Brief
.. _dragon.operators.recurrent: operators/recurrent.html
.. _dragon.operators.loss: operators/loss.html
.. _dragon.operators.norm: operators/norm.html
.. _dragon.io: io.html
.. _dragon.operators.custom.minibatch: operators/custom/minibatch.html
.. _dragon.operators.custom.data_process: operators/custom/data_process.html
.. _dragon.operators.custom.vec_mult: operators/custom/vec_mult.html
.. _dragon.operators.contrib.rcnn: operators/contrib/rcnn.html
.. _dragon.utils.vision: utils.html#vision
==================
:mod:`DataProcess`
==================
.. toctree::
:hidden:
.. currentmodule:: dragon.operators.custom.data_process
.. autoclass:: DataProcessOp
:members:
\ No newline at end of file
......@@ -10,4 +10,4 @@
.. autoclass:: MiniBatchOp
:members:
.. _dragon.io: ../../io.html
\ No newline at end of file
.. _dragon.utils.vision: ../../utils.html#vision
\ No newline at end of file
==============
:mod:`VecMult`
==============
.. toctree::
:hidden:
.. currentmodule:: dragon.operators.custom.vec_mult
.. autoclass:: VecMultOp
:members:
\ No newline at end of file
......@@ -9,7 +9,7 @@
:members:
.. _LMDB: http://lmdb.readthedocs.io/en/release
.. _DataBatch: ../io/data_batch.html#dragon.io.data_batch
.. _DataReader: ../io/data_reader.html#dragon.io.data_reader
.. _DataTransformer: ../io/data_transformer.html#dragon.io.data_transformer
.. _BlobFetcher: ../io/blob_fetcher.html#dragon.io.blob_fetcher
\ No newline at end of file
.. _DataBatch: ../utils/vision/data_batch.html
.. _DataReader: ../utils/vision/data_reader.html
.. _DataTransformer: ../utils/vision/data_transformer.html
.. _BlobFetcher: ../utils/vision/blob_fetcher.html
\ No newline at end of file
......@@ -23,7 +23,7 @@ if (NOT THIRD_PARTY_DIR)
set(THIRD_PARTY_DIR ${PROJECT_SOURCE_DIR}/../ThirdParty)
endif()
# Set your protobuf compiler(protc) if necessary
# Set your protobuf compiler(protoc) if necessary
# if not, a default "protoc" in the environment path will be used
if (NOT PROTOC_EXECUTABLE)
set(PROTOC_EXECUTABLE protoc)
......
......@@ -128,6 +128,8 @@ class CUDAObject {
#ifdef WITH_CUDNN
vector<cudnnHandle_t> cudnn_handles[CUDA_MAX_DEVICES];
#endif
bool cudnn_enabled = true;
};
class CUDAContext {
......
......@@ -84,7 +84,7 @@ class Graph : public GraphBase {
/*! \brief Create a graph from the raw def */
GraphBase* NewGraph(
const GraphDef& meta_graph,
const GraphDef& def,
Workspace* ws);
DECLARE_REGISTRY(
......
......@@ -142,7 +142,7 @@ class Operator : public OperatorBase {
allow_run_ = true;
allow_run_ &= MPICheck();
allow_run_ &= (!(OutputSize() == 1 &&
Output(0)->name() == "ignore"));
Output(0)->name() == "NULL"));
}
/*! \brief Run this operator on the specified stream */
......@@ -168,10 +168,10 @@ class Operator : public OperatorBase {
/*! \brief Coordinate the context of inputs and outputs */
virtual void MemorySwitch() {
for (auto* e : inputs_)
if(e->name() != "ignore")
if(e->name() != "NULL")
e->SwitchToDevice(ctx()->device_id());
for (auto* e : outputs_)
if(e->name() != "ignore")
if(e->name() != "NULL")
e->SwitchToDevice(ctx()->device_id());
}
......
......@@ -76,23 +76,23 @@ class GradientMakerBase {
const string I(const int i) const {
return i < def.input_size() ?
def.input(i) : "ignore";
def.input(i) : "NULL";
}
const string O(const int i) const {
return i < def.output_size() ?
def.output(i) : "ignore";
def.output(i) : "NULL";
}
string GI(const int i) {
if (i >= g_inputs_.size()) return "ignore";
if (i >= g_inputs_.size()) return "NULL";
g_inputs_[i] = def.input(i) + "_grad";
return g_inputs_[i];
}
const string GO(const int i) const {
return i < g_outputs_.size() ?
g_outputs_[i] : "ignore";
g_outputs_[i] : "NULL";
}
protected:
......
......@@ -12,9 +12,9 @@
#ifndef DRAGON_CORE_OPERATOR_SCHEMA_H_
#define DRAGON_CORE_OPERATOR_SCHEMA_H_
#include <functional>
#include <limits>
#include <functional>
#include "common.h"
......@@ -92,7 +92,7 @@ class OpSchemaRegistry {
}
private:
static Map<string, OpSchema>& schema_map() {
static Map<string, OpSchema>& schema_map() {
static Map<string, OpSchema> schema_map_;
return schema_map_;
}
......
......@@ -41,7 +41,7 @@ class GradientGatherOp final : public Operator<Context> {
GradientGatherOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) {
for (int i = 0; i < InputSize(); i++)
if (Input(i).name() != "ignore") indices.push_back(i);
if (Input(i).name() != "NULL") indices.push_back(i);
}
USE_OPERATOR_FUNCTIONS;
......
......@@ -43,7 +43,7 @@ class Conv2dGradientOp : public Conv2dOp<Context> {
USE_OPERATOR_FUNCTIONS;
USE_CONVOLUTION_FUNCTIONS;
bool HasBias() override { return Output(2)->name() != "ignore"; }
bool HasBias() override { return Output(2)->name() != "NULL"; }
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -47,7 +47,7 @@ class ConvTranspose2dGradientOp : public ConvTranspose2dOp<Context> {
USE_OPERATOR_FUNCTIONS;
USE_CONVOLUTION_FUNCTIONS;
bool HasBias() override { return Output(2)->name() != "ignore"; }
bool HasBias() override { return Output(2)->name() != "NULL"; }
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -50,7 +50,7 @@ class DepthwiseConv2dGradientOp
USE_OPERATOR_FUNCTIONS;
USE_CONVOLUTION_FUNCTIONS;
bool HasBias() override { return Output(2)->name() != "ignore"; }
bool HasBias() override { return Output(2)->name() != "NULL"; }
void RunOnDevice() override;
template <typename T> void RunWithType();
......
......@@ -131,7 +131,7 @@ struct CUDADeviceProps {
CUDADeviceProps() : props(CUDA_NUM_DEVICES()) {
for (int i = 0; i < CUDA_NUM_DEVICES(); ++i)
CUDA_CHECK(cudaGetDeviceProperties(&props[i], i));
}
}
vector<cudaDeviceProp> props;
};
......
......@@ -28,8 +28,7 @@ inline OperatorDef MakeOperatorDef(
const IterableInputs& inputs,
const IterableOutputs& outputs,
const IterableArgs& args,
const DeviceOption& device_option,
const string& engine) {
const DeviceOption& device_option) {
OperatorDef def;
def.set_type(type);
def.set_name(name);
......@@ -51,8 +50,8 @@ inline OperatorDef MakeOperatorDef(
const IterableOutputs& outputs,
const IterableArgs& args) {
return MakeOperatorDef(
type, name, inputs, outputs, args,
DeviceOption(), "");
type, name, inputs, outputs,
args, DeviceOption());
}
template <class IterableInputs,
......@@ -64,7 +63,7 @@ inline OperatorDef MakeOperatorDef(
const IterableOutputs& outputs) {
return MakeOperatorDef(
type, name, inputs, outputs,
vector<Argument>(), DeviceOption(), "");
vector<Argument>(), DeviceOption());
}
bool ParseProtoFromText(
......
......@@ -88,9 +88,8 @@ std::string CreateGraph(
auto graph_def_copy(*graph_def);
// Overwritten device options
DeviceOption* device_option = graph_def_copy.mutable_device_option();
device_option->set_device_type((DeviceType)device.device_type());
device_option->set_device_type((DeviceTypeProto)device.device_type());
device_option->set_device_id(device.device_id());
device_option->set_engine("CUDNN");
auto* graph = ws->CreateGraph(graph_def_copy);
if (!graph) LOG(FATAL) << "Can not create the graph.";
return graph->name();
......
......@@ -53,6 +53,7 @@ void AddGradientMethods(pybind11::module& m) {
if (is_sharing) maker.Share(backward_ops);
pybind11::gil_scoped_release g;
for (auto& op : backward_ops.op()) {
if (op.type().empty()) continue;
if (verbose) std::cout << op.DebugString() << std::endl;
if (op.has_uid()) ws()->RunOperator(op);
else ws()->RunOperatorOnce(op);
......
......@@ -31,6 +31,13 @@ void AddCUDAMethods(pybind11::module& m) {
#endif
});
m.def("EnableCUDNN", [](bool enabled) {
#ifdef WITH_CUDA
CUDAContext::cuda_object()
->cudnn_enabled = enabled;
#endif
});
m.def("cudaGetDevice", []() {
return CUDAContext::active_device_id();
});
......
......@@ -27,9 +27,6 @@ option['device'] = 'cpu'
# The device index
option['device_id'] = 0
# Whether to use cuDNN if possible
option['use_cudnn'] = False
# The global random seed
option['random_seed'] = 3
......@@ -77,15 +74,13 @@ def EnableCPU():
option['device'] = 'cpu'
def EnableCUDA(gpu_id=0, use_cudnn=True):
def EnableCUDA(gpu_id=0):
"""Enable NVIDIA's CUDA mode globally.
Parameters
----------
gpu_id : int
The index of GPU to use.
use_cudnn : boolean
Whether to use cuDNN if available.
Returns
-------
......@@ -95,7 +90,6 @@ def EnableCUDA(gpu_id=0, use_cudnn=True):
global option
option['device'] = 'cuda'
option['device_id'] = gpu_id
option['use_cudnn'] = use_cudnn
def EnableCNML(mlu_id=0):
......
......@@ -24,12 +24,28 @@ def IsCUDADriverSufficient():
Returns
-------
boolean
``True`` if your device(s) support CUDA otherwise ``False``.
*True* if your device(s) support CUDA otherwise *False*.
"""
return _C.IsCUDADriverSufficient()
def EnableCUDNN(enabled=True):
"""Enable the CuDNN engine.
Parameters
----------
enabled : boolean
*True* to enable.
Returns
-------
None
"""
return _C.EnableCUDNN(enabled)
def GetDevice():
"""Get the current active cuda device.
......
......@@ -164,7 +164,7 @@ class GraphGradientMaker(object):
is_skip, gen_grads = \
cls.CheckGrad(forward_op, inputs_to_grads, blacklist, targets)
# Missing grads are represented as ``None``
g_outputs = list(inputs_to_grads.get(name, 'ignore') for name in forward_op.output)
g_outputs = list(inputs_to_grads.get(name, 'NULL') for name in forward_op.output)
g_ops, g_inputs, defaults = cls.CreateGrad(forward_op, g_outputs)
# Append ops
......
......@@ -72,7 +72,7 @@ else:
def MakeOperatorDef(
op_type, inputs=(), outputs=(),
name='', uid=None, device_option=None,
arg=None, engine=None, **kwargs):
arg=None, **kwargs):
operator = pb.OperatorDef()
operator.type = op_type
operator.name = name
......@@ -80,14 +80,12 @@ def MakeOperatorDef(
operator.output.extend([str(tensor) for tensor in outputs])
if device_option is not None:
operator.device_option.CopyFrom(device_option)
if engine is not None:
operator.device_option.engine = engine
if 'random_seed' in kwargs:
operator.device_option.random_seed = kwargs['random_seed']
del kwargs['random_seed']
if uid is not None: operator.uid = uid
if arg is not None: operator.arg.extend(arg)
for k,v in kwargs.items():
for k, v in kwargs.items():
if v is None: continue
operator.arg.add().CopyFrom(MakeArgument(k,v))
return operator
......@@ -96,46 +94,36 @@ def MakeOperatorDef(
def MakeCXXOperatorDef(
op_type, inputs=(), outputs=(),
name='', uid=None, device_option=None,
arg=None, engine=None, **kwargs):
arg=None, **kwargs):
c_def = _C.OperatorDef()
py_def = MakeOperatorDef(
op_type, inputs, outputs, name, uid,
device_option, arg, engine, **kwargs)
device_option, arg, **kwargs)
c_def.ParseFrom(py_def.SerializeToString())
return c_def
def MakeDeviceOption(
device_type, device_id,
engine=None, rng_seed=None):
def MakeDeviceOption(device_type, device_id, rng_seed=None):
option = pb.DeviceOption()
option.device_type = device_type
option.device_id = device_id
if engine is not None: option.engine = engine
if rng_seed is not None: option.random_seed = rng_seed
return option
_PREDEFINED_DEVICE_LIMITS = 16
_PREDEFINED_DEVICE_ENGINES = ['', 'CUDNN']
_PREDEFINED_DEVICE_DICT = {'cpu': 0, 'cuda': 1, 'cnml': 2}
_PREDEFINED_DEVICE_OPTION_DICT = {}
for i in range(_PREDEFINED_DEVICE_LIMITS):
for device, identify in _PREDEFINED_DEVICE_DICT.items():
for engine in _PREDEFINED_DEVICE_ENGINES:
_PREDEFINED_DEVICE_OPTION_DICT[(device, i, engine)] = \
MakeDeviceOption(identify, i, engine)
if device == 'cuda':
_PREDEFINED_DEVICE_OPTION_DICT[('cuda', i)] = \
MakeDeviceOption(identify, i, 'CUDNN')
def GetDeviceOption(
device_type, device_id=0,
engine=None, rng_seed=None):
ctx = (device_type, device_id, engine if engine else '')
_PREDEFINED_DEVICE_OPTION_DICT[(device, i)] = \
MakeDeviceOption(identify, i)
def GetDeviceOption(device_type, device_id=0, rng_seed=None):
ctx = (device_type, device_id)
option = _PREDEFINED_DEVICE_OPTION_DICT[ctx]
if rng_seed is not None:
option_copy = copy.deepcopy(option)
......@@ -149,16 +137,15 @@ def GetDefaultDeviceOption():
if device_info is not None:
return GetDeviceOption(
device_info['device_type'],
device_info['device_id'],
device_info['device_engine'])
device_info['device_id'])
return None
def GetGlobalDeviceOption():
option = cfg.GetGlobalOptions()
return GetDeviceOption(
option['device'], option['device_id'],
'CUDNN' if option['use_cudnn'] else '')
option['device'],
option['device_id'])
# Fix the python stdout
......
......@@ -128,7 +128,7 @@ def name_scope(name):
return _GLOBAL_TENSOR_STACK.get_controller(default)
def device_scope(device_type, device_id=0, engine='AUTO'):
def device_scope(device_type, device_id=0):
"""Nest the the specific device info.
Parameters
......@@ -137,20 +137,15 @@ def device_scope(device_type, device_id=0, engine='AUTO'):
The type of device.
device_id : int, optional
The index of the device.
engine : {'AUTO', 'CUDNN'}, optional
The auxiliary accelerating library to use.
"""
device_type, device_id, device_engine = \
device_type.upper(), device_id, engine.upper()
device_type, device_id, device_type.lower(), device_id
assert device_type in ['cpu', 'gpu', 'cuda', 'cnml']
# Default names
if device_type == 'gpu': device_type = 'cuda'
if device_engine == 'AUTO': device_engine = 'CUDNN'
return _GLOBAL_DEVICE_STACK.get_controller({
'device_type': device_type,
'device_id': device_id,
'device_engine': device_engine})
'device_id': device_id})
def phase_scope(phase):
......@@ -209,7 +204,7 @@ def get_default_device():
The device dict contains the following keys:
(``device_type``, ``device_id``, ``device_engine``).
(``device_type``, ``device_id``).
Returns
-------
......
......@@ -32,29 +32,29 @@ def Proposal(inputs, strides, ratios, scales,
Parameters
----------
inputs : list of Tensor
inputs : sequence of Tensor
The inputs.
strides : list of int
strides : sequence of int
The strides of anchors.
ratios : list of float
ratios : sequence of float
The ratios of anchors.
scales : list of float
scales : sequence of float
The scales of anchors.
pre_nms_top_n : int
pre_nms_top_n : int, optional, default=6000
The number of anchors before nms.
post_nms_top_n : int
post_nms_top_n : int, optional, default=300
The number of anchors after nms.
nms_thresh : float
nms_thresh : float, optional, default=0.7
The threshold of nms.
min_size : int
min_size : int, optional, default=16
The min size of anchors.
min_level : int
min_level : int, optional, default=2
Finest level of the FPN pyramid.
max_level : int
max_level : int, optional, default=5
Coarsest level of the FPN pyramid.
canonical_scale : int
canonical_scale : int, optional, default=224
The baseline scale of mapping policy.
canonical_level : int
canonical_level : int, optional, default=4
Heuristic level of the canonical scale.
Returns
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon as dg
from multiprocessing import Process, Queue
class Fetcher(Process):
def __init__(self, queue):
super(Fetcher, self).__init__()
self._queue = queue
self.daemon = True
def cleanup():
print('Terminating Fetcher......')
self.terminate()
self.join()
import atexit
atexit.register(cleanup)
def run(self):
while True:
self._queue.put(np.ones((5, 10)))
class DataProcessOp(object):
"""How to custom a RunOp for data processing.
Examples
--------
>>> import dragon as dg
>>> y = dg.ops.Run([], module=__name__, op='DataProcessOp', num_outputs=1)
>>> foo = dg.function(outputs=y)
>>> foo()
>>> print(y.get_value())
>>> [[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
"""
def setup(self, inputs, outputs):
"""Setup for params or options.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
self._queue = Queue(100)
self._fetcher = Fetcher(self._queue)
self._fetcher.start()
def run(self, inputs, outputs):
"""Run method, i.e., forward pass.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
dg.workspace.FeedTensor(outputs[0], self._queue.get())
if __name__ == '__main__':
# Def
y = dg.ops.Run([], module=__name__, op='DataProcessOp', num_outputs=1)
foo = dg.function(outputs=y)
# Run
foo()
# Fetch
print(y.get_value())
\ No newline at end of file
......@@ -9,23 +9,26 @@
#
# ------------------------------------------------------------
import dragon.core.workspace as ws
from dragon.utils.vision import DataBatch
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon
import dragon.utils.vision
class MiniBatchOp(object):
"""How to form a minibatch based on `dragon.io`_ package.
"""Form a mini-batch based on `dragon.utils.vision`_ package."""
"""
def setup(self, inputs, outputs):
"""Setup for params or options.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
inputs : sequence of str
The name of inputs.
outputs : sequence of str
The name of outputs.
Returns
-------
......@@ -33,18 +36,17 @@ class MiniBatchOp(object):
"""
kwargs = eval(self.param_str)
self._data_batch = DataBatch(**kwargs)
self._data_batch = dragon.utils.vision.DataBatch(**kwargs)
def run(self, inputs, outputs):
"""Run method, i.e., forward pass.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
inputs : sequence of str
The name of inputs.
outputs : sequence of str
The name of outputs.
Returns
-------
......@@ -53,4 +55,4 @@ class MiniBatchOp(object):
"""
blobs = self._data_batch.get()
for idx, blob in enumerate(blobs):
ws.FeedTensor(outputs[idx], blob)
\ No newline at end of file
dragon.workspace.FeedTensor(outputs[idx], blob)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import dragon as dg
class VecMultOp(object):
"""How to custom a TemplateOp for Vector Multiplication.
Examples
--------
>>> import dragon as dg
>>> x1 = dg.Tensor('x1').Variable()
>>> x2 = dg.Tensor('x2').Variable()
>>> y = dg.ops.Template([x1, x2], module=__name__, op='VecMultOp', num_outputs=1)
>>> dx1 = dg.grad(y, x1)
>>> dx2 = dg.grad(y, x2)
>>> foo = dg.function(outputs=y)
>>> dg.workspace.FeedTensor(x1, np.ones((5, 3), dtype=np.float32))
>>> dg.workspace.FeedTensor(x2, np.ones((5, 3), dtype=np.float32) * 5.0)
>>> foo()
>>> print(y.get_value())
>>> [[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]]
>>> print(dx1.get_value())
>>> [[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]
[ 5. 5. 5.]]
>>> print(dx2.get_value())
>>> [[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]
[ 1. 1. 1.]]
"""
def setup(self, inputs, outputs):
"""Setup for params or options.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
pass
def run(self, inputs, outputs):
"""Run method, i.e., forward pass.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
x1 = dg.workspace.FetchTensor(inputs[0])
x2 = dg.workspace.FetchTensor(inputs[1])
dg.workspace.FeedTensor(outputs[0], x1 * x2) # call numpy mult
def grad(self, inputs, outputs):
"""Gradient method, i.e., backward pass.
Parameters
----------
inputs : list of str
Indicating the name of input tensors.
outputs : list of str
Indicating the name of output tensors.
Returns
-------
None
"""
x1 = dg.workspace.FetchTensor(inputs[0])
x2 = dg.workspace.FetchTensor(inputs[1])
dy = dg.workspace.FetchTensor(inputs[-1])
dx1 = dy * x2
dx2 = dy * x1
dg.workspace.FeedTensor(outputs[0], dx1)
dg.workspace.FeedTensor(outputs[1], dx2)
if __name__ == '__main__':
# Def
x1 = dg.Tensor('x1').Variable()
x2 = dg.Tensor('x2').Variable()
y = dg.ops.Template([x1, x2], module=__name__, op='VecMultOp', num_outputs=1)
dx1 = dg.grad(y, x1)
dx2 = dg.grad(y, x2)
foo = dg.function(outputs=y)
# Feed
dg.workspace.FeedTensor(x1, np.ones((5, 3), dtype=np.float32))
dg.workspace.FeedTensor(x2, np.ones((5, 3), dtype=np.float32) * 5.0)
# Run
foo()
# Fetch
print('y \n-------------- \n', y.get_value(), '\n')
print('dx1 \n-------------- \n', dx1.get_value(), '\n')
print('dx2 \n-------------- \n', dx2.get_value(), '\n')
\ No newline at end of file
......@@ -25,40 +25,40 @@ def LMDBData(**kwargs):
----------
source : str
The path of database.
shuffle : bool
shuffle : bool, optional, default=False
Whether to shuffle the data.
node_step: bool
Whether to split data for multiple parallel nodes.
num_chunks : int
The number of chunks to split. Default is ``2048``.
chunk_size : int
The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``).
mean_values : list
num_chunks : int, optional, default=2048
The number of chunks to split.
chunk_size : int, optional, default=-1
The size(MB) of each chunk.
mean_values : list, optional
The mean value of each image channel.
scale : float
The scale performed after mean subtraction. Default is ``1.0``.
padding : int
The zero-padding size. Default is ``0``.
crop_size : int
The crop size. Default is ``0`` (Disabled).
mirror : bool
Whether to mirror(flip horizontally) images. Default is ``False``.
color_augmentation : bool
Whether to use color distortion. Default is ``False``.
min_random_scale : float
The min scale of the input images. Default is ``1.0``.
max_random_scale : float
The max scale of the input images. Default is ``1.0``.
force_gray : bool
Set not to duplicate channel for gray. Default is ``False``.
phase : str
The phase of this operator, ``TRAIN`` or ``TEST``.
batch_size : int
scale : float, optional, default=1.
The scale performed after mean subtraction.
padding : int, optional, default=0
The zero-padding size.
crop_size : int, optional, default=0
The cropping size.
mirror : bool, optional, default=False
Whether to mirror(flip horizontally) images.
color_augmentation : bool, optional, default=False
Whether to use color distortion.1
min_random_scale : float, optional, default=1.
The min scale of the input images.
max_random_scale : float, optional, default=1.
The max scale of the input images.
force_gray : bool, optional, default=False
Set not to duplicate channel for gray.
phase : {'TRAIN', 'TEST'}, optional
The phase of this operator.
batch_size : int, optional, default=128
The size of a mini-batch.
partition : bool
Whether to partition batch for parallelism. Default is ``False``.
prefetch : int
The prefetch count. Default is ``5``.
partition : bool, optional, default=False
Whether to partition batch for parallelism.
prefetch : int, optional, default=5
The prefetch count.
Returns
-------
......@@ -85,8 +85,7 @@ def LMDBData(**kwargs):
@OpSchema.Inputs(1)
def ImageData(
inputs, mean_values=None, std_values=None,
dtype='float32', data_format='NCHW', **kwargs
):
dtype='float32', data_format='NCHW', **kwargs):
"""Process the images from 4D raw data.
Note that we assume the data format of raw data is **NHWC**.
......@@ -99,10 +98,10 @@ def ImageData(
The optional mean values to subtract.
std_values : sequence of float, optional
The optional std values to divide.
dtype : str
The type of output. ``float32`` or ``float16``.
data_format : str
The data format of output. ``NCHW`` or ``NHWC``.
dtype : {'float16', 'float32'}, optional
The data type of output.
data_format : {'NCHW', 'NHWC'}, optional
The data format of output.
Returns
-------
......
......@@ -52,7 +52,7 @@ def Conv2d(
The inputs, represent [input, weights] + [bias].
num_output : int
The output channels of convolution.
kernel_shape : sequence of int.
kernel_shape : sequence of int
The shape of convolution kernel.
strides : sequence of int, optional, default=1
The stride(s) of convolution.
......
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
// Licensed under the BSD 2-Clause License.
// Codes are based on:
// https://github.com/pytorch/pytorch/blob/master/caffe2/proto/caffe2.proto
syntax = "proto2";
package dragon;
// Store the serialized Tensor objects.
message TensorProto {
repeated int32 dims = 1;
enum DataType {
UNDEFINED = 0;
// Basic types.
FLOAT = 1;
INT32 = 2;
BYTE = 3;
STRING = 4;
// Less-commonly used data types.
BOOL = 5;
UINT8 = 6;
INT8 = 7;
UINT16 = 8;
INT16 = 9;
INT64 = 10;
FLOAT16 = 12;
DOUBLE = 13;
}
optional DataType data_type = 2 [default = FLOAT];
// For float.
repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true];
// For bytes.
optional bytes byte_data = 5;
// For strings.
repeated bytes string_data = 6;
// For double.
repeated double double_data = 9 [packed = true];
// For int64.
repeated int64 int64_data = 10 [packed = true];
// Store the raw data, contents are serialized as little-endian.
optional bytes raw_data = 13;
// Optionally, a name for the tensor.
optional string name = 7;
}
// Record the filler of Tensor.
// This structure is kept for backward compatibility
// with caffe1, which relies implicit initializer.
message TensorFillerProto {
optional string tensor = 1;
optional string type = 2 [default = 'constant'];
......@@ -36,67 +67,120 @@ message TensorFillerProto {
optional VarianceNorm variance_norm = 9 [default = FAN_IN];
}
// Store multiple TensorProto objects in one single proto.
message TensorProtos {
repeated TensorProto protos = 1;
}
enum DeviceType {
PROTO_CPU = 0;
PROTO_CUDA = 1;
PROTO_CNML = 2;
// DeviceType that Dragon currently supports.
enum DeviceTypeProto {
// The default device.
PROTO_CPU = 0;
// NVIDIA's CUDA Environment.
PROTO_CUDA = 1;
// CAMBRICON's CNML Environment.
PROTO_CNML = 2;
}
// Device-specific options.
message DeviceOption {
optional DeviceType device_type = 1 [default = PROTO_CPU];
// The type of device to dispatch executions.
optional DeviceTypeProto device_type = 1 [default = PROTO_CPU];
// The index of this device.
optional int32 device_id = 2 [default = 0];
// The random seed to start the random generator.
optional uint32 random_seed = 3 [default = 3];
optional string engine = 4;
}
// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument {
// The name of this argument.
optional string name = 1;
// Store the float32 value.
optional float f = 2;
// Store the bool, int32, int64 value.
optional int64 i = 3;
// Store the string value.
optional bytes s = 4;
// Store the float32 values.
repeated float floats = 7;
// Store the bool, int32, int64 values.
repeated int64 ints = 8;
// Store the string values.
repeated bytes strings = 9;
}
// Operator Definition
message OperatorDef {
// The unique id of this operator.
// Set it to persist operators in the dynamic graph.
optional string uid = 1;
// The name of inputs.
repeated string input = 2;
// The name of outputs.
repeated string output = 3;
// The optional name of this operator.
optional string name = 4;
// The operator type.
optional string type = 5;
// The arguments.
repeated Argument arg = 6;
// The device option that the operator should run under.
optional DeviceOption device_option = 7;
}
// Record the gradient information
message GradientProto {
// The derivative target.
optional string cost = 1;
// The target with respect to?
optional string wrt = 2;
// The external gradient
optional string external = 3;
}
// Record the updater information
message UpdaterProto {
// The operator name to use.
optional string name = 1;
// The operator type.
optional string type = 2;
// The tensor to update.
repeated string tensor = 3;
// The arguments.
repeated Argument arg = 4;
}
// Graph Definition
message GraphDef {
// The graph name.
optional string name = 1;
// The operators to execute.
repeated OperatorDef op = 2;
// The type of graph.
optional string graph_type = 3;
// The device option for this graph.
optional DeviceOption device_option = 5;
// The arguments.
repeated Argument arg = 6;
// The name of inputs.
repeated string input = 7;
// The name of outputs.
repeated string output = 8;
// The gradients information.
repeated GradientProto gradient = 9;
// The updaters information.
repeated UpdaterProto updater = 10;
}
\ No newline at end of file
......@@ -28,12 +28,12 @@ class BlobFetcher(Process):
Parameters
----------
batch_size : int
The size of a training batch.
partition : boolean
Whether to partition batch. Default is ``False``.
prefetch : int
The prefetch count. Default is ``5``.
batch_size : int, optional, default=128
The size of a mini-batch.
partition : bool, optional, default=False
Whether to partition batch for parallelism.
prefetch : int, optional, default=5
The prefetch count.
"""
super(BlobFetcher, self).__init__()
......
......@@ -40,38 +40,38 @@ class DataBatch(object):
----------
source : str
The path of database.
multiple_nodes: boolean
Whether to split data for multiple parallel nodes. Default is ``False``.
shuffle : boolean
Whether to shuffle the data. Default is ``False``.
num_chunks : int
The number of chunks to split. Default is ``2048``.
chunk_size : int
The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``).
padding : int
The zero-padding size. Default is ``0`` (Disabled).
fill_value : int
The value to fill when padding is valid. Default is ``127``.
crop_size : int
The crop size. Default is ``0`` (Disabled).
mirror : boolean
Whether to flip(horizontally) images. Default is ``False``.
color_augmentation : boolean
Whether to distort colors. Default is ``False``.
min_random_scale : float
The min scale of the input images. Default is ``1.0``.
max_random_scale : float
The max scale of the input images. Default is ``1.0``.
force_color : boolean
Set to duplicate channels for gray. Default is ``False``.
phase : str
The phase of this operator, ``TRAIN`` or ``TEST``. Default is ``TRAIN``.
batch_size : int
The size of a training batch.
partition : boolean
Whether to partition batch. Default is ``False``.
prefetch : int
The prefetch count. Default is ``5``.
multiple_nodes: boolean, optional, default=False
Whether to split data for multiple parallel nodes.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=2048
The number of chunks to split.
chunk_size : int, optional, default=-1
The size(MB) of each chunk.
padding : int, optional, default=0
The zero-padding size.
fill_value : int, optional, default=127
The value to fill when padding is valid.
crop_size : int, optional, default=0
The cropping size.
mirror : bool, optional, default=False
Whether to mirror(flip horizontally) images.
color_augmentation : bool, optional, default=False
Whether to use color distortion.1
min_random_scale : float, optional, default=1.
The min scale of the input images.
max_random_scale : float, optional, default=1.
The max scale of the input images.
force_gray : bool, optional, default=False
Set not to duplicate channel for gray.
phase : {'TRAIN', 'TEST'}, optional
The optional running phase.
batch_size : int, optional, default=128
The size of a mini-batch.
partition : bool, optional, default=False
Whether to partition batch for parallelism.
prefetch : int, optional, default=5
The prefetch count.
"""
super(DataBatch, self).__init__()
......@@ -109,7 +109,7 @@ class DataBatch(object):
self._num_transformers += 1
self._num_transformers = min(self._num_transformers, self._max_transformers)
self._batch_size = kwargs.get('batch_size', 100)
self._batch_size = kwargs.get('batch_size', 128)
self._partition = kwargs.get('partition', False)
if self._partition:
self._batch_size = int(self._batch_size / kwargs['group_size'])
......
......@@ -35,14 +35,14 @@ class DataReader(Process):
----------
source : str
The path of database.
multiple_nodes: boolean
Whether to split data for multiple parallel nodes. Default is ``False``.
shuffle : boolean
Whether to shuffle the data. Default is ``False``.
num_chunks : int
The number of chunks to split. Default is ``2048``.
chunk_size : int
The size(MB) of each chunk. Default is -1 (Refer ``num_chunks``).
multiple_nodes: boolean, optional, default=False
Whether to split data for multiple parallel nodes.
shuffle : bool, optional, default=False
Whether to shuffle the data.
num_chunks : int, optional, default=2048
The number of chunks to split.
chunk_size : int, optional, default=-1
The size(MB) of each chunk.
"""
super(DataReader, self).__init__()
......
......@@ -42,24 +42,24 @@ class DataTransformer(Process):
Parameters
----------
padding : int
The padding size. Default is ``0`` (Disabled).
fill_value : int
The value to fill when padding is valid. Default is ``127``.
crop_size : int
The crop size. Default is ``0`` (Disabled).
mirror : boolean
Whether to flip(horizontally) images. Default is ``False``.
color_augmentation : boolean
Whether to distort colors. Default is ``False``.
min_random_scale : float
The min scale of the input images. Default is ``1.0``.
max_random_scale : float
The max scale of the input images. Default is ``1.0``.
force_color : boolean
Set to duplicate channels for gray. Default is ``False``.
phase : str
The phase of this operator, ``TRAIN`` or ``TEST``. Default is ``TRAIN``.
padding : int, optional, default=0
The zero-padding size.
fill_value : int, optional, default=127
The value to fill when padding is valid.
crop_size : int, optional, default=0
The cropping size.
mirror : bool, optional, default=False
Whether to mirror(flip horizontally) images.
color_augmentation : bool, optional, default=False
Whether to use color distortion.1
min_random_scale : float, optional, default=1.
The min scale of the input images.
max_random_scale : float, optional, default=1.
The max scale of the input images.
force_gray : bool, optional, default=False
Set not to duplicate channel for gray.
phase : {'TRAIN', 'TEST'}, optional
The optional running phase.
"""
super(DataTransformer, self).__init__()
......
......@@ -183,8 +183,6 @@ def GraphDef_Device(graph_def):
device_option.device_type = supports[option['device']]
device_option.device_id = option['device_id']
device_option.random_seed = option['random_seed']
if option['device'] == 'cuda':
if option['use_cudnn']: device_option.engine = 'CUDNN'
graph_def.device_option.CopyFrom(device_option)
......
......@@ -93,14 +93,14 @@ def RunOperator(
op_name = recorder.append(op)
op.name = op_name
for ix in range(len(outputs)):
outputs[ix]._requires_grad = True
outputs[ix].requires_grad = True
outputs[ix].__jit_recorder__ = recorder
if len(ignored_grads) > 0:
outputs[ix]._ignored_grads = ignored_grads
else:
# Reset status
for ix in range(len(outputs)):
outputs[ix]._requires_grad = False
outputs[ix].requires_grad = False
# Callback on Run
if callback_on_run: callback_on_run(op_name)
......
......@@ -315,9 +315,8 @@ class Module(object):
op_type=self.op_meta['op_type'],
device_option=proto_utils.
GetDeviceOption(
self._device.type,
self._device.index,
engine='CUDNN'),
self._device.type,
self._device.index),
**self.op_meta['arguments']
)
......
......@@ -413,12 +413,15 @@ class RNNCellBase(Module):
for weight in self.parameters():
weight.data.uniform_(-stdv, stdv)
from .activation import Tanh, Sigmoid
class LSTMCell(RNNCellBase):
def __init__(self, input_size, hidden_size, bias=True):
super(LSTMCell, self).__init__(
input_size, hidden_size, bias, num_chunks=4)
self.register_op()
self.tanh = Tanh()
self.sigmoid = Sigmoid()
def register_op(self):
self.op_meta = {'op_type': 'LSTMCell', 'arguments': {}}
......
......@@ -349,7 +349,7 @@ class OneHot(BaseModule):
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [self.register_output()]
return self.run(inputs, outputs)
with no_grad(): return self.run(inputs, outputs)
class Cast(BaseModule):
......@@ -376,8 +376,7 @@ class Cast(BaseModule):
y = self.run(inputs, outputs)
else:
self.unify_devices([x])
with no_grad():
y = self.run([], [x])
with no_grad(): y = self.run([], [x])
return y
......@@ -400,4 +399,4 @@ class Multinomial(BaseModule):
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output()]
return self.run(inputs, outputs)
\ No newline at end of file
with no_grad(): return self.run(inputs, outputs)
\ No newline at end of file
......@@ -60,6 +60,37 @@ inline int BBoxTransform(
return (bbox_w >= min_box_w) * (bbox_h >= min_box_h);
}
template <typename T>
inline void BBoxTransform(
const T dx,
const T dy,
const T d_log_w,
const T d_log_h,
const T im_w,
const T im_h,
const T im_scale,
T* bbox) {
const T w = bbox[2] - bbox[0] + 1;
const T h = bbox[3] - bbox[1] + 1;
const T ctr_x = bbox[0] + (T)0.5 * w;
const T ctr_y = bbox[1] + (T)0.5 * h;
const T pred_ctr_x = dx * w + ctr_x;
const T pred_ctr_y = dy * h + ctr_y;
const T pred_w = exp(d_log_w) * w;
const T pred_h = exp(d_log_h) * h;
bbox[0] = pred_ctr_x - (T)0.5 * pred_w;
bbox[1] = pred_ctr_y - (T)0.5 * pred_h;
bbox[2] = pred_ctr_x + (T)0.5 * pred_w;
bbox[3] = pred_ctr_y + (T)0.5 * pred_h;
bbox[0] = std::max((T)0, std::min(bbox[0], im_w - 1)) / im_scale;
bbox[1] = std::max((T)0, std::min(bbox[1], im_h - 1)) / im_scale;
bbox[2] = std::max((T)0, std::min(bbox[2], im_w - 1)) / im_scale;
bbox[3] = std::max((T)0, std::min(bbox[3], im_h - 1)) / im_scale;
}
/******************** Anchor ********************/
template <typename T>
......@@ -117,6 +148,38 @@ inline void GenerateGridAnchors(
}
}
template <typename T>
inline void GenerateGridAnchors(
const int num_proposals,
const int num_classes,
const int num_anchors,
const int feat_h,
const int feat_w,
const int stride,
const int base_offset,
const T* anchors,
const int64_t* indices,
T* proposals) {
T x, y;
int idx_4d, a, h, w;
int lr = num_classes * base_offset;
int rr = num_classes * (num_anchors * feat_h * feat_w);
for (int i = 0; i < num_proposals; ++i) {
idx_4d = (int)indices[i] - lr;
if (idx_4d >= 0 && idx_4d < rr) {
idx_4d /= num_classes;
w = idx_4d % feat_w;
h = (idx_4d / feat_w) % feat_h;
a = idx_4d / feat_w / feat_h;
x = (T)w * stride, y = (T)h * stride;
auto* A = anchors + a * 4;
auto* P = proposals + i * 7 + 1;
P[0] = x + A[0], P[1] = y + A[1];
P[2] = x + A[2], P[3] = y + A[3];
}
}
}
/******************** Proposal ********************/
template <typename T>
......@@ -164,14 +227,16 @@ void GenerateMSProposals(
const int64_t* indices,
T* proposals) {
int64_t index;
int64_t num_candidates_2x = 2 * num_candidates;
int64_t num_candidates_3x = 3 * num_candidates;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
index = indices[i];
dx = deltas[index];
dy = deltas[num_candidates + index];
d_log_w = deltas[2 * num_candidates + index];
d_log_h = deltas[3 * num_candidates + index];
d_log_w = deltas[num_candidates_2x + index];
d_log_h = deltas[num_candidates_3x + index];
proposal[4] = BBoxTransform<float>(
dx, dy, d_log_w, d_log_h,
im_w, im_h, min_box_w, min_box_h,
......@@ -181,6 +246,41 @@ void GenerateMSProposals(
}
template <typename T>
void GenerateMCProposals(
const int num_proposals,
const int num_boxes,
const int num_classes,
const int im_idx,
const float im_h,
const float im_w,
const float im_scale,
const T* scores,
const T* deltas,
const int64_t* indices,
T* proposals) {
int64_t index, cls;
int64_t num_boxes_2x = 2 * num_boxes;
int64_t num_boxes_3x = 3 * num_boxes;
float* proposal = proposals;
float dx, dy, d_log_w, d_log_h;
for (int i = 0; i < num_proposals; ++i) {
cls = indices[i] % num_classes;
index = indices[i] / num_classes;
dx = deltas[index];
dy = deltas[num_boxes + index];
d_log_w = deltas[num_boxes_2x + index];
d_log_h = deltas[num_boxes_3x + index];
proposal[0] = im_idx;
BBoxTransform<float>(
dx, dy, d_log_w, d_log_h,
im_w, im_h, im_scale, proposal + 1);
proposal[5] = scores[indices[i]];
proposal[6] = cls + 1;
proposal += 7;
}
}
template <typename T>
inline void SortProposals(
const int start,
const int end,
......
......@@ -7,7 +7,7 @@
namespace dragon {
template <class Context> template <typename T>
void ProposalOp<Context>::RunWithType() {
void ProposalOp<Context>::RunWithRCNN() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
......@@ -15,7 +15,6 @@ void ProposalOp<Context>::RunWithType() {
int total_rois = 0, num_rois;
int num_candidates, num_proposals;
auto* RIdata = roi_indices.data();
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
......@@ -32,11 +31,10 @@ void ProposalOp<Context>::RunWithType() {
if (strides.size() == 1) {
// Case 1: single stride
feat_h = Input(0).dim(2), feat_w = Input(0).dim(3);
K = feat_h * feat_w, A = int(ratios.size() * scales.size());
A = int(ratios.size() * scales.size()), K = feat_h * feat_w;
// Select the Top-K candidates as proposals
num_candidates = K * A;
num_proposals = std::min(
num_candidates, (int)pre_nms_top_n);
num_candidates = A * K;
num_proposals = std::min(num_candidates, (int)pre_nms_top_n);
utils::math::ArgPartition(
num_candidates, num_proposals,
true, scores, indices);
......@@ -50,16 +48,16 @@ void ProposalOp<Context>::RunWithType() {
&ratios[0], &scales[0], Adata);
rcnn::GenerateGridAnchors(
num_proposals, A, feat_h, feat_w,
strides[0], 0, Adata, indices.data(), Pdata);
strides[0], 0, Adata, &indices[0], Pdata);
rcnn::GenerateSSProposals(K, num_proposals,
im_h, im_w, min_box_h, min_box_w,
scores, deltas, indices.data(), Pdata);
scores, deltas, &indices[0], Pdata);
// Sort, NMS and Retrieve
rcnn::SortProposals(0, num_proposals - 1, num_proposals, Pdata);
rcnn::ApplyNMS(num_proposals, post_nms_top_n, nms_thresh,
proposals_.template mutable_data<BT, Context>(),
RIdata, num_rois, ctx());
rcnn::RetrieveRoIs(num_rois, n, Pdata, RIdata, Ydata);
&roi_indices[0], num_rois, ctx());
rcnn::RetrieveRoIs(num_rois, n, Pdata, &roi_indices[0], Ydata);
} else if (strides.size() > 1) {
// Case 2: multiple stridess
CHECK_EQ(strides.size(), InputSize() - 3)
......@@ -70,8 +68,7 @@ void ProposalOp<Context>::RunWithType() {
<< scales.size() << " scales";
// Select the Top-K candidates as proposals
num_candidates = Input(-3).dim(1);
num_proposals = std::min(
num_candidates, (int)pre_nms_top_n);
num_proposals = std::min(num_candidates, (int)pre_nms_top_n);
utils::math::ArgPartition(
num_candidates, num_proposals,
true, scores, indices);
......@@ -90,19 +87,19 @@ void ProposalOp<Context>::RunWithType() {
rcnn::GenerateGridAnchors(
num_proposals, A, feat_h, feat_w,
strides[i], base_offset,
Adata, indices.data(), Pdata);
base_offset += K * A;
Adata, &indices[0], Pdata);
base_offset += (A * K);
}
rcnn::GenerateMSProposals(
num_candidates, num_proposals,
im_h, im_w, min_box_h, min_box_w,
scores, deltas, indices.data(), Pdata);
scores, deltas, &indices[0], Pdata);
// Sort, NMS and Retrieve
rcnn::SortProposals(0, num_proposals - 1, num_proposals, Pdata);
rcnn::ApplyNMS(num_proposals, post_nms_top_n, nms_thresh,
proposals_.template mutable_data<BT, Context>(),
RIdata, num_rois, ctx());
rcnn::RetrieveRoIs(num_rois, n, Pdata, RIdata, Ydata);
&roi_indices[0], num_rois, ctx());
rcnn::RetrieveRoIs(num_rois, n, Pdata, &roi_indices[0], Ydata);
} else {
LOG(FATAL) << "Excepted at least one stride for proposals.";
}
......@@ -126,7 +123,7 @@ void ProposalOp<Context>::RunWithType() {
ctx()->template Copy<BT, BC, BC>(Y.count(),
rois, Output(0)->template data<BT, BC>());
rcnn::CollectRoIs<BT>(total_rois, min_level, max_level,
rcnn::CollectRoIs(total_rois, min_level, max_level,
canonical_level, canonical_scale, rois, bins);
for (int i = 0; i < OutputSize(); i++) {
......@@ -138,17 +135,92 @@ void ProposalOp<Context>::RunWithType() {
}
}
template <class Context> template <typename T>
void ProposalOp<Context>::RunWithRetinaNet() {
using BT = float; // DType of BBox
using BC = CPUContext; // Context of BBox
int feat_h, feat_w, C = Input(-3).dim(2), A, K;
int total_proposals = 0;
int num_candidates, num_boxes, num_proposals;
auto* batch_scores = Input(-3).template data<T, BC>();
auto* batch_deltas = Input(-2).template data<T, BC>();
auto* im_info = Input(-1).template data<BT, BC>();
auto* Ydata = Output(0)->template mutable_data<BT, BC>();
for (int n = 0; n < num_images; ++n) {
const BT im_h = im_info[0];
const BT im_w = im_info[1];
const BT im_scale = im_info[2];
auto* scores = batch_scores + n * Input(-3).stride(0);
auto* deltas = batch_deltas + n * Input(-2).stride(0);
CHECK_EQ(strides.size(), InputSize() - 3)
<< "\nGiven " << strides.size() << " strides and "
<< InputSize() - 3 << " feature inputs";
// Select the Top-K candidates as proposals
num_boxes = Input(-3).dim(1);
num_candidates = Input(-3).count(1);
roi_indices.resize(num_candidates); num_candidates = 0;
for (int i = 0; i < roi_indices.size(); ++i)
if (scores[i] > score_thresh)
roi_indices[num_candidates++] = i;
scores_ex.resize(num_candidates);
for (int i = 0; i < num_candidates; ++i)
scores_ex[i] = scores[roi_indices[i]];
num_proposals = std::min(num_candidates, (int)pre_nms_top_n);
utils::math::ArgPartition(
num_candidates, num_proposals,
true, &scores_ex[0], indices);
for (int i = 0; i < num_proposals; ++i)
indices[i] = roi_indices[indices[i]];
// Decode the candidates
int base_offset = 0;
for (int i = 0; i < strides.size(); i++) {
feat_h = Input(i).dim(2), feat_w = Input(i).dim(3);
A = int(ratios.size() * scales.size()), K = feat_h * feat_w;
anchors_.Reshape({ A, 4 });
auto* Adata = anchors_.template mutable_data<BT, BC>();
rcnn::GenerateAnchors(strides[i],
(int)ratios.size(), (int)scales.size(),
&ratios[0], &scales[0], Adata);
rcnn::GenerateGridAnchors(
num_proposals, C, A, feat_h, feat_w,
strides[i], base_offset,
Adata, &indices[0], Ydata);
base_offset += (A * K);
}
rcnn::GenerateMCProposals(
num_proposals, num_boxes, C, n,
im_h, im_w, im_scale,
scores, deltas, &indices[0], Ydata);
total_proposals += num_proposals;
Ydata += (num_proposals * 7);
im_info += Input(-1).dim(1);
}
Output(0)->Reshape({ total_proposals, 7 });
}
template <class Context>
void ProposalOp<Context>::RunOnDevice() {
num_images = Input(0).dim(0);
CHECK_EQ(Input(-1).dim(0), num_images)
<< "\nExcepted " << num_images << " groups image info, "
<< "but got " << Input(-1).dim(0) << ".";
roi_indices.resize(post_nms_top_n);
Output(0)->Reshape({ num_images * post_nms_top_n, 5 });
if (XIsType(Input(-3), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
if (det_type == "RCNN") {
roi_indices.resize(post_nms_top_n);
Output(0)->Reshape({ num_images * post_nms_top_n, 5 });
if (XIsType(Input(-3), float)) { RunWithRCNN<float>(); }
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else if (det_type == "RETINANET") {
Output(0)->Reshape({ num_images * pre_nms_top_n, 7 });
if (XIsType(Input(-3), float)) { RunWithRetinaNet<float>(); }
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
} else {
LOG(FATAL) << "Unsupported detector: " << det_type;
}
}
DEPLOY_CPU(Proposal);
......
......@@ -22,12 +22,14 @@ class ProposalOp final : public Operator<Context> {
public:
ProposalOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
det_type(OperatorBase::Arg<string>("det_type", "RCNN")),
strides(OperatorBase::Args<int64_t>("strides")),
ratios(OperatorBase::Args<float>("ratios")),
scales(OperatorBase::Args<float>("scales")),
pre_nms_top_n(OperatorBase::Arg<int64_t>("pre_nms_top_n", 6000)),
post_nms_top_n(OperatorBase::Arg<int64_t>("post_nms_top_n", 300)),
nms_thresh(OperatorBase::Arg<float>("nms_thresh", (float)0.7)),
nms_thresh(OperatorBase::Arg<float>("nms_thresh", 0.7f)),
score_thresh(OperatorBase::Arg<float>("score_thresh", 0.05f)),
min_size(OperatorBase::Arg<int64_t>("min_size", 16)),
min_level(OperatorBase::Arg<int64_t>("min_level", 2)),
max_level(OperatorBase::Arg<int64_t>("max_level", 5)),
......@@ -37,14 +39,16 @@ class ProposalOp final : public Operator<Context> {
void RunOnDevice() override;
template <typename T> void RunWithType();
template <typename T> void RunWithRCNN();
template <typename T> void RunWithRetinaNet();
protected:
string det_type;
float nms_thresh, score_thresh;
vector<int64_t> strides, indices, roi_indices;
vector<float> ratios, scales;
vector<float> ratios, scales, scores_ex;
int64_t pre_nms_top_n, post_nms_top_n, min_size, num_images;
int64_t min_level, max_level, canonical_level, canonical_scale;
float nms_thresh;
Tensor anchors_, proposals_, nms_mask_;
};
......
......@@ -22,7 +22,7 @@ bool GraphGradientMaker::CheckGrad(
if (external_grads_.count(g_output))
inputs_to_grads_[output] = g_output;
// Consider generate virtual grad
else if (targets.count(output) && g_output != "ignore") {
else if (targets.count(output) && g_output != "NULL") {
gen_grads.push_back({ output, idx });
inputs_to_grads_[output] = g_output;
}
......@@ -88,7 +88,7 @@ void GraphGradientMaker::Make(
string g_output = "";
if (inputs_to_grads_.count(output) > 0)
g_output = inputs_to_grads_[output];
if (g_output.empty()) g_output = "ignore";
if (g_output.empty()) g_output = "NULL";
g_outputs.emplace_back(g_output);
}
Gradient grad = MakeGradientForOp(op, g_outputs);
......@@ -194,10 +194,10 @@ void GraphGradientMaker::Make(
#define SHARE_OUTPUTS_BODY \
{string output = op->output(ix); \
if (output == "ignore") continue; \
if (output == "NULL") continue; \
if (ref_count.count(output) == 0) { \
if (ignore_grads_.count(output) > 0) \
*op->mutable_output(ix) = "ignore"; \
*op->mutable_output(ix) = "NULL"; \
continue; \
} \
if (op->type() == "TemplateGradient" || \
......@@ -212,13 +212,22 @@ void GraphGradientMaker::Make(
*op->mutable_output(ix) = temp_grad;}
void GraphGradientMaker::Share(GraphDef& graph) {
Set<int> invalid_ops;
Map<string, int> ref_count;
// Count the refs for detecting leaf nodes
for (auto& op : graph.op()) {
for (int i = 0; i < graph.op_size(); ++i) {
const OperatorDef& op = graph.op(i);
// Ignore the non-gradient ops
if (op.type().find("Gradient") == string::npos) continue;
if (op.type() == "GradientGather" &&
ignore_grads_.count(op.output(0))) {
for (auto& input : op.input())
ignore_grads_.insert(input);
invalid_ops.insert(i); continue;
}
for (auto& input : op.input())
if (input.find("grad") != string::npos) ref_count[input] += 1;
if (input.find("grad") != string::npos)
ref_count[input] += 1;
}
// Prepare the Gradients Pool
......@@ -247,6 +256,8 @@ void GraphGradientMaker::Share(GraphDef& graph) {
OperatorDef* op = graph.mutable_op(i);
// Ignore the non-gradient ops
if (op->type().find("Gradient") == string::npos) continue;
// Ignore the invalid ops
if (invalid_ops.count(i)) { op->mutable_type()->clear(); continue; }
// GC to store the grads that have finished lifecycle
vector<string> GC;
// Inplace-aware
......
......@@ -19,7 +19,7 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) {
if (!op.input_size()) sp_u.resize(op.output_size(), "");
else sp_u.assign(op.input().begin(), op.input().end());
for (const auto& u : sp_u) {
if (u == "ignore") continue;
if (u == "NULL") continue;
dag_[v].parents.push_back(u);
dag_[u].childs.push_back(v);
dag_[v].op_idx = i;
......@@ -66,32 +66,32 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) {
for (int i = 0; i < input_def.op(it).input_size(); ++i) {
string input = input_def.op(it).input(i);
if (!colored_[input] || !outputs.count(input))
*op_def.mutable_input(i) = "ignore";
*op_def.mutable_input(i) = "NULL";
}
// Rewritten for outputs
for (int i = 0; i < input_def.op(it).output_size(); ++i) {
string output = input_def.op(it).output(i);
if (!colored_[output]) *op_def.mutable_output(i) = "ignore";
if (!colored_[output]) *op_def.mutable_output(i) = "NULL";
else outputs.insert(op_def.output(i));
}
// Rewritten for some hand-craft cases
if (op_def.type() == "AffineGradient") {
// Trigger in-place if not solving dAlpha
if (op_def.output(1) == "ignore")
*op_def.mutable_input(0) = "ignore";
if (op_def.output(1) == "NULL")
*op_def.mutable_input(0) = "NULL";
} else if (op_def.type() == "MulGradient" ||
op_def.type() == "RMulGradient") {
if (op_def.output(0) == "ignore")
*op_def.mutable_input(1) = "ignore";
if (op_def.output(1) == "ignore")
*op_def.mutable_input(0) = "ignore";
if (op_def.output(0) == "NULL")
*op_def.mutable_input(1) = "NULL";
if (op_def.output(1) == "NULL")
*op_def.mutable_input(0) = "NULL";
} else if (op_def.type() == "DivGradient" ||
op_def.type() == "RDivGradient") {
// dX2 requires both X1 and X2
if (op_def.output(1) == "ignore") {
*op_def.mutable_input(0) = "ignore";
if (op_def.output(0) == "ignore")
*op_def.mutable_input(1) = "ignore";
if (op_def.output(1) == "NULL") {
*op_def.mutable_input(0) = "NULL";
if (op_def.output(0) == "NULL")
*op_def.mutable_input(1) = "NULL";
}
}
// Push into the final sequence
......@@ -117,7 +117,7 @@ GraphDef GraphOptimizer::AddInplace(const GraphDef& input_def) {
if (!op.input_size()) sp_u.resize(op.output_size(), "");
else sp_u.assign(op.input().begin(), op.input().end());
for (const auto& u : sp_u) {
if (u == "ignore") continue;
if (u == "NULL") continue;
dag_[v].parents.push_back(u);
dag_[u].childs.push_back(v);
dag_[v].op_idx = i;
......
......@@ -100,10 +100,11 @@ OperatorBase* TryCreateOperator(
case PROTO_CPU:
return CPUOperatorRegistry()->Create(key, def, ws);
case PROTO_CUDA:
if (def.device_option().has_engine() &&
def.device_option().engine() == "CUDNN" &&
CUDNNOperatorRegistry()->Has(key))
#ifdef WITH_CUDNN
if (CUDNNOperatorRegistry()->Has(key) &&
CUDAContext::cuda_object()->cudnn_enabled)
return CUDNNOperatorRegistry()->Create(key, def, ws);
#endif
return CUDAOperatorRegistry()->Create(key, def, ws);
case PROTO_CNML:
return CNMLOperatorRegistry()->Create(key, def, ws);
......@@ -155,7 +156,7 @@ Gradient MakeGradientForOp(
);
}
}
// Copy device option, engine, and arguments
// Copy device option and arguments
if (maker->CopyDeviceOption() && def.has_device_option())
for (auto& grad_def : grad.ops)
grad_def.mutable_device_option()->CopyFrom(
......
......@@ -16,9 +16,9 @@ bool OpSchema::Verify(const OperatorDef& def) const {
<< ", max=" << max_output_ << "]";
}
for (int in = 0; in < def.input_size(); in++) {
if (def.input(in) == "ignore") continue;
if (def.input(in) == "NULL") continue;
for (int out = 0; out < def.output_size(); out++) {
if (def.output(out) == "ignore") continue;
if (def.output(out) == "NULL") continue;
if (def.input(in) == def.output(out) && (!CheckInplace(in, out)))
LOG(FATAL) << indicator << "Input(" << in << ") and "
<< "Output(" << out << ") can not be set to inplace.";
......
......@@ -7,7 +7,7 @@ namespace dragon {
/*! Create some internal tensors */
void Workspace::InitWorkspace() {
CreateTensor("ignore");
CreateTensor("NULL");
Tensor* recomputing_flag = CreateTensor(
"/opt/recomputing_flag")->Reshape({ 1 });
recomputing_flag->mutable_data
......
......@@ -351,7 +351,7 @@ ONNXImporterReturns ONNXBackend::ArgReduceNodeImporter(
auto* operation = attributes.AddRewrittenAttribute("operation");
if (onnx_node->node.op_type() == "ArgMax") operation->set_s("ARGMAX");
else if (onnx_node->node.op_type() == "ArgMin") operation->set_s("ARGMIN");
node.add_output("ignore"); // A dummy output("Value") is required
node.add_output("NULL"); // A dummy output("Value") is required
return CommonONNXNodeImporter(&onnx_node_v2, ctx);
}
......
......@@ -46,7 +46,7 @@ void PReluGradientOp<Context>::RunWithType() {
auto* Xdata = Input(0).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
DECLARE_MULTIPLIER(multiplier, channels * dim);
auto* dWdata = Output(1)->template mutable_data<T, Context>();
auto* dWBdata = ws()->template caches<T, Context>({ channels * dim })[0];
......@@ -55,7 +55,7 @@ void PReluGradientOp<Context>::RunWithType() {
dYdata, Xdata, multiplier, dWBdata, dWdata, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::PReluGrad(Output(0)->count(), channels, dim,
......
......@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void AddGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
Output(1)->count(), dx2, dy);
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
Output(0)->count(), dx1, dy);
......@@ -78,14 +78,14 @@ void AddGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>();
vector<int> dims = { rows, cols }, axes = { type };
kernel::ReduceSum(2, dims.data(),
1, axes.data(), 1.f, dy, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
X1->count(), dx1, dy);
......
......@@ -60,7 +60,7 @@ void AffineGradientOp<Context>::RunWithType() {
auto* dXdata = Output(0)->template mutable_data<T, Context>();
// dA = X * dY
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
Output(1)->ReshapeLike(Input(1));
auto* Xdata = Input(0).template data<T, Context>();
auto* dAdata = Output(1)->template mutable_data<T, Context>();
......@@ -74,7 +74,7 @@ void AffineGradientOp<Context>::RunWithType() {
}
// dB = dY
if (Output(2)->name() != "ignore") {
if (Output(2)->name() != "NULL") {
Output(2)->ReshapeLike(Input(1));
auto* dBdata = Output(2)->template mutable_data<T, Context>();
// Eltwise
......@@ -87,7 +87,7 @@ void AffineGradientOp<Context>::RunWithType() {
}
// dX = alpha * dY
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
kernel::AffineGrad(outer_dim, inner_dim, scale_dim,
dYdata, Adata, dXdata, ctx());
}
......
......@@ -101,7 +101,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() {
CUDNNType<CT>::type, CUDNN_PROPAGATE_NAN));
// dA = X * dY
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
Output(1)->ReshapeLike(Input(1));
auto* Xdata = Input(0).template data<DT, Context>();
auto* dAdata = Output(1)->template mutable_data<DT, Context>();
......@@ -119,7 +119,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() {
}
// dB = dY
if (Output(2)->name() != "ignore") {
if (Output(2)->name() != "NULL") {
Output(2)->ReshapeLike(Input(1));
auto* dBdata = Output(2)->template mutable_data<DT, Context>();
// Eltwise
......@@ -136,7 +136,7 @@ void CuDNNAffineGradientOp<Context>::RunWithType() {
}
// dX = alpha * dY
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
CUDNN_CHECK(cudnnOpTensor(
ctx()->cudnn_handle(), mul_desc,
CUDNNType<DT>::one, input_desc, dYdata,
......
......@@ -61,7 +61,7 @@ void DivGradientOp<Context>::EltwiseRunWithType() {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>();
auto* x2 = Input(1).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>();
......@@ -73,7 +73,7 @@ void DivGradientOp<Context>::EltwiseRunWithType() {
math::Scale(X2->count(), -1.f, dx2, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::Div(X1->count(), dy, x2, dx1, ctx());
......@@ -85,7 +85,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>();
auto* x2 = Input(1).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>();
......@@ -100,7 +100,7 @@ void DivGradientOp<Context>::BroadcastRunWithType(int type) {
1, axes.data(), -1.f, cs[0], dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::BroadcastDiv(rows, cols, type, dy, x2, dx1, ctx());
......
......@@ -116,13 +116,13 @@ void DotGradientOp<Context>::DotRunWithType() {
auto* Bdata = Input(1).template data<T, Context>();
auto* dYdata = Input(-1).template data<T, CPUContext>();
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dAdata = Output(0)->template mutable_data<T, Context>();
math::Scale(Output(0)->count(), cast::to<float>(
dYdata[0]), Bdata, dAdata, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dBdata = Output(1)->template mutable_data<T, Context>();
math::Scale(Output(0)->count(), cast::to<float>(
dYdata[0]), Adata, dBdata, ctx());
......@@ -145,7 +145,7 @@ void DotGradientOp<Context>::GemmRunWithType() {
auto* X2data = Input(1).template data<T, Context>();
auto* dYdata = Input(2).template data<T, Context>();
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dX1data = Output(0)->template mutable_data<T, Context>();
if (transA) {
math::Gemm(
......@@ -162,7 +162,7 @@ void DotGradientOp<Context>::GemmRunWithType() {
}
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dX2data = Output(1)->template mutable_data<T, Context>();
if (transB) {
math::Gemm(
......
......@@ -73,7 +73,7 @@ void EltwiseGradientOp<Context>::SumRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>();
for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue;
if (Output(i)->name() == "NULL") continue;
auto* dXdata = Output(i)->template mutable_data<T, Context>();
// Copy the dY to dX and Apply the coeffients
math::Scale(nelements, coeffs[i], dYdata, dXdata, ctx());
......@@ -86,7 +86,7 @@ void EltwiseGradientOp<Context>::ProdRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>();
for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue;
if (Output(i)->name() == "NULL") continue;
auto* dXdata = Output(i)->template mutable_data<T, Context>();
// Compute the first term of dX
bool initialized = false;
......
......@@ -122,7 +122,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
auto* Wdata = Input(1).template data<T, Context>();
auto* dYdata = Input(2).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
Output(1)->ReshapeLike(Input(1));
auto* dWdata = Output(1)->template mutable_data<T, Context>();
if (transW) {
......@@ -140,7 +140,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
}
}
if (Output(2)->name() != "ignore") {
if (Output(2)->name() != "NULL") {
DECLARE_MULTIPLIER(multiplier, M);
Output(2)->Reshape({ N });
auto* dBdata = Output(2)->template mutable_data<T, Context>();
......@@ -150,7 +150,7 @@ void FullyConnectedGradientOp<Context>::RunWithType() {
0.f, dBdata, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
Output(0)->ReshapeLike(Input(0));
auto* dXdata = Output(0)->template mutable_data<T, Context>();
if (transW) {
......
......@@ -70,14 +70,14 @@ void MatmulGradientOp<Context>::RunWithType() {
T* dAdata = nullptr, *dBdata = nullptr;
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
dAdata = Output(0)->template mutable_data<T, Context>();
} if (Output(1)->name() != "ignore") {
} if (Output(1)->name() != "NULL") {
dBdata = Output(1)->template mutable_data<T, Context>();
}
for (int i = 0; i < batch_size; ++i) {
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
if (transA) {
math::Gemm(
transB ? CblasTrans : CblasNoTrans,
......@@ -94,7 +94,7 @@ void MatmulGradientOp<Context>::RunWithType() {
0.f, dAdata + i * A_stride, ctx());
}
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
if (transB) {
math::Gemm(
CblasTrans,
......
......@@ -83,11 +83,11 @@ template <class Context> template <typename T>
void MaximumGradientOp<Context>::BroadcastRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>();
if (Input(0).count() == 1) {
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dAdata = Output(0)->template mutable_data<T, Context>();
math::Set(1, cast::to<T>(0.f), dAdata, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* Adata = Input(0).template data<T, CPUContext>();
auto* Bdata = Input(1).template data<T, Context>();
auto* dBdata = Output(1)->template mutable_data<T, Context>();
......@@ -95,14 +95,14 @@ void MaximumGradientOp<Context>::BroadcastRunWithType() {
Bdata, Adata[0], dYdata, dBdata, (T*)nullptr, ctx());
}
} else if (Input(1).count() == 1) {
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Adata = Input(0).template data<T, Context>();
auto* Bdata = Input(1).template data<T, CPUContext>();
auto* dAdata = Output(0)->template mutable_data<T, Context>();
kernel::BroadcastMaximumGrad(Output(0)->count(),
Adata, Bdata[0], dYdata, dAdata, (T*)nullptr, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dBdata = Output(1)->template mutable_data<T, Context>();
math::Set(1, cast::to<T>(0.f), dBdata, ctx());
}
......
......@@ -83,11 +83,11 @@ template <class Context> template <typename T>
void MinimumGradientOp<Context>::BroadcastRunWithType() {
auto* dYdata = Input(-1).template data<T, Context>();
if (Input(0).count() == 1) {
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dAdata = Output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(1, cast::to<T>(0.f), dAdata, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* Adata = Input(0).template data<T, CPUContext>();
auto* Bdata = Input(1).template data<T, Context>();
auto* dBdata = Output(1)->template mutable_data<T, Context>();
......@@ -95,14 +95,14 @@ void MinimumGradientOp<Context>::BroadcastRunWithType() {
Bdata, Adata[0], dYdata, dBdata, (T*)nullptr, ctx());
}
} else if (Input(1).count() == 1) {
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Adata = Input(0).template data<T, Context>();
auto* Bdata = Input(1).template data<T, CPUContext>();
auto* dAdata = Output(0)->template mutable_data<T, Context>();
kernel::BroadcastMinimumGrad(Output(0)->count(),
Adata, Bdata[0], dYdata, dAdata, (T*)nullptr, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dBdata = Output(1)->template mutable_data<T, Context>();
math::Set<T, Context>(1, cast::to<T>(0.f), dBdata, ctx());
}
......
......@@ -59,13 +59,13 @@ template <class Context> template <typename T>
void MulGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Mul(Output(1)->count(), dy, x1, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::Mul(Output(0)->count(), dy, x2, dx1, ctx());
......@@ -77,7 +77,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>();
auto* c = ws()->template caches<T, Context>({ X1->count() })[0];
......@@ -87,7 +87,7 @@ void MulGradientOp<Context>::BroadcastRunWithType(int type) {
1, axes.data(), 1.f, c, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::BroadcastMul(rows, cols, type, dy, x2, dx1, ctx());
......
......@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void RAddGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
Output(1)->count(), dx2, dy);
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
Output(0)->count(), dx1, dy);
......@@ -78,14 +78,14 @@ void RAddGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>();
vector<int> dims = { rows, cols }, axes = { type - 2 };
kernel::ReduceSum(2, dims.data(),
1, axes.data(), 1.f, dy, dx1, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
X2->count(), dx2, dy);
......@@ -99,23 +99,23 @@ void RAddGradientOp<Context>::RunOnDevice() {
Output(1)->ReshapeLike(*X2);
if (XIsType(Input(-1), int8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int8_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int8_t);
} else if (XIsType(Input(-1), uint8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(uint8_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(uint8_t);
} else if (XIsType(Input(-1), int)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int);
} else if (XIsType(Input(-1), int64_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int64_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int64_t);
} else if (XIsType(Input(-1), float16)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float16);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(float16);
} else if (XIsType(Input(-1), float)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(float);
} else if (XIsType(Input(-1), double)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(double);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(double);
} else {
LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64",
"float16", "float32", "float64",
"float16", "float32", "float64",
});
}
}
......
......@@ -61,7 +61,7 @@ void RDivGradientOp<Context>::EltwiseRunWithType() {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>();
auto* x2 = Input(1).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>();
......@@ -73,7 +73,7 @@ void RDivGradientOp<Context>::EltwiseRunWithType() {
math::Scale(X2->count(), -1.f, dx2, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::Div(X1->count(), dy, x2, dx1, ctx());
......@@ -85,7 +85,7 @@ void RDivGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
auto* c = ws()->template caches<T, Context>({ X2->count() })[0];
......@@ -95,7 +95,7 @@ void RDivGradientOp<Context>::BroadcastRunWithType(int type) {
1, axes.data(), 1.f, c, dx1, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>();
auto* x2 = Input(1).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>();
......@@ -114,19 +114,19 @@ void RDivGradientOp<Context>::RunOnDevice() {
Output(1)->ReshapeLike(*X2);
if (XIsType(Input(-1), int8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int8_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int8_t);
} else if (XIsType(Input(-1), uint8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(uint8_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(uint8_t);
} else if (XIsType(Input(-1), int)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int);
} else if (XIsType(Input(-1), int64_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int64_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int64_t);
} else if (XIsType(Input(-1), float16)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float16);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(float16);
} else if (XIsType(Input(-1), float)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(float);
} else if (XIsType(Input(-1), double)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(double);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(double);
} else {
LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64",
......
......@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void RMulGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Mul(Output(1)->count(), dy, x1, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
math::Mul(Output(0)->count(), dy, x2, dx1, ctx());
......@@ -78,7 +78,7 @@ void RMulGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* x2 = Input(1).template data<T, Context>();
auto* dx1 = Output(0)->template mutable_data<T, Context>();
auto* c = ws()->template caches<T, Context>({ X2->count() })[0];
......@@ -88,7 +88,7 @@ void RMulGradientOp<Context>::BroadcastRunWithType(int type) {
1, axes.data(), 1.f, c, dx1, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* x1 = Input(0).template data<T, Context>();
auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::BroadcastMul(rows, cols, type - 2, dy, x1, dx2, ctx());
......@@ -102,19 +102,19 @@ void RMulGradientOp<Context>::RunOnDevice() {
Output(1)->ReshapeLike(*X2);
if (XIsType(Input(-1), int8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int8_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int8_t);
} else if (XIsType(Input(-1), uint8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(uint8_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(uint8_t);
} else if (XIsType(Input(-1), int)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int);
} else if (XIsType(Input(-1), int64_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int64_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int64_t);
} else if (XIsType(Input(-1), float16)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float16);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(float16);
} else if (XIsType(Input(-1), float)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(float);
} else if (XIsType(Input(-1), double)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(double);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(double);
} else {
LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64",
......
......@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void RSubGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Scale<T, Context>(
Output(1)->count(), -1, dy, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
Output(0)->count(), dx1, dy);
......@@ -78,14 +78,14 @@ void RSubGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>();
vector<int> dims = { rows, cols }, axes = { type - 2 };
kernel::ReduceSum(2, dims.data(),
1, axes.data(), 1.f, dy, dx1, ctx());
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Scale(X2->count(), -1.f, dy, dx2, ctx());
}
......@@ -98,19 +98,19 @@ void RSubGradientOp<Context>::RunOnDevice() {
Output(1)->ReshapeLike(*X2);
if (XIsType(Input(-1), int8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int8_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int8_t);
} else if (XIsType(Input(-1), uint8_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(uint8_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(uint8_t);
} else if (XIsType(Input(-1), int)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int);
} else if (XIsType(Input(-1), int64_t)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(int64_t);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(int64_t);
} else if (XIsType(Input(-1), float16)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float16);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(float16);
} else if (XIsType(Input(-1), float)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(float);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(float);
} else if (XIsType(Input(-1), double)) {
DEFINE_FUNDAMENTAL_TYPED_CALLER(double);
DEFINE_FUNDAMENTAL_TYPED_RCALLER(double);
} else {
LOG(FATAL) << DTypeHelper(Input(0), {
"int8", "uint8", "int32", "int64",
......
......@@ -60,13 +60,13 @@ template <class Context> template <typename T>
void SubGradientOp<Context>::EltwiseRunWithType() {
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>();
math::Scale<T, Context>(Output(1)->count(),
-1.f, dy, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
Output(0)->count(), dx1, dy);
......@@ -78,14 +78,14 @@ void SubGradientOp<Context>::BroadcastRunWithType(int type) {
DEFINE_FUNDAMENTAL_OP_X1X2;
auto* dy = Input(-1).template data<T, Context>();
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* dx2 = Output(1)->template mutable_data<T, Context>();
vector<int> dims = { rows, cols }, axes = { type };
kernel::ReduceSum(2, dims.data(),
1, axes.data(), -1.f, dy, dx2, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* dx1 = Output(0)->template mutable_data<T, Context>();
ctx()->template Copy<T, Context, Context>(
X1->count(), dx1, dy);
......
......@@ -19,7 +19,7 @@ void ArgReduceOp<Context>::RunWithType() {
// It's difficult to implement device code when top_k > 1
auto* Xdata = Input(0).template data<T, CPUContext>();
auto* Idata = Output(0)->template mutable_data<int64_t, CPUContext>();
auto* Vdata = Output(1)->name() != "ignore" ? Output(1)
auto* Vdata = Output(1)->name() != "NULL" ? Output(1)
->template mutable_data<T, CPUContext>() : nullptr;
static CPUContext cctx;
if (operation == "ARGMAX") {
......@@ -34,7 +34,7 @@ void ArgReduceOp<Context>::RunWithType() {
} else {
auto* Xdata = Input(0).template data<T, Context>();
auto* Idata = Output(0)->template mutable_data<int64_t, Context>();
auto* Vdata = Output(1)->name() != "ignore" ? Output(1)
auto* Vdata = Output(1)->name() != "NULL" ? Output(1)
->template mutable_data<T, Context>() : nullptr;
if (operation == "ARGMAX") {
kernel::ArgMax(outer_dim, inner_dim, axis_dim,
......
......@@ -82,7 +82,7 @@ void ConcatGradientOp<Context>::RunWithType() {
for (int i = 0; i < OutputSize(); i++) {
x_concat_dim = Input(i).dim(axis);
if (Output(i)->name() != "ignore") {
if (Output(i)->name() != "NULL") {
auto* dXdata = Output(i)->template mutable_data<T, Context>();
kernel::Slice(
outer_dim, inner_dim,
......@@ -95,7 +95,7 @@ void ConcatGradientOp<Context>::RunWithType() {
template <class Context>
void ConcatGradientOp<Context>::RunOnDevice() {
if (Input(-1).name() == "ignore") return;
if (Input(-1).name() == "NULL") return;
DETERMINE_RUNTIME_ARGUMENTS(Input(0));
......
......@@ -89,7 +89,7 @@ void SliceGradientOp<Context>::RunWithType() {
<< "\nIllegal slice points: " << Tensor::DimString(slice_points)
<< " for dimension " << Input(0).dim(axis) << ".";
const T* dYdata = Input(i + 1).name() != "ignore" ?
const T* dYdata = Input(i + 1).name() != "NULL" ?
Input(i + 1).template data<T, Context>() : nullptr;
kernel::SliceGrad(
......
......@@ -69,7 +69,7 @@ void StackGradientOp<Context>::RunWithType() {
auto* dYdata = Input(-1).template data<T, Context>();
for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() != "ignore") {
if (Output(i)->name() != "NULL") {
auto* dXdata = Output(i)->template mutable_data<T, Context>();
kernel::Slice(
outer_dim, inner_dim,
......@@ -81,7 +81,7 @@ void StackGradientOp<Context>::RunWithType() {
template <class Context>
void StackGradientOp<Context>::RunOnDevice() {
if (Input(-1).name() == "ignore") return;
if (Input(-1).name() == "NULL") return;
DETERMINE_RUNTIME_ARGUMENTS(Input(-1));
......
......@@ -131,7 +131,7 @@ void ScanOp<Context>::UnrollTemplate() {
func_def.output(i) +
"@" + std::to_string(nsteps));
// Concat all steps if necessary
if (Output(i)->name() == "ignore") continue;
if (Output(i)->name() == "NULL") continue;
OperatorDef* op = new_def.add_op();
op->set_name(name() + "(BodyOp." + std::to_string(
nseqs + nrepeats + i) + ")");
......@@ -186,7 +186,7 @@ void ScanGradientOp<Context>::MakeOps(
maker.SetOperatorPrefix(name() + "(BodyOp.");
maker.SetOperatorSuffix(")");
for (int i = 0; i < forward_outputs.size(); i++) {
if (Input(i + (int)OutputSize()).name() != "ignore")
if (Input(i + (int)OutputSize()).name() != "NULL")
maker.AddExternalGrad(Input(i + (int)OutputSize()).name());
}
......@@ -197,8 +197,8 @@ void ScanGradientOp<Context>::MakeOps(
new_def.set_name(name() + "(ScanLen." + std::to_string(nsteps) + ")");
for (const auto& target : forward_def.output()) {
for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue;
if (Input(i).name() == "ignore") continue;
if (Output(i)->name() == "NULL") continue;
if (Input(i).name() == "NULL") continue;
auto* gradient = new_def.add_gradient();
gradient->set_cost(target);
gradient->set_wrt(Input(i).name());
......
......@@ -83,13 +83,13 @@ void L1LossGradientOp<Context>::RunWithType() {
} else { dYHost *= scale; }
for (int i = 0; i < 2; i++) {
if (Output(i)->name() == "ignore") continue;
if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(i)->template mutable_data<T, Context>();
math::Scale(Output(i)->count(),
dYHost * (i == 0 ? 1.f : -1.f),
Ddata, dXdata, ctx());
if (Input(2).name() != "ignore") {
if (Input(2).name() != "NULL") {
auto* mask = Input(2).template data<T, Context>();
math::Mul(Output(i)->count(), mask, dXdata, dXdata, ctx());
}
......
......@@ -88,13 +88,13 @@ void L2LossGradientOp<Context>::RunWithType() {
} else { dYHost *= scale; }
for (int i = 0; i < 2; i++) {
if (Output(i)->name() == "ignore") continue;
if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(i)->template mutable_data<T, Context>();
math::Scale(Output(i)->count(),
dYHost * (i == 0 ? 1.f : -1.f),
Ddata, dXdata, ctx());
if (Input(2).name() != "ignore") {
if (Input(2).name() != "NULL") {
auto* mask = Input(2).template data<T, Context>();
math::Mul(Output(i)->count(), mask, dXdata, dXdata, ctx());
}
......
......@@ -79,7 +79,7 @@ void SmoothL1LossGradientOp<Context>::RunWithType() {
}
for (int i = 0; i < 2; i++) {
if (Output(i)->name() == "ignore") continue;
if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(i)->template mutable_data<T, Context>();
math::Scale(Output(i)->count(),
......
......@@ -8,7 +8,7 @@ namespace dragon {
template <class Context> template <typename T>
void GradientGenerateOp<Context>::RunWithType() {
for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->name() == "ignore") continue;
if (Output(i)->name() == "NULL") continue;
Output(i)->ReshapeLike(Input(i));
auto* dXdata = Output(0)->template mutable_data<T, Context>();
math::Set(Output(0)->count(),
......
......@@ -174,8 +174,8 @@ void BatchNormGradientOp<Context>::InferenceRunWithType() {
Tp* dgamma = nullptr, *dbeta = nullptr;
// Gradient w.r.t. gamma or beta if necessary
if (Output(1)->name() != "ignore" ||
Output(2)->name() != "ignore") {
if (Output(1)->name() != "NULL" ||
Output(2)->name() != "NULL") {
dgamma = Output(1)->template mutable_data<Tp, Context>();
dbeta = Output(2)->template mutable_data<Tp, Context>();
}
......
......@@ -242,8 +242,8 @@ void CuDNNBatchNormGradientOp<Context>::InferenceRunWithType() {
Tp* dgamma = nullptr, *dbeta = nullptr;
// Gradient w.r.t. gamma or beta if necessary
if (Output(1)->name() != "ignore" ||
Output(2)->name() != "ignore") {
if (Output(1)->name() != "NULL" ||
Output(2)->name() != "NULL") {
dgamma = Output(1)->template mutable_data<Tp, Context>();
dbeta = Output(2)->template mutable_data<Tp, Context>();
}
......
......@@ -116,7 +116,7 @@ void CuDNNRecurrentOp<Context>::RunWithType() {
};
auto YsData = [this](int i) {
if (i >= OutputSize()) return (T*)NULL;
if (Output(i)->name() == "ignore") return (T*)NULL;
if (Output(i)->name() == "NULL") return (T*)NULL;
return Output(i)->template mutable_data<T, Context>();
};
......@@ -171,12 +171,12 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() {
auto XsData = [this](int i) {
if (i >= InputSize()) return (const T*)NULL;
if (Input(i).name() == "ignore") return (const T*)NULL;
if (Input(i).name() == "NULL") return (const T*)NULL;
return Input(i).template data<T, Context>();
};
auto YsData = [this](int i) {
if (i >= OutputSize()) return (T*)NULL;
if (Output(i)->name() == "ignore" && i > 0) return (T*)NULL;
if (Output(i)->name() == "NULL" && i > 0) return (T*)NULL;
return Output(i)->template mutable_data<T, Context>();
};
......@@ -194,10 +194,10 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() {
auto handle = ctx()->cudnn_handle();
if (Output(0)->name() != "ignore" ||
Output(1)->name() != "ignore" ||
Output(2)->name() != "ignore" ||
Output(3)->name() != "ignore") {
if (Output(0)->name() != "NULL" ||
Output(1)->name() != "NULL" ||
Output(2)->name() != "NULL" ||
Output(3)->name() != "NULL") {
CUDNN_CHECK(cudnnRNNBackwardData(handle, rnn_desc,
seq_length,
ys_desc->descs(), XsData(4), // Y
......@@ -214,7 +214,7 @@ void CuDNNRecurrentGradientOp<Context>::RunWithType() {
RSdata, reserve_size));
}
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
CUDNN_CHECK(cudnnRNNBackwardWeights(handle, rnn_desc,
seq_length,
xs_desc->descs(), XsData(0), // X
......
......@@ -7,14 +7,14 @@ namespace dragon {
template <class Context> template <typename T>
void LSTMCellOp<Context>::RunWithType() {
auto* XAdata = Input(0).template mutable_data<T, Context>();
auto* CXdata = Input(1).template data<T, Context>();
auto* Xdata = Input(0).template mutable_data<T, Context>();
auto* HXdata = Input(1).template data<T, Context>();
auto* Hdata = Output(0)->template mutable_data<T, Context>();
auto* Cdata = Output(1)->template mutable_data<T, Context>();
kernel::LSTMCell(Input(1).count(), Input(1).dim(0),
Input(1).ndim() == 2 ? Input(1).dim(1) : Input(1).dim(2),
CXdata, XAdata, Cdata, Hdata, ctx());
HXdata, Xdata, Cdata, Hdata, ctx());
}
template <class Context>
......@@ -34,17 +34,23 @@ OPERATOR_SCHEMA(LSTMCell).NumInputs(2, 3).NumOutputs(2);
template <class Context> template <typename T>
void LSTMCellGradientOp<Context>::RunWithType() {
auto* XAdata = Input(0).template data<T, Context>();
auto* CXdata = Input(1).template data<T, Context>();
auto* Xdata = Input(0).template data<T, Context>();
auto* HXdata = Input(1).template data<T, Context>();
auto* Cdata = Input(2).template data<T, Context>();
auto* dHdata = Input(3).template data<T, Context>();
auto* dCdata = Input(4).template data<T, Context>();
auto* dHdata = Input(-2).template data<T, Context>();
auto* dCdata = Input(4).template mutable_data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* dCXdata = Output(1)->template mutable_data<T, Context>();
auto* dHXdata = Output(1)->template mutable_data<T, Context>();
if (Input(-1).name() == "NULL") {
math::Set(Input(-1).count(),
cast::to<T>(0.f), dCdata, ctx());
}
kernel::LSTMCellGrad(Input(1).count(), Input(1).dim(0),
Input(1).ndim() == 2 ? Input(1).dim(1) : Input(1).dim(2),
CXdata, XAdata, Cdata, dCdata, dHdata, dCXdata, dXdata, ctx());
HXdata, Xdata, Cdata, dCdata, dHdata,
dHXdata, dXdata, ctx());
}
template <class Context>
......@@ -52,6 +58,12 @@ void LSTMCellGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0));
Output(1)->ReshapeLike(Input(1));
if (Input(-1).name() == "NULL") {
// dC will be ignored if C is not solved
// We should Zero-Reset the dC
Input(-1).ReshapeLike(Input(-2));
}
if (Input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
}
......@@ -72,8 +84,8 @@ class GetLSTMCellGradient final : public GradientMakerBase {
vector<string>({ I(0), I(1), O(0), GO(0), GO(1) }),
vector<string>({ GI(0), GI(1) }));
}
// fill zero for dc_{T+1}
vector<float> DefaultValues() override{ return { 0.f, 1.f }; }
// Fill zero for dCNext
vector<float> DefaultValues() override{ return { 1.f, 0.f }; }
};
REGISTER_GRADIENT(LSTMCell, GetLSTMCellGradient);
......
......@@ -24,14 +24,14 @@ class GetRecurrentGradient final : public GradientMakerBase {
GRADIENT_MAKER_CTOR(GetRecurrentGradient);
vector<OperatorDef> MakeDefs() override {
vector<string> inputs({ I(0), I(1),
def.input_size() > 2 ? I(2) : "ignore",
def.input_size() > 3 ? I(3) : "ignore",
def.input_size() > 2 ? I(2) : "NULL",
def.input_size() > 3 ? I(3) : "NULL",
O(0), GO(0),
def.output_size() > 1 ? GO(1) : "ignore",
def.output_size() > 2 ? GO(2) : "ignore"});
def.output_size() > 1 ? GO(1) : "NULL",
def.output_size() > 2 ? GO(2) : "NULL"});
vector<string> outputs({ GI(0), GI(1),
def.input_size() > 2 ? GI(2) : "ignore",
def.input_size() > 3 ? GI(3) : "ignore"});
def.input_size() > 2 ? GI(2) : "NULL",
def.input_size() > 3 ? GI(3) : "NULL"});
return SingleDef(def.type() + "Gradient", "", inputs, outputs);
}
};
......
......@@ -49,7 +49,7 @@ OPERATOR_SCHEMA(BiasAdd)
template <class Context> template <typename T>
void BiasAddGradientOp<Context>::RunWithType() {
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
DECLARE_MULTIPLIER(multiplier, inner_dim);
auto* dYdata = Input(-1).template mutable_data<T, Context>();
auto* dBias = Output(1)->template mutable_data<T, Context>();
......@@ -75,7 +75,7 @@ void BiasAddGradientOp<Context>::RunWithType() {
}
}
if (Output(0)->name() != "ignore" &&
if (Output(0)->name() != "NULL" &&
Output(0)->name() != Input(-1).name()) {
Output(0)->ReshapeLike(Input(-1));
Output(0)->template CopyFrom<Context>(Input(-1), ctx());
......
......@@ -51,12 +51,12 @@ void Conv2dGradientOp<Context>::RunWithType() {
}
for (int n = 0; n < Input(2).dim(0); n++) {
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>();
Dw(dYdata + n * y_offset, Xdata + n * x_offset, dWdata);
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
Dx(dYdata + n * y_offset, Wdata, dXdata + n * x_offset);
......
......@@ -48,22 +48,22 @@ template <class Context> template <typename T>
void ConvTranspose2dGradientOp<Context>::RunWithType() {
auto* dYdata = Input(-1).template data<T, Context>();
if (Output(2)->name() != "ignore") {
if (Output(2)->name() != "NULL") {
auto* dBdata = Output(2)->template mutable_data<T, Context>();
for (int n = 0; n < Input(2).dim(0); n++)
Db(dYdata + n * y_offset, dBdata);
}
for (int n = 0; n < Input(2).dim(0); n++) {
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>();
Dw(Xdata + n * x_offset, dYdata + n * y_offset, dWdata);
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
bool skip = Output(1)->name() != "ignore";
bool skip = Output(1)->name() != "NULL";
Wx(dYdata + n * y_offset, Wdata, dXdata + n * x_offset, skip);
}
}
......
......@@ -74,7 +74,7 @@ void CuDNNBiasAddGradientOp<Context>::RunWithType() {
CUDNNType<T>::one, input_desc, dYdata,
CUDNNType<T>::zero, bias_desc, dBdata));
if (Output(0)->name() != "ignore" &&
if (Output(0)->name() != "NULL" &&
Output(0)->name() != Input(-1).name()) {
Output(0)->ReshapeLike(Input(-1));
Output(0)->template CopyFrom<Context>(Input(-1), ctx());
......
......@@ -294,7 +294,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle();
if (Output(2)->name() != "ignore") {
if (Output(2)->name() != "NULL") {
T* dBdata = Output(2)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle,
CUDNNType<T>::one, input2b_desc, dYdata,
......@@ -302,7 +302,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
}
for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardFilter(cudnn_handle,
......@@ -311,7 +311,7 @@ void CuDNNConv2dGradientOp<Context>::RunWithType() {
conv_desc, bwd_filter_algo, WSdata, bwd_filter_size,
CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g));
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardData(cudnn_handle,
......
......@@ -290,7 +290,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() {
auto cudnn_handle = ctx()->cudnn_handle();
if (Output(2)->name() != "ignore") {
if (Output(2)->name() != "NULL") {
T* dBdata = Output(2)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(cudnn_handle,
CUDNNType<T>::one, input2b_desc, dYdata,
......@@ -298,7 +298,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() {
}
for (int g = 0; g < cudnn_group; g++) {
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardFilter(cudnn_handle,
......@@ -307,7 +307,7 @@ void CuDNNConvTranspose2dGradientOp<Context>::RunWithType() {
conv_desc, bwd_filter_algo, WSdata, bwd_filter_size,
CUDNNType<T>::zero, filter_desc, dWdata + weight_offset * g));
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionForward(cudnn_handle,
......
......@@ -79,7 +79,7 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::RunWithType() {
}
for (int n = 0; n < Input(2).dim(0); n++) {
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>();
math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx());
......@@ -88,7 +88,7 @@ void CuDNNDepthwiseConv2dGradientOp<Context>::RunWithType() {
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1],
data_format, dYdata, Xdata, dWdata, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::DepthwiseConv2dGrad(Input(0).dim(0), channels,
......
......@@ -54,7 +54,7 @@ void DepthwiseConv2dGradientOp<Context>::RunWithType() {
}
for (int n = 0; n < Input(2).dim(0); n++) {
if (Output(1)->name() != "ignore") {
if (Output(1)->name() != "NULL") {
auto* Xdata = Input(0).template data<T, Context>();
auto* dWdata = Output(1)->template mutable_data<T, Context>();
math::Set(Output(1)->count(), cast::to<T>(0.f), dWdata, ctx());
......@@ -63,7 +63,7 @@ void DepthwiseConv2dGradientOp<Context>::RunWithType() {
kernel_shape[0], kernel_shape[1], stride[0], pad_l[0], pad_l[1],
data_format, dYdata, Xdata, dWdata, ctx());
}
if (Output(0)->name() != "ignore") {
if (Output(0)->name() != "NULL") {
auto* Wdata = Input(1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>();
kernel::DepthwiseConv2dGrad(Input(0).dim(0), channels,
......
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
// Licensed under the BSD 2-Clause License.
// Codes are based on:
// https://github.com/pytorch/pytorch/blob/master/caffe2/proto/caffe2.proto
syntax = "proto2";
package dragon;
// Store the serialized Tensor objects.
message TensorProto {
repeated int32 dims = 1;
enum DataType {
UNDEFINED = 0;
// Basic types.
FLOAT = 1;
INT32 = 2;
BYTE = 3;
STRING = 4;
// Less-commonly used data types.
BOOL = 5;
UINT8 = 6;
INT8 = 7;
UINT16 = 8;
INT16 = 9;
INT64 = 10;
FLOAT16 = 12;
DOUBLE = 13;
}
optional DataType data_type = 2 [default = FLOAT];
// For float.
repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true];
// For bytes.
optional bytes byte_data = 5;
// For strings.
repeated bytes string_data = 6;
// For double.
repeated double double_data = 9 [packed = true];
// For int64.
repeated int64 int64_data = 10 [packed = true];
// Store the raw data, contents are serialized as little-endian.
optional bytes raw_data = 13;
// Optionally, a name for the tensor.
optional string name = 7;
}
// Record the filler of Tensor.
// This structure is kept for backward compatibility
// with caffe1, which relies implicit initializer.
message TensorFillerProto {
optional string tensor = 1;
optional string type = 2 [default = 'constant'];
......@@ -36,67 +67,120 @@ message TensorFillerProto {
optional VarianceNorm variance_norm = 9 [default = FAN_IN];
}
// Store multiple TensorProto objects in one single proto.
message TensorProtos {
repeated TensorProto protos = 1;
}
enum DeviceType {
PROTO_CPU = 0;
PROTO_CUDA = 1;
PROTO_CNML = 2;
// DeviceType that Dragon currently supports.
enum DeviceTypeProto {
// The default device.
PROTO_CPU = 0;
// NVIDIA's CUDA Environment.
PROTO_CUDA = 1;
// CAMBRICON's CNML Environment.
PROTO_CNML = 2;
}
// Device-specific options.
message DeviceOption {
optional DeviceType device_type = 1 [default = PROTO_CPU];
// The type of device to dispatch executions.
optional DeviceTypeProto device_type = 1 [default = PROTO_CPU];
// The index of this device.
optional int32 device_id = 2 [default = 0];
// The random seed to start the random generator.
optional uint32 random_seed = 3 [default = 3];
optional string engine = 4;
}
// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument {
// The name of this argument.
optional string name = 1;
// Store the float32 value.
optional float f = 2;
// Store the bool, int32, int64 value.
optional int64 i = 3;
// Store the string value.
optional bytes s = 4;
// Store the float32 values.
repeated float floats = 7;
// Store the bool, int32, int64 values.
repeated int64 ints = 8;
// Store the string values.
repeated bytes strings = 9;
}
// Operator Definition
message OperatorDef {
// The unique id of this operator.
// Set it to persist operators in the dynamic graph.
optional string uid = 1;
// The name of inputs.
repeated string input = 2;
// The name of outputs.
repeated string output = 3;
// The optional name of this operator.
optional string name = 4;
// The operator type.
optional string type = 5;
// The arguments.
repeated Argument arg = 6;
// The device option that the operator should run under.
optional DeviceOption device_option = 7;
}
// Record the gradient information
message GradientProto {
// The derivative target.
optional string cost = 1;
// The target with respect to?
optional string wrt = 2;
// The external gradient
optional string external = 3;
}
// Record the updater information
message UpdaterProto {
// The operator name to use.
optional string name = 1;
// The operator type.
optional string type = 2;
// The tensor to update.
repeated string tensor = 3;
// The arguments.
repeated Argument arg = 4;
}
// Graph Definition
message GraphDef {
// The graph name.
optional string name = 1;
// The operators to execute.
repeated OperatorDef op = 2;
// The type of graph.
optional string graph_type = 3;
// The device option for this graph.
optional DeviceOption device_option = 5;
// The arguments.
repeated Argument arg = 6;
// The name of inputs.
repeated string input = 7;
// The name of outputs.
repeated string output = 8;
// The gradients information.
repeated GradientProto gradient = 9;
// The updaters information.
repeated UpdaterProto updater = 10;
}
\ No newline at end of file
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!