Commit 02ad90d5 by Ting PAN

Remove the duplicate workspace singletons

Summary:
This commit moves the workspace api into the current workspace instance.
For this reason, the namespace ``dragon.workspace`` is removed for simplicity.
1 parent adb6fa64
Showing with 3854 additions and 4629 deletions
<p align="center"> <p align="center">
<img width="40%" src="http://dragon.seetatech.com/static/images/styles-dragon.png"/> <img width="40%" src="https://dragon.seetatech.com/static/images/styles-dragon.png"/>
</p> </p>
[Dragon](http://dragon.seetatech.com) is a **C**(Computation)**G**(Graph)**V**(Virtual)**M**(Machine) based distributed deep learning framework. [Dragon](https://dragon.seetatech.com) is a **C**(Computation)**G**(Graph)**V**(Virtual)**M**(Machine) based distributed deep learning framework.
It fuses several modern frameworks and integrations together, powered by a unified engine. It fuses several modern frameworks and integrations together, powered by a unified engine.
The computation between different programming styles is deterministic and reproduceable. The computation between different programming styles is deterministic and reproduceable.
...@@ -11,7 +11,7 @@ promoting internal interfaces. We will always learn from the AI community to evo ...@@ -11,7 +11,7 @@ promoting internal interfaces. We will always learn from the AI community to evo
## Installation ## Installation
See the [install guide](http://dragon.seetatech.com/install) for the pip package See the [install guide](https://dragon.seetatech.com/install) for the pip package
or how to build from source. or how to build from source.
## License ## License
......
...@@ -15,9 +15,13 @@ from __future__ import absolute_import ...@@ -15,9 +15,13 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy
from dragon.core.autograph.tensor import TensorRef from dragon.core.autograph.tensor import TensorRef
from dragon.core.eager import context as eager_context from dragon.core.eager import context as eager_context
from dragon.core.framework import context from dragon.core.framework import context
from dragon.core.util import logging
from dragon.vm.caffe.proto import caffe_pb2
class Layer(object): class Layer(object):
...@@ -34,24 +38,26 @@ class Layer(object): ...@@ -34,24 +38,26 @@ class Layer(object):
""" """
self._proto = layer_param self._proto = layer_param
self._name = layer_param.name self._name = layer_param.name
self._arguments, self.arguments = {'name': self._name}, {} self._arguments, self.arguments = {'name': 'output'}, {}
# Store the inputs, outputs and trainable parameters. # Store the inputs, outputs and trainable parameters.
self._bottom, self._top, self._blobs = [], [], [] self._bottom, self._top, self._blobs = [], [], []
for blob in layer_param.bottom: for blob in layer_param.bottom:
self._bottom.append(blob) self._bottom.append(blob)
for blob in layer_param.top: for blob in layer_param.top:
self._top.append(blob) self._top.append(blob)
# Store the loss weight to apply gradients. # Store the loss weight to apply gradients.
self._loss_weight = layer_param.loss_weight \ self._loss_weight = layer_param.loss_weight \
if len(layer_param.loss_weight) > 0 else None if len(layer_param.loss_weight) > 0 else None
# Optional mirror stage argument for memory optimization. # Optional mirror stage argument for memory optimization.
if layer_param.HasField('mirror_stage'): if layer_param.HasField('mirror_stage'):
self._arguments['mirror_stage'] = layer_param.mirror_stage self._arguments['mirror_stage'] = layer_param.mirror_stage
@property @property
def blobs(self):
"""Return the blobs."""
return self._blobs
@property
def bottom(self): def bottom(self):
"""Return the bottom names.""" """Return the bottom names."""
return self._bottom return self._bottom
...@@ -62,49 +68,91 @@ class Layer(object): ...@@ -62,49 +68,91 @@ class Layer(object):
return self._loss_weight return self._loss_weight
@property @property
def name(self):
"""Return the layer name."""
return self._name
@property
def top(self): def top(self):
"""Return the top names.""" """Return the top names."""
return self._top return self._top
def add_blob(self, value=None, filler=None, no_grad=False): def add_blob(self, value=None, filler=None, no_grad=False):
"""Add a weight blob into this layer.""" """Add a blob into this layer."""
# Use a fixed name in the current workspace. # Set the name for reference explicitly.
# Note that a non-empty tensor scope will make it data_name = context.get_name_scope() + 'param:{}'.format(len(self._blobs))
# impossible to load/save models. You should use data, diff = TensorRef(data_name), TensorRef(data_name + '_grad')
# a new workspace instead of the terrible name scope.
scoped_name = context.get_name_scope() + self._name
param_name = scoped_name + '/param:{}'.format(len(self._blobs))
# Set the name explicitly.
variable = TensorRef(param_name)
variable_grad = TensorRef(param_name + '_grad')
if filler is not None: if filler is not None:
variable._register_as(**filler) data._register_as(**filler)
else: else:
# Register a constant filler by default. # Register a constant filler by default.
value = value if value else 0 value = value if value else 0
variable.constant(value=value) data.constant(value=value)
# Append to the blobs.
self._blobs.append({'data': data, 'diff': None if no_grad else diff})
# Determine whether to disable the gradients explicitly. def from_proto(self, proto):
if no_grad is True: """Deserialize from the proto.
variable_grad = None
# Append to the blobs. Parameters
self._blobs.append({'data': variable, 'diff': variable_grad}) ----------
proto : LayerParameter
The ``LayerParameter`` protocol buffer.
"""
for i in range(len(self._blobs)):
if i < len(proto.blobs):
blob_proto = proto.blobs[i]
if len(blob_proto.data) > 0:
value = numpy.array(blob_proto.data, dtype='float32')
elif len(blob_proto.double_data) > 0:
value = numpy.array(blob_proto.double_data, dtype='float64')
else:
raise ValueError('Neither <data> or <double_data> in blob proto.')
if len(blob_proto.shape.dim) > 0:
value = value.reshape([dim for dim in blob_proto.shape.dim])
self._blobs[i]['data'].set_value(value)
logging.info('Blob({}/param:{}) loaded, shape: {}, size: {}'
.format(self._name, i, value.shape, value.size))
def setup(self, bottom): def setup(self, bottom):
# Merge the arguments, then setup up the specific layer. """Setup the layer."""
self.arguments = dict(self.arguments, **self._arguments) self.arguments = dict(self.arguments, **self._arguments)
bottom = bottom[0] if len(bottom) == 1 else bottom bottom = bottom[0] if len(bottom) == 1 else bottom
with eager_context.graph_mode(): with eager_context.graph_mode():
return self.__call__(bottom) return self.__call__(bottom)
@classmethod def to_proto(self):
def get_filler(cls, layer_param, filler_name): """Serialize to the proto.
"""Construct a filler from the parameter."""
if layer_param.HasField(filler_name): Returns
filler = getattr(layer_param, filler_name) -------
LayerParameter
The ``LayerParameter`` protocol buffer.
"""
proto = caffe_pb2.LayerParameter()
proto.CopyFrom(self._proto)
for blob in self._blobs:
value = blob['data'].get_value()
if str(value.dtype) == 'float32':
blob_proto = caffe_pb2.BlobProto(
data=value.flatten(),
shape=caffe_pb2.BlobShape(dim=value.shape))
elif str(value.dtype) == 'float64':
blob_proto = caffe_pb2.BlobProto(
double_data=value.flatten(),
shape=caffe_pb2.BlobShape(dim=value.shape))
else:
raise ValueError('Either float32 or float64 blob is required.')
proto.blobs.extend([blob_proto])
return proto
@staticmethod
def get_filler(proto, filler_name):
"""Return the filler from proto."""
if proto.HasField(filler_name):
filler = getattr(proto, filler_name)
return { return {
'type': filler.type.lower(), 'type': filler.type.lower(),
'value': filler.value, 'value': filler.value,
......
...@@ -16,14 +16,10 @@ from __future__ import print_function ...@@ -16,14 +16,10 @@ from __future__ import print_function
from dragon.vm.caffe.layers.common import Accuracy from dragon.vm.caffe.layers.common import Accuracy
from dragon.vm.caffe.layers.common import ArgMax from dragon.vm.caffe.layers.common import ArgMax
from dragon.vm.caffe.layers.common import BatchNorm from dragon.vm.caffe.layers.common import BatchNorm
from dragon.vm.caffe.layers.common import Cast
from dragon.vm.caffe.layers.common import Concat from dragon.vm.caffe.layers.common import Concat
from dragon.vm.caffe.layers.common import Crop from dragon.vm.caffe.layers.common import Crop
from dragon.vm.caffe.layers.common import Eltwise from dragon.vm.caffe.layers.common import Eltwise
from dragon.vm.caffe.layers.common import Flatten from dragon.vm.caffe.layers.common import Flatten
from dragon.vm.caffe.layers.common import FusedBatchNorm
from dragon.vm.caffe.layers.common import FusedGroupNorm
from dragon.vm.caffe.layers.common import GroupNorm
from dragon.vm.caffe.layers.common import InnerProduct from dragon.vm.caffe.layers.common import InnerProduct
from dragon.vm.caffe.layers.common import Input from dragon.vm.caffe.layers.common import Input
from dragon.vm.caffe.layers.common import Normalize from dragon.vm.caffe.layers.common import Normalize
...@@ -46,12 +42,10 @@ from dragon.vm.caffe.layers.neuron import ELU ...@@ -46,12 +42,10 @@ from dragon.vm.caffe.layers.neuron import ELU
from dragon.vm.caffe.layers.neuron import Power from dragon.vm.caffe.layers.neuron import Power
from dragon.vm.caffe.layers.neuron import PReLU from dragon.vm.caffe.layers.neuron import PReLU
from dragon.vm.caffe.layers.neuron import ReLU from dragon.vm.caffe.layers.neuron import ReLU
from dragon.vm.caffe.layers.neuron import SELU
from dragon.vm.caffe.layers.neuron import Sigmoid from dragon.vm.caffe.layers.neuron import Sigmoid
from dragon.vm.caffe.layers.neuron import TanH from dragon.vm.caffe.layers.neuron import TanH
from dragon.vm.caffe.layers.vision import Convolution from dragon.vm.caffe.layers.vision import Convolution
from dragon.vm.caffe.layers.vision import Deconvolution from dragon.vm.caffe.layers.vision import Deconvolution
from dragon.vm.caffe.layers.vision import DepthwiseConv2d
from dragon.vm.caffe.layers.vision import LRN from dragon.vm.caffe.layers.vision import LRN
from dragon.vm.caffe.layers.vision import Pooling from dragon.vm.caffe.layers.vision import Pooling
from dragon.vm.caffe.layers.vision import ROIAlign from dragon.vm.caffe.layers.vision import ROIAlign
......
...@@ -15,7 +15,9 @@ from __future__ import absolute_import ...@@ -15,7 +15,9 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.core.autograph.tensor import Tensor from dragon.core.autograph.tensor import TensorRef
from dragon.core.framework import context
from dragon.core.framework import workspace
from dragon.core.ops import activation_ops from dragon.core.ops import activation_ops
from dragon.core.ops import array_ops from dragon.core.ops import array_ops
from dragon.core.ops import framework_ops from dragon.core.ops import framework_ops
...@@ -32,15 +34,15 @@ class Accuracy(Layer): ...@@ -32,15 +34,15 @@ class Accuracy(Layer):
```python ```python
layer { layer {
type: "Accuracy" type: "Accuracy"
bottom: "ip2" bottom: "ip2"
bottom: "label" bottom: "label"
top: "acc" top: "acc"
accuracy_param { accuracy_param {
axis: 1 axis: 1
top_k: 1 top_k: 1
ignore_label: -1 ignore_label: -1
} }
} }
``` ```
...@@ -67,13 +69,13 @@ class ArgMax(Layer): ...@@ -67,13 +69,13 @@ class ArgMax(Layer):
```python ```python
layer { layer {
type: "ArgMax" type: "ArgMax"
bottom: "ip2" bottom: "ip2"
top: "cls" top: "cls"
argmax_param { argmax_param {
top_k: 1 top_k: 1
axis: 1 axis: 1
} }
} }
``` ```
...@@ -100,14 +102,14 @@ class BatchNorm(Layer): ...@@ -100,14 +102,14 @@ class BatchNorm(Layer):
```python ```python
layer { layer {
type: "BatchNorm" type: "BatchNorm"
bottom: "conv1" bottom: "conv1"
top: "conv1/bn" top: "conv1/bn"
batch_norm_param { batch_norm_param {
use_global_stats: False use_global_stats: False
moving_average_fraction: 0.9 moving_average_fraction: 0.9
eps: 1e-5 eps: 1e-5
} }
} }
``` ```
...@@ -123,43 +125,27 @@ class BatchNorm(Layer): ...@@ -123,43 +125,27 @@ class BatchNorm(Layer):
'eps': param.eps, 'eps': param.eps,
'axis': 1, 'axis': 1,
} }
self.add_blob(value=1, no_grad=True) # gamma
self.add_blob(value=0, no_grad=True) # beta
self.add_blob(value=0, no_grad=True) # running_mean self.add_blob(value=0, no_grad=True) # running_mean
self.add_blob(value=1, no_grad=True) # running_var self.add_blob(value=1, no_grad=True) # running_var
self.add_blob(value=1, no_grad=True) # running_num_batches
self.add_blob(value=1, no_grad=True) # fixed_gamma
self.add_blob(value=0, no_grad=True) # fixed_beta
self._blobs[2]['data'].set_value([1.])
self._weight, self._bias = [blob['data'] for blob in self._blobs[3:5]]
del self._blobs[3:] # Avoid to save the fixed blobs
def fuse_with_scale_layer(self, scale_layer):
self._weight = scale_layer._blobs[0]['data']
if len(scale_layer._blobs) == 2:
self._bias = scale_layer._blobs[1]['data']
scale_layer.__call__ = lambda *args, **kwargs: None
def __call__(self, bottom): def __call__(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom, self._weight, self._bias] + \
[blob['data'] for blob in self._blobs[:2]]
return normalization_ops.batch_norm(inputs, **self.arguments) return normalization_ops.batch_norm(inputs, **self.arguments)
class Cast(Layer):
r"""Cast the data type of input.
Examples:
```python
layer {
type: "Cast"
bottom: "ip2/fp16"
top: "ip2/fp32"
cast_param {
dtype: "float32"
}
}
```
"""
def __init__(self, layer_param):
super(Cast, self).__init__(layer_param)
param = layer_param.cast_param
self.arguments = {'dtype': param.dtype.lower()}
def __call__(self, bottom):
return array_ops.cast(bottom, **self.arguments)
class Concat(Layer): class Concat(Layer):
r"""Concatenate the inputs along the given axis. r"""Concatenate the inputs along the given axis.
...@@ -167,13 +153,13 @@ class Concat(Layer): ...@@ -167,13 +153,13 @@ class Concat(Layer):
```python ```python
layer { layer {
type: "Concat" type: "Concat"
bottom: "conv2" bottom: "conv2"
bottom: "conv1" bottom: "conv1"
top: "conv2/fuse" top: "conv2/fuse"
concat_param { concat_param {
axis: 1 axis: 1
} }
} }
``` ```
...@@ -194,15 +180,15 @@ class Crop(Layer): ...@@ -194,15 +180,15 @@ class Crop(Layer):
```python ```python
layer { layer {
type: "Crop" type: "Crop"
bottom: "score" bottom: "score"
bottom: "score/ref" bottom: "score/ref"
top: "score/crop" top: "score/crop"
crop_param { crop_param {
axis: 2 axis: 2
offset: 5 offset: 5
offset: 10 offset: 10
} }
} }
``` ```
...@@ -232,15 +218,15 @@ class Eltwise(Layer): ...@@ -232,15 +218,15 @@ class Eltwise(Layer):
```python ```python
layer { layer {
type: "Eltwise" type: "Eltwise"
bottom: "conv2" bottom: "conv2"
bottom: "conv1" bottom: "conv1"
top: "conv2/fuse" top: "conv2/fuse"
eltwise_param { eltwise_param {
operation: SUM operation: SUM
coeff: 1. coeff: 1.
coeff: 1. coeff: 1.
} }
} }
``` ```
...@@ -250,9 +236,9 @@ class Eltwise(Layer): ...@@ -250,9 +236,9 @@ class Eltwise(Layer):
super(Eltwise, self).__init__(layer_param) super(Eltwise, self).__init__(layer_param)
param = layer_param.eltwise_param param = layer_param.eltwise_param
self.eltwise_op = { self.eltwise_op = {
0: math_ops.mul, # MUL 0: math_ops.mul,
1: math_ops.add, # SUM 1: math_ops.add,
2: math_ops.maximum, # MAX 2: math_ops.maximum,
}[param.operation] }[param.operation]
self.factors = [element for element in param.coeff] self.factors = [element for element in param.coeff]
...@@ -273,13 +259,13 @@ class Flatten(Layer): ...@@ -273,13 +259,13 @@ class Flatten(Layer):
```python ```python
layer { layer {
type: "Flatten" type: "Flatten"
bottom: "conv5" bottom: "conv5"
top: "conv5/flatten" top: "conv5/flatten"
flatten_param { flatten_param {
axis: 1 axis: 1
end_axis: -1 end_axis: -1
} }
} }
``` ```
...@@ -296,141 +282,6 @@ class Flatten(Layer): ...@@ -296,141 +282,6 @@ class Flatten(Layer):
return array_ops.flatten(bottom, **self.arguments) return array_ops.flatten(bottom, **self.arguments)
class FusedBatchNorm(Layer):
r"""Apply the fused batch normalization.
`[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
Examples:
```python
layer {
type: "FusedBatchNorm"
bottom: "conv1"
top: "conv1/bn"
batch_norm_param {
use_global_stats: False
moving_average_fraction: 0.9
eps: 1e-5
}
scale_param {
filler: {
type: "constant"
value: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
```
"""
def __init__(self, layer_param):
super(FusedBatchNorm, self).__init__(layer_param)
bn_param = layer_param.batch_norm_param
scale_param = layer_param.scale_param
self.arguments = {
'axis': 1,
'momentum': bn_param.moving_average_fraction,
'eps': bn_param.eps,
'use_stats': int(bn_param.use_global_stats)
if bn_param.HasField('use_global_stats') else -1,
}
self.add_blob(filler=self.get_filler(scale_param, 'filler'), value=1) # gamma
self.add_blob(filler=self.get_filler(scale_param, 'bias_filler')) # beta
self.add_blob(value=0, no_grad=True) # running_mean
self.add_blob(value=1, no_grad=True) # running_var
def __call__(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs]
return normalization_ops.batch_norm(inputs, **self.arguments)
class FusedGroupNorm(Layer):
r"""Apply the fused group normalization.
`[Wu & He, 2018] <https://arxiv.org/abs/1803.08494>`_.
Examples:
```python
layer {
type: "FusedGroupNorm"
bottom: "conv1"
top: "conv1/gn"
group_norm_param {
group: 32
eps: 1e-5
}
scale_param {
filler: {
type: "constant"
value: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
```
"""
def __init__(self, layer_param):
super(FusedGroupNorm, self).__init__(layer_param)
gn_param = layer_param.group_norm_param
scale_param = layer_param.scale_param
self.arguments = {
'axis': 1,
'group': gn_param.group,
'eps': gn_param.eps,
}
self.add_blob(filler=self.get_filler(scale_param, 'filler'), value=1) # gamma
self.add_blob(filler=self.get_filler(scale_param, 'bias_filler')) # beta
def __call__(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs]
return normalization_ops.group_norm(inputs, **self.arguments)
class GroupNorm(Layer):
r"""Apply the group normalization.
`[Wu & He, 2018] <https://arxiv.org/abs/1803.08494>`_.
Examples:
```python
layer {
type: "GroupNorm"
bottom: "conv1"
top: "conv1/gn"
group_norm_param {
group: 32
eps: 1e-5
}
}
```
"""
def __init__(self, layer_param):
super(GroupNorm, self).__init__(layer_param)
param = layer_param.group_norm_param
self.arguments = {
'axis': 1,
'group': param.group,
'eps': param.eps,
}
self.add_blob(value=1, no_grad=True)
self.add_blob(value=0, no_grad=True)
def __call__(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs]
return normalization_ops.group_norm(inputs, **self.arguments)
class InnerProduct(Layer): class InnerProduct(Layer):
r"""Compute the dense matrix multiplication along the given axes. r"""Compute the dense matrix multiplication along the given axes.
...@@ -438,13 +289,13 @@ class InnerProduct(Layer): ...@@ -438,13 +289,13 @@ class InnerProduct(Layer):
```python ```python
layer { layer {
type: "InnerProduct" type: "InnerProduct"
bottom: "conv5" bottom: "conv5"
top: "ip1" top: "ip1"
inner_product_param { inner_product_param {
axis: 1 axis: 1
num_output: 1024 num_output: 1024
} }
} }
``` ```
...@@ -458,7 +309,6 @@ class InnerProduct(Layer): ...@@ -458,7 +309,6 @@ class InnerProduct(Layer):
'out_channels': param.num_output, 'out_channels': param.num_output,
'transpose_w': not param.transpose, 'transpose_w': not param.transpose,
} }
# Add weights and biases
self.add_blob(filler=self.get_filler(param, 'weight_filler')) self.add_blob(filler=self.get_filler(param, 'weight_filler'))
if param.bias_term: if param.bias_term:
self.add_blob(filler=self.get_filler(param, 'bias_filler')) self.add_blob(filler=self.get_filler(param, 'bias_filler'))
...@@ -475,15 +325,13 @@ class Input(Layer): ...@@ -475,15 +325,13 @@ class Input(Layer):
```python ```python
layer { layer {
type: "Input" type: "Input"
top: "a" top: "data1"
top: "b" top: "data2"
input_param { input_param {
shape: { dim: 2 dim: 3 } shape: { dim: 2 dim: 3 }
shape: { dim: 2 dim: 3 dim: 3 } shape: { dim: 2 dim: 3 dim: 3 }
dtype: "float32" }
dtype: "float64"
}
} }
``` ```
...@@ -492,20 +340,24 @@ class Input(Layer): ...@@ -492,20 +340,24 @@ class Input(Layer):
def __init__(self, layer_param): def __init__(self, layer_param):
super(Input, self).__init__(layer_param) super(Input, self).__init__(layer_param)
param = layer_param.input_param param = layer_param.input_param
self.shapes, self.dtypes = [], [] self.blob_shapes = []
for i in range(len(self.top)): for i in range(len(self.top)):
if i < len(param.shape): if i < len(param.shape):
self.shapes.append([e for e in param.shape[i].dim]) self.blob_shapes.append([e for e in param.shape[i].dim])
else:
self.shapes.append(None)
if i < len(param.dtype):
self.dtypes.append(param.dtype[i])
else: else:
self.dtypes.append('float32') self.blob_shapes.append(None)
def __call__(self, bottom): def __call__(self, bottom):
return [Tensor(shape=self.shapes[i], dtype=self.dtypes[i]) name_scope = context.get_name_scope()
for i in range(len(self.shapes))] current_ws = workspace.get_workspace()
return [TensorRef(
name=current_ws.unique_name(
name_scope + 'output',
suffix=':{}'.format(i),
namespace='Tensor'),
shape=self.blob_shapes[i],
dtype='float32',
).placeholder() for i in range(len(self.blob_shapes))]
class Normalize(Layer): class Normalize(Layer):
...@@ -516,18 +368,18 @@ class Normalize(Layer): ...@@ -516,18 +368,18 @@ class Normalize(Layer):
```python ```python
layer { layer {
type: "Normalize" type: "Normalize"
bottom: "conv4" bottom: "conv4"
top: "conv4/norm" top: "conv4/norm"
normalize_param { normalize_param {
across_spatial: false across_spatial: false
channel_shared: false channel_shared: false
eps: 1e-12 eps: 1e-12
scale_filler: { scale_filler: {
type: "constant" type: "constant"
value: 1 value: 1
} }
} }
} }
``` ```
...@@ -560,15 +412,15 @@ class Permute(Layer): ...@@ -560,15 +412,15 @@ class Permute(Layer):
```python ```python
layer { layer {
type: "Permute" type: "Permute"
bottom: "cls_score" bottom: "cls_score"
top: "cls_score/perm" top: "cls_score/perm"
permute_param { permute_param {
order: 0 order: 0
order: 2 order: 2
order: 3 order: 3
order: 1 order: 1
} }
} }
``` ```
...@@ -590,16 +442,16 @@ class Python(Layer): ...@@ -590,16 +442,16 @@ class Python(Layer):
```python ```python
layer { layer {
type: "Python" type: "Python"
bottom: "cls_prob" bottom: "cls_prob"
bottom: "bbox_pred" bottom: "bbox_pred"
bottom: "ims_info" bottom: "ims_info"
top: "rois" top: "rois"
python_param { python_param {
module: 'rpn.proposal_layer' module: 'rpn.proposal_layer'
layer: 'ProposalLayer' layer: 'ProposalLayer'
param_str: "'feat_stride': 16" param_str: "'feat_stride': 16"
} }
} }
``` ```
...@@ -626,13 +478,13 @@ class Reduction(Layer): ...@@ -626,13 +478,13 @@ class Reduction(Layer):
```python ```python
layer { layer {
type: "Reduction" type: "Reduction"
bottom: "entropy" bottom: "entropy"
top: "loss" top: "loss"
reduction_param { reduction_param {
operation: SUM operation: SUM
axis: 1 axis: 1
} }
} }
``` ```
...@@ -646,10 +498,7 @@ class Reduction(Layer): ...@@ -646,10 +498,7 @@ class Reduction(Layer):
raise ValueError('The negative axis can only be -1.') raise ValueError('The negative axis can only be -1.')
self.scale = param.coeff self.scale = param.coeff
self.arguments = {'axis': [param.axis]} self.arguments = {'axis': [param.axis]}
self.reduction = { self.reduction = {1: array_ops.sum, 4: array_ops.mean}[param.operation]
1: array_ops.sum,
4: array_ops.mean,
}[param.operation]
def __call__(self, bottom): def __call__(self, bottom):
top = self.reduction(bottom, **self.arguments) top = self.reduction(bottom, **self.arguments)
...@@ -665,16 +514,16 @@ class Reshape(Layer): ...@@ -665,16 +514,16 @@ class Reshape(Layer):
```python ```python
layer { layer {
type: "Reshape" type: "Reshape"
bottom: "bbox_pred/perm" bottom: "bbox_pred/perm"
top: "bbox_pred/reshape" top: "bbox_pred/reshape"
reshape_param { reshape_param {
shape { shape {
dim: 0 dim: 0
dim: -1 dim: -1
dim: 4 dim: 4
} }
} }
} }
``` ```
...@@ -696,22 +545,22 @@ class Scale(Layer): ...@@ -696,22 +545,22 @@ class Scale(Layer):
```python ```python
layer { layer {
type: "Scale" type: "Scale"
bottom: "conv1/bn" bottom: "conv1/bn"
top: "conv1/scale" top: "conv1/scale"
scale_param { scale_param {
axis: 1 axis: 1
num_axes: 1 num_axes: 1
bias_term: true bias_term: true
filler: { filler: {
type: "constant" type: "constant"
value: 1 value: 1
} }
bias_filler { bias_filler {
type: "constant" type: "constant"
value: 0 value: 0
} }
} }
} }
``` ```
...@@ -721,7 +570,6 @@ class Scale(Layer): ...@@ -721,7 +570,6 @@ class Scale(Layer):
super(Scale, self).__init__(layer_param) super(Scale, self).__init__(layer_param)
param = layer_param.scale_param param = layer_param.scale_param
self.arguments = {'axis': param.axis, 'num_axes': param.num_axes} self.arguments = {'axis': param.axis, 'num_axes': param.num_axes}
# Add weights and biases
self.add_blob(filler=self.get_filler(param, 'filler'), value=1) self.add_blob(filler=self.get_filler(param, 'filler'), value=1)
if param.bias_term: if param.bias_term:
self.add_blob(filler=self.get_filler(param, 'bias_filler')) self.add_blob(filler=self.get_filler(param, 'bias_filler'))
...@@ -738,16 +586,16 @@ class Slice(Layer): ...@@ -738,16 +586,16 @@ class Slice(Layer):
```python ```python
layer { layer {
type: "Slice" type: "Slice"
bottom: "image" bottom: "image"
top: "image/b" top: "image/b"
top: "image/g" top: "image/g"
top: "image/r" top: "image/r"
slice_param { slice_param {
axis: 1 axis: 1
slice_point: 1 slice_point: 1
slice_point: 2 slice_point: 2
} }
} }
``` ```
...@@ -773,12 +621,12 @@ class Softmax(Layer): ...@@ -773,12 +621,12 @@ class Softmax(Layer):
```python ```python
layer { layer {
type: "Softmax" type: "Softmax"
bottom: "cls_score" bottom: "cls_score"
top: "cls_prob" top: "cls_prob"
softmax_param { softmax_param {
axis: 1 axis: 1
} }
} }
``` ```
...@@ -799,9 +647,9 @@ class StopGradient(Layer): ...@@ -799,9 +647,9 @@ class StopGradient(Layer):
```python ```python
layer { layer {
type: "StopGradient" type: "StopGradient"
bottom: "res2c" bottom: "res2c"
top: "res2c/frozen" top: "res2c/frozen"
} }
``` ```
...@@ -815,22 +663,18 @@ class StopGradient(Layer): ...@@ -815,22 +663,18 @@ class StopGradient(Layer):
class Tile(Layer): class Tile(Layer):
r"""Tile the input according to the given multiples. r"""Repeat the input according to the given axis.
Examples: Examples:
```python ```python
layer { layer {
type: "Slice" type: "Tile"
bottom: "conv2" bottom: "data"
top: "conv2/dup" top: "output"
tile_param { tile_param {
multiples: { axis: 1
dim: 1 tiles: 2
dim: 2
dim: 1
dim: 1
}
} }
} }
``` ```
...@@ -840,7 +684,9 @@ class Tile(Layer): ...@@ -840,7 +684,9 @@ class Tile(Layer):
def __init__(self, layer_param): def __init__(self, layer_param):
super(Tile, self).__init__(layer_param) super(Tile, self).__init__(layer_param)
param = layer_param.tile_param param = layer_param.tile_param
self.arguments = {'multiples': [e for e in param.multiples.dim]} repeats = [1] * (param.axis + 1)
repeats[param.axis] = param.tiles
self.arguments = {'repeats': repeats}
def __call__(self, bottom): def __call__(self, bottom):
return array_ops.tile(bottom, **self.arguments) return array_ops.tile(bottom, **self.arguments)
...@@ -33,8 +33,9 @@ class _DataPlugin(object): ...@@ -33,8 +33,9 @@ class _DataPlugin(object):
def forward(self, inputs, outputs): def forward(self, inputs, outputs):
blobs = self.iterator.next() blobs = self.iterator.next()
current_ws = workspace.get_workspace()
for i, blob in enumerate(blobs): for i, blob in enumerate(blobs):
workspace.feed_tensor(outputs[i], blob) current_ws.feed_tensor(outputs[i], blob)
class Data(Layer): class Data(Layer):
...@@ -44,42 +45,46 @@ class Data(Layer): ...@@ -44,42 +45,46 @@ class Data(Layer):
```python ```python
layer { layer {
type: "Data" type: "Data"
top: "data" top: "data"
top: "label" top: "label"
include { phase: TRAIN } include {
data_param { phase: TRAIN
source: "/data/imagenet/train" }
batch_size: 128 data_param {
shuffle: true source: "/data/train"
num_chunks: 0 batch_size: 128
prefetch: 5 shuffle: true
} num_chunks: 0
transform_param { prefetch: 5
mirror: true }
random_crop_size: 224 transform_param {
augment_color: true mirror: true
mean_value: 104.00698793 random_crop_size: 224
mean_value: 116.66876762 augment_color: true
mean_value: 122.67891434 mean_value: 104.00698793
} mean_value: 116.66876762
mean_value: 122.67891434
}
} }
layer { layer {
type: "Data" type: "Data"
top: "data" top: "data"
top: "label" top: "label"
include { phase: TEST } include {
data_param { phase: TEST
source: "/data/imagenet/val" }
batch_size: 100 data_param {
} source: "/data/val"
transform_param { batch_size: 64
resize: 256 }
crop_size: 224 transform_param {
mean_value: 104.00698793 resize: 256
mean_value: 116.66876762 crop_size: 224
mean_value: 122.67891434 mean_value: 104.00698793
} mean_value: 116.66876762
mean_value: 122.67891434
}
} }
``` ```
......
...@@ -30,13 +30,13 @@ class EuclideanLoss(Layer): ...@@ -30,13 +30,13 @@ class EuclideanLoss(Layer):
```python ```python
layer { layer {
type: "EuclideanLoss" type: "EuclideanLoss"
bottom: "bbox_pred" bottom: "bbox_pred"
bottom: "bbox_target" bottom: "bbox_target"
top: "bbox_loss" top: "bbox_loss"
loss_param { loss_param {
normalization: BATCH_SIZE normalization: BATCH_SIZE
} }
} }
``` ```
...@@ -67,13 +67,13 @@ class SigmoidCrossEntropyLoss(Layer): ...@@ -67,13 +67,13 @@ class SigmoidCrossEntropyLoss(Layer):
```python ```python
layer { layer {
type: "SigmoidCrossEntropyLoss" type: "SigmoidCrossEntropyLoss"
bottom: "rpn_cls_score" bottom: "rpn_cls_score"
bottom: "rpn_labels" bottom: "rpn_labels"
top: "rpn_loss" top: "rpn_loss"
loss_param { loss_param {
normalization: VALID normalization: VALID
} }
} }
``` ```
...@@ -106,15 +106,15 @@ class SmoothL1Loss(Layer): ...@@ -106,15 +106,15 @@ class SmoothL1Loss(Layer):
```python ```python
layer { layer {
type: "SmoothL1Loss" type: "SmoothL1Loss"
bottom: "bbox_pred" bottom: "bbox_pred"
bottom: "bbox_targets" bottom: "bbox_targets"
bottom: "bbox_inside_weights" bottom: "bbox_inside_weights"
bottom: "bbox_outside_weights" bottom: "bbox_outside_weights"
top: "bbox_loss" top: "bbox_loss"
loss_param { loss_param {
normalization: BATCH_SIZE normalization: BATCH_SIZE
} }
} }
``` ```
...@@ -155,15 +155,17 @@ class SoftmaxWithLoss(Layer): ...@@ -155,15 +155,17 @@ class SoftmaxWithLoss(Layer):
```python ```python
layer { layer {
type: "SoftmaxWithLoss" type: "SoftmaxWithLoss"
bottom: "cls_score" bottom: "cls_score"
bottom: "labels" bottom: "labels"
top: "cls_loss" top: "cls_loss"
softmax_param { axis: 1 } softmax_param {
loss_param { axis: 1
ignore_label: -1 }
normalization: VALID loss_param {
} ignore_label: -1
normalization: VALID
}
} }
``` ```
......
...@@ -32,12 +32,12 @@ class Dropout(Layer): ...@@ -32,12 +32,12 @@ class Dropout(Layer):
```python ```python
layer { layer {
type: "Dropout" type: "Dropout"
bottom: "fc6" bottom: "fc6"
top: "fc6" top: "fc6"
dropout_param { dropout_param {
dropout_ratio: 0.5 dropout_ratio: 0.5
} }
} }
``` ```
...@@ -73,12 +73,12 @@ class ELU(Layer): ...@@ -73,12 +73,12 @@ class ELU(Layer):
```python ```python
layer { layer {
type: "ELU" type: "ELU"
bottom: "conv2" bottom: "conv2"
top: "conv2" top: "conv2"
elu_param { elu_param {
alpha: 1. alpha: 1.
} }
} }
``` ```
...@@ -101,14 +101,14 @@ class Power(Layer): ...@@ -101,14 +101,14 @@ class Power(Layer):
```python ```python
layer { layer {
type: "Power" type: "Power"
bottom: "x" bottom: "x"
top: "y" top: "y"
power_param { power_param {
scale: 1. scale: 1.
shift: 0. shift: 0.
power: 2. power: 2.
} }
} }
``` ```
...@@ -148,16 +148,16 @@ class PReLU(Layer): ...@@ -148,16 +148,16 @@ class PReLU(Layer):
```python ```python
layer { layer {
type: "PReLU" type: "PReLU"
bottom: "conv2" bottom: "conv2"
top: "conv2/relu" top: "conv2/relu"
prelu_param { prelu_param {
channel_shared: false channel_shared: false
filler { filler {
type: "constant" type: "constant"
value: 0.25 value: 0.25
}
} }
}
} }
``` ```
...@@ -194,12 +194,12 @@ class ReLU(Layer): ...@@ -194,12 +194,12 @@ class ReLU(Layer):
```python ```python
layer { layer {
type: "ReLU" type: "ReLU"
bottom: "conv2" bottom: "conv2"
top: "conv2/relu" top: "conv2/relu"
relu_param { relu_param {
negative_slope: 0. negative_slope: 0.
} }
} }
``` ```
...@@ -215,38 +215,6 @@ class ReLU(Layer): ...@@ -215,38 +215,6 @@ class ReLU(Layer):
return activation_ops.relu(bottom, **self.arguments) return activation_ops.relu(bottom, **self.arguments)
class SELU(Layer):
r"""Apply the scaled exponential linear unit.
`[Klambauer et.al, 2017] <https://arxiv.org/abs/1706.02515>`_.
The **SELU** function is defined as:
.. math::
\text{SELU}(x) = 1.0507 *
\begin{cases}
x, & \text{ if } x \geq 0 \\
1.6733 * (e^{x} - 1), & \text{ otherwise }
\end{cases}
Examples:
```python
layer {
type: "SELU"
bottom: "conv2"
top: "conv2/relu"
}
```
"""
def __init__(self, layer_param):
super(SELU, self).__init__(layer_param)
def __call__(self, bottom):
return activation_ops.selu(bottom, **self.arguments)
class Sigmoid(Layer): class Sigmoid(Layer):
r"""Apply the sigmoid function. r"""Apply the sigmoid function.
...@@ -258,9 +226,9 @@ class Sigmoid(Layer): ...@@ -258,9 +226,9 @@ class Sigmoid(Layer):
```python ```python
layer { layer {
type: "Sigmoid" type: "Sigmoid"
bottom: "rpn_cls_score" bottom: "rpn_cls_score"
top: "rpn_cls_prob" top: "rpn_cls_prob"
} }
``` ```
...@@ -284,9 +252,9 @@ class TanH(Layer): ...@@ -284,9 +252,9 @@ class TanH(Layer):
```python ```python
layer { layer {
type: "TanH" type: "TanH"
bottom: "g/conv5" bottom: "g/conv5"
top: "g/image" top: "g/image"
} }
``` ```
......
...@@ -23,39 +23,29 @@ from dragon.vm.caffe.layer import Layer ...@@ -23,39 +23,29 @@ from dragon.vm.caffe.layer import Layer
class Convolution(Layer): class Convolution(Layer):
r"""Apply the n-dimension convolution. r"""Apply the n-dimension convolution.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{DK}_{size}) / stride + 1
\end{cases}
Examples: Examples:
```python ```python
layer { layer {
type: "Convolution" type: "Convolution"
bottom: "input" bottom: "input"
top: "conv1" top: "conv1"
convolution_param { convolution_param {
num_output: 32 num_output: 32
bias_term: true bias_term: true
kernel_size: 3 kernel_size: 3
pad: 1 pad: 1
stride: 1 stride: 1
dilation: 1 dilation: 1
group: 1 group: 1
weight_filler { weight_filler {
type: "xavier" type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
} }
bias_filler {
type: "constant"
value: 0
}
}
} }
``` ```
...@@ -83,7 +73,6 @@ class Convolution(Layer): ...@@ -83,7 +73,6 @@ class Convolution(Layer):
if param.HasField('pad_h'): if param.HasField('pad_h'):
assert param.HasField('pad_w') assert param.HasField('pad_w')
self.arguments['pads'] = [param.pad_h, param.pad_w] self.arguments['pads'] = [param.pad_h, param.pad_w]
self.add_blob(filler=self.get_filler(param, 'weight_filler')) self.add_blob(filler=self.get_filler(param, 'weight_filler'))
if param.bias_term: if param.bias_term:
self.add_blob(filler=self.get_filler(param, 'bias_filler')) self.add_blob(filler=self.get_filler(param, 'bias_filler'))
...@@ -96,39 +85,29 @@ class Convolution(Layer): ...@@ -96,39 +85,29 @@ class Convolution(Layer):
class Deconvolution(Convolution): class Deconvolution(Convolution):
r"""Apply the 2d deconvolution. r"""Apply the 2d deconvolution.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} - 1) *
stride + \text{DK}_{size} - 2 * pad
\end{cases}
Examples: Examples:
```python ```python
layer { layer {
type: "Deconvolution" type: "Deconvolution"
bottom: "conv5" bottom: "conv5"
top: "conv5/upscale" top: "conv5/upscale"
convolution_param { convolution_param {
num_output: 256 num_output: 256
bias_term: true bias_term: true
kernel_size: 2 kernel_size: 2
pad: 0 pad: 0
stride: 2 stride: 2
dilation: 1 dilation: 1
group: 1 group: 1
weight_filler { weight_filler {
type: "xavier" type: "xavier"
} }
bias_filler { bias_filler {
type: "constant" type: "constant"
value: 0 value: 0
}
} }
}
} }
``` ```
...@@ -142,77 +121,6 @@ class Deconvolution(Convolution): ...@@ -142,77 +121,6 @@ class Deconvolution(Convolution):
return vision_ops.conv2d_transpose(inputs, **self.arguments) return vision_ops.conv2d_transpose(inputs, **self.arguments)
class DepthwiseConv2d(Layer):
r"""Apply the 2d depthwise convolution.
`[Chollet, 2016] <https://arxiv.org/abs/1610.02357>`_.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{DK}_{size}) / stride + 1
\end{cases}
Examples:
```python
layer {
type: "DepthwiseConv2d"
bottom: "input"
top: "conv1"
convolution_param {
num_output: 32
bias_term: true
kernel_size: 3
pad: 1
stride: 1
dilation: 1
weight_filler {
type: "xavier"
variance_norm: FAN_OUT
}
bias_filler {
type: "constant"
value: 0
}
}
}
```
"""
def __init__(self, layer_param):
super(DepthwiseConv2d, self).__init__(layer_param)
param = layer_param.convolution_param
self.arguments = {
'out_channels': param.num_output,
'kernel_shape': [int(e) for e in param.kernel_size],
'strides': [int(e) for e in param.stride] if len(param.stride) > 0 else [1],
'pads': [int(e) for e in param.pad] if len(param.pad) > 0 else [0],
'padding': 'VALID',
'data_format': 'NCHW',
}
if param.HasField('kernel_h'):
assert param.HasField('kernel_w')
self.arguments['kernel_shape'] = [param.kernel_h, param.kernel_w]
if param.HasField('stride_h'):
assert param.HasField('stride_w')
self.arguments['strides'] = [param.stride_h, param.stride_w]
if param.HasField('pad_h'):
assert param.HasField('pad_w')
self.arguments['pads'] = [param.pad_h, param.pad_w]
self.add_blob(filler=self.get_filler(param, 'weight_filler'))
if param.bias_term:
self.add_blob(filler=self.get_filler(param, 'bias_filler'))
def __call__(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs]
return vision_ops.depthwise_conv2d(inputs, **self.arguments)
class LRN(Layer): class LRN(Layer):
r"""Apply the local response normalization. r"""Apply the local response normalization.
`[Krizhevsky et.al, 2012] <http://www.cs.toronto.edu/~hinton/absps/imagenet.pdf>`_. `[Krizhevsky et.al, 2012] <http://www.cs.toronto.edu/~hinton/absps/imagenet.pdf>`_.
...@@ -221,15 +129,15 @@ class LRN(Layer): ...@@ -221,15 +129,15 @@ class LRN(Layer):
```python ```python
layer { layer {
type: "LRN" type: "LRN"
bottom: "conv2" bottom: "conv2"
top: "conv2/norm" top: "conv2/norm"
lrn_param { lrn_param {
local_size: 5 local_size: 5
alpha: 1. alpha: 1.
beta: 0.75 beta: 0.75
k: 1. k: 1.
} }
} }
``` ```
...@@ -255,24 +163,18 @@ class LRN(Layer): ...@@ -255,24 +163,18 @@ class LRN(Layer):
class Pooling(Layer): class Pooling(Layer):
r"""Apply the n-dimension pooling. r"""Apply the n-dimension pooling.
The spatial output dimension is computed as:
.. math::
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{K}_{size}) / stride + 1
Examples: Examples:
```python ```python
layer { layer {
type: "Pooling" type: "Pooling"
bottom: "conv2" bottom: "conv2"
top: "pool2" top: "pool2"
pooling_param { pooling_param {
kernel_size: 3 kernel_size: 3
stride: 2 stride: 2
pool: AVG pool: AVG
} }
} }
``` ```
...@@ -311,14 +213,14 @@ class ROIAlign(Layer): ...@@ -311,14 +213,14 @@ class ROIAlign(Layer):
```python ```python
layer { layer {
type: "ROIAlign" type: "ROIAlign"
bottom: "conv5_3" bottom: "conv5_3"
top: "roi_pool4" top: "roi_pool4"
roi_pooling_param { roi_pooling_param {
pooled_w: 7 pooled_w: 7
pooled_h: 7 pooled_h: 7
spatial_scale: 0.0625 spatial_scale: 0.0625
} }
} }
``` ```
...@@ -345,14 +247,14 @@ class ROIPooling(Layer): ...@@ -345,14 +247,14 @@ class ROIPooling(Layer):
```python ```python
layer { layer {
type: "ROIPooling" type: "ROIPooling"
bottom: "conv5_3" bottom: "conv5_3"
top: "roi_pool4" top: "roi_pool4"
roi_pooling_param { roi_pooling_param {
pooled_w: 7 pooled_w: 7
pooled_h: 7 pooled_h: 7
spatial_scale: 0.0625 spatial_scale: 0.0625
} }
} }
``` ```
......
...@@ -20,10 +20,11 @@ from google.protobuf import text_format ...@@ -20,10 +20,11 @@ from google.protobuf import text_format
from dragon.core.autograph import def_function from dragon.core.autograph import def_function
from dragon.core.autograph import grad_impl from dragon.core.autograph import grad_impl
from dragon.core.autograph.tensor import Tensor
from dragon.core.autograph.tensor import TensorRef from dragon.core.autograph.tensor import TensorRef
from dragon.core.framework import context
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.util import nest from dragon.core.util import nest
from dragon.core.util import serialization
from dragon.vm.caffe import layers as layer_factory from dragon.vm.caffe import layers as layer_factory
from dragon.vm.caffe.proto import caffe_pb2 from dragon.vm.caffe.proto import caffe_pb2
...@@ -37,236 +38,83 @@ class Blob(object): ...@@ -37,236 +38,83 @@ class Blob(object):
class Net(object): class Net(object):
"""The abstraction ``caffe.Net``. """The abstraction ``caffe.Net``.
This class accepts a proto-text file, and an optional This class accepts a network file, and an optional parameter file.
serialized model weights. You can also specify a phase Besides, a phase tag is required to compute gradients or not:
flag to indicate whether to compute the gradients:
```python ```python
train_net = Net('train.prototxt', 'TRAIN') net1 = caffe.Net('train.prototxt', 'TRAIN')
test_net = Net('test.prototxt', 'my.caffemodel', 'TEST') net2 = caffe.Net('test.prototxt', 'test.caffemodel', 'TEST')
``` ```
""" """
def __init__(self, *args): def __init__(self, *args):
"""Create a Net. """Create a ``Net``.
Parameters Parameters
---------- ----------
network_file : str net_file : str
The path of ``net.prototxt`` file. The path of text proto file to load network.
weights : str, optional param_file : str, optional
The path of the weights file. The path of binary proto file to load parameters.
phase : {'TRAIN', 'TEST'}, optional phase : {'TRAIN', 'TEST'}, optional
The optional phase. The optional phase tag.
""" """
if len(args) == 2: if len(args) == 2:
(net_file, self._phase), weights = args, None (net_file, self._phase), param_file = args, None
elif len(args) == 3: elif len(args) == 3:
net_file, weights, self._phase = args net_file, param_file, self._phase = args
else: else:
raise ValueError('Excepted 2 or 3 args.') raise ValueError('Excepted 2 or 3 args.')
self._net_proto = caffe_pb2.NetParameter()
self._blobs = {} self._blobs = {}
self._layers = [] self._layers = []
self._layer_blobs = [] self._layer_blobs = []
self._losses = [] self._losses = []
self._variables = [] self._params = []
self._blob_dict = None self._blob_dict = None
self._param_dict = None self._param_dict = None
self._input_list = None self._input_list = None
self._output_list = None self._output_list = None
# Parse the network file
with open(net_file, 'r') as f: with open(net_file, 'r') as f:
text_format.Parse(f.read(), self._net_proto) self._proto = text_format.Parse(f.read(), caffe_pb2.NetParameter())
# Construct the layer class from proto
if len(self._net_proto.input) > 0: for layer_param in self._proto.layer:
shapes = self._net_proto.input_shape if not self._filter_layer(layer_param):
for i, input_name in enumerate(self._net_proto.input):
shape = [e for e in shapes[i].dim] if i < len(shapes) else None
if input not in self._blobs:
data = Tensor(input_name, shape, 'float32').placeholder()
self._blobs[input_name] = {
'data': data,
'diff': TensorRef(data.id + '_grad', shape, data.dtype),
}
for layer in self._net_proto.layer:
if not self._filter_layer(layer):
continue continue
cls = getattr(layer_factory, layer.type) cls = getattr(layer_factory, layer_param.type)
self._layers.append(cls(layer)) with context.name_scope(layer_param.name):
self._layers.append(cls(layer_param))
# Prepare for the legacy net inputs
if len(self._proto.input) > 0:
layer_param = caffe_pb2.LayerParameter(
name='data',
type='Input',
top=self._proto.input,
input_param=caffe_pb2.InputParameter(
shape=self._proto.input_shape))
cls = getattr(layer_factory, layer_param.type)
with context.name_scope(layer_param.name):
self._layers.insert(0, cls(layer_param))
# Call layers sequentially to get outputs
self._setup() self._setup()
# Collect losses and parameters
for layer in self._net_proto.layer: for layer in self._proto.layer:
if not self._filter_layer(layer): if not self._filter_layer(layer):
continue continue
self._collect_losses_and_variables(layer) self._collect_losses_and_params(layer)
# Load the pre-trained weights if necessary
if weights is not None: if param_file is not None:
workspace.load(weights, format='caffe') self.copy_from(param_file)
def _filter_layer(self, layer_param):
"""Indicate whether the given layer should be included."""
phase_dict = {'TRAIN': 0, 'TEST': 1}
if layer_param.HasField('phase') and \
layer_param.phase != phase_dict[self._phase]:
return False
for include in layer_param.include:
if include.HasField('phase') and \
include.phase != phase_dict[self._phase]:
return False
layer_param.phase = phase_dict[self._phase]
return True
def _setup(self):
"""Connect the layers sequentially."""
self._net_outputs = set()
# Collect bottom and top blobs.
for layer in self._layers:
bottom = []
for blob in layer._bottom:
if blob not in self._blobs:
raise RuntimeError('bottom({}) is unknown.'.format(blob))
bottom.append(self._blobs[blob])
if blob in self._net_outputs:
self._net_outputs.remove(blob)
outputs = layer.setup([blob['data'] for blob in bottom])
outputs = nest.flatten(outputs)
for i, blob in enumerate(layer._top):
self._blobs[blob] = {
'data': outputs[i],
'diff': TensorRef(outputs[i].id + '_grad'),
}
self._net_outputs.add(blob)
# Collect layer param blobs.
for blobs in self.params.values():
self._layer_blobs.extend(blobs)
def _collect_losses_and_variables(self, layer_param):
"""Collect losses and variables."""
if layer_param.type.find('Loss') != -1:
if len(layer_param.loss_weight) == 0:
layer_param.loss_weight.extend([1.])
for i, loss_weight in enumerate(layer_param.loss_weight):
if loss_weight <= 0:
continue
self._losses.append(self.blobs[layer_param.top[i]].data)
else:
if len(layer_param.loss_weight) != 0:
for i, loss_weight in enumerate(layer_param.loss_weight):
if loss_weight <= 0:
continue
self._losses.append(self.blobs[layer_param.top[i]].data)
if self._phase != 'TRAIN':
return
if len(layer_param.param) > 0:
for i, p in enumerate(layer_param.param):
blob = self.params[layer_param.name][i]
blob.lr_multiplier = p.lr_mult if p.HasField('lr_mult') else 1.
blob.decay_multiplier = p.decay_mult if p.HasField('decay_mult') else 1.
if blob.diff is not None and blob.lr_multiplier > 0:
self._variables.append(blob.data)
else:
for blob in self.params[layer_param.name]:
if blob.diff is not None and blob.lr_multiplier > 0:
self._variables.append(blob.data)
@classmethod
def copy_from(cls, weights):
"""Copy the weights from the binary proto file.
Parameters
----------
weights : str
The path of the weights file.
"""
workspace.load(weights, format='caffe')
@def_function.function
def forward_backward(self, **kwargs):
"""Forward pass following by backward pass.
This function will be compiled to a computation graph
once executed, with implicit feeding of inputs.
"""
grad_impl.gradients(self._losses, self._variables)
return [self.blobs[key].data for key in self.outputs]
def forward(self, **inputs):
"""Forward pass.
Parameters
----------
inputs : dict, optional
The blobs to feed.
Returns
-------
callable
The callable to return outputs.
"""
for name, blob in inputs.items():
workspace.feed_tensor(self._blobs[name]['data'], blob)
self.forward_backward(return_outputs=False, stage='forward')
return lambda: dict(
(output, self.blobs[output].data.get_value())
for output in self.outputs
)
def backward(self, **diffs):
"""Backward pass.
Parameters
----------
diffs : dict, optional
The diffs to feed.
"""
for name, blob in diffs.items():
workspace.feed_tensor(self.blobs[name].diff, blob)
self.forward_backward(return_outputs=False, stage='backward')
def save(self, filename):
"""Save the parameters into a binary file.
Parameters
----------
filename : str
The path of model file.
"""
workspace.save(
tensors=[blob.data for blob in self._layer_blobs],
filename=filename, suffix='', format='caffe',
)
@property @property
def blobs(self): def blobs(self):
"""Return the blob dict. """Return the blob dict.
Blobs stored in the dict will be:
```python
for blob_name, blob in net.blobs():
print(blob.data) # DataTensor
print(blob.diff) # GradTensor
```
Returns Returns
------- -------
Dict dict
The blob dict. The blob dict.
""" """
...@@ -280,19 +128,9 @@ class Net(object): ...@@ -280,19 +128,9 @@ class Net(object):
def params(self): def params(self):
"""Return the parameter dict. """Return the parameter dict.
Parameters stored in the dict will be:
```python
for layer_name, blobs in net.params():
print(layer_name)
for blob in blobs:
print(' *', blob.data) # DataTensor
print(' *', blob.diff) # GradTensor
```
Returns Returns
------- -------
Dict dict
The parameter dict. The parameter dict.
""" """
...@@ -327,7 +165,7 @@ class Net(object): ...@@ -327,7 +165,7 @@ class Net(object):
""" """
if self._input_list is None: if self._input_list is None:
self._input_list = [input for input in self._net_proto.input] self._input_list = [input for input in self._proto.input]
return self._input_list return self._input_list
@property @property
...@@ -343,3 +181,194 @@ class Net(object): ...@@ -343,3 +181,194 @@ class Net(object):
if self._output_list is None: if self._output_list is None:
self._output_list = list(self._net_outputs) self._output_list = list(self._net_outputs)
return self._output_list return self._output_list
def backward(self, **diffs):
"""The backward pass.
Parameters
----------
diffs : dict, optional
The data to feed to the diffs.
"""
current_ws = workspace.get_workspace()
for name, blob in diffs.items():
current_ws.feed_tensor(self.blobs[name].diff, blob)
self._forward_backward_impl(return_outputs=False, stage='backward')
def copy_from(self, other):
"""Copy layers from the other.
Parameters
----------
other : Union[str, NetParameter]
The path of binary proto file or ``NetParameter``.
"""
if hasattr(other, 'ParseFromString') and \
callable(other.ParseFromString):
self.from_proto(other)
else:
self.from_proto(serialization.deserialize_proto(
serialization.load_bytes(other), caffe_pb2.NetParameter()))
def forward(self, **inputs):
"""The forward pass.
Parameters
----------
inputs : dict, optional
The data to feed to the inputs.
Returns
-------
callable
The callable to fetch outputs.
"""
current_ws = workspace.get_workspace()
for name, blob in inputs.items():
current_ws.feed_tensor(self._blobs[name]['data'], blob)
self._forward_backward_impl(return_outputs=False, stage='forward')
return lambda: dict(
(output, current_ws.fetch_tensor(self.blobs[output].data))
for output in self.outputs)
def forward_backward(self, **inputs):
"""The forward and backward pass.
Parameters
----------
inputs : dict, optional
The data to feed to the inputs.
Returns
-------
callable
The callable to fetch outputs.
"""
current_ws = workspace.get_workspace()
for name, blob in inputs.items():
current_ws.feed_tensor(self._blobs[name]['data'], blob)
self._forward_backward_impl(return_outputs=False)
return lambda: dict(
(output, current_ws.fetch_tensor(self.blobs[output].data))
for output in self.outputs)
def from_proto(self, proto):
"""Deserialize from the proto.
Parameters
----------
proto : NetParameter
The ``NetParameter`` protocol buffer.
"""
layer_dict = dict((layer.name, layer) for layer in proto.layer)
for layer in self._layers:
if layer.name in layer_dict:
layer.from_proto(layer_dict[layer.name])
def save(self, filepath):
"""Save proto into a binary file.
Parameters
----------
filepath : str
The path of binary proto file.
"""
serialization.save_bytes(
serialization.serialize_proto(
self.to_proto()), filepath)
def to_proto(self):
"""Serialize to the proto.
Returns
-------
NetParameter
The ``NetParameter`` protocol buffer.
"""
return caffe_pb2.NetParameter(
name=self._proto.name,
layer=[layer.to_proto() for layer in self._layers])
def _collect_losses_and_params(self, layer_param):
"""Collect losses and parameters."""
if layer_param.type.find('Loss') != -1:
if len(layer_param.loss_weight) == 0:
layer_param.loss_weight.extend([1.])
for i, loss_weight in enumerate(layer_param.loss_weight):
if loss_weight <= 0:
continue
self._losses.append(self.blobs[layer_param.top[i]].data)
else:
if len(layer_param.loss_weight) != 0:
for i, loss_weight in enumerate(layer_param.loss_weight):
if loss_weight <= 0:
continue
self._losses.append(self.blobs[layer_param.top[i]].data)
if self._phase != 'TRAIN':
return
if len(layer_param.param) > 0:
for i, p in enumerate(layer_param.param):
blob = self.params[layer_param.name][i]
blob.lr_multiplier = p.lr_mult if p.HasField('lr_mult') else 1.
blob.decay_multiplier = p.decay_mult if p.HasField('decay_mult') else 1.
if blob.diff is not None and blob.lr_multiplier > 0:
self._params.append(blob.data)
else:
for blob in self.params[layer_param.name]:
if blob.diff is not None and blob.lr_multiplier > 0:
self._params.append(blob.data)
def _filter_layer(self, layer_param):
"""Check if layer should be included."""
phase_dict = {'TRAIN': 0, 'TEST': 1}
if layer_param.HasField('phase') and \
layer_param.phase != phase_dict[self._phase]:
return False
for include in layer_param.include:
if include.HasField('phase') and \
include.phase != phase_dict[self._phase]:
return False
layer_param.phase = phase_dict[self._phase]
return True
@def_function.function
def _forward_backward_impl(self, **kwargs):
"""Implementation for ``self.forward_backward(...)``."""
grad_impl.gradients(self._losses, self._params)
return [self.blobs[key].data for key in self.outputs]
def _setup(self):
"""Connect the layers sequentially."""
self._net_outputs = set()
# Collect bottom and top blobs.
for layer_idx, layer in enumerate(self._layers):
bottom = []
for blob in layer._bottom:
if blob not in self._blobs:
raise RuntimeError('bottom({}) is unknown.'.format(blob))
bottom.append(self._blobs[blob])
if blob in self._net_outputs:
self._net_outputs.remove(blob)
if isinstance(layer, layer_factory.BatchNorm):
next_layer = self._layers[layer_idx + 1]
if isinstance(next_layer, layer_factory.Scale):
layer.fuse_with_scale_layer(next_layer)
with context.name_scope(layer._name):
outputs = layer.setup([blob['data'] for blob in bottom])
if outputs is not None:
outputs = nest.flatten(outputs)
for blob_idx, blob in enumerate(layer._top):
self._blobs[blob] = {
'data': outputs[blob_idx],
'diff': TensorRef(outputs[blob_idx].id + '_grad')}
self._net_outputs.add(blob)
# Collect layer param blobs.
for blobs in self.params.values():
self._layer_blobs.extend(blobs)
...@@ -3,25 +3,29 @@ syntax = "proto2"; ...@@ -3,25 +3,29 @@ syntax = "proto2";
package caffe; package caffe;
// Specifies the shape (dimensions) of a Blob. // Specifies the shape (dimensions) of a Blob.
message BlobShape { repeated int64 dim = 1 [ packed = true ]; } message BlobShape {
repeated int64 dim = 1 [packed = true];
}
message BlobProto { message BlobProto {
optional BlobShape shape = 7; optional BlobShape shape = 7;
repeated float data = 5 [ packed = true ]; repeated float data = 5 [packed = true];
repeated float diff = 6 [ packed = true ]; repeated float diff = 6 [packed = true];
repeated double double_data = 8 [ packed = true ]; repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [ packed = true ]; repeated double double_diff = 9 [packed = true];
// 4D dimensions -- deprecated. Use "shape" instead. // 4D dimensions -- deprecated. Use "shape" instead.
optional int32 num = 1 [ default = 0 ]; optional int32 num = 1 [default = 0];
optional int32 channels = 2 [ default = 0 ]; optional int32 channels = 2 [default = 0];
optional int32 height = 3 [ default = 0 ]; optional int32 height = 3 [default = 0];
optional int32 width = 4 [ default = 0 ]; optional int32 width = 4 [default = 0];
} }
// The BlobProtoVector is simply a way to pass multiple blobproto instances // The BlobProtoVector is simply a way to pass multiple blobproto instances
// around. // around.
message BlobProtoVector { repeated BlobProto blobs = 1; } message BlobProtoVector {
repeated BlobProto blobs = 1;
}
message Datum { message Datum {
optional int32 channels = 1; optional int32 channels = 1;
...@@ -33,21 +37,21 @@ message Datum { ...@@ -33,21 +37,21 @@ message Datum {
// Optionally, the datum could also hold float data. // Optionally, the datum could also hold float data.
repeated float float_data = 6; repeated float float_data = 6;
// If true data contains an encoded image that need to be decoded // If true data contains an encoded image that need to be decoded
optional bool encoded = 7 [ default = false ]; optional bool encoded = 7 [default = false];
repeated int32 labels = 8; repeated int32 labels = 8;
} }
message FillerParameter { message FillerParameter {
// The filler type. // The filler type.
optional string type = 1 [ default = 'constant' ]; optional string type = 1 [default = 'constant'];
optional float value = 2 [ default = 0 ]; // the value in constant filler optional float value = 2 [default = 0]; // the value in constant filler
optional float min = 3 [ default = 0 ]; // the min value in uniform filler optional float min = 3 [default = 0]; // the min value in uniform filler
optional float max = 4 [ default = 1 ]; // the max value in uniform filler optional float max = 4 [default = 1]; // the max value in uniform filler
optional float mean = 5 [ default = 0 ]; // the mean value in Gaussian filler optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
optional float std = 6 [ default = 1 ]; // the std value in Gaussian filler optional float std = 6 [default = 1]; // the std value in Gaussian filler
// The expected number of non-zero output weights for a given input in // The expected number of non-zero output weights for a given input in
// Gaussian filler -- the default -1 means don't perform sparsification. // Gaussian filler -- the default -1 means don't perform sparsification.
optional int32 sparse = 7 [ default = -1 ]; optional int32 sparse = 7 [default = -1];
// Normalize the filler variance by fan_in, fan_out, or their average. // Normalize the filler variance by fan_in, fan_out, or their average.
// Applies to 'xavier' and 'msra' fillers. // Applies to 'xavier' and 'msra' fillers.
enum VarianceNorm { enum VarianceNorm {
...@@ -55,11 +59,11 @@ message FillerParameter { ...@@ -55,11 +59,11 @@ message FillerParameter {
FAN_OUT = 1; FAN_OUT = 1;
AVERAGE = 2; AVERAGE = 2;
} }
optional VarianceNorm variance_norm = 8 [ default = FAN_IN ]; optional VarianceNorm variance_norm = 8 [default = FAN_IN];
} }
message NetParameter { message NetParameter {
optional string name = 1; // consider giving the network a name optional string name = 1; // consider giving the network a name
// DEPRECATED. See InputParameter. The input blobs to the network. // DEPRECATED. See InputParameter. The input blobs to the network.
repeated string input = 3; repeated string input = 3;
// DEPRECATED. See InputParameter. The shape of the input blobs. // DEPRECATED. See InputParameter. The shape of the input blobs.
...@@ -74,7 +78,7 @@ message NetParameter { ...@@ -74,7 +78,7 @@ message NetParameter {
// Whether the network will force every layer to carry out backward operation. // Whether the network will force every layer to carry out backward operation.
// If set False, then whether to carry out backward is determined // If set False, then whether to carry out backward is determined
// automatically according to the net structure and learning rates. // automatically according to the net structure and learning rates.
optional bool force_backward = 5 [ default = false ]; optional bool force_backward = 5 [default = false];
// The current "state" of the network, including the phase, level, and stage. // The current "state" of the network, including the phase, level, and stage.
// Some layers may be included/excluded depending on this state and the states // Some layers may be included/excluded depending on this state and the states
// specified in the layers' include and exclude fields. // specified in the layers' include and exclude fields.
...@@ -82,11 +86,11 @@ message NetParameter { ...@@ -82,11 +86,11 @@ message NetParameter {
// Print debugging information about results while running Net::Forward, // Print debugging information about results while running Net::Forward,
// Net::Backward, and Net::Update. // Net::Backward, and Net::Update.
optional bool debug_info = 7 [ default = false ]; optional bool debug_info = 7 [default = false];
// The layers that make up the net. Each of their configurations, including // The layers that make up the net. Each of their configurations, including
// connectivity and behavior, is specified as a LayerParameter. // connectivity and behavior, is specified as a LayerParameter.
repeated LayerParameter layer = 100; // ID 100 so layers are printed last. repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
// DEPRECATED: use 'layer' instead. // DEPRECATED: use 'layer' instead.
repeated V1LayerParameter layers = 2; repeated V1LayerParameter layers = 2;
...@@ -117,10 +121,10 @@ message SolverParameter { ...@@ -117,10 +121,10 @@ message SolverParameter {
// Inline train net param, possibly combined with one or more test nets. // Inline train net param, possibly combined with one or more test nets.
optional NetParameter net_param = 25; optional NetParameter net_param = 25;
optional string train_net = 1; // Proto filename for the train net. optional string train_net = 1; // Proto filename for the train net.
repeated string test_net = 2; // Proto filenames for the test nets. repeated string test_net = 2; // Proto filenames for the test nets.
optional NetParameter train_net_param = 21; // Inline train net params. optional NetParameter train_net_param = 21; // Inline train net params.
repeated NetParameter test_net_param = 22; // Inline test net params. repeated NetParameter test_net_param = 22; // Inline test net params.
// The states for the train/test nets. Must be unspecified or // The states for the train/test nets. Must be unspecified or
// specified once per net. // specified once per net.
...@@ -136,22 +140,22 @@ message SolverParameter { ...@@ -136,22 +140,22 @@ message SolverParameter {
repeated int32 test_iter = 3; repeated int32 test_iter = 3;
// The number of iterations between two testing phases. // The number of iterations between two testing phases.
optional int32 test_interval = 4 [ default = 0 ]; optional int32 test_interval = 4 [default = 0];
optional bool test_compute_loss = 19 [ default = false ]; optional bool test_compute_loss = 19 [default = false];
// If true, run an initial test pass before the first iteration, // If true, run an initial test pass before the first iteration,
// ensuring memory availability and printing the starting value of the loss. // ensuring memory availability and printing the starting value of the loss.
optional bool test_initialization = 32 [ default = true ]; optional bool test_initialization = 32 [default = true];
optional float base_lr = 5; // The base learning rate optional float base_lr = 5; // The base learning rate
repeated float stage_lr = 50; repeated float stage_lr = 50;
repeated int32 stage_iter = 51; repeated int32 stage_iter = 51;
// the number of iterations between displaying info. If display = 0, no info // the number of iterations between displaying info. If display = 0, no info
// will be displayed. // will be displayed.
optional int32 display = 6; optional int32 display = 6;
// Display the loss averaged over the last average_loss iterations // Display the loss averaged over the last average_loss iterations
optional int32 average_loss = 33 [ default = 1 ]; optional int32 average_loss = 33 [default = 1];
optional int32 max_iter = 7; // the maximum number of iterations optional int32 max_iter = 7; // the maximum number of iterations
// accumulate gradients over `iter_size` x `batch_size` instances // accumulate gradients over `iter_size` x `batch_size` instances
optional int32 iter_size = 36 [ default = 1 ]; optional int32 iter_size = 36 [default = 1];
// The learning rate decay policy. The currently implemented learning rate // The learning rate decay policy. The currently implemented learning rate
// policies are as follows: // policies are as follows:
...@@ -169,13 +173,13 @@ message SolverParameter { ...@@ -169,13 +173,13 @@ message SolverParameter {
// where base_lr, max_iter, gamma, step, stepvalue and power are defined // where base_lr, max_iter, gamma, step, stepvalue and power are defined
// in the solver parameter protocol buffer, and iter is the current iteration. // in the solver parameter protocol buffer, and iter is the current iteration.
optional string lr_policy = 8; optional string lr_policy = 8;
optional float gamma = 9; // The parameter to compute the learning rate. optional float gamma = 9; // The parameter to compute the learning rate.
optional float power = 10; // The parameter to compute the learning rate. optional float power = 10; // The parameter to compute the learning rate.
optional float momentum = 11; // The momentum value. optional float momentum = 11; // The momentum value.
optional float weight_decay = 12; // The weight decay. optional float weight_decay = 12; // The weight decay.
// regularization types supported: L1 and L2 // regularization types supported: L1 and L2
// controlled by weight_decay // controlled by weight_decay
optional string regularization_type = 29 [ default = "L2" ]; optional string regularization_type = 29 [default = "L2"];
// the stepsize for learning rate policy "step" // the stepsize for learning rate policy "step"
optional int32 stepsize = 13; optional int32 stepsize = 13;
// the stepsize for learning rate policy "multistep" // the stepsize for learning rate policy "multistep"
...@@ -183,49 +187,49 @@ message SolverParameter { ...@@ -183,49 +187,49 @@ message SolverParameter {
// Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
// whenever their actual L2 norm is larger. // whenever their actual L2 norm is larger.
optional float clip_gradients = 35 [ default = -1 ]; optional float clip_gradients = 35 [default = -1];
optional int32 snapshot = 14 [ default = 0 ]; // The snapshot interval optional int32 snapshot = 14 [default = 0]; // The snapshot interval
optional string snapshot_prefix = 15; // The prefix for the snapshot. optional string snapshot_prefix = 15; // The prefix for the snapshot.
// whether to snapshot diff in the results or not. Snapshotting diff will help // whether to snapshot diff in the results or not. Snapshotting diff will help
// debugging but the final protocol buffer size will be much larger. // debugging but the final protocol buffer size will be much larger.
optional bool snapshot_diff = 16 [ default = false ]; optional bool snapshot_diff = 16 [default = false];
enum SnapshotFormat { enum SnapshotFormat {
HDF5 = 0; HDF5 = 0;
BINARYPROTO = 1; BINARYPROTO = 1;
} }
optional SnapshotFormat snapshot_format = 37 [ default = BINARYPROTO ]; optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
// the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
enum SolverMode { enum SolverMode {
CPU = 0; CPU = 0;
GPU = 1; GPU = 1;
} }
optional SolverMode solver_mode = 17 [ default = GPU ]; optional SolverMode solver_mode = 17 [default = GPU];
// the device_id will that be used in GPU mode. Use device_id = 0 in default. // the device_id will that be used in GPU mode. Use device_id = 0 in default.
optional int32 device_id = 18 [ default = 0 ]; optional int32 device_id = 18 [default = 0];
// If non-negative, the seed with which the Solver will initialize the Caffe // If non-negative, the seed with which the Solver will initialize the Caffe
// random number generator -- useful for reproducible results. Otherwise, // random number generator -- useful for reproducible results. Otherwise,
// (and by default) initialize using a seed derived from the system clock. // (and by default) initialize using a seed derived from the system clock.
optional int64 random_seed = 20 [ default = -1 ]; optional int64 random_seed = 20 [default = -1];
// type of the solver // type of the solver
optional string type = 40 [ default = "SGD" ]; optional string type = 40 [default = "SGD"];
// numerical stability for RMSProp, AdaGrad and AdaDelta and Adam // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
optional float delta = 31 [ default = 1e-8 ]; optional float delta = 31 [default = 1e-8];
// parameters for the Adam solver // parameters for the Adam solver
optional float momentum2 = 39 [ default = 0.999 ]; optional float momentum2 = 39 [default = 0.999];
// RMSProp decay value // RMSProp decay value
// MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
optional float rms_decay = 38 [ default = 0.99 ]; optional float rms_decay = 38 [default = 0.99];
// If true, print information about the state of the net that may help with // If true, print information about the state of the net that may help with
// debugging learning problems. // debugging learning problems.
optional bool debug_info = 23 [ default = false ]; optional bool debug_info = 23 [default = false];
// If false, don't save a snapshot after training finishes. // If false, don't save a snapshot after training finishes.
optional bool snapshot_after_train = 28 [ default = true ]; optional bool snapshot_after_train = 28 [default = true];
// DEPRECATED: old solver enum types, use string instead // DEPRECATED: old solver enum types, use string instead
enum SolverType { enum SolverType {
...@@ -237,16 +241,16 @@ message SolverParameter { ...@@ -237,16 +241,16 @@ message SolverParameter {
ADAM = 5; ADAM = 5;
} }
// DEPRECATED: use type instead of solver_type // DEPRECATED: use type instead of solver_type
optional SolverType solver_type = 30 [ default = SGD ]; optional SolverType solver_type = 30 [default = SGD];
} }
// A message that stores the solver snapshots // A message that stores the solver snapshots
message SolverState { message SolverState {
optional int32 iter = 1; // The current iteration optional int32 iter = 1; // The current iteration
optional string learned_net = 2; // The file that stores the learned net. optional string learned_net = 2; // The file that stores the learned net.
repeated BlobProto history = 3; // The history for sgd solvers repeated BlobProto history = 3; // The history for sgd solvers
optional int32 current_step = 4 optional int32 current_step = 4
[ default = 0 ]; // The current step for learning rate [default = 0]; // The current step for learning rate
} }
enum Phase { enum Phase {
...@@ -255,8 +259,8 @@ enum Phase { ...@@ -255,8 +259,8 @@ enum Phase {
} }
message NetState { message NetState {
optional Phase phase = 1 [ default = TEST ]; optional Phase phase = 1 [default = TEST];
optional int32 level = 2 [ default = 0 ]; optional int32 level = 2 [default = 0];
repeated string stage = 3; repeated string stage = 3;
} }
...@@ -297,10 +301,10 @@ message ParamSpec { ...@@ -297,10 +301,10 @@ message ParamSpec {
} }
// The multiplier on the global learning rate for this parameter. // The multiplier on the global learning rate for this parameter.
optional float lr_mult = 3 [ default = 1.0 ]; optional float lr_mult = 3 [default = 1.0];
// The multiplier on the global weight decay for this parameter. // The multiplier on the global weight decay for this parameter.
optional float decay_mult = 4 [ default = 1.0 ]; optional float decay_mult = 4 [default = 1.0];
} }
// NOTE // NOTE
...@@ -309,13 +313,13 @@ message ParamSpec { ...@@ -309,13 +313,13 @@ message ParamSpec {
// LayerParameter next available layer-specific ID: 146 (last added: // LayerParameter next available layer-specific ID: 146 (last added:
// parameter_param) // parameter_param)
message LayerParameter { message LayerParameter {
optional string name = 1; // the layer name optional string name = 1; // the layer name
optional string type = 2; // the layer type optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob repeated string top = 4; // the name of each top blob
// The mirror stage optimization // The mirror stage optimization
optional bool mirror_stage = 150 [ default = false ]; optional bool mirror_stage = 150 [default = false];
// The train / test phase for computation. // The train / test phase for computation.
optional Phase phase = 10; optional Phase phase = 10;
...@@ -411,8 +415,6 @@ message LayerParameter { ...@@ -411,8 +415,6 @@ message LayerParameter {
optional SmoothL1LossParameter smooth_l1_loss_param = 152; optional SmoothL1LossParameter smooth_l1_loss_param = 152;
optional PermuteParameter permute_param = 153; optional PermuteParameter permute_param = 153;
optional NormalizeParameter normalize_param = 154; optional NormalizeParameter normalize_param = 154;
optional GroupNormParameter group_norm_param = 155;
optional CastParameter cast_param = 156;
} }
// Message that stores parameters used to apply transformation // Message that stores parameters used to apply transformation
...@@ -421,11 +423,11 @@ message TransformationParameter { ...@@ -421,11 +423,11 @@ message TransformationParameter {
// For data pre-processing, we can do simple scaling and subtracting the // For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried // data mean, if provided. Note that the mean subtraction is always carried
// out before scaling. // out before scaling.
optional float scale = 1 [ default = 1 ]; optional float scale = 1 [default = 1];
// Specify if we want to randomly mirror data. // Specify if we want to randomly mirror data.
optional bool mirror = 2 [ default = false ]; optional bool mirror = 2 [default = false];
// Specify if we would like to randomly crop an image. // Specify if we would like to randomly crop an image.
optional uint32 crop_size = 3 [ default = 0 ]; optional uint32 crop_size = 3 [default = 0];
// mean_file and mean_value cannot be specified at the same time // mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4; optional string mean_file = 4;
// if specified can be repeated once (would substract it from all the // if specified can be repeated once (would substract it from all the
...@@ -433,17 +435,17 @@ message TransformationParameter { ...@@ -433,17 +435,17 @@ message TransformationParameter {
// subtract them from the corresponding channel) // subtract them from the corresponding channel)
repeated float mean_value = 5; repeated float mean_value = 5;
// Force the decoded image to have 3 color channels. // Force the decoded image to have 3 color channels.
optional bool force_color = 6 [ default = false ]; optional bool force_color = 6 [default = false];
// Force the decoded image to have 1 color channels. // Force the decoded image to have 1 color channels.
optional bool force_gray = 7 [ default = false ]; optional bool force_gray = 7 [default = false];
// Distort the color? // Distort the color?
optional bool augment_color = 9 [ default = false ]; optional bool augment_color = 9 [default = false];
// Target size. // Target size.
optional uint32 resize = 10 [ default = 0 ]; optional uint32 resize = 10 [default = 0];
// Padding size. // Padding size.
optional uint32 padding = 11 [ default = 0 ]; optional uint32 padding = 11 [default = 0];
// Crop size during scale jittering // Crop size during scale jittering
optional uint32 random_crop_size = 12 [ default = 0 ]; optional uint32 random_crop_size = 12 [default = 0];
} }
// Message that stores parameters shared by loss layers // Message that stores parameters shared by loss layers
...@@ -467,7 +469,7 @@ message LossParameter { ...@@ -467,7 +469,7 @@ message LossParameter {
// Do not normalize the loss. // Do not normalize the loss.
NONE = 3; NONE = 3;
} }
optional NormalizationMode normalization = 3 [ default = VALID ]; optional NormalizationMode normalization = 3 [default = VALID];
// Deprecated. Ignored if normalization is specified. If normalization // Deprecated. Ignored if normalization is specified. If normalization
// is not specified, then setting this to false will be equivalent to // is not specified, then setting this to false will be equivalent to
// normalization = BATCH_SIZE to be consistent with previous behavior. // normalization = BATCH_SIZE to be consistent with previous behavior.
...@@ -481,14 +483,14 @@ message AccuracyParameter { ...@@ -481,14 +483,14 @@ message AccuracyParameter {
// When computing accuracy, count as correct by comparing the true label to // When computing accuracy, count as correct by comparing the true label to
// the top k scoring classes. By default, only compare to the top scoring // the top k scoring classes. By default, only compare to the top scoring
// class (i.e. argmax). // class (i.e. argmax).
optional uint32 top_k = 1 [ default = 1 ]; optional uint32 top_k = 1 [default = 1];
// The "label" axis of the prediction blob, whose argmax corresponds to the // The "label" axis of the prediction blob, whose argmax corresponds to the
// predicted label -- may be negative to index from the end (e.g., -1 for the // predicted label -- may be negative to index from the end (e.g., -1 for the
// last axis). For example, if axis == 1 and the predictions are // last axis). For example, if axis == 1 and the predictions are
// (N x C x H x W), the label blob is expected to contain N*H*W ground truth // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
// labels with integer values in {0, 1, ..., C-1}. // labels with integer values in {0, 1, ..., C-1}.
optional int32 axis = 2 [ default = 1 ]; optional int32 axis = 2 [default = 1];
// If specified, ignore instances with the given label. // If specified, ignore instances with the given label.
optional int32 ignore_label = 3; optional int32 ignore_label = 3;
...@@ -496,8 +498,8 @@ message AccuracyParameter { ...@@ -496,8 +498,8 @@ message AccuracyParameter {
message ArgMaxParameter { message ArgMaxParameter {
// If true produce pairs (argmax, maxval) // If true produce pairs (argmax, maxval)
optional bool out_max_val = 1 [ default = false ]; optional bool out_max_val = 1 [default = false];
optional uint32 top_k = 2 [ default = 1 ]; optional uint32 top_k = 2 [default = 1];
// The axis along which to maximise -- may be negative to index from the // The axis along which to maximise -- may be negative to index from the
// end (e.g., -1 for the last axis). // end (e.g., -1 for the last axis).
// By default ArgMaxLayer maximizes over the flattened trailing dimensions // By default ArgMaxLayer maximizes over the flattened trailing dimensions
...@@ -510,10 +512,10 @@ message ConcatParameter { ...@@ -510,10 +512,10 @@ message ConcatParameter {
// end (e.g., -1 for the last axis). Other axes must have the // end (e.g., -1 for the last axis). Other axes must have the
// same dimension for all the bottom blobs. // same dimension for all the bottom blobs.
// By default, ConcatLayer concatenates blobs along the "channels" axis (1). // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
optional int32 axis = 2 [ default = 1 ]; optional int32 axis = 2 [default = 1];
// DEPRECATED: alias for "axis" -- does not support negative indexing. // DEPRECATED: alias for "axis" -- does not support negative indexing.
optional uint32 concat_dim = 1 [ default = 1 ]; optional uint32 concat_dim = 1 [default = 1];
} }
message BatchNormParameter { message BatchNormParameter {
...@@ -522,10 +524,10 @@ message BatchNormParameter { ...@@ -522,10 +524,10 @@ message BatchNormParameter {
// across the batch. // across the batch.
optional bool use_global_stats = 1; optional bool use_global_stats = 1;
// How much does the moving average decay each iteration? // How much does the moving average decay each iteration?
optional float moving_average_fraction = 2 [ default = 0.9 ]; optional float moving_average_fraction = 2 [default = 0.9];
// Small value to add to the variance estimate so that we don't divide by // Small value to add to the variance estimate so that we don't divide by
// zero. // zero.
optional float eps = 3 [ default = 1e-5 ]; optional float eps = 3 [default = 1e-5];
} }
message BiasParameter { message BiasParameter {
...@@ -542,7 +544,7 @@ message BiasParameter { ...@@ -542,7 +544,7 @@ message BiasParameter {
// (axis == 3 == -1) 60 // (axis == 3 == -1) 60
// Furthermore, bottom[1] may have the empty shape (regardless of the value of // Furthermore, bottom[1] may have the empty shape (regardless of the value of
// "axis") -- a scalar bias. // "axis") -- a scalar bias.
optional int32 axis = 1 [ default = 1 ]; optional int32 axis = 1 [default = 1];
// (num_axes is ignored unless just one bottom is given and the bias is // (num_axes is ignored unless just one bottom is given and the bias is
// a learned parameter of the layer. Otherwise, num_axes is determined by the // a learned parameter of the layer. Otherwise, num_axes is determined by the
...@@ -550,7 +552,7 @@ message BiasParameter { ...@@ -550,7 +552,7 @@ message BiasParameter {
// The number of axes of the input (bottom[0]) covered by the bias // The number of axes of the input (bottom[0]) covered by the bias
// parameter, or -1 to cover all axes of bottom[0] starting from `axis`. // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
// Set num_axes := 0, to add a zero-axis Blob: a scalar. // Set num_axes := 0, to add a zero-axis Blob: a scalar.
optional int32 num_axes = 2 [ default = 1 ]; optional int32 num_axes = 2 [default = 1];
// (filler is ignored unless just one bottom is given and the bias is // (filler is ignored unless just one bottom is given and the bias is
// a learned parameter of the layer.) // a learned parameter of the layer.)
...@@ -562,49 +564,49 @@ message BiasParameter { ...@@ -562,49 +564,49 @@ message BiasParameter {
message ContrastiveLossParameter { message ContrastiveLossParameter {
// margin for dissimilar pair // margin for dissimilar pair
optional float margin = 1 [ default = 1.0 ]; optional float margin = 1 [default = 1.0];
// The first implementation of this cost did not exactly match the cost of // The first implementation of this cost did not exactly match the cost of
// Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
// legacy_version = false (the default) uses (margin - d)^2 as proposed in the // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
// Hadsell paper. New models should probably use this version. // Hadsell paper. New models should probably use this version.
// legacy_version = true uses (margin - d^2). This is kept to support / // legacy_version = true uses (margin - d^2). This is kept to support /
// reproduce existing models and results // reproduce existing models and results
optional bool legacy_version = 2 [ default = false ]; optional bool legacy_version = 2 [default = false];
} }
message ConvolutionParameter { message ConvolutionParameter {
optional uint32 num_output = 1; // The number of outputs for the layer optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [ default = true ]; // whether to have bias terms optional bool bias_term = 2 [default = true]; // whether to have bias terms
// Pad, kernel size, and stride are all given as a single value for equal // Pad, kernel size, and stride are all given as a single value for equal
// dimensions in all spatial dimensions, or once per spatial dimension. // dimensions in all spatial dimensions, or once per spatial dimension.
repeated uint32 pad = 3; // The padding size; defaults to 0 repeated uint32 pad = 3; // The padding size; defaults to 0
repeated uint32 kernel_size = 4; // The kernel size repeated uint32 kernel_size = 4; // The kernel size
repeated uint32 stride = 6; // The stride; defaults to 1 repeated uint32 stride = 6; // The stride; defaults to 1
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the // holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme �� trous from Holschneider et al. 1987.) // algorithme �� trous from Holschneider et al. 1987.)
repeated uint32 dilation = 18; // The dilation; defaults to 1 repeated uint32 dilation = 18; // The dilation; defaults to 1
// For 2D convolution only, the *_h and *_w versions may also be used to // For 2D convolution only, the *_h and *_w versions may also be used to
// specify both spatial dimensions. // specify both spatial dimensions.
optional uint32 pad_h = 9 [ default = 0 ]; // The padding height (2D only) optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
optional uint32 pad_w = 10 [ default = 0 ]; // The padding width (2D only) optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
optional uint32 kernel_h = 11; // The kernel height (2D only) optional uint32 kernel_h = 11; // The kernel height (2D only)
optional uint32 kernel_w = 12; // The kernel width (2D only) optional uint32 kernel_w = 12; // The kernel width (2D only)
optional uint32 stride_h = 13; // The stride height (2D only) optional uint32 stride_h = 13; // The stride height (2D only)
optional uint32 stride_w = 14; // The stride width (2D only) optional uint32 stride_w = 14; // The stride width (2D only)
optional uint32 group = 5 [ default = 1 ]; // The group size for group conv optional uint32 group = 5 [default = 1]; // The group size for group conv
optional FillerParameter weight_filler = 7; // The filler for the weight optional FillerParameter weight_filler = 7; // The filler for the weight
optional FillerParameter bias_filler = 8; // The filler for the bias optional FillerParameter bias_filler = 8; // The filler for the bias
enum Engine { enum Engine {
DEFAULT = 0; DEFAULT = 0;
CAFFE = 1; CAFFE = 1;
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 15 [ default = DEFAULT ]; optional Engine engine = 15 [default = DEFAULT];
// The axis to interpret as "channels" when performing convolution. // The axis to interpret as "channels" when performing convolution.
// Preceding dimensions are treated as independent inputs; // Preceding dimensions are treated as independent inputs;
...@@ -615,14 +617,14 @@ message ConvolutionParameter { ...@@ -615,14 +617,14 @@ message ConvolutionParameter {
// With (N, C, D, H, W) inputs, and axis == 1, we perform // With (N, C, D, H, W) inputs, and axis == 1, we perform
// N independent 3D convolutions, sliding (C/g)-channels // N independent 3D convolutions, sliding (C/g)-channels
// filters across the spatial axes (D, H, W) of the input. // filters across the spatial axes (D, H, W) of the input.
optional int32 axis = 16 [ default = 1 ]; optional int32 axis = 16 [default = 1];
// Whether to force use of the general ND convolution, even if a specific // Whether to force use of the general ND convolution, even if a specific
// implementation for blobs of the appropriate number of spatial dimensions // implementation for blobs of the appropriate number of spatial dimensions
// is available. (Currently, there is only a 2D-specific convolution // is available. (Currently, there is only a 2D-specific convolution
// implementation; for input blobs with num_axes != 2, this option is // implementation; for input blobs with num_axes != 2, this option is
// ignored and the ND implementation will be used.) // ignored and the ND implementation will be used.)
optional bool force_nd_im2col = 17 [ default = false ]; optional bool force_nd_im2col = 17 [default = false];
} }
message CropParameter { message CropParameter {
...@@ -639,7 +641,7 @@ message CropParameter { ...@@ -639,7 +641,7 @@ message CropParameter {
// Note: standard dimensions are N,C,H,W so the default is a spatial crop, // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
// and `axis` may be negative to index from the end (e.g., -1 for the last // and `axis` may be negative to index from the end (e.g., -1 for the last
// axis). // axis).
optional int32 axis = 1 [ default = 2 ]; optional int32 axis = 1 [default = 2];
repeated uint32 offset = 2; repeated uint32 offset = 2;
} }
...@@ -657,33 +659,33 @@ message DataParameter { ...@@ -657,33 +659,33 @@ message DataParameter {
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database. // be larger than the number of keys in the database.
// DEPRECATED. Each solver accesses a different subset of the database. // DEPRECATED. Each solver accesses a different subset of the database.
optional uint32 rand_skip = 7 [ default = 0 ]; optional uint32 rand_skip = 7 [default = 0];
optional DB backend = 8 [ default = LEVELDB ]; optional DB backend = 8 [default = LEVELDB];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
// simple scaling and subtracting the data mean, if provided. Note that the // simple scaling and subtracting the data mean, if provided. Note that the
// mean subtraction is always carried out before scaling. // mean subtraction is always carried out before scaling.
optional float scale = 2 [ default = 1 ]; optional float scale = 2 [default = 1];
optional string mean_file = 3; optional string mean_file = 3;
// DEPRECATED. See TransformationParameter. Specify if we would like to // DEPRECATED. See TransformationParameter. Specify if we would like to
// randomly crop an image. // randomly crop an image.
optional uint32 crop_size = 5 [ default = 0 ]; optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly // DEPRECATED. See TransformationParameter. Specify if we want to randomly
// mirror data. // mirror data.
optional bool mirror = 6 [ default = false ]; optional bool mirror = 6 [default = false];
// Force the encoded image to have 3 color channels // Force the encoded image to have 3 color channels
optional bool force_encoded_color = 9 [ default = false ]; optional bool force_encoded_color = 9 [default = false];
// Prefetch queue (Number of batches to prefetch to host memory, increase if // Prefetch queue (Number of batches to prefetch to host memory, increase if
// data access bandwidth varies). // data access bandwidth varies).
optional uint32 prefetch = 10 [ default = 5 ]; optional uint32 prefetch = 10 [default = 5];
// Whether to shuffle the data. // Whether to shuffle the data.
optional bool shuffle = 11 [ default = false ]; optional bool shuffle = 11 [default = false];
// The number of chunks to shuffle. // The number of chunks to shuffle.
optional int32 num_chunks = 12 [ default = 2048 ]; optional int32 num_chunks = 12 [default = 2048];
} }
message DropoutParameter { message DropoutParameter {
optional float dropout_ratio = 1 [ default = 0.5 ]; // dropout ratio optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
optional bool scale_train = 2 [ default = true ]; // scale train or test phase optional bool scale_train = 2 [default = true]; // scale train or test phase
} }
// DummyDataLayer fills any number of arbitrarily shaped blobs with random // DummyDataLayer fills any number of arbitrarily shaped blobs with random
...@@ -711,12 +713,12 @@ message EltwiseParameter { ...@@ -711,12 +713,12 @@ message EltwiseParameter {
SUM = 1; SUM = 1;
MAX = 2; MAX = 2;
} }
optional EltwiseOp operation = 1 [ default = SUM ]; // element-wise operation optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
repeated float coeff = 2; // blob-wise coefficient for SUM operation repeated float coeff = 2; // blob-wise coefficient for SUM operation
// Whether to use an asymptotically slower (for >2 inputs) but stabler method // Whether to use an asymptotically slower (for >2 inputs) but stabler method
// of computing the gradient for the PROD operation. (No effect for SUM op.) // of computing the gradient for the PROD operation. (No effect for SUM op.)
optional bool stable_prod_grad = 3 [ default = true ]; optional bool stable_prod_grad = 3 [default = true];
} }
// Message that stores parameters used by ELULayer // Message that stores parameters used by ELULayer
...@@ -724,20 +726,20 @@ message ELUParameter { ...@@ -724,20 +726,20 @@ message ELUParameter {
// Described in: // Described in:
// Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
// Deep Network Learning by Exponential Linear Units (ELUs). arXiv // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
optional float alpha = 1 [ default = 1 ]; optional float alpha = 1 [default = 1];
} }
// Message that stores parameters used by EmbedLayer // Message that stores parameters used by EmbedLayer
message EmbedParameter { message EmbedParameter {
optional uint32 num_output = 1; // The number of outputs for the layer optional uint32 num_output = 1; // The number of outputs for the layer
// The input is given as integers to be interpreted as one-hot // The input is given as integers to be interpreted as one-hot
// vector indices with dimension num_input. Hence num_input should be // vector indices with dimension num_input. Hence num_input should be
// 1 greater than the maximum possible input value. // 1 greater than the maximum possible input value.
optional uint32 input_dim = 2; optional uint32 input_dim = 2;
optional bool bias_term = 3 [ default = true ]; // Whether to use a bias term optional bool bias_term = 3 [default = true]; // Whether to use a bias term
optional FillerParameter weight_filler = 4; // The filler for the weight optional FillerParameter weight_filler = 4; // The filler for the weight
optional FillerParameter bias_filler = 5; // The filler for the bias optional FillerParameter bias_filler = 5; // The filler for the bias
} }
// Message that stores parameters used by ExpLayer // Message that stores parameters used by ExpLayer
...@@ -745,21 +747,21 @@ message ExpParameter { ...@@ -745,21 +747,21 @@ message ExpParameter {
// ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
// Or if base is set to the default (-1), base is set to e, // Or if base is set to the default (-1), base is set to e,
// so y = exp(shift + scale * x). // so y = exp(shift + scale * x).
optional float base = 1 [ default = -1.0 ]; optional float base = 1 [default = -1.0];
optional float scale = 2 [ default = 1.0 ]; optional float scale = 2 [default = 1.0];
optional float shift = 3 [ default = 0.0 ]; optional float shift = 3 [default = 0.0];
} }
/// Message that stores parameters used by FlattenLayer /// Message that stores parameters used by FlattenLayer
message FlattenParameter { message FlattenParameter {
// The first axis to flatten: all preceding axes are retained in the output. // The first axis to flatten: all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis). // May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 1 [ default = 1 ]; optional int32 axis = 1 [default = 1];
// The last axis to flatten: all following axes are retained in the output. // The last axis to flatten: all following axes are retained in the output.
// May be negative to index from the end (e.g., the default -1 for the last // May be negative to index from the end (e.g., the default -1 for the last
// axis). // axis).
optional int32 end_axis = 2 [ default = -1 ]; optional int32 end_axis = 2 [default = -1];
} }
// Message that stores parameters used by HDF5DataLayer // Message that stores parameters used by HDF5DataLayer
...@@ -774,10 +776,12 @@ message HDF5DataParameter { ...@@ -774,10 +776,12 @@ message HDF5DataParameter {
// and the ordering of data within any given HDF5 file is shuffled, // and the ordering of data within any given HDF5 file is shuffled,
// but data between different files are not interleaved; all of a file's // but data between different files are not interleaved; all of a file's
// data are output (in a random order) before moving onto another file. // data are output (in a random order) before moving onto another file.
optional bool shuffle = 3 [ default = false ]; optional bool shuffle = 3 [default = false];
} }
message HDF5OutputParameter { optional string file_name = 1; } message HDF5OutputParameter {
optional string file_name = 1;
}
message HingeLossParameter { message HingeLossParameter {
enum Norm { enum Norm {
...@@ -785,38 +789,38 @@ message HingeLossParameter { ...@@ -785,38 +789,38 @@ message HingeLossParameter {
L2 = 2; L2 = 2;
} }
// Specify the Norm to use L1 or L2 // Specify the Norm to use L1 or L2
optional Norm norm = 1 [ default = L1 ]; optional Norm norm = 1 [default = L1];
} }
message ImageDataParameter { message ImageDataParameter {
// Specify the data source. // Specify the data source.
optional string source = 1; optional string source = 1;
// Specify the batch size. // Specify the batch size.
optional uint32 batch_size = 4 [ default = 1 ]; optional uint32 batch_size = 4 [default = 1];
// The rand_skip variable is for the data layer to skip a few data points // The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip // to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database. // be larger than the number of keys in the database.
optional uint32 rand_skip = 7 [ default = 0 ]; optional uint32 rand_skip = 7 [default = 0];
// Whether or not ImageLayer should shuffle the list of files at every epoch. // Whether or not ImageLayer should shuffle the list of files at every epoch.
optional bool shuffle = 8 [ default = false ]; optional bool shuffle = 8 [default = false];
// It will also resize images if new_height or new_width are not zero. // It will also resize images if new_height or new_width are not zero.
optional uint32 new_height = 9 [ default = 0 ]; optional uint32 new_height = 9 [default = 0];
optional uint32 new_width = 10 [ default = 0 ]; optional uint32 new_width = 10 [default = 0];
// Specify if the images are color or gray // Specify if the images are color or gray
optional bool is_color = 11 [ default = true ]; optional bool is_color = 11 [default = true];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
// simple scaling and subtracting the data mean, if provided. Note that the // simple scaling and subtracting the data mean, if provided. Note that the
// mean subtraction is always carried out before scaling. // mean subtraction is always carried out before scaling.
optional float scale = 2 [ default = 1 ]; optional float scale = 2 [default = 1];
optional string mean_file = 3; optional string mean_file = 3;
// DEPRECATED. See TransformationParameter. Specify if we would like to // DEPRECATED. See TransformationParameter. Specify if we would like to
// randomly crop an image. // randomly crop an image.
optional uint32 crop_size = 5 [ default = 0 ]; optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly // DEPRECATED. See TransformationParameter. Specify if we want to randomly
// mirror data. // mirror data.
optional bool mirror = 6 [ default = false ]; optional bool mirror = 6 [default = false];
optional string root_folder = 12 [ default = "" ]; optional string root_folder = 12 [default = ""];
} }
message InfogainLossParameter { message InfogainLossParameter {
...@@ -825,21 +829,21 @@ message InfogainLossParameter { ...@@ -825,21 +829,21 @@ message InfogainLossParameter {
} }
message InnerProductParameter { message InnerProductParameter {
optional uint32 num_output = 1; // The number of outputs for the layer optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [ default = true ]; // whether to have bias terms optional bool bias_term = 2 [default = true]; // whether to have bias terms
optional FillerParameter weight_filler = 3; // The filler for the weight optional FillerParameter weight_filler = 3; // The filler for the weight
optional FillerParameter bias_filler = 4; // The filler for the bias optional FillerParameter bias_filler = 4; // The filler for the bias
// The first axis to be lumped into a single inner product computation; // The first axis to be lumped into a single inner product computation;
// all preceding axes are retained in the output. // all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis). // May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 5 [ default = 1 ]; optional int32 axis = 5 [default = 1];
// Specify whether to transpose the weight matrix or not. // Specify whether to transpose the weight matrix or not.
// If transpose == true, any operations will be performed on the transpose // If transpose == true, any operations will be performed on the transpose
// of the weight matrix. The weight matrix itself is not going to be // of the weight matrix. The weight matrix itself is not going to be
// transposed but rather the transfer flag of operations will be toggled // transposed but rather the transfer flag of operations will be toggled
// accordingly. // accordingly.
optional bool transpose = 6 [ default = false ]; optional bool transpose = 6 [default = false];
} }
message InputParameter { message InputParameter {
...@@ -848,7 +852,6 @@ message InputParameter { ...@@ -848,7 +852,6 @@ message InputParameter {
// Define 1 shape to set the same shape for every top. // Define 1 shape to set the same shape for every top.
// Define no shape to defer to reshaping manually. // Define no shape to defer to reshaping manually.
repeated BlobShape shape = 1; repeated BlobShape shape = 1;
repeated string dtype = 2;
} }
// Message that stores parameters used by LogLayer // Message that stores parameters used by LogLayer
...@@ -856,28 +859,28 @@ message LogParameter { ...@@ -856,28 +859,28 @@ message LogParameter {
// LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
// Or if base is set to the default (-1), base is set to e, // Or if base is set to the default (-1), base is set to e,
// so y = ln(shift + scale * x) = log_e(shift + scale * x) // so y = ln(shift + scale * x) = log_e(shift + scale * x)
optional float base = 1 [ default = -1.0 ]; optional float base = 1 [default = -1.0];
optional float scale = 2 [ default = 1.0 ]; optional float scale = 2 [default = 1.0];
optional float shift = 3 [ default = 0.0 ]; optional float shift = 3 [default = 0.0];
} }
// Message that stores parameters used by LRNLayer // Message that stores parameters used by LRNLayer
message LRNParameter { message LRNParameter {
optional uint32 local_size = 1 [ default = 5 ]; optional uint32 local_size = 1 [default = 5];
optional float alpha = 2 [ default = 1. ]; optional float alpha = 2 [default = 1.];
optional float beta = 3 [ default = 0.75 ]; optional float beta = 3 [default = 0.75];
enum NormRegion { enum NormRegion {
ACROSS_CHANNELS = 0; ACROSS_CHANNELS = 0;
WITHIN_CHANNEL = 1; WITHIN_CHANNEL = 1;
} }
optional NormRegion norm_region = 4 [ default = ACROSS_CHANNELS ]; optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
optional float k = 5 [ default = 1. ]; optional float k = 5 [default = 1.];
enum Engine { enum Engine {
DEFAULT = 0; DEFAULT = 0;
CAFFE = 1; CAFFE = 1;
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 6 [ default = DEFAULT ]; optional Engine engine = 6 [default = DEFAULT];
} }
message MemoryDataParameter { message MemoryDataParameter {
...@@ -889,16 +892,18 @@ message MemoryDataParameter { ...@@ -889,16 +892,18 @@ message MemoryDataParameter {
message MVNParameter { message MVNParameter {
// This parameter can be set to false to normalize mean only // This parameter can be set to false to normalize mean only
optional bool normalize_variance = 1 [ default = true ]; optional bool normalize_variance = 1 [default = true];
// This parameter can be set to true to perform DNN-like MVN // This parameter can be set to true to perform DNN-like MVN
optional bool across_channels = 2 [ default = false ]; optional bool across_channels = 2 [default = false];
// Epsilon for not dividing by zero while normalizing variance // Epsilon for not dividing by zero while normalizing variance
optional float eps = 3 [ default = 1e-9 ]; optional float eps = 3 [default = 1e-9];
} }
message ParameterParameter { optional BlobShape shape = 1; } message ParameterParameter {
optional BlobShape shape = 1;
}
message PoolingParameter { message PoolingParameter {
enum PoolMethod { enum PoolMethod {
...@@ -906,45 +911,45 @@ message PoolingParameter { ...@@ -906,45 +911,45 @@ message PoolingParameter {
AVE = 1; AVE = 1;
STOCHASTIC = 2; STOCHASTIC = 2;
} }
optional PoolMethod pool = 1 [ default = MAX ]; // The pooling method optional PoolMethod pool = 1 [default = MAX]; // The pooling method
// Pad, kernel size, and stride are all given as a single value for equal // Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs. // dimensions in height and width or as Y, X pairs.
optional uint32 pad = 4 [ default = 0 ]; // The padding size (equal in Y, X) optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
optional uint32 pad_h = 9 [ default = 0 ]; // The padding height optional uint32 pad_h = 9 [default = 0]; // The padding height
optional uint32 pad_w = 10 [ default = 0 ]; // The padding width optional uint32 pad_w = 10 [default = 0]; // The padding width
optional uint32 kernel_size = 2; // The kernel size (square) optional uint32 kernel_size = 2; // The kernel size (square)
optional uint32 kernel_h = 5; // The kernel height optional uint32 kernel_h = 5; // The kernel height
optional uint32 kernel_w = 6; // The kernel width optional uint32 kernel_w = 6; // The kernel width
optional uint32 stride = 3 [ default = 1 ]; // The stride (equal in Y, X) optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
optional uint32 stride_h = 7; // The stride height optional uint32 stride_h = 7; // The stride height
optional uint32 stride_w = 8; // The stride width optional uint32 stride_w = 8; // The stride width
enum Engine { enum Engine {
DEFAULT = 0; DEFAULT = 0;
CAFFE = 1; CAFFE = 1;
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 11 [ default = DEFAULT ]; optional Engine engine = 11 [default = DEFAULT];
// If global_pooling then it will pool over the size of the bottom by doing // If global_pooling then it will pool over the size of the bottom by doing
// kernel_h = bottom->height and kernel_w = bottom->width // kernel_h = bottom->height and kernel_w = bottom->width
optional bool global_pooling = 12 [ default = false ]; optional bool global_pooling = 12 [default = false];
} }
// Message that stores parameters used by ROIPoolingLayer // Message that stores parameters used by ROIPoolingLayer
message ROIPoolingParameter { message ROIPoolingParameter {
// Pad, kernel size, and stride are all given as a single value for equal // Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs. // dimensions in height and width or as Y, X pairs.
optional uint32 pooled_h = 1 [ default = 0 ]; // The pooled output height optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
optional uint32 pooled_w = 2 [ default = 0 ]; // The pooled output width optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
// Multiplicative spatial scale factor to translate ROI coords from their // Multiplicative spatial scale factor to translate ROI coords from their
// input scale to the scale used when pooling // input scale to the scale used when pooling
optional float spatial_scale = 3 [ default = 1 ]; optional float spatial_scale = 3 [default = 1];
} }
message PowerParameter { message PowerParameter {
// PowerLayer computes outputs y = (shift + scale * x) ^ power. // PowerLayer computes outputs y = (shift + scale * x) ^ power.
optional float power = 1 [ default = 1.0 ]; optional float power = 1 [default = 1.0];
optional float scale = 2 [ default = 1.0 ]; optional float scale = 2 [default = 1.0];
optional float shift = 3 [ default = 0.0 ]; optional float shift = 3 [default = 0.0];
} }
message PythonParameter { message PythonParameter {
...@@ -954,11 +959,11 @@ message PythonParameter { ...@@ -954,11 +959,11 @@ message PythonParameter {
// in Python before calling the `setup()` method. This could be a number, // in Python before calling the `setup()` method. This could be a number,
// string, dictionary in Python dict format, JSON, etc. You may parse this // string, dictionary in Python dict format, JSON, etc. You may parse this
// string in `setup` method and use it in `forward` and `backward`. // string in `setup` method and use it in `forward` and `backward`.
optional string param_str = 3 [ default = '']; optional string param_str = 3 [default = ''];
// Whether this PythonLayer is shared among worker solvers during data // Whether this PythonLayer is shared among worker solvers during data
// parallelism. If true, each worker solver sequentially run forward from this // parallelism. If true, each worker solver sequentially run forward from this
// layer. This value should be set true if you are using it as a data layer. // layer. This value should be set true if you are using it as a data layer.
optional bool share_in_parallel = 4 [ default = false ]; optional bool share_in_parallel = 4 [default = false];
} }
// Message that stores parameters used by ReductionLayer // Message that stores parameters used by ReductionLayer
...@@ -970,7 +975,7 @@ message ReductionParameter { ...@@ -970,7 +975,7 @@ message ReductionParameter {
MEAN = 4; MEAN = 4;
} }
optional ReductionOp operation = 1 [ default = SUM ]; // reduction operation optional ReductionOp operation = 1 [default = SUM]; // reduction operation
// The first axis to reduce to a scalar -- may be negative to index from the // The first axis to reduce to a scalar -- may be negative to index from the
// end (e.g., -1 for the last axis). // end (e.g., -1 for the last axis).
...@@ -985,9 +990,9 @@ message ReductionParameter { ...@@ -985,9 +990,9 @@ message ReductionParameter {
// If axis == 0 (the default), the output Blob always has the empty shape // If axis == 0 (the default), the output Blob always has the empty shape
// (count 1), performing reduction across the entire input -- // (count 1), performing reduction across the entire input --
// often useful for creating new loss functions. // often useful for creating new loss functions.
optional int32 axis = 2 [ default = 0 ]; optional int32 axis = 2 [default = 0];
optional float coeff = 3 [ default = 1.0 ]; // coefficient for output optional float coeff = 3 [default = 1.0]; // coefficient for output
} }
// Message that stores parameters used by ReLULayer // Message that stores parameters used by ReLULayer
...@@ -997,13 +1002,13 @@ message ReLUParameter { ...@@ -997,13 +1002,13 @@ message ReLUParameter {
// Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
// improve neural network acoustic models. In ICML Workshop on Deep Learning // improve neural network acoustic models. In ICML Workshop on Deep Learning
// for Audio, Speech, and Language Processing. // for Audio, Speech, and Language Processing.
optional float negative_slope = 1 [ default = 0 ]; optional float negative_slope = 1 [default = 0];
enum Engine { enum Engine {
DEFAULT = 0; DEFAULT = 0;
CAFFE = 1; CAFFE = 1;
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 2 [ default = DEFAULT ]; optional Engine engine = 2 [default = DEFAULT];
} }
message ReshapeParameter { message ReshapeParameter {
...@@ -1066,8 +1071,8 @@ message ReshapeParameter { ...@@ -1066,8 +1071,8 @@ message ReshapeParameter {
// reshape_param { shape { dim: 2 dim: 1 dim: 8 } } // reshape_param { shape { dim: 2 dim: 1 dim: 8 } }
// reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 }
// //
optional int32 axis = 2 [ default = 0 ]; optional int32 axis = 2 [default = 0];
optional int32 num_axes = 3 [ default = -1 ]; optional int32 num_axes = 3 [default = -1];
} }
message ScaleParameter { message ScaleParameter {
...@@ -1084,7 +1089,7 @@ message ScaleParameter { ...@@ -1084,7 +1089,7 @@ message ScaleParameter {
// (axis == 3 == -1) 60 // (axis == 3 == -1) 60
// Furthermore, bottom[1] may have the empty shape (regardless of the value of // Furthermore, bottom[1] may have the empty shape (regardless of the value of
// "axis") -- a scalar multiplier. // "axis") -- a scalar multiplier.
optional int32 axis = 1 [ default = 1 ]; optional int32 axis = 1 [default = 1];
// (num_axes is ignored unless just one bottom is given and the scale is // (num_axes is ignored unless just one bottom is given and the scale is
// a learned parameter of the layer. Otherwise, num_axes is determined by the // a learned parameter of the layer. Otherwise, num_axes is determined by the
...@@ -1092,7 +1097,7 @@ message ScaleParameter { ...@@ -1092,7 +1097,7 @@ message ScaleParameter {
// The number of axes of the input (bottom[0]) covered by the scale // The number of axes of the input (bottom[0]) covered by the scale
// parameter, or -1 to cover all axes of bottom[0] starting from `axis`. // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
// Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
optional int32 num_axes = 2 [ default = 1 ]; optional int32 num_axes = 2 [default = 1];
// (filler is ignored unless just one bottom is given and the scale is // (filler is ignored unless just one bottom is given and the scale is
// a learned parameter of the layer.) // a learned parameter of the layer.)
...@@ -1103,7 +1108,7 @@ message ScaleParameter { ...@@ -1103,7 +1108,7 @@ message ScaleParameter {
// Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
// may be more efficient). Initialized with bias_filler (defaults to 0). // may be more efficient). Initialized with bias_filler (defaults to 0).
optional bool bias_term = 4 [ default = false ]; optional bool bias_term = 4 [default = false];
optional FillerParameter bias_filler = 5; optional FillerParameter bias_filler = 5;
} }
...@@ -1113,18 +1118,18 @@ message SigmoidParameter { ...@@ -1113,18 +1118,18 @@ message SigmoidParameter {
CAFFE = 1; CAFFE = 1;
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 1 [ default = DEFAULT ]; optional Engine engine = 1 [default = DEFAULT];
} }
message SliceParameter { message SliceParameter {
// The axis along which to slice -- may be negative to index from the end // The axis along which to slice -- may be negative to index from the end
// (e.g., -1 for the last axis). // (e.g., -1 for the last axis).
// By default, SliceLayer concatenates blobs along the "channels" axis (1). // By default, SliceLayer concatenates blobs along the "channels" axis (1).
optional int32 axis = 3 [ default = 1 ]; optional int32 axis = 3 [default = 1];
repeated uint32 slice_point = 2; repeated uint32 slice_point = 2;
// DEPRECATED: alias for "axis" -- does not support negative indexing. // DEPRECATED: alias for "axis" -- does not support negative indexing.
optional uint32 slice_dim = 1 [ default = 1 ]; optional uint32 slice_dim = 1 [default = 1];
} }
// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
...@@ -1134,12 +1139,12 @@ message SoftmaxParameter { ...@@ -1134,12 +1139,12 @@ message SoftmaxParameter {
CAFFE = 1; CAFFE = 1;
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 1 [ default = DEFAULT ]; optional Engine engine = 1 [default = DEFAULT];
// The axis along which to perform the softmax -- may be negative to index // The axis along which to perform the softmax -- may be negative to index
// from the end (e.g., -1 for the last axis). // from the end (e.g., -1 for the last axis).
// Any other axes will be evaluated as independent softmaxes. // Any other axes will be evaluated as independent softmaxes.
optional int32 axis = 2 [ default = 1 ]; optional int32 axis = 2 [default = 1];
} }
message TanHParameter { message TanHParameter {
...@@ -1148,23 +1153,21 @@ message TanHParameter { ...@@ -1148,23 +1153,21 @@ message TanHParameter {
CAFFE = 1; CAFFE = 1;
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 1 [ default = DEFAULT ]; optional Engine engine = 1 [default = DEFAULT];
} }
// Message that stores parameters used by TileLayer // Message that stores parameters used by TileLayer
message TileParameter { message TileParameter {
// The index of the axis to tile. // The index of the axis to tile.
optional int32 axis = 1 [ default = 1 ]; optional int32 axis = 1 [default = 1];
// The number of copies (tiles) of the blob to output. // The number of copies (tiles) of the blob to output.
optional int32 tiles = 2; optional int32 tiles = 2;
optional BlobShape multiples = 3;
} }
// Message that stores parameters used by ThresholdLayer // Message that stores parameters used by ThresholdLayer
message ThresholdParameter { message ThresholdParameter {
optional float threshold = 1 [ default = 0 ]; // Strictly positive values optional float threshold = 1 [default = 0]; // Strictly positive values
} }
message WindowDataParameter { message WindowDataParameter {
...@@ -1173,31 +1176,31 @@ message WindowDataParameter { ...@@ -1173,31 +1176,31 @@ message WindowDataParameter {
// For data pre-processing, we can do simple scaling and subtracting the // For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried // data mean, if provided. Note that the mean subtraction is always carried
// out before scaling. // out before scaling.
optional float scale = 2 [ default = 1 ]; optional float scale = 2 [default = 1];
optional string mean_file = 3; optional string mean_file = 3;
// Specify the batch size. // Specify the batch size.
optional uint32 batch_size = 4; optional uint32 batch_size = 4;
// Specify if we would like to randomly crop an image. // Specify if we would like to randomly crop an image.
optional uint32 crop_size = 5 [ default = 0 ]; optional uint32 crop_size = 5 [default = 0];
// Specify if we want to randomly mirror data. // Specify if we want to randomly mirror data.
optional bool mirror = 6 [ default = false ]; optional bool mirror = 6 [default = false];
// Foreground (object) overlap threshold // Foreground (object) overlap threshold
optional float fg_threshold = 7 [ default = 0.5 ]; optional float fg_threshold = 7 [default = 0.5];
// Background (non-object) overlap threshold // Background (non-object) overlap threshold
optional float bg_threshold = 8 [ default = 0.5 ]; optional float bg_threshold = 8 [default = 0.5];
// Fraction of batch that should be foreground objects // Fraction of batch that should be foreground objects
optional float fg_fraction = 9 [ default = 0.25 ]; optional float fg_fraction = 9 [default = 0.25];
// Amount of contextual padding to add around a window // Amount of contextual padding to add around a window
// (used only by the window_data_layer) // (used only by the window_data_layer)
optional uint32 context_pad = 10 [ default = 0 ]; optional uint32 context_pad = 10 [default = 0];
// Mode for cropping out a detection window // Mode for cropping out a detection window
// warp: cropped window is warped to a fixed size and aspect ratio // warp: cropped window is warped to a fixed size and aspect ratio
// square: the tightest square around the window is cropped // square: the tightest square around the window is cropped
optional string crop_mode = 11 [ default = "warp" ]; optional string crop_mode = 11 [default = "warp"];
// cache_images: will load all images in memory for faster access // cache_images: will load all images in memory for faster access
optional bool cache_images = 12 [ default = false ]; optional bool cache_images = 12 [default = false];
// append root_folder to locate images // append root_folder to locate images
optional string root_folder = 13 [ default = "" ]; optional string root_folder = 13 [default = ""];
} }
message SPPParameter { message SPPParameter {
...@@ -1207,13 +1210,13 @@ message SPPParameter { ...@@ -1207,13 +1210,13 @@ message SPPParameter {
STOCHASTIC = 2; STOCHASTIC = 2;
} }
optional uint32 pyramid_height = 1; optional uint32 pyramid_height = 1;
optional PoolMethod pool = 2 [ default = MAX ]; // The pooling method optional PoolMethod pool = 2 [default = MAX]; // The pooling method
enum Engine { enum Engine {
DEFAULT = 0; DEFAULT = 0;
CAFFE = 1; CAFFE = 1;
CUDNN = 2; CUDNN = 2;
} }
optional Engine engine = 6 [ default = DEFAULT ]; optional Engine engine = 6 [default = DEFAULT];
} }
// DEPRECATED: use LayerParameter. // DEPRECATED: use LayerParameter.
...@@ -1312,45 +1315,45 @@ message V1LayerParameter { ...@@ -1312,45 +1315,45 @@ message V1LayerParameter {
// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
// in Caffe. We keep this message type around for legacy support. // in Caffe. We keep this message type around for legacy support.
message V0LayerParameter { message V0LayerParameter {
optional string name = 1; // the layer name optional string name = 1; // the layer name
optional string type = 2; // the string to specify the layer type optional string type = 2; // the string to specify the layer type
// Parameters to specify layers with inner products. // Parameters to specify layers with inner products.
optional uint32 num_output = 3; // The number of outputs for the layer optional uint32 num_output = 3; // The number of outputs for the layer
optional bool biasterm = 4 [ default = true ]; // whether to have bias terms optional bool biasterm = 4 [default = true]; // whether to have bias terms
optional FillerParameter weight_filler = 5; // The filler for the weight optional FillerParameter weight_filler = 5; // The filler for the weight
optional FillerParameter bias_filler = 6; // The filler for the bias optional FillerParameter bias_filler = 6; // The filler for the bias
optional uint32 pad = 7 [ default = 0 ]; // The padding size optional uint32 pad = 7 [default = 0]; // The padding size
optional uint32 kernelsize = 8; // The kernel size optional uint32 kernelsize = 8; // The kernel size
optional uint32 group = 9 [ default = 1 ]; // The group size for group conv optional uint32 group = 9 [default = 1]; // The group size for group conv
optional uint32 stride = 10 [ default = 1 ]; // The stride optional uint32 stride = 10 [default = 1]; // The stride
enum PoolMethod { enum PoolMethod {
MAX = 0; MAX = 0;
AVE = 1; AVE = 1;
STOCHASTIC = 2; STOCHASTIC = 2;
} }
optional PoolMethod pool = 11 [ default = MAX ]; // The pooling method optional PoolMethod pool = 11 [default = MAX]; // The pooling method
optional float dropout_ratio = 12 [ default = 0.5 ]; // dropout ratio optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
optional uint32 local_size = 13 [ default = 5 ]; // for local response norm optional uint32 local_size = 13 [default = 5]; // for local response norm
optional float alpha = 14 [ default = 1. ]; // for local response norm optional float alpha = 14 [default = 1.]; // for local response norm
optional float beta = 15 [ default = 0.75 ]; // for local response norm optional float beta = 15 [default = 0.75]; // for local response norm
optional float k = 22 [ default = 1. ]; optional float k = 22 [default = 1.];
// For data layers, specify the data source // For data layers, specify the data source
optional string source = 16; optional string source = 16;
// For data pre-processing, we can do simple scaling and subtracting the // For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried // data mean, if provided. Note that the mean subtraction is always carried
// out before scaling. // out before scaling.
optional float scale = 17 [ default = 1 ]; optional float scale = 17 [default = 1];
optional string meanfile = 18; optional string meanfile = 18;
// For data layers, specify the batch size. // For data layers, specify the batch size.
optional uint32 batchsize = 19; optional uint32 batchsize = 19;
// For data layers, specify if we would like to randomly crop an image. // For data layers, specify if we would like to randomly crop an image.
optional uint32 cropsize = 20 [ default = 0 ]; optional uint32 cropsize = 20 [default = 0];
// For data layers, specify if we want to randomly mirror data. // For data layers, specify if we want to randomly mirror data.
optional bool mirror = 21 [ default = false ]; optional bool mirror = 21 [default = false];
// The blobs containing the numeric parameters of the layer // The blobs containing the numeric parameters of the layer
repeated BlobProto blobs = 50; repeated BlobProto blobs = 50;
...@@ -1364,41 +1367,41 @@ message V0LayerParameter { ...@@ -1364,41 +1367,41 @@ message V0LayerParameter {
// to avoid all asynchronous sgd clients to start at the same point. The skip // to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database. // be larger than the number of keys in the database.
optional uint32 rand_skip = 53 [ default = 0 ]; optional uint32 rand_skip = 53 [default = 0];
// Fields related to detection (det_*) // Fields related to detection (det_*)
// foreground (object) overlap threshold // foreground (object) overlap threshold
optional float det_fg_threshold = 54 [ default = 0.5 ]; optional float det_fg_threshold = 54 [default = 0.5];
// background (non-object) overlap threshold // background (non-object) overlap threshold
optional float det_bg_threshold = 55 [ default = 0.5 ]; optional float det_bg_threshold = 55 [default = 0.5];
// Fraction of batch that should be foreground objects // Fraction of batch that should be foreground objects
optional float det_fg_fraction = 56 [ default = 0.25 ]; optional float det_fg_fraction = 56 [default = 0.25];
// optional bool OBSOLETE_can_clobber = 57 [default = true]; // optional bool OBSOLETE_can_clobber = 57 [default = true];
// Amount of contextual padding to add around a window // Amount of contextual padding to add around a window
// (used only by the window_data_layer) // (used only by the window_data_layer)
optional uint32 det_context_pad = 58 [ default = 0 ]; optional uint32 det_context_pad = 58 [default = 0];
// Mode for cropping out a detection window // Mode for cropping out a detection window
// warp: cropped window is warped to a fixed size and aspect ratio // warp: cropped window is warped to a fixed size and aspect ratio
// square: the tightest square around the window is cropped // square: the tightest square around the window is cropped
optional string det_crop_mode = 59 [ default = "warp" ]; optional string det_crop_mode = 59 [default = "warp"];
// For ReshapeLayer, one needs to specify the new dimensions. // For ReshapeLayer, one needs to specify the new dimensions.
optional int32 new_num = 60 [ default = 0 ]; optional int32 new_num = 60 [default = 0];
optional int32 new_channels = 61 [ default = 0 ]; optional int32 new_channels = 61 [default = 0];
optional int32 new_height = 62 [ default = 0 ]; optional int32 new_height = 62 [default = 0];
optional int32 new_width = 63 [ default = 0 ]; optional int32 new_width = 63 [default = 0];
// Whether or not ImageLayer should shuffle the list of files at every epoch. // Whether or not ImageLayer should shuffle the list of files at every epoch.
// It will also resize images if new_height or new_width are not zero. // It will also resize images if new_height or new_width are not zero.
optional bool shuffle_images = 64 [ default = false ]; optional bool shuffle_images = 64 [default = false];
// For ConcatLayer, one needs to specify the dimension for concatenation, and // For ConcatLayer, one needs to specify the dimension for concatenation, and
// the other dimensions must be the same for all the bottom blobs. // the other dimensions must be the same for all the bottom blobs.
// By default it will concatenate blobs along the channels dimension. // By default it will concatenate blobs along the channels dimension.
optional uint32 concat_dim = 65 [ default = 1 ]; optional uint32 concat_dim = 65 [default = 1];
optional HDF5OutputParameter hdf5_output_param = 1001; optional HDF5OutputParameter hdf5_output_param = 1001;
} }
...@@ -1410,14 +1413,14 @@ message PReLUParameter { ...@@ -1410,14 +1413,14 @@ message PReLUParameter {
// Initial value of a_i. Default is a_i=0.25 for all i. // Initial value of a_i. Default is a_i=0.25 for all i.
optional FillerParameter filler = 1; optional FillerParameter filler = 1;
// Whether or not slope paramters are shared across channels. // Whether or not slope paramters are shared across channels.
optional bool channel_shared = 2 [ default = false ]; optional bool channel_shared = 2 [default = false];
} }
message SmoothL1LossParameter { message SmoothL1LossParameter {
// SmoothL1Loss(x) = // SmoothL1Loss(x) =
// 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma // 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma
// |x| - 0.5 / sigma / sigma -- otherwise // |x| - 0.5 / sigma / sigma -- otherwise
optional float sigma = 1 [ default = 1 ]; optional float sigma = 1 [default = 1];
} }
message PermuteParameter { message PermuteParameter {
...@@ -1428,18 +1431,11 @@ message PermuteParameter { ...@@ -1428,18 +1431,11 @@ message PermuteParameter {
} }
message NormalizeParameter { message NormalizeParameter {
optional bool across_spatial = 1 [ default = true ]; optional bool across_spatial = 1 [default = true];
// Initial value of scale. Default is 1.0 for all // Initial value of scale. Default is 1.0 for all
optional FillerParameter scale_filler = 2; optional FillerParameter scale_filler = 2;
// Whether or not scale parameters are shared across channels. // Whether or not scale parameters are shared across channels.
optional bool channel_shared = 3 [ default = true ]; optional bool channel_shared = 3 [default = true];
// Epsilon for not dividing by zero while normalizing variance // Epsilon for not dividing by zero while normalizing variance
optional float eps = 4 [ default = 1e-12 ]; optional float eps = 4 [default = 1e-12];
}
message GroupNormParameter {
optional float eps = 1 [ default = 1e-5 ];
optional int32 group = 2 [ default = 32 ];
} }
message CastParameter { optional string dtype = 1; }
...@@ -16,15 +16,14 @@ from __future__ import division ...@@ -16,15 +16,14 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import time import time
from google.protobuf import text_format from google.protobuf import text_format
from dragon.core.autograph import def_function from dragon.core.autograph import def_function
from dragon.core.framework import workspace
from dragon.core.training.adam import Adam from dragon.core.training.adam import Adam
from dragon.core.training.rmsprop import RMSprop from dragon.core.training.rmsprop import RMSprop
from dragon.core.training.sgd import SGD from dragon.core.training.sgd import SGD
from dragon.core.training.sgd import Nesterov from dragon.core.training.sgd import Nesterov
from dragon.core.util import logging
from dragon.vm.caffe.net import Net from dragon.vm.caffe.net import Net
from dragon.vm.caffe.proto import caffe_pb2 from dragon.vm.caffe.proto import caffe_pb2
...@@ -99,8 +98,9 @@ class Solver(object): ...@@ -99,8 +98,9 @@ class Solver(object):
if self._current_step < len(self._param.stepvalue) \ if self._current_step < len(self._param.stepvalue) \
and self.iter >= self._param.stepvalue[self._current_step]: and self.iter >= self._param.stepvalue[self._current_step]:
self._current_step = self._current_step + 1 self._current_step = self._current_step + 1
print('MultiStep Status: Iteration {}, step = {}' logging.info(
.format(self.iter, self._current_step)) 'MultiStep Status: Iteration {}, step = {}'
.format(self.iter, self._current_step))
new_lr = self._param.base_lr * \ new_lr = self._param.base_lr * \
pow(self._param.gamma, self._current_step) pow(self._param.gamma, self._current_step)
self.base_lr = new_lr self.base_lr = new_lr
...@@ -112,8 +112,9 @@ class Solver(object): ...@@ -112,8 +112,9 @@ class Solver(object):
else: else:
if self._current_step + 1 < len(stage_iters): if self._current_step + 1 < len(stage_iters):
self._current_step = self._current_step + 1 self._current_step = self._current_step + 1
print('MultiFixed Status: Iteration {}, stage = {}' logging.info(
.format(self.iter, self._current_step)) 'MultiFixed Status: Iteration {}, stage = {}'
.format(self.iter, self._current_step))
self.base_lr = stage_lrs[self._current_step] self.base_lr = stage_lrs[self._current_step]
elif policy == 'inv': elif policy == 'inv':
power = self._param.power power = self._param.power
...@@ -130,8 +131,7 @@ class Solver(object): ...@@ -130,8 +131,7 @@ class Solver(object):
def _apply_update(self): def _apply_update(self):
"""Apply the weights update.""" """Apply the weights update."""
for blob in self.net._layer_blobs: for blob in self.net._layer_blobs:
if blob.lr_multiplier > 0 and \ if blob.lr_multiplier > 0 and blob.diff is not None:
blob.diff is not None:
self._optimizer.apply_gradients( self._optimizer.apply_gradients(
values_and_grads=[(blob.data, blob.diff)], values_and_grads=[(blob.data, blob.diff)],
lr_mult=blob.lr_multiplier, lr_mult=blob.lr_multiplier,
...@@ -211,80 +211,18 @@ class Solver(object): ...@@ -211,80 +211,18 @@ class Solver(object):
""" """
return self._test_nets return self._test_nets
def one_step(self):
"""One step run the train net.
Returns
-------
dict
The stats.
"""
if self._param.test_interval and \
self.iter % self._param.test_interval == 0:
if (self.iter == 0 and
self._param.test_initialization) or self.iter != 0:
for test_idx in range(len(self._test_nets)):
self.test(test_idx)
# Forward, backward and compute loss.
run_time, stats = 0., {'loss': {'total': 0.}, 'iter': self.iter}
for i in range(self._param.iter_size):
tic = time.time()
self._net.forward_backward(return_outputs=False)
run_time += (time.time() - tic)
# Total loss.
for e in self.net.losses:
values = e.get_value().flatten()
if values.size == 1:
stats['loss']['total'] += values[0]
# Partial loss.
for key in self.net.outputs:
values = self.net.blobs[key].data
values = values.get_value().flatten()
if values.size != 1:
continue
if key not in stats['loss']:
stats['loss'][key] = 0.
stats['loss'][key] += values[0]
# Apply Update.
self._get_learning_rate()
tic = time.time()
self._apply_update()
run_time += (time.time() - tic)
self.iter = self.iter + 1
# Snapshot.
if self._param.snapshot:
if self.iter % self._param.snapshot == 0:
self.snapshot()
# Average loss by the iter size.
for k in stats['loss'].keys():
stats['loss'][k] /= self._param.iter_size
# Misc stats.
stats['lr'] = self.base_lr
stats['time'] = run_time
return stats
def snapshot(self): def snapshot(self):
"""Snapshot the parameters of train net.""" """Snapshot the parameters of train net."""
workspace.save( self._net.save(
tensors=[blob.data for blob in self.net._layer_blobs], '%s_iter_%d.caffemodel'
filename='_iter_%d' % self.iter, % (self._param.snapshot_prefix, self._iter))
prefix=self._param.snapshot_prefix,
suffix='.caffemodel', def step(self, num_iterations=1):
format='caffe',
)
def step(self, num_iterations):
"""Step the train net. """Step the train net.
Parameters Parameters
---------- ----------
num_iterations : int num_iterations : int, optional, default=1
The number of iterations to step. The number of iterations to step.
""" """
...@@ -293,19 +231,18 @@ class Solver(object): ...@@ -293,19 +231,18 @@ class Solver(object):
loss_vec, smoothed_loss = [], 0. loss_vec, smoothed_loss = [], 0.
tic = time.time() tic = time.time()
while self.iter < stop_step: while self.iter < stop_step:
# Test if necessary. # Test if necessary.
if self._param.test_interval and \ if self._is_root and self._param.test_interval > 0 and \
self.iter % self._param.test_interval == 0: self.iter % self._param.test_interval == 0:
if (self.iter == 0 and if (self.iter == 0 and self._param.test_initialization) or \
self._param.test_initialization) or self.iter != 0: self.iter != 0:
for test_idx in range(len(self._test_nets)): for test_idx in range(len(self._test_nets)):
self.test(test_idx) self.test(test_idx)
# Forward, backward and compute loss. # Forward, backward and compute loss.
loss = 0. loss = 0.
for i in range(self._param.iter_size): for i in range(self._param.iter_size):
self._net.forward_backward(return_outputs=False) self._net.forward_backward()
if self._is_root: if self._is_root:
for e in self.net.losses: for e in self.net.losses:
values = e.get_value().flatten() values = e.get_value().flatten()
...@@ -322,24 +259,23 @@ class Solver(object): ...@@ -322,24 +259,23 @@ class Solver(object):
idx = (self.iter - start_step) % self._param.average_loss idx = (self.iter - start_step) % self._param.average_loss
smoothed_loss += ((loss - loss_vec[idx]) / self._param.average_loss) smoothed_loss += ((loss - loss_vec[idx]) / self._param.average_loss)
loss_vec[idx] = loss loss_vec[idx] = loss
# Apply Update. # Apply Update.
self._get_learning_rate() self._get_learning_rate()
self._apply_update() self._apply_update()
# Display iteration info.
# Display.
if self._is_root and self._param.display: if self._is_root and self._param.display:
if self.iter % self._param.display == 0: if self.iter % self._param.display == 0:
print('Iteration %d, lr = %s, loss = %f, time = %.2fs' % ( logging.info(
self.iter, str(self.base_lr), smoothed_loss, time.time() - tic)) 'Iteration %d, lr = %s, loss = %f, time = %.2fs'
% (self.iter, str(self.base_lr), smoothed_loss, time.time() - tic))
tic = time.time() tic = time.time()
for idx, net_output in enumerate(self.net.outputs): for idx, net_output in enumerate(self.net.outputs):
values = self.net.blobs[net_output].data.get_value().flatten() values = self.net.blobs[net_output].data.get_value().flatten()
for v in values: for v in values:
print(' ' * 10 + 'Train net output #{}({}): {}' logging.info(
.format(idx, net_output, v)) ' ' * 10 + 'Train net output #{}({}): {}'
.format(idx, net_output, v))
self.iter = self.iter + 1 self.iter = self.iter + 1
# Snapshot if necessary. # Snapshot if necessary.
if self._param.snapshot: if self._param.snapshot:
if self.iter % self._param.snapshot == 0: if self.iter % self._param.snapshot == 0:
...@@ -359,7 +295,7 @@ class Solver(object): ...@@ -359,7 +295,7 @@ class Solver(object):
test_iter = self._param.test_iter[test_idx] test_iter = self._param.test_iter[test_idx]
for iter in range(test_iter): for iter in range(test_iter):
net.forward_backward(return_outputs=False) net.forward()
if not self._is_root: if not self._is_root:
continue continue
if iter == 0: if iter == 0:
...@@ -376,27 +312,25 @@ class Solver(object): ...@@ -376,27 +312,25 @@ class Solver(object):
test_score[i] += value test_score[i] += value
i += 1 i += 1
if not self._is_root: logging.info('Iteration {}, Test net #{}'.format(self.iter, test_idx))
return
print('Iteration {}, Test net #{}'.format(self.iter, test_idx))
for i, score in enumerate(test_score): for i, score in enumerate(test_score):
print(' ' * 10 + 'Test net output #%d(%s): %.4f' logging.info(
% (i, output_id[i], score / test_iter)) ' ' * 10 + 'Test net output #%d(%s): %.4f'
% (i, output_id[i], score / test_iter))
class AdamSolver(Solver): class AdamSolver(Solver):
r"""The Adam solver. r"""The Adam solver.
`[Kingma & Ba, 2014] <https://arxiv.org/abs/1412.6980>`_. `[Kingma & Ba, 2014] <https://arxiv.org/abs/1412.6980>`_.
Following hyper parameters will be taken: Examples:
```python ```python
caffe_pb2.SolverParameter( solver {
base_lr=0., base_lr=0.001,
momentum=0., momentum=0.9,
momentum2=0.999, momentum2=0.999,
delta=1e-8, delta=1e-8,
) )
``` ```
...@@ -425,13 +359,13 @@ class NesterovSolver(Solver): ...@@ -425,13 +359,13 @@ class NesterovSolver(Solver):
r"""The Nesterov-SGD solver. r"""The Nesterov-SGD solver.
`[Sutskever et.al, 2013] <http://www.cs.toronto.edu/~hinton/absps/momentum.pdf>`_. `[Sutskever et.al, 2013] <http://www.cs.toronto.edu/~hinton/absps/momentum.pdf>`_.
Following hyper parameters will be taken: Examples:
```python ```python
caffe_pb2.SolverParameter( solver {
base_lr=0., base_lr: 0.01
momentum=0., momentum: 0.9
) }
``` ```
""" """
...@@ -457,13 +391,13 @@ class RMSPropSolver(Solver): ...@@ -457,13 +391,13 @@ class RMSPropSolver(Solver):
r"""The RMSProp solver. r"""The RMSProp solver.
`[Hinton et.al, 2013] <http://www.cs.utoronto.ca/~bonner/courses/2016s/csc321/lectures/lec6.pdf>`_. `[Hinton et.al, 2013] <http://www.cs.utoronto.ca/~bonner/courses/2016s/csc321/lectures/lec6.pdf>`_.
Following hyper parameters will be taken: Examples:
```python ```python
caffe_pb2.SolverParameter( solver {
base_lr=0., base_lr=0.01,
rms_decay=0.99, rms_decay=0.99,
delta=1e-8, delta=1e-8,
) )
``` ```
...@@ -491,12 +425,12 @@ class SGDSolver(Solver): ...@@ -491,12 +425,12 @@ class SGDSolver(Solver):
r"""The Momentum-SGD solver. r"""The Momentum-SGD solver.
`[Polyak, 1964] <https://doi.org/10.1016/0041-5553(64)90137-5>`_. `[Polyak, 1964] <https://doi.org/10.1016/0041-5553(64)90137-5>`_.
Following hyper parameters will be taken: Examples:
```python ```python
caffe_pb2.SolverParameter( solver {
base_lr=0., base_lr=0.01,
momentum=0., momentum=0.9,
) )
``` ```
......
...@@ -3,9 +3,9 @@ Building Dragon Documentation ...@@ -3,9 +3,9 @@ Building Dragon Documentation
This page will help you to build the following documentations: This page will help you to build the following documentations:
Dragon C++ API: http://dragon.seetatech.com/api/cc Dragon C++ API: https://dragon.seetatech.com/api/cc
Dragon Python API: http://dragon.seetatech.com/api/python Dragon Python API: https://dragon.seetatech.com/api/python
Build Documentation of C++ API Build Documentation of C++ API
------------------------------ ------------------------------
......
...@@ -34,10 +34,6 @@ vm.caffe.layers ...@@ -34,10 +34,6 @@ vm.caffe.layers
`class Deconvolution <layers/Deconvolution.html>`_ `class Deconvolution <layers/Deconvolution.html>`_
: Apply the n-dimension deconvolution. : Apply the n-dimension deconvolution.
`class DepthwiseConv2d <layers/DepthwiseConv2d.html>`_
: Apply the 2d depthwise convolution.
`[Chollet, 2016] <https://arxiv.org/abs/1610.02357>`_.
`class Dropout <layers/Dropout.html>`_ `class Dropout <layers/Dropout.html>`_
: Set the elements of the input to zero randomly. : Set the elements of the input to zero randomly.
`[Srivastava et.al, 2014] <http://jmlr.org/papers/v15/srivastava14a.html>`_. `[Srivastava et.al, 2014] <http://jmlr.org/papers/v15/srivastava14a.html>`_.
...@@ -58,18 +54,6 @@ vm.caffe.layers ...@@ -58,18 +54,6 @@ vm.caffe.layers
`class Flatten <layers/Flatten.html>`_ `class Flatten <layers/Flatten.html>`_
: Flatten the input along the given axes. : Flatten the input along the given axes.
`class FusedBatchNorm <layers/FusedBatchNorm.html>`_
: Apply the fused batch normalization.
`[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
`class FusedGroupNorm <layers/FusedBatchNorm.html>`_
: Apply the fused group normalization.
`[Wu & He, 2018] <https://arxiv.org/abs/1803.08494>`_.
`class GroupNorm <layers/FusedBatchNorm.html>`_
: Apply the group normalization.
`[Wu & He, 2018] <https://arxiv.org/abs/1803.08494>`_.
`class InnerProduct <layers/InnerProduct.html>`_ `class InnerProduct <layers/InnerProduct.html>`_
: Compute the dense matrix multiplication along the given axes. : Compute the dense matrix multiplication along the given axes.
...@@ -121,10 +105,6 @@ vm.caffe.layers ...@@ -121,10 +105,6 @@ vm.caffe.layers
`class Scale <layers/Scale.html>`_ `class Scale <layers/Scale.html>`_
: Compute the affine transformation along the given axes. : Compute the affine transformation along the given axes.
`class SELU <layers/SELU.html>`_
: Apply the scaled exponential linear unit.
`[Klambauer et.al, 2017] <https://arxiv.org/abs/1706.02515>`_.
`class Sigmoid <layers/Sigmoid.html>`_ `class Sigmoid <layers/Sigmoid.html>`_
: Apply the sigmoid function. : Apply the sigmoid function.
...@@ -145,7 +125,7 @@ vm.caffe.layers ...@@ -145,7 +125,7 @@ vm.caffe.layers
: Apply the tanh function. : Apply the tanh function.
`class Tile <layers/Tile.html>`_ `class Tile <layers/Tile.html>`_
: Tile the input according to the given multiples. : Repeat the input according to the given axis.
.. toctree:: .. toctree::
:hidden: :hidden:
...@@ -153,21 +133,16 @@ vm.caffe.layers ...@@ -153,21 +133,16 @@ vm.caffe.layers
layers/Accuracy layers/Accuracy
layers/ArgMax layers/ArgMax
layers/BatchNorm layers/BatchNorm
layers/Cast
layers/Concat layers/Concat
layers/Convolution layers/Convolution
layers/Crop layers/Crop
layers/Data layers/Data
layers/Deconvolution layers/Deconvolution
layers/DepthwiseConv2d
layers/Dropout layers/Dropout
layers/Eltwise layers/Eltwise
layers/ELU layers/ELU
layers/EuclideanLoss layers/EuclideanLoss
layers/Flatten layers/Flatten
layers/FusedBatchNorm
layers/FusedGroupNorm
layers/GroupNorm
layers/InnerProduct layers/InnerProduct
layers/Input layers/Input
layers/LRN layers/LRN
...@@ -183,7 +158,6 @@ vm.caffe.layers ...@@ -183,7 +158,6 @@ vm.caffe.layers
layers/ROIAlign layers/ROIAlign
layers/ROIPooling layers/ROIPooling
layers/Scale layers/Scale
layers/SELU
layers/Sigmoid layers/Sigmoid
layers/SigmoidCrossEntropyLoss layers/SigmoidCrossEntropyLoss
layers/SmoothL1Loss layers/SmoothL1Loss
......
DepthwiseConv2d
===============
.. autoclass:: dragon.vm.caffe.layers.DepthwiseConv2d
.. raw:: html
<style>
h1:before {
content: "caffe.layers.";
color: #103d3e;
}
</style>
FusedBatchNorm
==============
.. autoclass:: dragon.vm.caffe.layers.FusedBatchNorm
.. raw:: html
<style>
h1:before {
content: "caffe.layers.";
color: #103d3e;
}
</style>
FusedGroupNorm
==============
.. autoclass:: dragon.vm.caffe.layers.FusedGroupNorm
.. raw:: html
<style>
h1:before {
content: "caffe.layers.";
color: #103d3e;
}
</style>
GroupNorm
=========
.. autoclass:: dragon.vm.caffe.layers.GroupNorm
.. raw:: html
<style>
h1:before {
content: "caffe.layers.";
color: #103d3e;
}
</style>
SELU
====
.. autoclass:: dragon.vm.caffe.layers.SELU
.. raw:: html
<style>
h1:before {
content: "caffe.layers.";
color: #103d3e;
}
</style>
...@@ -18,8 +18,8 @@ dragon ...@@ -18,8 +18,8 @@ dragon
`class TensorSpec <dragon/TensorSpec.html>`_ `class TensorSpec <dragon/TensorSpec.html>`_
: Spec to describe properties of a tensor. : Spec to describe properties of a tensor.
`class Workspace <dragon/Workspace_.html>`_ `class Workspace <dragon/Workspace.html>`_
: Space to isolate computations that share resources. : Sandbox to isolate the resources and computations.
Functions Functions
--------- ---------
...@@ -151,7 +151,7 @@ dragon ...@@ -151,7 +151,7 @@ dragon
: Return the identity of input with truncated gradient-flow. : Return the identity of input with truncated gradient-flow.
`tile(...) <dragon/tile.html>`_ `tile(...) <dragon/tile.html>`_
: Tile the input according to the given multiples. : Tile the input according to the given repeats.
`transpose(...) <dragon/transpose.html>`_ `transpose(...) <dragon/transpose.html>`_
: Permute the dimensions of input. : Permute the dimensions of input.
...@@ -217,7 +217,7 @@ dragon ...@@ -217,7 +217,7 @@ dragon
dragon/tile dragon/tile
dragon/transpose dragon/transpose
dragon/where dragon/where
dragon/Workspace_ dragon/Workspace
dragon/zeros dragon/zeros
dragon/zeros_like dragon/zeros_like
......
...@@ -14,10 +14,6 @@ gradient ...@@ -14,10 +14,6 @@ gradient
######## ########
.. automethod:: dragon.GradientTape.gradient .. automethod:: dragon.GradientTape.gradient
replay
######
.. automethod:: dragon.GradientTape.replay
reset reset
##### #####
.. automethod:: dragon.GradientTape.reset .. automethod:: dragon.GradientTape.reset
......
...@@ -30,6 +30,10 @@ shape ...@@ -30,6 +30,10 @@ shape
##### #####
.. autoattribute:: dragon.Tensor.shape .. autoattribute:: dragon.Tensor.shape
size
#####
.. autoattribute:: dragon.Tensor.size
Methods Methods
------- -------
......
Workspace
=========
.. autoclass:: dragon.Workspace
__init__
--------
.. automethod:: dragon.Workspace.__init__
Methods
-------
as_default
##########
.. automethod:: dragon.Workspace.as_default
clear
#####
.. automethod:: dragon.Workspace.clear
merge_from
##########
.. automethod:: dragon.Workspace.merge_from
.. raw:: html
<style>
h1:before {
content: "dragon.";
color: #103d3e;
}
</style>
dragon.workspace Workspace
================ =========
.. only:: html .. autoclass:: dragon.Workspace
Functions __init__
--------- --------
.. automethod:: dragon.Workspace.__init__
`feed_tensor(...) <workspace/feed_tensor.html>`_ Methods
: Copy the value to tensor. -------
`fetch_tensor(...) <workspace/fetch_tensor.html>`_ as_default
: Return the value of tensor. ##########
.. automethod:: dragon.Workspace.as_default
`has_tensor(...) <workspace/has_tensor.html>`_ feed_tensor
: Return a bool indicating if tensor is in current workspace. ###########
.. automethod:: dragon.Workspace.feed_tensor
`load(...) <workspace/load.html>`_ fetch_tensor
: Load tensors from a binary file. ############
.. automethod:: dragon.Workspace.fetch_tensor
`reset_tensor(...) <workspace/reset_tensor.html>`_ has_tensor
: Reset the memory of tensor. ##########
.. automethod:: dragon.Workspace.has_tensor
`run_operator(...) <workspace/run_operator.html>`_ merge_from
: Run the operators in current workspace. ##########
.. automethod:: dragon.Workspace.merge_from
`save(...) <workspace/save.html>`_ reset_tensor
: Serialize tensors into a binary file. ############
.. automethod:: dragon.Workspace.reset_tensor
.. toctree::
:hidden:
workspace/feed_tensor
workspace/fetch_tensor
workspace/has_tensor
workspace/load
workspace/reset_tensor
workspace/run_operator
workspace/save
.. raw:: html .. raw:: html
<style> <style>
h1:before { h1:before {
content: "Module: "; content: "dragon.";
color: #103d3e; color: #103d3e;
} }
</style> </style>
feed_tensor
===========
.. autofunction:: dragon.workspace.feed_tensor
.. raw:: html
<style>
h1:before {
content: "dragon.workspace.";
color: #103d3e;
}
</style>
fetch_tensor
============
.. autofunction:: dragon.workspace.fetch_tensor
.. raw:: html
<style>
h1:before {
content: "dragon.workspace.";
color: #103d3e;
}
</style>
has_tensor
==========
.. autofunction:: dragon.workspace.has_tensor
.. raw:: html
<style>
h1:before {
content: "dragon.workspace.";
color: #103d3e;
}
</style>
load
====
.. autofunction:: dragon.workspace.load
.. raw:: html
<style>
h1:before {
content: "dragon.workspace.";
color: #103d3e;
}
</style>
reset_tensor
============
.. autofunction:: dragon.workspace.reset_tensor
.. raw:: html
<style>
h1:before {
content: "dragon.workspace.";
color: #103d3e;
}
</style>
run_operator
============
.. autofunction:: dragon.workspace.run_operator
.. raw:: html
<style>
h1:before {
content: "dragon.workspace.";
color: #103d3e;
}
</style>
save
====
.. autofunction:: dragon.workspace.save
.. raw:: html
<style>
h1:before {
content: "dragon.workspace.";
color: #103d3e;
}
</style>
...@@ -40,7 +40,6 @@ Dragon ...@@ -40,7 +40,6 @@ Dragon
* `dragon.nn <dragon/nn.html>`_ * `dragon.nn <dragon/nn.html>`_
* `dragon.optimizers <dragon/optimizers.html>`_ * `dragon.optimizers <dragon/optimizers.html>`_
* `dragon.random <dragon/random.html>`_ * `dragon.random <dragon/random.html>`_
* `dragon.workspace <dragon/workspace.html>`_
* `dragon.vision <dragon/vision.html>`_ * `dragon.vision <dragon/vision.html>`_
Caffe Caffe
...@@ -112,6 +111,7 @@ PyTorch ...@@ -112,6 +111,7 @@ PyTorch
This style involves the following components: This style involves the following components:
* `torch <torch.html>`_ * `torch <torch.html>`_
* `torch.autograd <torch/autograd.html>`_
* `torch.distributed <torch/distributed.html>`_ * `torch.distributed <torch/distributed.html>`_
* `torch.jit <torch/jit.html>`_ * `torch.jit <torch/jit.html>`_
* `torch.nn <torch/nn.html>`_ * `torch.nn <torch/nn.html>`_
...@@ -206,15 +206,9 @@ Modules ...@@ -206,15 +206,9 @@ Modules
`Module random <dragon/random.html>`_ `Module random <dragon/random.html>`_
: Native API for ``dragon.random`` namespace. : Native API for ``dragon.random`` namespace.
`Module workspace <dragon/workspace.html>`_
: Native API for ``dragon.workspace`` namespace.
`Module vision <dragon/vision.html>`_ `Module vision <dragon/vision.html>`_
: Native API for ``dragon.vision`` namespace. : Native API for ``dragon.vision`` namespace.
`Module workspace <dragon/workspace.html>`_
: Native API for ``dragon.workspace`` namespace.
`Module vm.caffe <caffe.html>`_ `Module vm.caffe <caffe.html>`_
: Virtual API for ``caffe`` namespace. : Virtual API for ``caffe`` namespace.
...@@ -278,6 +272,9 @@ Modules ...@@ -278,6 +272,9 @@ Modules
`Module vm.torch <torch.html>`_ `Module vm.torch <torch.html>`_
: Virtual API for ``torch`` namespace. : Virtual API for ``torch`` namespace.
`Module vm.torch.autograd <torch/autograd.html>`_
: Virtual API for ``torch.autograd`` namespace.
`Module vm.torch.distributed <torch/distributed.html>`_ `Module vm.torch.distributed <torch/distributed.html>`_
: Virtual API for ``torch.distributed`` namespace. : Virtual API for ``torch.distributed`` namespace.
...@@ -319,7 +316,6 @@ Modules ...@@ -319,7 +316,6 @@ Modules
dragon/nn dragon/nn
dragon/optimizers dragon/optimizers
dragon/random dragon/random
dragon/workspace
dragon/vision dragon/vision
caffe caffe
caffe/layers caffe/layers
...@@ -343,6 +339,7 @@ Modules ...@@ -343,6 +339,7 @@ Modules
tensorrt tensorrt
tensorrt/backend tensorrt/backend
torch torch
torch/autograd
torch/distributed torch/distributed
torch/jit torch/jit
torch/nn torch/nn
......
...@@ -15,11 +15,6 @@ gradient ...@@ -15,11 +15,6 @@ gradient
.. automethod:: dragon.GradientTape.gradient .. automethod:: dragon.GradientTape.gradient
:noindex: :noindex:
replay
######
.. automethod:: dragon.GradientTape.replay
:noindex:
reset reset
##### #####
.. automethod:: dragon.GradientTape.reset .. automethod:: dragon.GradientTape.reset
......
vm.torch.autograd
==================
.. only:: html
Functions
---------
`backward(...) <autograd/backward.html>`_
: Compute the derivatives of tensors w.r.t. graph leaves.
.. toctree::
:hidden:
autograd/backward
.. raw:: html
<style>
h1:before {
content: "Module: dragon.";
color: #103d3e;
}
</style>
Cast backward
==== ========
.. autoclass:: dragon.vm.caffe.layers.Cast .. autofunction:: dragon.vm.torch.autograd.backward
.. raw:: html .. raw:: html
<style> <style>
h1:before { h1:before {
content: "caffe.layers."; content: "torch.autograd.";
color: #103d3e; color: #103d3e;
} }
</style> </style>
...@@ -7,7 +7,7 @@ all_reduce ...@@ -7,7 +7,7 @@ all_reduce
<style> <style>
h1:before { h1:before {
content: "torch.nn.distributed."; content: "torch.distributed.";
color: #103d3e; color: #103d3e;
} }
</style> </style>
...@@ -7,7 +7,7 @@ broadcast ...@@ -7,7 +7,7 @@ broadcast
<style> <style>
h1:before { h1:before {
content: "torch.nn.distributed."; content: "torch.distributed.";
color: #103d3e; color: #103d3e;
} }
</style> </style>
...@@ -7,7 +7,7 @@ trace ...@@ -7,7 +7,7 @@ trace
<style> <style>
h1:before { h1:before {
content: "torch.nn.jit."; content: "torch.jit.";
color: #103d3e; color: #103d3e;
} }
</style> </style>
...@@ -10,25 +10,25 @@ __init__ ...@@ -10,25 +10,25 @@ __init__
Methods Methods
------- -------
accumulate_grad accumulate
############### ##########
.. automethod:: dragon.vm.torch.optim.Optimizer.accumulate_grad .. automethod:: dragon.vm.torch.optim.Optimizer.accumulate
:noindex: :noindex:
add_param_group add_param_group
############### ###############
.. automethod:: dragon.vm.torch.optim.Optimizer.add_param_group .. automethod:: dragon.vm.torch.optim.Optimizer.add_param_group
:noindex: :noindex:
step step
#### ####
.. automethod:: dragon.vm.torch.optim.Optimizer.step .. automethod:: dragon.vm.torch.optim.Optimizer.step
:noindex: :noindex:
zero_grad zero_grad
######### #########
.. automethod:: dragon.vm.torch.optim.Optimizer.zero_grad .. automethod:: dragon.vm.torch.optim.Optimizer.zero_grad
:noindex: :noindex:
.. raw:: html .. raw:: html
......
...@@ -10,9 +10,9 @@ __init__ ...@@ -10,9 +10,9 @@ __init__
Methods Methods
------- -------
accumulate_grad accumulate
############### ##########
.. automethod:: dragon.vm.torch.optim.Optimizer.accumulate_grad .. automethod:: dragon.vm.torch.optim.Optimizer.accumulate
add_param_group add_param_group
############### ###############
......
...@@ -10,25 +10,25 @@ __init__ ...@@ -10,25 +10,25 @@ __init__
Methods Methods
------- -------
accumulate_grad accumulate
############### ##########
.. automethod:: dragon.vm.torch.optim.Optimizer.accumulate_grad .. automethod:: dragon.vm.torch.optim.Optimizer.accumulate
:noindex: :noindex:
add_param_group add_param_group
############### ###############
.. automethod:: dragon.vm.torch.optim.Optimizer.add_param_group .. automethod:: dragon.vm.torch.optim.Optimizer.add_param_group
:noindex: :noindex:
step step
#### ####
.. automethod:: dragon.vm.torch.optim.Optimizer.step .. automethod:: dragon.vm.torch.optim.Optimizer.step
:noindex: :noindex:
zero_grad zero_grad
######### #########
.. automethod:: dragon.vm.torch.optim.Optimizer.zero_grad .. automethod:: dragon.vm.torch.optim.Optimizer.zero_grad
:noindex: :noindex:
.. raw:: html .. raw:: html
......
...@@ -10,25 +10,25 @@ __init__ ...@@ -10,25 +10,25 @@ __init__
Methods Methods
------- -------
accumulate_grad accumulate
############### ##########
.. automethod:: dragon.vm.torch.optim.Optimizer.accumulate_grad .. automethod:: dragon.vm.torch.optim.Optimizer.accumulate
:noindex: :noindex:
add_param_group add_param_group
############### ###############
.. automethod:: dragon.vm.torch.optim.Optimizer.add_param_group .. automethod:: dragon.vm.torch.optim.Optimizer.add_param_group
:noindex: :noindex:
step step
#### ####
.. automethod:: dragon.vm.torch.optim.Optimizer.step .. automethod:: dragon.vm.torch.optim.Optimizer.step
:noindex: :noindex:
zero_grad zero_grad
######### #########
.. automethod:: dragon.vm.torch.optim.Optimizer.zero_grad .. automethod:: dragon.vm.torch.optim.Optimizer.zero_grad
:noindex: :noindex:
.. raw:: html .. raw:: html
......
...@@ -7,7 +7,7 @@ namespace dragon { ...@@ -7,7 +7,7 @@ namespace dragon {
GraphBase::GraphBase(const GraphDef& def, Workspace* ws) GraphBase::GraphBase(const GraphDef& def, Workspace* ws)
: def_(def), ws_(ws), name_(def.name()), phase_("TEST") { : def_(def), ws_(ws), name_(def.name()), phase_("TEST") {
// Scan the defined arguments // Collect arguments
for (auto& arg : def_.arg()) { for (auto& arg : def_.arg()) {
CHECK_GT(arg.name().size(), 0); CHECK_GT(arg.name().size(), 0);
CHECK_EQ(args_.count(arg.name()), 0); CHECK_EQ(args_.count(arg.name()), 0);
...@@ -18,32 +18,31 @@ GraphBase::GraphBase(const GraphDef& def, Workspace* ws) ...@@ -18,32 +18,31 @@ GraphBase::GraphBase(const GraphDef& def, Workspace* ws)
// Collect outputs // Collect outputs
Set<string> outputs; Set<string> outputs;
for (const auto& op : def.op()) { for (const auto& op : def.op()) {
for (const auto& in : op.input()) for (const auto& input : op.input())
CHECK(outputs.count(in) || ws_->HasTensor(in)) CHECK(outputs.count(input) || ws_->HasTensor(input))
<< "\nInput: " << in << " for op: " << op.name() << " is unknown."; << "\nThe input <" << input << "> is not in graph.";
for (const auto& out : op.output()) for (const auto& output : op.output()) {
outputs.insert(out); outputs.insert(output);
}
} }
// Check targets // Check targets
Set<string> targets; Set<string> targets;
for (const auto& target : def.output()) { for (const auto& target : def.output()) {
CHECK(outputs.count(target) || ws_->HasTensor(target)) CHECK(outputs.count(target) || ws_->HasTensor(target))
<< "\nTarget: " << target << " does not exist in the graph."; << "\nThe output <" << target << "> is not in graph.";
targets.insert(target); targets.insert(target);
} }
// Check gradients // Check gradients
for (const auto& gradient : def.gradient()) { for (const auto& grad_info : def.grad_info()) {
const auto& cost = gradient.cost(); const auto& y = grad_info.y();
const auto& wrt = gradient.wrt(); CHECK_GT(targets.count(y), 0)
CHECK(outputs.count(cost) || ws_->HasTensor(cost)) << "\nThe derivative target <" << y << "> is not in outputs.";
<< "\nTarget: " << cost << "does not exist in the graph."; for (const auto& x : grad_info.xs()) {
CHECK(outputs.count(wrt) || ws_->HasTensor(wrt)) CHECK(outputs.count(x) || ws_->HasTensor(x))
<< "\nTarget: " << wrt << "does not exist in the graph."; << "\nThe differentiated input <" << x << "> is not in graph.";
CHECK_GT(targets.count(cost), 0) }
<< "\nTo solve d(" << cost << ")/d(" << wrt << "),\n"
<< cost << " should be set as a target.";
} }
} }
...@@ -54,21 +53,18 @@ bool Graph::Create(const GraphDef& def, Workspace* ws) { ...@@ -54,21 +53,18 @@ bool Graph::Create(const GraphDef& def, Workspace* ws) {
auto op_def(def.op(i)); auto op_def(def.op(i));
LOG(DEBUG) << "Create Operator " << op_def.name() << ": " << op_def.type(); LOG(DEBUG) << "Create Operator " << op_def.name() << ": " << op_def.type();
// Inherit device option if necessary // Inherit device option if necessary
if (!op_def.has_device_option() && has_device_option) if (!op_def.has_device_option() && has_device_option) {
op_def.mutable_device_option()->CopyFrom(def.device_option()); op_def.mutable_device_option()->CopyFrom(def.device_option());
}
Argument arg; Argument arg;
arg.set_name("allow_recomp");
arg.set_i(1);
op_def.add_arg()->CopyFrom(arg);
// For the last operator, enforce the synchronization // For the last operator, enforce the synchronization
if (i == def.op_size() - 1) { if (i == def.op_size() - 1) {
arg.set_name("do_sync"); arg.set_name("do_sync");
arg.set_i(1); arg.set_i(1);
op_def.add_arg()->CopyFrom(arg); op_def.add_arg()->CopyFrom(arg);
} }
ops_.push_back(NewOperator(op_def, ws)); cached_ops_.push_back(NewOperator(op_def, ws));
// Attatch the output aliases info cached_ops_.back()->set_output_aliases(output_aliases_);
ops_.back()->set_output_aliases(output_aliases_);
} }
return true; return true;
} }
...@@ -80,7 +76,7 @@ Graph::Graph(const GraphDef& def, Workspace* ws) : GraphBase(def, ws) { ...@@ -80,7 +76,7 @@ Graph::Graph(const GraphDef& def, Workspace* ws) : GraphBase(def, ws) {
GraphGradientMaker gradient_maker; GraphGradientMaker gradient_maker;
Map<string, vec32_t> subgraph_indices; Map<string, vec32_t> subgraph_indices;
int opt = 3; // defaults: O3 int opt = 3; // defaults: O3
if (args().count("optimization_level")) opt = arg("optimization_level").i(); if (args().count("optimization")) opt = arg("optimization").i();
if (opt >= 1) opt_def = graph_optim.PruneNodes(def); if (opt >= 1) opt_def = graph_optim.PruneNodes(def);
if (opt >= 2) graph_optim.AddInplace(opt_def, output_aliases_); if (opt >= 2) graph_optim.AddInplace(opt_def, output_aliases_);
if (opt >= 3) { if (opt >= 3) {
...@@ -101,22 +97,23 @@ Graph::Graph(const GraphDef& def, Workspace* ws) : GraphBase(def, ws) { ...@@ -101,22 +97,23 @@ Graph::Graph(const GraphDef& def, Workspace* ws) : GraphBase(def, ws) {
for (const auto& it : subgraph_indices) { for (const auto& it : subgraph_indices) {
subgraph[it.first] = vector<OperatorBase*>(); subgraph[it.first] = vector<OperatorBase*>();
for (const auto& idx : subgraph_indices[it.first]) for (const auto& idx : subgraph_indices[it.first])
subgraph[it.first].push_back(ops_[idx]); subgraph[it.first].push_back(cached_ops_[idx]);
} }
for (const auto& op : ops_) for (auto* op : cached_ops_) {
op->set_subgraph(subgraph); op->set_subgraph(subgraph);
}
} }
} }
bool Graph::Run(const string& incl, const string& excl, int stream_id) { bool Graph::Run(const string& include, const string& exclude, int stream) {
LOG(DEBUG) << "Run Graph: " << name(); LOG(DEBUG) << "Run Graph: " << name();
for (auto op : ops_) { for (auto* op : cached_ops_) {
if (!incl.empty() && !str::find(op->type(), incl)) continue; if (!include.empty() && !str::find(op->type(), include)) continue;
if (!excl.empty() && str::find(op->type(), excl)) continue; if (!exclude.empty() && str::find(op->type(), exclude)) continue;
op->SwitchToPhase(phase()); op->SwitchToPhase(phase());
LOG(DEBUG) << "$ Before Operator: " << op->name(); LOG(DEBUG) << "Run Op: " << op->name();
op->Run(stream_id); op->Run(stream);
LOG(DEBUG) << "$ After Operator: " << op->name(); LOG(DEBUG) << "Finish Op: " << op->name();
} }
return true; return true;
} }
......
...@@ -88,8 +88,8 @@ class Graph : public GraphBase { ...@@ -88,8 +88,8 @@ class Graph : public GraphBase {
/*! \brief Default Destructor */ /*! \brief Default Destructor */
virtual ~Graph() { virtual ~Graph() {
for (auto* op : ops_) { for (auto* cached_op : cached_ops_) {
delete op; delete cached_op;
} }
} }
...@@ -100,8 +100,8 @@ class Graph : public GraphBase { ...@@ -100,8 +100,8 @@ class Graph : public GraphBase {
bool Run(const string&, const string&, int = 0) override; bool Run(const string&, const string&, int = 0) override;
protected: protected:
/*! \brief Store the internal operators */ /*! \brief The cached operators */
vector<OperatorBase*> ops_; vector<OperatorBase*> cached_ops_;
/*! \brief Store the candidate output aliases */ /*! \brief Store the candidate output aliases */
Map<string, Set<string>> output_aliases_; Map<string, Set<string>> output_aliases_;
......
...@@ -4,23 +4,24 @@ ...@@ -4,23 +4,24 @@
namespace dragon { namespace dragon {
bool GraphGradientMaker::CheckGrad( bool GraphGradientMaker::CheckGrad(
const OperatorDef& forward_op, const OperatorDef& op_def,
const Set<string>& targets, const Set<string>& targets,
vector<pair<string, int>>& gen_grads) { vector<pair<string, int>>& gen_grads) {
if (NoGradientRegistry()->Has(forward_op.type())) { if (NoGradientRegistry()->Has(op_def.type())) {
for (auto& input : forward_op.input()) for (auto& input : op_def.input()) {
blacklist_set_.insert(input); blacklist_set_.insert(input);
}
return true; return true;
} }
for (int i = 0; i < forward_op.output_size(); ++i) { for (int i = 0; i < op_def.output_size(); ++i) {
const auto& output = forward_op.output(i); const auto& output = op_def.output(i);
if (!inputs_to_grads_.count(output)) { if (!inputs_to_grads_.count(output)) {
if (blacklist_set_.count(output)) return true; if (blacklist_set_.count(output)) return true;
if (targets.count(output)) { if (targets.count(output)) {
// Consider to generate virtual gradient for targets // Consider to generate virtual gradient for targets
gen_grads.push_back({output, i}); gen_grads.push_back({output, i});
inputs_to_grads_[output] = output + "_grad"; inputs_to_grads_[output] = output + "_grad";
} else if (forward_op.output_size() == 1) { } else if (op_def.output_size() == 1) {
return true; // We can skip this op, obviously return true; // We can skip this op, obviously
} }
} }
...@@ -30,7 +31,7 @@ bool GraphGradientMaker::CheckGrad( ...@@ -30,7 +31,7 @@ bool GraphGradientMaker::CheckGrad(
} }
void GraphGradientMaker::Make( void GraphGradientMaker::Make(
const vector<OperatorDef*>& forward_ops, const vector<OperatorDef*>& op_defs,
const vector<string>& targets, const vector<string>& targets,
const vector<string>& input_grads, const vector<string>& input_grads,
GraphDef& backward_def) { GraphDef& backward_def) {
...@@ -39,11 +40,11 @@ void GraphGradientMaker::Make( ...@@ -39,11 +40,11 @@ void GraphGradientMaker::Make(
Map<string, string> targets_to_grads; Map<string, string> targets_to_grads;
// PLAY for the forward // PLAY for the forward
for (auto* op : forward_ops) { for (auto* op_def : op_defs) {
if (NoGradientRegistry()->Has(op->type())) continue; if (NoGradientRegistry()->Has(op_def->type())) continue;
for (const auto& input : op->input()) { for (const auto& input : op_def->input()) {
bool input_in_outputs = false; bool input_in_outputs = false;
for (auto& output : op->output()) for (auto& output : op_def->output())
if (output == input) { if (output == input) {
input_in_outputs = true; input_in_outputs = true;
break; break;
...@@ -62,9 +63,9 @@ void GraphGradientMaker::Make( ...@@ -62,9 +63,9 @@ void GraphGradientMaker::Make(
targets_set.insert(targets[i]); targets_set.insert(targets[i]);
} }
for (int op_idx = (int)forward_ops.size() - 1; op_idx >= 0; --op_idx) { for (int op_idx = (int)op_defs.size() - 1; op_idx >= 0; --op_idx) {
// Collect inputs and outputs, generate raw gradient ops // Collect inputs and outputs, generate raw gradient ops
const OperatorDef& op = *forward_ops[op_idx]; const OperatorDef& op = *op_defs[op_idx];
vector<pair<string, int>> gen_grads; vector<pair<string, int>> gen_grads;
bool is_skip = CheckGrad(op, targets_set, gen_grads); bool is_skip = CheckGrad(op, targets_set, gen_grads);
vector<string> g_outputs; vector<string> g_outputs;
...@@ -183,9 +184,9 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) { ...@@ -183,9 +184,9 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) {
// Flag the gathering gradients // Flag the gathering gradients
if (op.type() == "GradientGather") { if (op.type() == "GradientGather") {
invalid_ops.insert(op_idx); invalid_ops.insert(op_idx);
if (ignored_grads_.count(op.output(0))) { if (empty_grads_.count(op.output(0))) {
for (const auto& input : op.input()) { for (const auto& input : op.input()) {
ignored_grads_.insert(input); empty_grads_.insert(input);
} }
continue; continue;
} else { } else {
...@@ -200,7 +201,7 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) { ...@@ -200,7 +201,7 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) {
} }
// Count the references to detect leafs // Count the references to detect leafs
for (const auto& input : op.input()) { for (const auto& input : op.input()) {
if (str::find(input, "grad")) { if (str::endswith(input, "_grad")) {
ref_count[input] += 1; ref_count[input] += 1;
} }
} }
...@@ -293,21 +294,17 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) { ...@@ -293,21 +294,17 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) {
// Rewrite output gradients // Rewrite output gradients
for (int i = 0; i < op->output_size(); ++i) { for (int i = 0; i < op->output_size(); ++i) {
if (str::startswith(op->type(), "Python")) continue;
const string& output = op->output(i); const string& output = op->output(i);
if (output.empty() || str::startswith(output, "/share/")) continue; if (output.empty() || str::startswith(output, "/share/buffer")) continue;
if (ignored_grads_.count(output) > 0) { if (empty_grads_.count(output) > 0) {
// Prune for non-trainable leafs
*op->mutable_output(i) = ""; *op->mutable_output(i) = "";
continue; continue;
} }
if (hooked_grads_.empty()) { // Protection for leafs
// Protection for leafs if (ref_count.count(output) == 0) continue;
if (ref_count.count(output) == 0) continue; // Protection for sources and leafs
} else { if (retained_grads_.count(output) > 0) continue;
// Protection for sources
if (hooked_grads_.count(output) > 0) continue;
}
if (op->type() == "PythonPluginGradient") continue;
string new_output = output; string new_output = output;
if (inplace_flags[i] >= 0) { if (inplace_flags[i] >= 0) {
new_output = op->input(inplace_flags[i]); new_output = op->input(inplace_flags[i]);
......
...@@ -21,22 +21,22 @@ class DRAGON_API GraphGradientMaker { ...@@ -21,22 +21,22 @@ class DRAGON_API GraphGradientMaker {
public: public:
/*! \brief Generate a backward graph from the forward ops */ /*! \brief Generate a backward graph from the forward ops */
void Make( void Make(
const vector<OperatorDef*>& forward_ops, const vector<OperatorDef*>& op_defs,
const vector<string>& targets, const vector<string>& targets,
const vector<string>& input_grads, const vector<string>& input_grads,
GraphDef& backward_def); GraphDef& graph_def);
/*! \brief Rewrite a graph to share the intermediate grads */ /*! \brief Rewrite a graph to share the intermediate grads */
GraphDef Share(const GraphDef& input_def); GraphDef Share(const GraphDef& input_def);
/*! \brief Add a hooked gradient */ /*! \brief Add an empty gradient */
void add_hooked_grad(const string& name) { void add_empty_grad(const string& name) {
hooked_grads_.insert(name); empty_grads_.insert(name);
} }
/*! \brief Add an ignored gradient */ /*! \brief Add a retained gradient */
void add_ignored_grad(const string& name) { void add_retained_grad(const string& name) {
ignored_grads_.insert(name); retained_grads_.insert(name);
} }
/*! \brief Set the prefix of backward op name */ /*! \brief Set the prefix of backward op name */
...@@ -47,32 +47,32 @@ class DRAGON_API GraphGradientMaker { ...@@ -47,32 +47,32 @@ class DRAGON_API GraphGradientMaker {
private: private:
/*! \brief Check the missing grads of backward procedure */ /*! \brief Check the missing grads of backward procedure */
bool CheckGrad( bool CheckGrad(
const OperatorDef& forward_op, const OperatorDef& op_def,
const Set<string>& targets, const Set<string>& targets,
vector<pair<string, int>>& gen_grads); vector<pair<string, int>>& gen_grads);
/*! \brief Return a dummy operator name */ /*! \brief Return a dummy operator name */
string GetOperatorName() { string GetOperatorName() {
if (op_prefix_.empty()) return "Generic"; if (op_prefix_.empty()) return "GradientOp";
return op_prefix_ + str::to(op_index_++); return op_prefix_ + str::to(op_index_++);
} }
/*! \brief Store the mapping of intermediate grads */ /*! \brief The mapping from input to grad */
Map<string, string> inputs_to_grads_; Map<string, string> inputs_to_grads_;
/*! \brief Store the non-gradient outputs */ /*! \brief The non-gradient outputs */
Set<string> blacklist_set_; Set<string> blacklist_set_;
/*! \brief Store the non-shared gradients */ /*! \brief The gradients should be retained */
Set<string> hooked_grads_; Set<string> retained_grads_;
/*! \brief Store the gradients that are not required */ /*! \brief The gradients should be set to empty */
Set<string> ignored_grads_; Set<string> empty_grads_;
/*! \brief Store the prefix of dummy operator name */ /*! \brief The prefix of op name */
string op_prefix_; string op_prefix_;
/*! \brief Store the counter of dummy operator name */ /*! \brief The counter of op name */
int64_t op_index_ = 0; int64_t op_index_ = 0;
}; };
......
...@@ -39,14 +39,12 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) { ...@@ -39,14 +39,12 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) {
BackwardPrunePass(target); BackwardPrunePass(target);
} }
// Forward pass from gradients for (const auto& grad_info : input_def.grad_info()) {
for (const auto& gradient : input_def.gradient()) { const auto u = grad_info.y() + "_grad";
auto u = gradient.cost() + "_grad"; for (const auto& x : grad_info.xs()) {
auto v = gradient.wrt() + "_grad"; visited_.clear();
if (ws_->HasTensor(u)) u = ws_->GetTensor(u)->name(); ForwardPrunePass(u, x + "_grad", std::deque<string>({u}));
if (ws_->HasTensor(v)) v = ws_->GetTensor(v)->name(); }
visited_.clear();
ForwardPrunePass(u, v, vector<string>({u}));
} }
// Select all colored operators // Select all colored operators
...@@ -64,7 +62,6 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) { ...@@ -64,7 +62,6 @@ GraphDef GraphOptimizer::PruneNodes(const GraphDef& input_def) {
// Generate the final op sequence // Generate the final op sequence
map<int, OperatorDef> final_sequence; map<int, OperatorDef> final_sequence;
for (auto op_idx : selected_op_indices) { for (auto op_idx : selected_op_indices) {
const auto& op = input_def.op(op_idx); const auto& op = input_def.op(op_idx);
auto new_op(input_def.op(op_idx)); auto new_op(input_def.op(op_idx));
...@@ -308,11 +305,13 @@ GraphDef GraphOptimizer::SimulateGC(const GraphDef& input_def) { ...@@ -308,11 +305,13 @@ GraphDef GraphOptimizer::SimulateGC(const GraphDef& input_def) {
void GraphOptimizer::ForwardPrunePass( void GraphOptimizer::ForwardPrunePass(
const string& u, const string& u,
const string& leaf, const string& leaf,
const vector<string>& path) { const std::deque<string>& path) {
if (visited_.count(u)) { if (visited_.count(u)) {
if (visited_[u]) if (visited_[u]) {
for (const auto& node : path) for (const auto& node : path) {
visited_[node] = colored_[node] = true; visited_[node] = colored_[node] = true;
}
}
return; return;
} }
visited_[u] = false; visited_[u] = false;
...@@ -321,8 +320,9 @@ void GraphOptimizer::ForwardPrunePass( ...@@ -321,8 +320,9 @@ void GraphOptimizer::ForwardPrunePass(
auto new_path(path); auto new_path(path);
new_path.push_back(v); new_path.push_back(v);
if (v == leaf) { if (v == leaf) {
for (const auto& node : new_path) for (const auto& node : new_path) {
visited_[node] = colored_[node] = true; visited_[node] = colored_[node] = true;
}
return; return;
} }
ForwardPrunePass(v, leaf, new_path); ForwardPrunePass(v, leaf, new_path);
......
...@@ -56,7 +56,7 @@ class GraphOptimizer { ...@@ -56,7 +56,7 @@ class GraphOptimizer {
void ForwardPrunePass( void ForwardPrunePass(
const string& u, const string& u,
const string& leaf, const string& leaf,
const vector<string>& path); const std::deque<string>& path);
/*! \brief Pass from targets to remove unused nodes */ /*! \brief Pass from targets to remove unused nodes */
void BackwardPrunePass(const string& v); void BackwardPrunePass(const string& v);
......
...@@ -41,14 +41,11 @@ OperatorBase::OperatorBase(const OperatorDef& def, Workspace* ws) ...@@ -41,14 +41,11 @@ OperatorBase::OperatorBase(const OperatorDef& def, Workspace* ws)
} }
} }
template <class Context> // template <class Context>
Operator<Context>::Operator(const OperatorDef& def, Workspace* ws) // Operator<Context>::Operator(const OperatorDef& def, Workspace* ws)
: OperatorBase(def, ws), // : OperatorBase(def, ws),
ctx_(def.device_option()), // ctx_(def.device_option()),
do_sync_(OpArg<bool>("do_sync", false)), // do_sync_(OpArg<bool>("do_sync", false)) {}
allow_recomp_(OpArg<bool>("allow_recomp", false)) {
allow_run_ = (!(OutputSize() == 1 && !Output(0)->has_name()));
}
Tensor& OperatorBase::Input(int i) { Tensor& OperatorBase::Input(int i) {
CHECK_LT(i, (int)inputs_.size()); CHECK_LT(i, (int)inputs_.size());
...@@ -112,32 +109,32 @@ OperatorBase* OperatorBase::UpdateFrom(const OperatorDef& def) { ...@@ -112,32 +109,32 @@ OperatorBase* OperatorBase::UpdateFrom(const OperatorDef& def) {
handle_ = def.name(); handle_ = def.name();
inputs_.resize(def.input_size()); inputs_.resize(def.input_size());
outputs_.resize(def.output_size()); outputs_.resize(def.output_size());
for (int i = 0; i < inputs_.size(); i++) for (int i = 0; i < inputs_.size(); i++) {
inputs_[i] = ws()->GetTensor(def.input(i)); inputs_[i] = ws()->GetTensor(def.input(i));
for (int i = 0; i < outputs_.size(); i++) }
for (int i = 0; i < outputs_.size(); i++) {
outputs_[i] = ws()->CreateTensor(def.output(i)); outputs_[i] = ws()->CreateTensor(def.output(i));
}
return this; return this;
} }
template <class Context> template <class Context>
void Operator<Context>::Prepare() { void Operator<Context>::Prepare() {
string tensor_name;
size_t ver_pos;
int version;
for (int i = 0; i < InputSize(); i++) { for (int i = 0; i < InputSize(); i++) {
if (Input(i).version() >= 0) { if (Input(i).version() >= 0) {
tensor_name = def().input(i); const auto& name = def().input(i);
ver_pos = tensor_name.find("/ver:"); auto ver_pos = name.find("/ver:");
version = std::atoi(tensor_name.substr(ver_pos + 5).c_str()); auto version = std::atoi(name.substr(ver_pos + 5).c_str());
if (version == Input(i).version()) continue; if (version == Input(i).version()) continue;
LOG(DEBUG) << "Excepted version of Tensor(" + Input(i).name() + ") " LOG(DEBUG) << "Excepted version of Tensor(" + Input(i).name() + ") "
<< "is " << version << ", got " << Input(i).version() << "is " << version << ", got " << Input(i).version()
<< ". Recompute."; << ". Recompute.";
Tensor* flag = ws()->GetTensor("/share/flag/recomputing"); Tensor* flag = ws()->GetTensor("/share/flag/recomputing");
flag->mutable_data<bool, CPUContext>()[0] = true; flag->mutable_data<bool, CPUContext>()[0] = true;
vector<OperatorBase*>& chain = subgraph()[tensor_name]; vector<OperatorBase*>& chain = subgraph()[name];
for (auto* op : chain) for (auto* op : chain) {
op->Run(ctx()->stream_id()); op->Run(ctx()->stream_id());
}
flag->mutable_data<bool, CPUContext>()[0] = false; flag->mutable_data<bool, CPUContext>()[0] = false;
} }
} }
...@@ -145,14 +142,11 @@ void Operator<Context>::Prepare() { ...@@ -145,14 +142,11 @@ void Operator<Context>::Prepare() {
template <class Context> template <class Context>
void Operator<Context>::Release() { void Operator<Context>::Release() {
string tensor_name;
size_t ver_pos;
int version;
for (int i = 0; i < OutputSize(); i++) { for (int i = 0; i < OutputSize(); i++) {
if (Output(i)->version() >= 0) { if (Output(i)->version() >= 0) {
tensor_name = def().output(i); const auto& name = def().output(i);
ver_pos = tensor_name.find("/ver:"); auto ver_pos = name.find("/ver:");
version = std::atoi(tensor_name.substr(ver_pos + 5).c_str()); auto version = std::atoi(name.substr(ver_pos + 5).c_str());
Output(i)->set_version(version); Output(i)->set_version(version);
} }
} }
...@@ -195,8 +189,7 @@ TryCreateOperator(const string& key, const OperatorDef& def, Workspace* ws) { ...@@ -195,8 +189,7 @@ TryCreateOperator(const string& key, const OperatorDef& def, Workspace* ws) {
OperatorBase* NewOperator(const OperatorDef& def, Workspace* ws) { OperatorBase* NewOperator(const OperatorDef& def, Workspace* ws) {
auto* schema = OpSchemaRegistry::Schema(def.type()); auto* schema = OpSchemaRegistry::Schema(def.type());
if (schema) { if (schema != nullptr) {
// Check the Inputs and Outputs if necessary
CHECK(schema->Verify(def)) CHECK(schema->Verify(def))
<< "\nOperator failed to pass the schema checking."; << "\nOperator failed to pass the schema checking.";
} }
...@@ -219,7 +212,7 @@ Gradient MakeGradientForOp( ...@@ -219,7 +212,7 @@ Gradient MakeGradientForOp(
<< "not implemented."; << "not implemented.";
Gradient grad = maker->Make(); Gradient grad = maker->Make();
OperatorDef reference_def(def); OperatorDef reference_def(def);
// Map the cache key // Set the cache key
if (reference_def.has_cache_key()) { if (reference_def.has_cache_key()) {
for (int i = 0; i < grad.ops.size(); ++i) { for (int i = 0; i < grad.ops.size(); ++i) {
grad.ops[i].set_cache_key( grad.ops[i].set_cache_key(
......
...@@ -40,7 +40,7 @@ class DRAGON_API OperatorBase { ...@@ -40,7 +40,7 @@ class DRAGON_API OperatorBase {
} }
/*! \brief Run operator on the specified stream */ /*! \brief Run operator on the specified stream */
virtual void Run(int stream_id = 0) { virtual void Run(int stream = 0) {
NOT_IMPLEMENTED; NOT_IMPLEMENTED;
} }
...@@ -154,12 +154,12 @@ class DRAGON_API OperatorBase { ...@@ -154,12 +154,12 @@ class DRAGON_API OperatorBase {
} }
/*! \brief Set the output aliases for in-place */ /*! \brief Set the output aliases for in-place */
void set_output_aliases(const Map<string, Set<string>>& aliases_map) { void set_output_aliases(const Map<string, Set<string>>& alias_map) {
output_aliases_.resize(outputs_.size()); output_aliases_.resize(outputs_.size());
for (int i = 0; i < outputs_.size(); ++i) { for (int i = 0; i < outputs_.size(); ++i) {
auto aliases_iter = aliases_map.find(outputs_[i]->name()); const auto& it = alias_map.find(outputs_[i]->name());
if (aliases_iter != aliases_map.end()) { if (it != alias_map.end()) {
output_aliases_[i] = aliases_iter->second; output_aliases_[i] = it->second;
} else { } else {
output_aliases_[i].clear(); output_aliases_[i].clear();
} }
...@@ -196,7 +196,10 @@ template <class Context> ...@@ -196,7 +196,10 @@ template <class Context>
class DRAGON_API Operator : public OperatorBase { class DRAGON_API Operator : public OperatorBase {
public: public:
/*! \brief Default constructor */ /*! \brief Default constructor */
Operator(const OperatorDef& def, Workspace* ws); Operator(const OperatorDef& def, Workspace* ws)
: OperatorBase(def, ws),
ctx_(def.device_option()),
do_sync_(OperatorBase::Arg<bool>("do_sync", false)) {}
/*! \brief Prepare the content of inputs */ /*! \brief Prepare the content of inputs */
virtual void Prepare(); virtual void Prepare();
...@@ -207,36 +210,32 @@ class DRAGON_API Operator : public OperatorBase { ...@@ -207,36 +210,32 @@ class DRAGON_API Operator : public OperatorBase {
/*! \brief Coordinate the context of inputs and outputs */ /*! \brief Coordinate the context of inputs and outputs */
virtual void SwitchToDevice(); virtual void SwitchToDevice();
/*! \brief Implement the detailed execution */ /*! \brief The detailed execution on device */
virtual void RunOnDevice() = 0; virtual void RunOnDevice() = 0;
/*! \brief Run this operator on the specified stream */ /*! \brief Run this operator */
void Run(int stream_id = 0) final { void Run(int stream = 0) final {
if (!allow_run_) return; Prepare();
if (allow_recomp_) Prepare(); ctx()->SwitchToDevice(stream);
ctx()->SwitchToDevice(stream_id);
SwitchToDevice(); SwitchToDevice();
RunOnDevice(); RunOnDevice();
if (do_sync_ || stream_id > 0) { if (do_sync_ || stream > 0) {
ctx()->FinishDeviceComputation(); ctx()->FinishDeviceComputation();
} }
if (allow_recomp_) Release(); Release();
} }
/*! \brief Return a bool indicating the run is available */ /*! \brief Return the context */
bool allow_run() const {
return allow_run_;
}
/*! \brief Return the internal context */
Context* ctx() { Context* ctx() {
return &ctx_; return &ctx_;
} }
protected: protected:
/*! \brief Store the internal context */ /*! \brief The context */
Context ctx_; Context ctx_;
bool do_sync_, allow_run_, allow_recomp_;
/*! \brief The executing flags */
bool do_sync_;
}; };
/*! \brief New a operator from the raw def */ /*! \brief New a operator from the raw def */
...@@ -266,9 +265,8 @@ OperatorBase* NewOperator(const OperatorDef&, Workspace*); ...@@ -266,9 +265,8 @@ OperatorBase* NewOperator(const OperatorDef&, Workspace*);
using OperatorBase::def; \ using OperatorBase::def; \
using OperatorBase::ws using OperatorBase::ws
#define USE_OPERATOR_FUNCTIONS \ #define USE_OPERATOR_FUNCTIONS \
USE_OPERATOR_BASE_FUNCTIONS; \ USE_OPERATOR_BASE_FUNCTIONS; \
using Operator<Context>::allow_run; \
using Operator<Context>::ctx using Operator<Context>::ctx
#define STORE_INPUT_SPEC(i) \ #define STORE_INPUT_SPEC(i) \
...@@ -342,46 +340,46 @@ DEFINE_TENSOR_TYPES_DISPATCHER(DoRunWithType); ...@@ -342,46 +340,46 @@ DEFINE_TENSOR_TYPES_DISPATCHER(DoRunWithType);
/* Fillers */ /* Fillers */
#define TENSOR_FILL_WITH_TYPE(tensor, shape, type) \ #define TENSOR_FILL_WITH_TYPE(tensor, shape, type) \
if (tensor.count() == 0) { \ if (tensor.count() == 0) { \
CHECK(ws()->GetFiller(tensor.name())) \ auto* filler_info = ws()->GetFillerInfo(tensor.name()); \
<< "\nTensor(" << tensor.name() << ") is empty. \n" \ CHECK(filler_info) << "\nTensor(" << tensor.name() << ") is empty.\n" \
<< "may be specify a filler for it?"; \ << "May be specify a filler for it?"; \
tensor.Reshape(shape); \ tensor.Reshape(shape); \
unique_ptr<Filler<type, Context>> filler( \ unique_ptr<Filler<type, Context>> filler( \
CreateFiller<type, Context>(*ws()->GetFiller(tensor.name()))); \ CreateFiller<type, Context>(*filler_info)); \
filler->Fill(&tensor, ctx()); \ filler->Fill(&tensor, ctx()); \
} else { \ } else { \
int64_t count = 1; \ int64_t count = 1; \
for (int i = 0; i < shape.size(); i++) \ for (int i = 0; i < shape.size(); i++) \
count *= shape[i]; \ count *= shape[i]; \
CHECK_EQ(count, tensor.count()) \ CHECK_EQ(count, tensor.count()) \
<< "\nExcepted Tensor(" << tensor.name() << ")'s " \ << "\nExcepted Tensor(" << tensor.name() << ")'s " \
<< "size is " << count << ", \n" \ << "size is " << count << ", \n" \
<< "but now is " << tensor.count() << ", " \ << "but now is " << tensor.count() << ", " \
<< "did you feed the incorrect data before?"; \ << "did you feed the incorrect data before?"; \
tensor.Reshape(shape); \ tensor.Reshape(shape); \
} }
#define TENSOR_FILL(tensor, shape) \ #define TENSOR_FILL(tensor, shape) \
if (tensor.count() == 0) { \ if (tensor.count() == 0) { \
CHECK(ws()->GetFiller(tensor.name())) \ auto* filler_info = ws()->GetFillerInfo(tensor.name()); \
<< "\nTensor(" << tensor.name() << ") is empty. \n" \ CHECK(filler_info) << "\nTensor(" << tensor.name() << ") is empty.\n" \
<< "Maybe specify a filler for it?"; \ << "May be specify a filler for it?"; \
tensor.Reshape(shape); \ tensor.Reshape(shape); \
unique_ptr<Filler<T, Context>> filler( \ unique_ptr<Filler<T, Context>> filler( \
CreateFiller<T, Context>(*ws()->GetFiller(tensor.name()))); \ CreateFiller<T, Context>(*filler_info)); \
filler->Fill(&tensor, ctx()); \ filler->Fill(&tensor, ctx()); \
} else { \ } else { \
int64_t count = 1; \ int64_t count = 1; \
for (int i = 0; i < shape.size(); i++) \ for (int i = 0; i < shape.size(); i++) \
count *= shape[i]; \ count *= shape[i]; \
CHECK_EQ(count, tensor.count()) \ CHECK_EQ(count, tensor.count()) \
<< "\nExcepted Tensor(" << tensor.name() << ")'s " \ << "\nExcepted Tensor(" << tensor.name() << ")'s " \
<< "size is " << count << ", \n" \ << "size is " << count << ", \n" \
<< "but now is " << tensor.count() << ", " \ << "but now is " << tensor.count() << ", " \
<< "did you feed the incorrect data before?"; \ << "did you feed the incorrect data before?"; \
tensor.Reshape(shape); \ tensor.Reshape(shape); \
} }
/* Arguments */ /* Arguments */
......
...@@ -4,176 +4,150 @@ ...@@ -4,176 +4,150 @@
namespace dragon { namespace dragon {
vector<string> Workspace::tensors() const { Workspace::Workspace(const string& name) : name_(name) {
vector<string> locals;
// Search the local workspace
for (const auto& it : tensor_map_)
locals.push_back(it.first);
// Search the remote workspaces
for (const auto& it : external_tensor_map_) {
locals.push_back(it.first);
}
return locals;
}
vector<string> Workspace::graphs() const {
vector<string> names;
for (const auto& it : graph_map_) {
names.push_back(it.first);
}
return names;
}
void Workspace::Initialize() {
CreateTensor(""); // Empty placeholder CreateTensor(""); // Empty placeholder
CreateTensor("/share/flag/recomputing") CreateTensor("/share/flag/recomputing")
->Reshape({1}) ->Reshape({})
->mutable_data<bool, CPUContext>()[0] = false; ->mutable_data<bool, CPUContext>()[0] = false;
} }
void Workspace::Clear() { void Workspace::MergeFrom(Workspace* other) {
// Remove and Initialize again if (other != nullptr) {
tensor_map_.clear(); // Add the external tensors
Initialize(); for (const auto& it : other->tensor_map_) {
} if (!it.first.empty() && !str::startswith(it.first, "/")) {
external_tensor_map_[it.first] = it.second.get();
void Workspace::MergeFrom(Workspace* ws) { }
CHECK(ws) << "\nThe given Workspace is invalid."; }
for (const auto& it : ws->tensor_map_) { // Recount the unique index to avoid duplicate names
if (!it.first.empty() && !str::startswith(it.first, "/")) { for (const auto& i : other->unique_index_map_) {
external_tensor_map_[it.first] = it.second.get(); auto& index_map = unique_index_map_[i.first];
for (const auto& j : i.second) {
index_map[j.first] = std::max(index_map[j.first], j.second);
}
} }
} }
} }
string Workspace::GetTensorName(const string& name) const { Tensor* Workspace::TryGetTensor(const string& name, bool external) const {
const auto& it = alias_active_map_.find(name); // Check the alias firstly
if (it != alias_active_map_.end()) return it->second; const auto& alias_it = alias_map_.find(name);
return name; auto name_v2 = alias_it != alias_map_.end() ? alias_it->second : name;
} // Search this workspace
const auto& it = tensor_map_.find(name_v2);
Tensor* Workspace::TryGetTensor(const string& name, bool use_remote) const {
// Check the proxy of this tensor firstly
string query = GetTensorName(name);
// Search the local workspace
const auto& it = tensor_map_.find(query);
if (it != tensor_map_.end()) return it->second.get(); if (it != tensor_map_.end()) return it->second.get();
if (external) {
if (use_remote) { // Search external workspaces
// Search the remote workspaces const auto& it = external_tensor_map_.find(name_v2);
const auto& it = external_tensor_map_.find(query);
if (it != external_tensor_map_.end()) return it->second; if (it != external_tensor_map_.end()) return it->second;
} }
return nullptr; return nullptr;
} }
Tensor* Workspace::CreateTensor(const string& name) { Tensor* Workspace::CreateTensor(const string& name, FillerInfo* filler) {
Tensor* tensor = TryGetTensor(name); auto* tensor = TryGetTensor(name);
if (!tensor) { // Create only if name not existed
tensor_map_[name] = unique_ptr<Tensor>(new Tensor(name)); if (tensor == nullptr) {
return tensor_map_[name].get(); tensor = new Tensor(name);
tensor_map_[name] = unique_ptr<Tensor>(tensor);
}
// Maybe bind it with a filler
if (filler != nullptr) {
filler_map_[tensor->name()] = std::move(FillerInfo(*filler));
} }
return tensor; return tensor;
} }
Tensor* Workspace::GetTensor(const string& name, bool use_remote) const { Tensor* Workspace::GetTensor(const string& name, bool external) const {
Tensor* tensor = TryGetTensor(name, use_remote); auto* tensor = TryGetTensor(name, external);
CHECK(tensor) << "\nTensor(" << name << ") does not " CHECK(tensor) << "\nTensor(" << name << ") is not in current workspace.";
<< "exist in current workspace.";
return tensor; return tensor;
} }
void Workspace::ResetTensor(const string& name) { void Workspace::ResetTensor(const string& name) {
Tensor* tensor = TryGetTensor(name, false); auto* tensor = TryGetTensor(name, false);
CHECK(tensor) << "\nTensor(" << name << ") does not " CHECK(tensor) << "\nTensor(" << name << ") is not in current workspace.";
<< "belong to current workspace.";
tensor->Reset(); tensor->Reset();
} }
bool Workspace::HasFiller(const string& name) const { FillerInfo* Workspace::GetFillerInfo(const string& name) {
return tensor_filler_map_.count(name) > 0; const auto& it = filler_map_.find(name);
} if (it != filler_map_.end()) return &it->second;
void Workspace::CreateFiller(const TensorFillerProto& filler) {
CHECK_GT(filler.tensor().size(), 0)
<< "\nTensor with an empty name can not be filled.";
if (HasFiller(filler.tensor())) return;
tensor_filler_map_[filler.tensor()] = filler;
}
TensorFillerProto* Workspace::GetFiller(const string& name) {
const auto& it = tensor_filler_map_.find(name);
if (it != tensor_filler_map_.end()) return &it->second;
return nullptr; return nullptr;
} }
OperatorBase* Workspace::CreateOperator(const OperatorDef& def) {
const auto& it = operator_map_.find(def.cache_key());
if (it == operator_map_.end()) {
auto* new_op = NewOperator(def, this);
operator_map_[def.cache_key()] = unique_ptr<OperatorBase>(new_op);
return new_op;
}
return it->second.get();
}
void Workspace::RunOperator(const OperatorDef& def) { void Workspace::RunOperator(const OperatorDef& def) {
if (def.has_cache_key()) { if (def.has_cache_key()) {
CreateOperator(def)->UpdateFrom(def)->Run(0); OperatorBase* cached_op = nullptr;
const auto& it = operator_map_.find(def.cache_key());
if (it == operator_map_.end()) {
cached_op = NewOperator(def, this);
operator_map_[def.cache_key()] = unique_ptr<OperatorBase>(cached_op);
} else {
cached_op = it->second.get();
}
cached_op->UpdateFrom(def)->Run();
} else { } else {
unique_ptr<OperatorBase> op(NewOperator(def, this)); OperatorBase* temporal_op = NewOperator(def, this);
op->Run(0); temporal_op->Run();
delete temporal_op;
} }
} }
GraphBase* Workspace::CreateGraph(const GraphDef& def) { GraphBase* Workspace::CreateGraph(const GraphDef& def) {
CHECK(def.has_name()) << "\nGraph name is missing."; CHECK(def.has_name()) << "\nExcepted non-empty graph name.";
auto name = GetDummyName(def.name(), "", "Graph", false); GraphDef def_v2(def); // Copy to set an unique name
LOG(DEBUG) << "Create Graph: " << name << "(" << def.name() << ")"; def_v2.set_name(UniqueName(def.name(), "", "Graph", false));
GraphDef _def(def); LOG(DEBUG) << "Create Graph: " << def_v2.name() << "(" << def.name() << ")";
_def.set_name(name); auto* cached_graph = NewGraph(def_v2, this);
graph_map_[name] = unique_ptr<GraphBase>(NewGraph(_def, this)); graph_map_[def_v2.name()] = unique_ptr<GraphBase>(cached_graph);
return graph_map_[name].get(); return cached_graph;
} }
void Workspace::RunGraph( void Workspace::RunGraph(
const string& graph_name, const string& name,
const string& incl, const string& include,
const string& excl, const string& exclude,
int stream_id) { const int stream) {
if (!graph_map_.count(graph_name)) { CHECK(graph_map_.count(name))
LOG(FATAL) << "Graph(" << graph_name << ") does not exist."; << "\nGraph(" << name << ") is not in current workspace.";
} graph_map_[name]->Run(include, exclude, stream);
graph_map_[graph_name]->Run(incl, excl, stream_id);
} }
bool Workspace::ActivateAlias(const string& name, const string& alias) { void Workspace::RegisterAlias(const string& target, const string& alias) {
bool status = alias_active_map_.count(alias) > 0; alias_map_[alias] = target;
alias_active_map_[alias] = name;
return status; // True if activated otherwise false
} }
string Workspace::GetDummyName( string Workspace::UniqueName(
const string& base_name, const string& name,
const string& suffix, const string& suffix,
const string& domain, const string& scope,
bool zero_based) { bool zero_based) {
string accepted_name; auto& index_map = unique_index_map_[scope];
int64_t index; auto required_name = name + suffix;
const auto required_name = base_name + suffix; auto index = index_map[required_name]++;
auto& dmap = dummy_name_map_[domain]; if (index > 0) return name + "_" + str::to(index) + suffix;
while (1) { if (zero_based) return required_name;
index = dmap[required_name]++; return name + "_" + str::to(index_map[required_name]++) + suffix;
accepted_name = index ? base_name + "_" + str::to(index) + suffix }
: zero_based
? required_name vector<string> Workspace::tensors() const {
: base_name + "_" + str::to(dmap[required_name]++) + suffix; vector<string> names;
if (external_tensor_map_.empty()) break; for (const auto& it : tensor_map_) {
if (!HasTensor(accepted_name)) break; names.push_back(it.first);
} }
return accepted_name; for (const auto& it : external_tensor_map_) {
names.push_back(it.first);
}
return names;
}
vector<string> Workspace::graphs() const {
vector<string> names;
for (const auto& it : graph_map_) {
names.push_back(it.first);
}
return names;
} }
} // namespace dragon } // namespace dragon
...@@ -20,83 +20,63 @@ namespace dragon { ...@@ -20,83 +20,63 @@ namespace dragon {
class Workspace { class Workspace {
public: public:
/*! \brief Constructor */ /*! \brief Constructor */
explicit Workspace(const string& name) : name_(name) { DRAGON_API explicit Workspace(const string& name);
Initialize();
}
/*! \brief Create some internal tensors */
DRAGON_API void Initialize();
/*! \brief Merge tensors from a external workspace */ /*! \brief Merge resources from other */
DRAGON_API void MergeFrom(Workspace*); DRAGON_API void MergeFrom(Workspace*);
/*! \brief Destory all the tensors */ /* \brief Return an unique name */
DRAGON_API void Clear(); DRAGON_API string UniqueName(
const string& name,
/* \brief Return a unique dummy name within this workspace */
DRAGON_API string GetDummyName(
const string& base_name,
const string& suffix, const string& suffix,
const string& domain = "", const string& scope = "",
bool zero_based = true); const bool zero_based = false);
/*! \brief Whether the specified tensor is in this workspace */ /* \brief Register an alias for the target */
DRAGON_API bool HasTensor(const string& name, bool use_remote = true) const { DRAGON_API void RegisterAlias(const string& target, const string& alias);
return TryGetTensor(name, use_remote) ? true : false;
}
/*! \brief Query the real name of specified tensor */ /*! \brief Return whether tensor is existing */
DRAGON_API string GetTensorName(const string&) const; DRAGON_API bool HasTensor(const string& name, bool external = true) const {
return TryGetTensor(name, external) == nullptr ? false : true;
/* \brief Activate an alias for the target */ }
DRAGON_API bool ActivateAlias(const string& name, const string& alias);
/*! \brief Create a tensor in this workspace */ /*! \brief Create the tensor */
DRAGON_API Tensor* CreateTensor(const string&); DRAGON_API Tensor* CreateTensor(const string&, FillerInfo* = nullptr);
/*! \brief Try to search the specified tensor in this workspace */ /*! \brief Try to return the tensor */
DRAGON_API Tensor* TryGetTensor(const string&, bool = true) const; DRAGON_API Tensor* TryGetTensor(const string&, bool = true) const;
/*! \brief Return the specified tensor */ /*! \brief Return the tensor */
DRAGON_API Tensor* GetTensor(const string&, bool = true) const; DRAGON_API Tensor* GetTensor(const string&, bool = true) const;
/*! \brief Reset the specified tensor */ /*! \brief Reset the tensor */
DRAGON_API void ResetTensor(const string&); DRAGON_API void ResetTensor(const string&);
/* \brief Whether the specified filler is existing */ /*! \brief Return the filler info */
DRAGON_API bool HasFiller(const string&) const; DRAGON_API FillerInfo* GetFillerInfo(const string&);
/*! \brief Create a filler in this workspace */
DRAGON_API void CreateFiller(const TensorFillerProto&);
/*! \brief Return the specified filler */ /*! \brief Run the operator */
DRAGON_API TensorFillerProto* GetFiller(const string&);
/*! \brief Create an operator in this workspace */
DRAGON_API OperatorBase* CreateOperator(const OperatorDef&);
/*! \brief Run an operator in this workspace */
DRAGON_API void RunOperator(const OperatorDef&); DRAGON_API void RunOperator(const OperatorDef&);
/*! \brief Create a graph in this workspace */ /*! \brief Create the graph */
DRAGON_API GraphBase* CreateGraph(const GraphDef&); DRAGON_API GraphBase* CreateGraph(const GraphDef&);
/*! \brief Run the specifed graph by name and rules */ /*! \brief Run the graph */
DRAGON_API void RunGraph( DRAGON_API void RunGraph(
const string& graph_name, const string& graph_name,
const string& incl = "", const string& include = "",
const string& excl = "", const string& exclude = "",
int stream_id = 0); const int stream = 0);
/*! \brief Return the name of this workspace */ /*! \brief Return the workspace name */
const string& name() { const string& name() {
return name_; return name_;
} }
/*! \brief Return the name of stored tensors */ /*! \brief Return the name of cached tensors */
DRAGON_API vector<string> tensors() const; DRAGON_API vector<string> tensors() const;
/*! \brief Return the name of stored graphs */ /*! \brief Return the name of cached graphs */
DRAGON_API vector<string> graphs() const; DRAGON_API vector<string> graphs() const;
/*! \brief Provide a group of the shared byte data */ /*! \brief Provide a group of the shared byte data */
...@@ -127,28 +107,28 @@ class Workspace { ...@@ -127,28 +107,28 @@ class Workspace {
} }
private: private:
/*! \brief The unique workspace name */ /*! \brief The workspace name */
string name_; string name_;
/*! \brief The dummy name indices */ /*! \brief The external tensors */
Map<string, Map<string, int64_t>> dummy_name_map_; Map<string, Tensor*> external_tensor_map_;
/*! \brief Store the created tensors */ /*! \brief The unique indices */
Map<string, unique_ptr<Tensor>> tensor_map_; Map<string, Map<string, int64_t>> unique_index_map_;
/*! \brief Store the external tensors */ /*! \brief The registered fillers */
Map<string, Tensor*> external_tensor_map_; Map<string, FillerInfo> filler_map_;
/*! \brief Store the registered tensor fillers */ /*! \brief The registered aliases */
Map<string, TensorFillerProto> tensor_filler_map_; Map<string, string> alias_map_;
/*! \brief Store the active aliases */ /*! \brief The cached tensors */
Map<string, string> alias_active_map_; Map<string, unique_ptr<Tensor>> tensor_map_;
/*! \brief Store the registered operators for dynamic graph */ /*! \brief The cached operators */
Map<string, unique_ptr<OperatorBase>> operator_map_; Map<string, unique_ptr<OperatorBase>> operator_map_;
/*! \brief Store the registered graphs for static graph */ /*! \brief The cached graphs */
Map<string, unique_ptr<GraphBase>> graph_map_; Map<string, unique_ptr<GraphBase>> graph_map_;
}; };
......
...@@ -425,7 +425,7 @@ void PReluWGrad<float16, CUDAContext>( ...@@ -425,7 +425,7 @@ void PReluWGrad<float16, CUDAContext>(
CUDA_THREADS, CUDA_THREADS,
0, 0,
ctx->cuda_stream()>>>( ctx->cuda_stream()>>>(
N * C * S, N * S,
C, C,
S, S,
reinterpret_cast<const half*>(dy), reinterpret_cast<const half*>(dy),
...@@ -437,7 +437,7 @@ void PReluWGrad<float16, CUDAContext>( ...@@ -437,7 +437,7 @@ void PReluWGrad<float16, CUDAContext>(
CUDA_THREADS, CUDA_THREADS,
0, 0,
ctx->cuda_stream()>>>( ctx->cuda_stream()>>>(
N * C * S, N * S,
C, C,
reinterpret_cast<const half*>(dy), reinterpret_cast<const half*>(dy),
reinterpret_cast<const half*>(x), reinterpret_cast<const half*>(x),
...@@ -536,13 +536,13 @@ void PReluWGrad<float16, CUDAContext>( ...@@ -536,13 +536,13 @@ void PReluWGrad<float16, CUDAContext>(
CUDA_2D_BLOCKS(C), \ CUDA_2D_BLOCKS(C), \
CUDA_THREADS, \ CUDA_THREADS, \
0, \ 0, \
ctx->cuda_stream()>>>(N * C * S, C, S, dy, x, dw); \ ctx->cuda_stream()>>>(N * S, C, S, dy, x, dw); \
} else if (data_format == "NHWC") { \ } else if (data_format == "NHWC") { \
_PReluWGradNHWC<<< \ _PReluWGradNHWC<<< \
CUDA_2D_BLOCKS(C), \ CUDA_2D_BLOCKS(C), \
CUDA_THREADS, \ CUDA_THREADS, \
0, \ 0, \
ctx->cuda_stream()>>>(N * C * S, C, dy, x, dw); \ ctx->cuda_stream()>>>(N * S, C, dy, x, dw); \
} else { \ } else { \
LOG(FATAL) << "Unknown data format: " << data_format; \ LOG(FATAL) << "Unknown data format: " << data_format; \
} \ } \
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include "dragon/core/workspace.h" #include "dragon/core/workspace.h"
#include "dragon/modules/python/types.h" #include "dragon/modules/python/types.h"
#include "dragon/onnx/onnx_backend.h" #include "dragon/onnx/onnx_backend.h"
#include "dragon/utils/caffemodel.h"
#include <pybind11/pybind11.h> #include <pybind11/pybind11.h>
#include <pybind11/stl.h> #include <pybind11/stl.h>
......
...@@ -74,7 +74,7 @@ class DLPackWrapper { ...@@ -74,7 +74,7 @@ class DLPackWrapper {
} }
Tensor* From(py::object obj) { Tensor* From(py::object obj) {
CHECK(PyCapsule_CheckExact(obj.ptr())) << "\nExpected DLPack capsule"; CHECK(PyCapsule_CheckExact(obj.ptr())) << "\nExpected DLPack capsule.";
auto* managed_tensor = auto* managed_tensor =
(DLManagedTensor*)PyCapsule_GetPointer(obj.ptr(), "dltensor"); (DLManagedTensor*)PyCapsule_GetPointer(obj.ptr(), "dltensor");
CHECK(managed_tensor) << "\nInvalid DLPack capsule"; CHECK(managed_tensor) << "\nInvalid DLPack capsule";
......
...@@ -44,48 +44,38 @@ PYBIND11_MODULE(libdragon_python, m) { ...@@ -44,48 +44,38 @@ PYBIND11_MODULE(libdragon_python, m) {
/*! \brief Return the name of stored graphs */ /*! \brief Return the name of stored graphs */
.def_property_readonly("graphs", &Workspace::graphs) .def_property_readonly("graphs", &Workspace::graphs)
/*! \brief Destory all the tensors */ /*! \brief Merge resources from another workspace */
.def("Clear", &Workspace::Clear)
/*! \brief Merge a external workspace into self */
.def("MergeFrom", &Workspace::MergeFrom) .def("MergeFrom", &Workspace::MergeFrom)
/*! \brief Return a unique dummy name */ /*! \brief Return an unique name */
.def("GetDummyName", &Workspace::GetDummyName) .def("UniqueName", &Workspace::UniqueName)
/*! \brief Return the unique name of given tensor */
.def("GetTensorName", &Workspace::GetTensorName)
/*! \brief Reset a tensor with the given name */ /*! \brief Reset the tensor */
.def("ResetTensor", &Workspace::ResetTensor) .def("ResetTensor", &Workspace::ResetTensor)
/*! \brief Indicate whether the given tensor is existing */ /*! \brief Return whether the tensor is existing */
.def( .def(
"HasTensor", "HasTensor",
[](Workspace* self, const string& name) { [](Workspace* self, const string& name) {
return self->HasTensor(name); return self->HasTensor(name);
}) })
/*! \brief Create a tensor with the given name */ /*! \brief Create the tensor */
.def( .def(
"CreateTensor", "CreateTensor",
[](Workspace* self, const string& name) { [](Workspace* self, const string& name, const string& filler_str) {
if (!filler_str.empty()) {
FillerInfo filler_info;
if (!filler_info.ParseFromString(filler_str)) {
LOG(FATAL) << "Failed to parse the FillerInfo.";
}
return self->CreateTensor(name, &filler_info);
}
return self->CreateTensor(name); return self->CreateTensor(name);
}, },
py::return_value_policy::reference_internal) py::return_value_policy::reference_internal)
/*! \brief Create a tensor from the specified filler */ /*! \brief Return the tensor */
.def(
"CreateFiller",
[](Workspace* self, const string& serialized) {
TensorFillerProto filler_proto;
if (!filler_proto.ParseFromString(serialized))
LOG(FATAL) << "Failed to parse the TensorFiller.";
self->CreateFiller(filler_proto);
self->CreateTensor(filler_proto.tensor());
})
/*! \brief Return the CXX Tensor reference */
.def( .def(
"GetTensor", "GetTensor",
[](Workspace* self, const string& name) { [](Workspace* self, const string& name) {
...@@ -93,11 +83,11 @@ PYBIND11_MODULE(libdragon_python, m) { ...@@ -93,11 +83,11 @@ PYBIND11_MODULE(libdragon_python, m) {
}, },
py::return_value_policy::reference_internal) py::return_value_policy::reference_internal)
/* \brief Set an alias for the tensor */ /* \brief Register an alias for the name */
.def( .def(
"SetTensorAlias", "RegisterAlias",
[](Workspace* self, const string& name, const string& alias) { [](Workspace* self, const string& name, const string& alias) {
return self->ActivateAlias(name, alias); return self->RegisterAlias(name, alias);
}) })
/*! \brief Copy the array data to tensor */ /*! \brief Copy the array data to tensor */
...@@ -118,7 +108,7 @@ PYBIND11_MODULE(libdragon_python, m) { ...@@ -118,7 +108,7 @@ PYBIND11_MODULE(libdragon_python, m) {
dev, reinterpret_cast<PyArrayObject*>(value.ptr()), tensor); dev, reinterpret_cast<PyArrayObject*>(value.ptr()), tensor);
}) })
/*! \brief Copy the tensor data to the array */ /*! \brief Copy the tensor data to array */
.def( .def(
"FetchTensor", "FetchTensor",
[](Workspace* self, const string& name) { [](Workspace* self, const string& name) {
...@@ -142,7 +132,7 @@ PYBIND11_MODULE(libdragon_python, m) { ...@@ -142,7 +132,7 @@ PYBIND11_MODULE(libdragon_python, m) {
} }
}) })
/*! \brief Run a operator from the def reference */ /*! \brief Run the operator */
.def( .def(
"RunOperator", "RunOperator",
[](Workspace* self, OperatorDef* def, const bool verbose) { [](Workspace* self, OperatorDef* def, const bool verbose) {
...@@ -156,7 +146,7 @@ PYBIND11_MODULE(libdragon_python, m) { ...@@ -156,7 +146,7 @@ PYBIND11_MODULE(libdragon_python, m) {
self->RunOperator(*def); self->RunOperator(*def);
}) })
/*! \brief Run operators from the def reference */ /*! \brief Run the operators */
.def( .def(
"RunOperator", "RunOperator",
[](Workspace* self, vector<OperatorDef*>& defs, const bool verbose) { [](Workspace* self, vector<OperatorDef*>& defs, const bool verbose) {
...@@ -172,7 +162,7 @@ PYBIND11_MODULE(libdragon_python, m) { ...@@ -172,7 +162,7 @@ PYBIND11_MODULE(libdragon_python, m) {
} }
}) })
/*! \brief Run a operator from the serialized def */ /*! \brief Run the operator from serialized def */
.def( .def(
"RunOperator", "RunOperator",
[](Workspace* self, const string& serialized, const bool verbose) { [](Workspace* self, const string& serialized, const bool verbose) {
...@@ -188,7 +178,7 @@ PYBIND11_MODULE(libdragon_python, m) { ...@@ -188,7 +178,7 @@ PYBIND11_MODULE(libdragon_python, m) {
self->RunOperator(def); self->RunOperator(def);
}) })
/*! \brief Create a graph from the serialized def */ /*! \brief Create the graph */
.def( .def(
"CreateGraph", "CreateGraph",
[](Workspace* self, const string& serialized, const bool verbose) { [](Workspace* self, const string& serialized, const bool verbose) {
...@@ -213,89 +203,49 @@ PYBIND11_MODULE(libdragon_python, m) { ...@@ -213,89 +203,49 @@ PYBIND11_MODULE(libdragon_python, m) {
return graph->name(); return graph->name();
}) })
/*! \brief Run an existing graph */ /*! \brief Run the graph */
.def( .def(
"RunGraph", "RunGraph",
[](Workspace* self, [](Workspace* self,
const string& name, const string& name,
const string& incl, const string& include,
const string& excl) { const string& exclude) {
py::gil_scoped_release g; py::gil_scoped_release g;
self->RunGraph(name, incl, excl); self->RunGraph(name, include, exclude);
}) })
/*! \brief Run the backward */
.def( .def(
"RunBackward", "RunBackward",
[](Workspace* self, [](Workspace* self,
const vector<OperatorDef*>& forward_ops, const vector<OperatorDef*>& op_defs,
const vector<string>& targets, const vector<string>& targets,
const vector<string>& sources, const vector<string>& sources,
const vector<string>& input_grads, const vector<string>& input_grads,
const vector<string>& ignored_grads, const vector<string>& empty_grads,
const bool is_sharing, const bool retain_grads,
const bool verbose) { const bool verbose) {
GraphDef backward_ops; GraphDef graph_def;
GraphGradientMaker maker; GraphGradientMaker maker;
for (const auto& name : ignored_grads) { for (const auto& name : empty_grads) {
maker.add_ignored_grad(name); maker.add_empty_grad(name);
} }
for (const auto& name : sources) { for (const auto& name : sources) {
maker.add_hooked_grad(name + "_grad"); maker.add_retained_grad(name + "_grad");
} }
maker.Make(forward_ops, targets, input_grads, backward_ops); maker.Make(op_defs, targets, input_grads, graph_def);
py::gil_scoped_release g; py::gil_scoped_release g;
if (is_sharing) { if (!retain_grads) {
backward_ops = maker.Share(backward_ops); graph_def = maker.Share(graph_def);
} }
for (const auto& def : backward_ops.op()) { for (const auto& op_def : graph_def.op()) {
if (verbose) { if (verbose) {
auto msg = string("\n") + def.DebugString(); auto msg = string("\n") + op_def.DebugString();
msg.pop_back(); msg.pop_back();
PRINT(INFO) PRINT(INFO)
<< "op {" << str::replace_all(msg, "\n", "\n ") << "\n}\n"; << "op {" << str::replace_all(msg, "\n", "\n ") << "\n}\n";
} }
self->RunOperator(def); self->RunOperator(op_def);
}
})
/*! \brief Serialize tensors into a binary file */
.def(
"Save",
[](Workspace* self,
const string& filename,
const vector<string>& tensors,
const int format) {
vector<Tensor*> refs;
switch (format) {
case 0: // Pickle
LOG(FATAL) << "Format depends on Pickle. "
<< "Can't be used in C++.";
break;
case 1: // CaffeModel
for (const auto& name : tensors) {
refs.emplace_back(self->GetTensor(name));
}
SavaCaffeModel(filename, refs);
break;
default:
LOG(FATAL) << "Unknown format, code: " << format;
}
})
/*! \brief Load tensors from a binary file */
.def(
"Load",
[](Workspace* self, const string& filename, const int format) {
switch (format) {
case 0: // Pickle
LOG(FATAL) << "Format depends on Pickle. "
<< "Can't be used in C++.";
break;
case 1: // CaffeModel
LoadCaffeModel(filename, self);
break;
default:
LOG(FATAL) << "Unknown format, code: " << format;
} }
}) })
......
...@@ -20,7 +20,6 @@ PythonPluginInferOp<Context>::PythonPluginInferOp( ...@@ -20,7 +20,6 @@ PythonPluginInferOp<Context>::PythonPluginInferOp(
class_name_(OpArg<string>("class_name", "")), class_name_(OpArg<string>("class_name", "")),
kwargs_str_((OpArg<string>("kwargs_str", ""))) { kwargs_str_((OpArg<string>("kwargs_str", ""))) {
// Optimization for all python ops // Optimization for all python ops
if (!allow_run()) return;
this->do_sync_ = false; this->do_sync_ = false;
// Initialize interpreter and load module // Initialize interpreter and load module
......
...@@ -24,6 +24,9 @@ namespace tensor { ...@@ -24,6 +24,9 @@ namespace tensor {
void RegisterModule(py::module& m) { void RegisterModule(py::module& m) {
/*! \brief Export the Tensor class */ /*! \brief Export the Tensor class */
py::class_<Tensor>(m, "Tensor") py::class_<Tensor>(m, "Tensor")
/*! \brief Return the tensor name */
.def_property_readonly("name", &Tensor::name)
/*! \brief Return the number of dimensions */ /*! \brief Return the number of dimensions */
.def_property_readonly("ndim", &Tensor::ndim) .def_property_readonly("ndim", &Tensor::ndim)
...@@ -46,9 +49,9 @@ void RegisterModule(py::module& m) { ...@@ -46,9 +49,9 @@ void RegisterModule(py::module& m) {
"device", "device",
[](Tensor* self) { [](Tensor* self) {
if (self->has_memory()) { if (self->has_memory()) {
auto mem_info = self->memory()->info(); auto info = self->memory()->info();
return std::tuple<string, int>( return std::tuple<string, int>(
mem_info["device_type"], atoi(mem_info["device_id"].c_str())); info["device_type"], atoi(info["device_id"].c_str()));
} else { } else {
return std::tuple<string, int>("Unknown", 0); return std::tuple<string, int>("Unknown", 0);
} }
......
...@@ -119,8 +119,6 @@ DRAGON_API void DestroyGraphDef(GraphDef_t graph_def); ...@@ -119,8 +119,6 @@ DRAGON_API void DestroyGraphDef(GraphDef_t graph_def);
* Model API * Model API
*/ */
DRAGON_API void LoadCaffeModel(const std::string& model_file, Workspace_t ws);
DRAGON_API void LoadONNXModel( DRAGON_API void LoadONNXModel(
const std::string& model_file, const std::string& model_file,
GraphDef_t init_graph, GraphDef_t init_graph,
......
#include "dragon/core/common.h" #include "dragon/core/common.h"
#include "dragon/modules/runtime/dragon_runtime.h" #include "dragon/modules/runtime/dragon_runtime.h"
#include "dragon/onnx/onnx_backend.h" #include "dragon/onnx/onnx_backend.h"
#include "dragon/utils/caffemodel.h"
#include "dragon/utils/proto_utils.h" #include "dragon/utils/proto_utils.h"
namespace dragon { namespace dragon {
...@@ -161,46 +160,6 @@ DRAGON_API void DestroyGraphDef(GraphDef_t graph_def) { ...@@ -161,46 +160,6 @@ DRAGON_API void DestroyGraphDef(GraphDef_t graph_def) {
if (graph_def) delete graph_def; if (graph_def) delete graph_def;
} }
void LoadCaffeModel(const string& model_file, Workspace_t ws) {
NetParameter net_param;
ReadProtoFromBinaryFile(model_file.c_str(), &net_param);
std::string scope = "";
LOG(INFO) << "Load Model: " << model_file << "......";
LOG(INFO) << "Format: Caffe";
for (int i = 0; i < net_param.layer_size(); i++) {
const LayerParameter& layer = net_param.layer(i);
const string& layer_name = layer.name();
string prefix = scope + layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++) {
string tensor_name = prefix + std::to_string(j);
if (!ws->HasTensor(tensor_name)) ws->CreateTensor(tensor_name);
BlobProto blob = layer.blobs(j);
vector<int64_t> dims;
for (auto dim : blob.shape().dim())
dims.push_back(dim);
Tensor* tensor = ws->GetTensor(tensor_name);
std::stringstream DimString;
if (dims.size() > 0) {
tensor->Reshape(dims);
CHECK_EQ(tensor->count(), blob.data_size())
<< "Tensor(" << tensor_name << ") "
<< "failed to load, except size: " << tensor->count()
<< ", loaded " << blob.data_size();
DimString << tensor->DimString();
} else {
tensor->Reshape(vector<int64_t>(1, blob.data_size()));
DimString << "(missing)";
}
float* Xdata = tensor->mutable_data<float, CPUContext>();
for (int idx = 0; idx < blob.data_size(); idx++)
Xdata[idx] = blob.data(idx);
LOG(INFO) << "Tensor(" << tensor_name << ") "
<< "loaded, shape: " << DimString.str()
<< ", size: " << blob.data_size();
}
}
}
void LoadONNXModel( void LoadONNXModel(
const string& model_file, const string& model_file,
GraphDef_t init_graph, GraphDef_t init_graph,
......
...@@ -19,7 +19,6 @@ ONNXImporterReturns ONNXBackend::ArgReduceImporter( ...@@ -19,7 +19,6 @@ ONNXImporterReturns ONNXBackend::ArgReduceImporter(
auto node = NodeProto(onnx_node->node); auto node = NodeProto(onnx_node->node);
auto onnx_node_v2 = ONNXNode(node); auto onnx_node_v2 = ONNXNode(node);
auto& attributes = onnx_node_v2.attributes; auto& attributes = onnx_node_v2.attributes;
// Determine the operation // Determine the operation
auto* operation = attributes.AddRewrittenAttribute("operation"); auto* operation = attributes.AddRewrittenAttribute("operation");
if (onnx_node->node.op_type() == "ArgMax") { if (onnx_node->node.op_type() == "ArgMax") {
...@@ -27,7 +26,6 @@ ONNXImporterReturns ONNXBackend::ArgReduceImporter( ...@@ -27,7 +26,6 @@ ONNXImporterReturns ONNXBackend::ArgReduceImporter(
} else if (onnx_node->node.op_type() == "ArgMin") { } else if (onnx_node->node.op_type() == "ArgMin") {
operation->set_s("MIN"); operation->set_s("MIN");
} }
return GenericImporter(&onnx_node_v2, ctx); return GenericImporter(&onnx_node_v2, ctx);
} }
...@@ -37,17 +35,13 @@ ONNXImporterReturns ONNXBackend::ATenImporter( ...@@ -37,17 +35,13 @@ ONNXImporterReturns ONNXBackend::ATenImporter(
auto node = NodeProto(onnx_node->node); auto node = NodeProto(onnx_node->node);
auto onnx_node_v2 = ONNXNode(node); auto onnx_node_v2 = ONNXNode(node);
auto& attributes = onnx_node_v2.attributes; auto& attributes = onnx_node_v2.attributes;
auto op_type = attributes.get<string>("op_type", ""); auto op_type = attributes.get<string>("op_type", "");
if (op_type.empty()) { if (op_type.empty()) {
LOG(FATAL) << "op_type is required to evolve " LOG(FATAL) << "op_type is required to evolve "
<< "to the specific operator."; << "to the specific operator.";
} }
node.set_op_type(op_type); node.set_op_type(op_type);
attributes.remove("op_type"); attributes.remove("op_type");
return GenericImporter(&onnx_node_v2, ctx); return GenericImporter(&onnx_node_v2, ctx);
} }
...@@ -56,17 +50,13 @@ ONNXImporterReturns ONNXBackend::BatchNormImporter( ...@@ -56,17 +50,13 @@ ONNXImporterReturns ONNXBackend::BatchNormImporter(
const ConversionContext& ctx) { const ConversionContext& ctx) {
auto node = NodeProto(onnx_node->node); auto node = NodeProto(onnx_node->node);
auto onnx_node_v2 = ONNXNode(node); auto onnx_node_v2 = ONNXNode(node);
auto& attributes = onnx_node_v2.attributes; auto& attributes = onnx_node_v2.attributes;
// Enforce to NCHW format // Enforce to NCHW format
attributes.AddRewrittenAttribute("axis")->set_i(1); attributes.AddRewrittenAttribute("axis")->set_i(1);
// Remove dummy attributes // Remove dummy attributes
attributes.remove("consumed_inputs"); attributes.remove("consumed_inputs");
attributes.remove("is_test"); attributes.remove("is_test");
attributes.remove("spatial"); attributes.remove("spatial");
return GenericImporter(&onnx_node_v2, ctx); return GenericImporter(&onnx_node_v2, ctx);
} }
...@@ -74,12 +64,10 @@ ONNXImporterReturns ONNXBackend::CastImporter( ...@@ -74,12 +64,10 @@ ONNXImporterReturns ONNXBackend::CastImporter(
ONNXNode* onnx_node, ONNXNode* onnx_node,
const ConversionContext& ctx) { const ConversionContext& ctx) {
auto& attributes = onnx_node->attributes; auto& attributes = onnx_node->attributes;
// Determine the dtype // Determine the dtype
auto* dtype = attributes.AddRewrittenAttribute("dtype"); auto* dtype = attributes.AddRewrittenAttribute("dtype");
auto onnx_dtype = attributes.get<int64_t>("to", TensorProto::UNDEFINED); auto onnx_dtype = attributes.get<int64_t>("to", TensorProto::UNDEFINED);
auto supported_dtype = true; auto supported_dtype = true;
switch (onnx_dtype) { switch (onnx_dtype) {
case ONNX_NAMESPACE::TensorProto::BOOL: case ONNX_NAMESPACE::TensorProto::BOOL:
dtype->set_s("bool"); dtype->set_s("bool");
...@@ -138,11 +126,9 @@ ONNXImporterReturns ONNXBackend::CastImporter( ...@@ -138,11 +126,9 @@ ONNXImporterReturns ONNXBackend::CastImporter(
supported_dtype = false; supported_dtype = false;
break; break;
}; };
CHECK(supported_dtype) << "\nCasting to " << dtype->s() CHECK(supported_dtype) << "\nCasting to " << dtype->s()
<< " is not supported."; << " is not supported.";
attributes.remove("to"); attributes.remove("to");
return GenericImporter(onnx_node, ctx); return GenericImporter(onnx_node, ctx);
} }
...@@ -151,17 +137,16 @@ ONNXImporterReturns ONNXBackend::ConvPoolImporter( ...@@ -151,17 +137,16 @@ ONNXImporterReturns ONNXBackend::ConvPoolImporter(
const ConversionContext& ctx) { const ConversionContext& ctx) {
auto& attributes = onnx_node->attributes; auto& attributes = onnx_node->attributes;
const auto onnx_op_type = onnx_node->node.op_type(); const auto onnx_op_type = onnx_node->node.op_type();
// Determine the padding // Determine the padding
auto mode = attributes.get<string>("auto_pad"); auto mode = attributes.get<string>("auto_pad");
auto* padding = attributes.AddRewrittenAttribute("padding"); auto* padding = attributes.AddRewrittenAttribute("padding");
// SAME, SAME_LOWER, or SAME_UPPER // SAME, SAME_LOWER, or SAME_UPPER
if (str::find(mode, "SAME")) if (str::find(mode, "SAME")) {
padding->set_s(mode); padding->set_s(mode);
else } else {
padding->set_s("VALID"); // Use explicit pads padding->set_s("VALID"); // Use explicit pads
}
attributes.remove("auto_pad"); attributes.remove("auto_pad");
// Determine the pooling mode // Determine the pooling mode
if (onnx_op_type == "MaxPool") { if (onnx_op_type == "MaxPool") {
attributes.AddRewrittenAttribute("mode")->set_s("MAX"); attributes.AddRewrittenAttribute("mode")->set_s("MAX");
...@@ -174,14 +159,11 @@ ONNXImporterReturns ONNXBackend::ConvPoolImporter( ...@@ -174,14 +159,11 @@ ONNXImporterReturns ONNXBackend::ConvPoolImporter(
attributes.AddRewrittenAttribute("mode")->set_s("AVG"); attributes.AddRewrittenAttribute("mode")->set_s("AVG");
attributes.AddRewrittenAttribute("global_pooling")->set_i(1); attributes.AddRewrittenAttribute("global_pooling")->set_i(1);
} }
auto returns = GenericImporter(onnx_node, ctx); auto returns = GenericImporter(onnx_node, ctx);
// Determine the op type // Determine the op type
OperatorDef* op_def = returns.GetOp(0); OperatorDef* op_def = returns.GetOp(0);
auto ks = attributes.get<ONNX_INTS>("kernel_shape"); auto ks = attributes.get<ONNX_INTS>("kernel_shape");
*(op_def->mutable_type()) += (str::to(ks.size() > 0 ? ks.size() : 2) + "d"); *(op_def->mutable_type()) += (str::to(ks.size() > 0 ? ks.size() : 2) + "d");
return returns; return returns;
} }
...@@ -194,11 +176,9 @@ ONNXImporterReturns ONNXBackend::GenericImporter( ...@@ -194,11 +176,9 @@ ONNXImporterReturns ONNXBackend::GenericImporter(
op_def->mutable_input()->MergeFrom(node.input()); op_def->mutable_input()->MergeFrom(node.input());
op_def->mutable_output()->MergeFrom(node.output()); op_def->mutable_output()->MergeFrom(node.output());
op_def->set_name(node.name()); op_def->set_name(node.name());
const auto onnx_op_type = node.op_type(); const auto onnx_op_type = node.op_type();
op_def->set_type( op_def->set_type(
get_default(get_renamed_nodes(), onnx_op_type, onnx_op_type)); get_default(get_renamed_nodes(), onnx_op_type, onnx_op_type));
auto mapper = [&, this](const std::string& k) { auto mapper = [&, this](const std::string& k) {
const auto it = get_node_renamed_attrs().find(onnx_op_type); const auto it = get_node_renamed_attrs().find(onnx_op_type);
if (it != get_node_renamed_attrs().end()) { if (it != get_node_renamed_attrs().end()) {
...@@ -224,18 +204,16 @@ ONNXImporterReturns ONNXBackend::GemmImporter( ...@@ -224,18 +204,16 @@ ONNXImporterReturns ONNXBackend::GemmImporter(
auto alpha = attributes.get<float>("alpha", 1.f); auto alpha = attributes.get<float>("alpha", 1.f);
auto beta = attributes.get<float>("beta", 1.f); auto beta = attributes.get<float>("beta", 1.f);
auto trans_a = attributes.get<int64_t>("transA", 0L); auto trans_a = attributes.get<int64_t>("transA", 0L);
// Remove the unsupported attributes
if (alpha != 1.f || beta != 1.f) { if (alpha != 1.f || beta != 1.f) {
LOG(FATAL) << "alpha/beta can not be set currently."; LOG(FATAL) << "alpha/beta can not be set currently.";
} }
if (trans_a) { if (trans_a) {
LOG(FATAL) << "Tranposed A is not supported currently."; LOG(FATAL) << "Tranposed A is not supported currently.";
} }
attributes.remove("alpha"); attributes.remove("alpha");
attributes.remove("beta"); attributes.remove("beta");
attributes.remove("transA"); attributes.remove("transA");
return GenericImporter(onnx_node, ctx); return GenericImporter(onnx_node, ctx);
} }
...@@ -244,11 +222,9 @@ ONNXImporterReturns ONNXBackend::MaxRoiPoolImporter( ...@@ -244,11 +222,9 @@ ONNXImporterReturns ONNXBackend::MaxRoiPoolImporter(
const ConversionContext& ctx) { const ConversionContext& ctx) {
auto& attributes = onnx_node->attributes; auto& attributes = onnx_node->attributes;
auto pooled_shape = attributes.get<ONNX_INTS>("pooled_shape"); auto pooled_shape = attributes.get<ONNX_INTS>("pooled_shape");
attributes.AddRewrittenAttribute("pool_h")->set_i(pooled_shape.Get(0)); attributes.AddRewrittenAttribute("pool_h")->set_i(pooled_shape.Get(0));
attributes.AddRewrittenAttribute("pool_w")->set_i(pooled_shape.Get(1)); attributes.AddRewrittenAttribute("pool_w")->set_i(pooled_shape.Get(1));
attributes.remove("pooled_shape"); attributes.remove("pooled_shape");
return GenericImporter(onnx_node, ctx); return GenericImporter(onnx_node, ctx);
} }
...@@ -258,18 +234,16 @@ ONNXImporterReturns ONNXBackend::ReshapeImporter( ...@@ -258,18 +234,16 @@ ONNXImporterReturns ONNXBackend::ReshapeImporter(
auto node = NodeProto(onnx_node->node); auto node = NodeProto(onnx_node->node);
auto onnx_node_v2 = ONNXNode(node); auto onnx_node_v2 = ONNXNode(node);
auto& attributes = onnx_node_v2.attributes; auto& attributes = onnx_node_v2.attributes;
attributes.remove("consumed_inputs"); attributes.remove("consumed_inputs");
// Determine the dims // Determine the dims
auto* dims = attributes.AddRewrittenAttribute("dims"); auto* dims = attributes.AddRewrittenAttribute("dims");
if (ctx.opset_version() < 5) { if (ctx.opset_version() < 5) {
const auto& shape = attributes.get<ONNX_INTS>("shape"); const auto& shape = attributes.get<ONNX_INTS>("shape");
CHECK_GT(shape.size(), 0) << "\nExcepted the shape value"; CHECK_GT(shape.size(), 0) << "\nExcepted the shape value";
attributes.remove("shape"); attributes.remove("shape");
for (auto d : shape) for (auto d : shape) {
dims->add_ints(d); dims->add_ints(d);
}
} else { } else {
CHECK_EQ(node.input_size(), 2) CHECK_EQ(node.input_size(), 2)
<< "\nExpectd 2 input in upsample after onnx version 5"; << "\nExpectd 2 input in upsample after onnx version 5";
...@@ -280,10 +254,10 @@ ONNXImporterReturns ONNXBackend::ReshapeImporter( ...@@ -280,10 +254,10 @@ ONNXImporterReturns ONNXBackend::ReshapeImporter(
Argument shape_dtype, shape_values; Argument shape_dtype, shape_values;
ONNXTensorToArgument(*shape_tensor, &shape_dtype, &shape_values); ONNXTensorToArgument(*shape_tensor, &shape_dtype, &shape_values);
CHECK_GT(shape_values.ints_size(), 0) << "\nExcepted the shape value"; CHECK_GT(shape_values.ints_size(), 0) << "\nExcepted the shape value";
for (auto d : shape_values.ints()) for (auto d : shape_values.ints()) {
dims->add_ints(d); dims->add_ints(d);
}
} }
return GenericImporter(&onnx_node_v2, ctx); return GenericImporter(&onnx_node_v2, ctx);
} }
...@@ -293,9 +267,7 @@ ONNXImporterReturns ONNXBackend::ResizeImporter( ...@@ -293,9 +267,7 @@ ONNXImporterReturns ONNXBackend::ResizeImporter(
auto node = NodeProto(onnx_node->node); auto node = NodeProto(onnx_node->node);
auto onnx_node_v2 = ONNXNode(node); auto onnx_node_v2 = ONNXNode(node);
auto& attributes = onnx_node_v2.attributes; auto& attributes = onnx_node_v2.attributes;
attributes.remove("coordinate_transformation_mode"); attributes.remove("coordinate_transformation_mode");
if (ctx.opset_version() >= 9) { if (ctx.opset_version() >= 9) {
node.mutable_input()->Clear(); node.mutable_input()->Clear();
node.add_input(onnx_node->node.input(0)); node.add_input(onnx_node->node.input(0));
...@@ -307,21 +279,22 @@ ONNXImporterReturns ONNXBackend::ResizeImporter( ...@@ -307,21 +279,22 @@ ONNXImporterReturns ONNXBackend::ResizeImporter(
const auto* scales_tensor = ctx.initializer().at(scales_name); const auto* scales_tensor = ctx.initializer().at(scales_name);
ONNXTensorToArgument(*scales_tensor, &scales_dtype, &scale_values); ONNXTensorToArgument(*scales_tensor, &scales_dtype, &scale_values);
auto* scales = attributes.AddRewrittenAttribute("scales"); auto* scales = attributes.AddRewrittenAttribute("scales");
for (auto d : scale_values.floats()) for (auto d : scale_values.floats()) {
scales->add_floats(d); scales->add_floats(d);
}
if (sizes_idx > 0) { if (sizes_idx > 0) {
Argument sizes_dtype, sizes_values; Argument sizes_dtype, sizes_values;
const auto& sizes_name = onnx_node->node.input(sizes_idx); const auto& sizes_name = onnx_node->node.input(sizes_idx);
const auto* sizes_tensor = ctx.initializer().at(sizes_name); const auto* sizes_tensor = ctx.initializer().at(sizes_name);
ONNXTensorToArgument(*sizes_tensor, &sizes_dtype, &sizes_values); ONNXTensorToArgument(*sizes_tensor, &sizes_dtype, &sizes_values);
auto* sizes = attributes.AddRewrittenAttribute("sizes"); auto* sizes = attributes.AddRewrittenAttribute("sizes");
for (auto d : sizes_values.floats()) for (auto d : sizes_values.floats()) {
sizes->add_ints(d); sizes->add_ints(d);
}
} }
} else { } else {
LOG(FATAL) << "Required opset >= 7"; LOG(FATAL) << "Required opset >= 7";
} }
return GenericImporter(&onnx_node_v2, ctx); return GenericImporter(&onnx_node_v2, ctx);
} }
...@@ -330,12 +303,10 @@ ONNXImporterReturns ONNXBackend::RoiAlignImporter( ...@@ -330,12 +303,10 @@ ONNXImporterReturns ONNXBackend::RoiAlignImporter(
const ConversionContext& ctx) { const ConversionContext& ctx) {
auto node = NodeProto(onnx_node->node); auto node = NodeProto(onnx_node->node);
auto onnx_node_v2 = ONNXNode(node); auto onnx_node_v2 = ONNXNode(node);
// Remove the batch indices // Remove the batch indices
node.mutable_input()->Clear(); node.mutable_input()->Clear();
node.add_input(onnx_node->node.input(0)); node.add_input(onnx_node->node.input(0));
node.add_input(onnx_node->node.input(1)); node.add_input(onnx_node->node.input(1));
return GenericImporter(&onnx_node_v2, ctx); return GenericImporter(&onnx_node_v2, ctx);
} }
...@@ -345,19 +316,22 @@ ONNXImporterReturns ONNXBackend::TileImporter( ...@@ -345,19 +316,22 @@ ONNXImporterReturns ONNXBackend::TileImporter(
auto node = NodeProto(onnx_node->node); auto node = NodeProto(onnx_node->node);
auto onnx_node_v2 = ONNXNode(node); auto onnx_node_v2 = ONNXNode(node);
auto& attributes = onnx_node_v2.attributes; auto& attributes = onnx_node_v2.attributes;
if (ctx.opset_version() >= 6) {
// Determine the multiples from repeats // Determine repeats from repeats
auto* multiples = attributes.AddRewrittenAttribute("multiples"); auto* repeats = attributes.AddRewrittenAttribute("repeats");
node.mutable_input()->Clear(); node.mutable_input()->Clear();
node.add_input(onnx_node->node.input(0)); node.add_input(onnx_node->node.input(0));
const auto& repeats_name = onnx_node->node.input(1); const auto& repeats_name = onnx_node->node.input(1);
const auto* repeats_tensor = ctx.initializer().at(repeats_name); const auto* repeats_tensor = ctx.initializer().at(repeats_name);
Argument multiples_dtype, multiples_values; Argument repeats_dtype, repeats_values;
ONNXTensorToArgument(*repeats_tensor, &multiples_dtype, &multiples_values); ONNXTensorToArgument(*repeats_tensor, &repeats_dtype, &repeats_values);
CHECK_GT(multiples_values.ints_size(), 0) << "\nExcepted the repeats value"; CHECK_GT(repeats_values.ints_size(), 0) << "\nExcepted the repeats value";
for (auto d : multiples_values.ints()) for (auto repeat : repeats_values.ints()) {
multiples->add_ints(d); repeats->add_ints(repeat);
}
} else {
LOG(FATAL) << "Required opset >= 6";
}
return GenericImporter(&onnx_node_v2, ctx); return GenericImporter(&onnx_node_v2, ctx);
} }
......
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
namespace dragon { namespace dragon {
#define DEFINE_FILLER_OP_IMPL(name) \ #define DEFINE_FILLER_OP_IMPL(name) \
template <class Context> \ template <class Context> \
template <typename T> \ template <typename T> \
void name##Op<Context>::DoRunWithType() { \ void name##Op<Context>::DoRunWithType() { \
unique_ptr<Filler<T, Context>> f; \ unique_ptr<Filler<T, Context>> f; \
f.reset(CreateFiller<T, Context>(this->proto_)); \ f.reset(CreateFiller<T, Context>(this->filler_info_)); \
f->Fill(Output(0), ctx()); \ f->Fill(Output(0), ctx()); \
} }
#define DISPATCH_WITH_TYPES(name, ...) \ #define DISPATCH_WITH_TYPES(name, ...) \
......
...@@ -30,7 +30,7 @@ class InitializeOp : public Operator<Context> { ...@@ -30,7 +30,7 @@ class InitializeOp : public Operator<Context> {
void RunOnDevice() override; void RunOnDevice() override;
protected: protected:
TensorFillerProto proto_; FillerInfo filler_info_;
DECLARE_ARGS_WITH_DESC(int64_t, dims); DECLARE_ARGS_WITH_DESC(int64_t, dims);
}; };
...@@ -142,9 +142,9 @@ class RandomNormalOp final : public InitializeOp<Context> { ...@@ -142,9 +142,9 @@ class RandomNormalOp final : public InitializeOp<Context> {
: InitializeOp<Context>(def, ws) { : InitializeOp<Context>(def, ws) {
auto mu = OpArg<float>("mean", 0.f); auto mu = OpArg<float>("mean", 0.f);
auto sigma = OpArg<float>("std", 1.f); auto sigma = OpArg<float>("std", 1.f);
this->proto_.set_mean(mu); this->filler_info_.set_mean(mu);
this->proto_.set_std(sigma); this->filler_info_.set_std(sigma);
this->proto_.set_type("normal"); this->filler_info_.set_type("normal");
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -161,9 +161,9 @@ class RandomUniformOp final : public InitializeOp<Context> { ...@@ -161,9 +161,9 @@ class RandomUniformOp final : public InitializeOp<Context> {
: InitializeOp<Context>(def, ws) { : InitializeOp<Context>(def, ws) {
auto low = OpArg<float>("low", -1.f); auto low = OpArg<float>("low", -1.f);
auto high = OpArg<float>("high", 1.f); auto high = OpArg<float>("high", 1.f);
this->proto_.set_low(low); this->filler_info_.set_low(low);
this->proto_.set_high(high); this->filler_info_.set_high(high);
this->proto_.set_type("uniform"); this->filler_info_.set_type("uniform");
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -180,11 +180,11 @@ class TruncatedNormalOp final : public InitializeOp<Context> { ...@@ -180,11 +180,11 @@ class TruncatedNormalOp final : public InitializeOp<Context> {
: InitializeOp<Context>(def, ws) { : InitializeOp<Context>(def, ws) {
auto mu = OpArg<float>("mean", 0.f); auto mu = OpArg<float>("mean", 0.f);
auto sigma = OpArg<float>("std", 1.f); auto sigma = OpArg<float>("std", 1.f);
this->proto_.set_mean(mu); this->filler_info_.set_mean(mu);
this->proto_.set_std(sigma); this->filler_info_.set_std(sigma);
this->proto_.set_low(mu - 2 * sigma); this->filler_info_.set_low(mu - 2 * sigma);
this->proto_.set_high(mu + 2 * sigma); this->filler_info_.set_high(mu + 2 * sigma);
this->proto_.set_type("truncated_normal"); this->filler_info_.set_type("truncated_normal");
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -201,15 +201,15 @@ class GlorotNormalOp final : public InitializeOp<Context> { ...@@ -201,15 +201,15 @@ class GlorotNormalOp final : public InitializeOp<Context> {
: InitializeOp<Context>(def, ws) { : InitializeOp<Context>(def, ws) {
auto scale = OpArg<float>("scale", 2.f); auto scale = OpArg<float>("scale", 2.f);
auto mode = OpArg<string>("mode", "fan_in"); auto mode = OpArg<string>("mode", "fan_in");
this->proto_.set_type("msra"); this->filler_info_.set_type("glorot_normal");
if (mode == "fan_avg") { if (mode == "fan_avg") {
this->proto_.set_variance_norm(TensorFillerProto_VarianceNorm_FAN_AVG); this->filler_info_.set_variance_norm(FillerInfo_VarianceNorm_FAN_AVG);
} else if (mode == "fan_out") { } else if (mode == "fan_out") {
this->proto_.set_variance_norm(TensorFillerProto_VarianceNorm_FAN_OUT); this->filler_info_.set_variance_norm(FillerInfo_VarianceNorm_FAN_OUT);
} else { } else {
this->proto_.set_variance_norm(TensorFillerProto_VarianceNorm_FAN_IN); this->filler_info_.set_variance_norm(FillerInfo_VarianceNorm_FAN_IN);
} }
this->proto_.set_scale(scale); this->filler_info_.set_scale(scale);
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -226,15 +226,15 @@ class GlorotUniformOp final : public InitializeOp<Context> { ...@@ -226,15 +226,15 @@ class GlorotUniformOp final : public InitializeOp<Context> {
: InitializeOp<Context>(def, ws) { : InitializeOp<Context>(def, ws) {
auto scale = OpArg<float>("scale", 3.f); auto scale = OpArg<float>("scale", 3.f);
auto mode = OpArg<string>("mode", "fan_in"); auto mode = OpArg<string>("mode", "fan_in");
this->proto_.set_type("xavier"); this->filler_info_.set_type("glorot_uniform");
if (mode == "fan_avg") { if (mode == "fan_avg") {
this->proto_.set_variance_norm(TensorFillerProto_VarianceNorm_FAN_AVG); this->filler_info_.set_variance_norm(FillerInfo_VarianceNorm_FAN_AVG);
} else if (mode == "fan_out") { } else if (mode == "fan_out") {
this->proto_.set_variance_norm(TensorFillerProto_VarianceNorm_FAN_OUT); this->filler_info_.set_variance_norm(FillerInfo_VarianceNorm_FAN_OUT);
} else { } else {
this->proto_.set_variance_norm(TensorFillerProto_VarianceNorm_FAN_IN); this->filler_info_.set_variance_norm(FillerInfo_VarianceNorm_FAN_IN);
} }
this->proto_.set_scale(scale); this->filler_info_.set_scale(scale);
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
......
...@@ -9,9 +9,12 @@ template <typename T> ...@@ -9,9 +9,12 @@ template <typename T>
void TileOp<Context>::DoRunWithType() { void TileOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0); auto &X = Input(0), *Y = Output(0);
int num_repeats;
repeats(0, &num_repeats);
auto Y_dims = X.dims(); auto Y_dims = X.dims();
for (int i = 0; i < Y_dims.size(); ++i) for (int i = 0; i < num_repeats; ++i) {
Y_dims[i] *= multiples(i); Y_dims[i] *= repeats(i);
}
if (X.dims() == Y_dims) { if (X.dims() == Y_dims) {
Y->Reshape(Y_dims)->CopyFrom(X, ctx()); Y->Reshape(Y_dims)->CopyFrom(X, ctx());
...@@ -49,7 +52,7 @@ void TileGradientOp<Context>::DoRunWithType() { ...@@ -49,7 +52,7 @@ void TileGradientOp<Context>::DoRunWithType() {
dx = dest_->template mutable_data<T, Context>(); dx = dest_->template mutable_data<T, Context>();
} }
kernel::TileGrad( kernel::TileGrad(
dest_->count(0, axis_), dest_->count(axis_), multiple_, dy, dx, ctx()); dest_->count(0, axis_), dest_->count(axis_), repeat_, dy, dx, ctx());
} }
template <class Context> template <class Context>
...@@ -57,10 +60,14 @@ void TileGradientOp<Context>::RunOnDevice() { ...@@ -57,10 +60,14 @@ void TileGradientOp<Context>::RunOnDevice() {
auto &dY = Input(0), *dX = Output(0); auto &dY = Input(0), *dX = Output(0);
// Add the axes // Add the axes
int num_repeats;
repeats(0, &num_repeats);
vector<pair<int, int>> dispatch_axes; vector<pair<int, int>> dispatch_axes;
for (int i = 0; i < dY.ndim(); i++) { for (int i = 0; i < dY.ndim() && i < num_repeats; i++) {
auto m = multiples(i); auto repeat = repeats(i);
if (m > 1) dispatch_axes.push_back({m, i}); if (repeat > 1) {
dispatch_axes.push_back({repeat, i});
}
} }
std::sort(dispatch_axes.begin(), dispatch_axes.end()); std::sort(dispatch_axes.begin(), dispatch_axes.end());
std::reverse(dispatch_axes.begin(), dispatch_axes.end()); std::reverse(dispatch_axes.begin(), dispatch_axes.end());
...@@ -76,10 +83,10 @@ void TileGradientOp<Context>::RunOnDevice() { ...@@ -76,10 +83,10 @@ void TileGradientOp<Context>::RunOnDevice() {
// Reduce N times along each tiled axis // Reduce N times along each tiled axis
for (const auto& task : dispatch_axes) { for (const auto& task : dispatch_axes) {
axis_ = task.second, multiple_ = task.first; axis_ = task.second, repeat_ = task.first;
vec64_t X_dims(src_->dims()); vec64_t X_dims(src_->dims());
X_dims[axis_] /= multiple_; X_dims[axis_] /= repeat_;
dest_->Reshape(X_dims); dest_->Reshape(X_dims);
DispatchHelper<FloatingTensorTypes>::Call(this, dY); DispatchHelper<FloatingTensorTypes>::Call(this, dY);
......
...@@ -21,7 +21,7 @@ template <class Context> ...@@ -21,7 +21,7 @@ template <class Context>
class TileOp final : public Operator<Context> { class TileOp final : public Operator<Context> {
public: public:
TileOp(const OperatorDef& def, Workspace* ws) : Operator<Context>(def, ws) { TileOp(const OperatorDef& def, Workspace* ws) : Operator<Context>(def, ws) {
GET_ARGS_WITH_DESC(int64_t, multiples); GET_ARGS_WITH_DESC(int64_t, repeats);
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -31,7 +31,7 @@ class TileOp final : public Operator<Context> { ...@@ -31,7 +31,7 @@ class TileOp final : public Operator<Context> {
void DoRunWithType(); void DoRunWithType();
protected: protected:
DECLARE_ARGS_WITH_DESC(int64_t, multiples); DECLARE_ARGS_WITH_DESC(int64_t, repeats);
}; };
template <class Context> template <class Context>
...@@ -39,7 +39,7 @@ class TileGradientOp final : public Operator<Context> { ...@@ -39,7 +39,7 @@ class TileGradientOp final : public Operator<Context> {
public: public:
TileGradientOp(const OperatorDef& def, Workspace* ws) TileGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) { : Operator<Context>(def, ws) {
GET_ARGS_WITH_DESC(int64_t, multiples); GET_ARGS_WITH_DESC(int64_t, repeats);
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -50,12 +50,12 @@ class TileGradientOp final : public Operator<Context> { ...@@ -50,12 +50,12 @@ class TileGradientOp final : public Operator<Context> {
protected: protected:
Tensor *dest_, *src_, nav_; Tensor *dest_, *src_, nav_;
int64_t axis_, multiple_; int64_t axis_, repeat_;
DECLARE_ARGS_WITH_DESC(int64_t, multiples); DECLARE_ARGS_WITH_DESC(int64_t, repeats);
}; };
DEFINE_ARGS_WITH_DESC(int64_t, TileOp, multiples); DEFINE_ARGS_WITH_DESC(int64_t, TileOp, repeats);
DEFINE_ARGS_WITH_DESC(int64_t, TileGradientOp, multiples); DEFINE_ARGS_WITH_DESC(int64_t, TileGradientOp, repeats);
} // namespace dragon } // namespace dragon
......
...@@ -9,7 +9,6 @@ void AdamUpdateOp<Context>::ComputeUpdate(Tensor* dX) { ...@@ -9,7 +9,6 @@ void AdamUpdateOp<Context>::ComputeUpdate(Tensor* dX) {
t_++; t_++;
auto beta1 = Parameter("beta1"), beta2 = Parameter("beta2"); auto beta1 = Parameter("beta1"), beta2 = Parameter("beta2");
auto coef = sqrt(1.f - pow(beta2, t_)) / (1.f - pow(beta1, t_)); auto coef = sqrt(1.f - pow(beta2, t_)) / (1.f - pow(beta1, t_));
kernel::AdamUpdate( kernel::AdamUpdate(
dX->count(), dX->count(),
Parameter("base_lr") * coef * this->lr_mult_, Parameter("base_lr") * coef * this->lr_mult_,
......
...@@ -10,7 +10,6 @@ void SGDUpdateOp<Context>::ComputeUpdate(Tensor* dX) { ...@@ -10,7 +10,6 @@ void SGDUpdateOp<Context>::ComputeUpdate(Tensor* dX) {
auto lr = Parameter("base_lr") * this->lr_mult_; auto lr = Parameter("base_lr") * this->lr_mult_;
if (last_lr_ > 0) correction_ = lr / last_lr_; if (last_lr_ > 0) correction_ = lr / last_lr_;
last_lr_ = lr; // Record the last value last_lr_ = lr; // Record the last value
kernel::SGDUpdate( kernel::SGDUpdate(
dX->count(), dX->count(),
lr, lr,
......
...@@ -20,9 +20,7 @@ void BiasAddOp<Context>::DoRunWithType() { ...@@ -20,9 +20,7 @@ void BiasAddOp<Context>::DoRunWithType() {
LOG(FATAL) << "Unknown DataFormat: " << data_format(); LOG(FATAL) << "Unknown DataFormat: " << data_format();
} }
// Maybe fill the bias at the first time
TENSOR_FILL(B, vec64_t({C})); TENSOR_FILL(B, vec64_t({C}));
kernel::BiasAdd( kernel::BiasAdd(
N, N,
C, C,
......
...@@ -4,73 +4,69 @@ ...@@ -4,73 +4,69 @@
namespace dragon { namespace dragon {
#define SAME_PADDING(A, B) \ #define DETERMINE_SAME_PADDING(l, r) \
A[i] = padding_needed / 2; \ if (padding_ != "SAME_UPPER") { \
B[i] = padding_needed - A[i] l[i] = pad_size / 2; \
r[i] = pad_size - l[i]; \
} else { \
r[i] = pad_size / 2; \
l[i] = pad_size - r[i]; \
}
template <class Context> template <class Context>
void ConvOpBase<Context>::ComputeOutShape() { void ConvOpBase<Context>::ComputeOutShape() {
auto X_dims = Input(0).dims();
out_shape_.clear(); out_shape_.clear();
for (int i = 0; i < num_axes_; i++) { vec64_t X_dims = Input(0).dims();
if (!Transposed()) { int64_t in_size, out_size, k_size, pad_size;
auto idm = X_dims[axis_ + i]; if (!Transposed()) {
auto dk = dilation_[i] * (kshape_[i] - 1) + 1; for (int i = 0; i < num_axes_; i++) {
if (!str::find(padding_, "SAME")) { in_size = X_dims[axis_ + i];
// Explicit pads k_size = dilation_[i] * (kshape_[i] - 1) + 1;
auto odm = (idm + pad_l_[i] + pad_r_[i] - dk) / stride_[i] + 1; if (!str::find(padding_, "SAME")) { // Explicit pads
out_shape_.push_back(odm); pad_size = pad_l_[i] + pad_r_[i];
} else { out_size = (in_size + pad_size - k_size) / stride_[i] + 1;
// Auto pads } else { // Auto pads
int64_t odm = (idm + stride_[i] - 1) / (float)stride_[i]; out_size = (in_size + stride_[i] - 1) / stride_[i];
auto padding_needed = pad_size = (out_size - 1) * stride_[i] + k_size - in_size;
std::max(int64_t(0), (odm - 1) * stride_[i] + dk - idm); pad_size = std::max(pad_size, int64_t(0));
out_shape_.push_back(odm); DETERMINE_SAME_PADDING(pad_l_, pad_r_);
if (padding_ == "SAME_UPPER") { }
SAME_PADDING(pad_l_, pad_r_); out_shape_.push_back(out_size);
} else { }
SAME_PADDING(pad_r_, pad_l_); } else {
} // SAME_LOWER or SAME int num_output_padding;
output_padding(0, &num_output_padding);
CHECK(num_output_padding == 0 || num_output_padding == num_axes_)
<< "\nExcepted 0 or " << num_axes_ << " ints for <output_padding>.";
if (!str::find(padding_, "SAME")) { // Explicit pads
for (int i = 0; i < num_axes_; i++) {
in_size = X_dims[axis_ + i];
k_size = dilation_[i] * (kshape_[i] - 1) + 1;
pad_size = pad_l_[i] + pad_r_[i];
out_size = stride_[i] * (in_size - 1) + k_size - pad_size;
if (num_output_padding > 0) out_size += output_padding(i);
out_shape_.push_back(out_size);
} }
} else { } else {
auto idm = X_dims[axis_ + i]; // Auto pads
auto dk = dilation_[i] * (kshape_[i] - 1) + 1; int num_output_shape;
if (!str::find(padding_, "SAME")) { output_shape(0, &num_output_shape);
// Explicit pads CHECK(num_output_shape == num_axes_)
auto odm = stride_[i] * (idm - 1) + dk - pad_l_[i] - pad_r_[i]; << "\nExcepted " << num_axes_ << " ints for <output_shape>.";
out_shape_.push_back(odm); for (int i = 0; i < num_axes_; i++) {
} else { in_size = X_dims[axis_ + i];
// Auto pads k_size = dilation_[i] * (kshape_[i] - 1) + 1;
int output_shape_size; out_size = output_shape(i);
int output_padding_size; pad_size = stride_[i] * (in_size - 1) + k_size;
output_shape(0, &output_shape_size); if (num_output_padding > 0) pad_size += output_padding(i);
output_padding(0, &output_padding_size); CHECK_GE(pad_size, out_size)
CHECK(output_shape_size == 0 || output_shape_size == num_axes_) << "\nThe output shape is incorrect."
<< "Excepted 0 or " << num_axes_ << " ints for output shape."; << "\nDimension of spatial axis " << i << " should be at most "
CHECK(output_padding_size == 0 || output_padding_size == num_axes_) << pad_size << ".";
<< "Excepted 0 or " << num_axes_ << " ints for output padding."; pad_size = stride_[i] * (in_size - 1) + k_size - out_size;
int64_t padding_needed, odm; pad_size = std::max(pad_size, int64_t(0));
if (output_padding_size) { DETERMINE_SAME_PADDING(pad_l_, pad_r_);
padding_needed = output_padding(i); out_shape_.push_back(out_size);
odm = stride_[i] * (idm - 1) + dk + padding_needed;
} else if (output_shape_size) {
odm = output_shape(i);
padding_needed = odm - (stride_[i] * (idm - 1) + dk);
CHECK_GE(padding_needed, 0)
<< "\nThe output shape is incorrect."
<< "\nWith the given stride and kernel, "
<< "dimension of spatial axis " << i << " should be at least "
<< odm - padding_needed << ".";
} else {
LOG(FATAL) << "Excepted the output padding or output shape "
<< "for \"SAME\" padding algorithm.";
}
out_shape_.push_back(odm);
if (padding_ == "SAME_UPPER") {
SAME_PADDING(pad_l_, pad_r_);
} else {
SAME_PADDING(pad_r_, pad_l_);
} // SAME_LOWER or SAME
} }
} }
} }
...@@ -373,7 +369,7 @@ INSTANTIATE_API(CUDAContext, float); ...@@ -373,7 +369,7 @@ INSTANTIATE_API(CUDAContext, float);
INSTANTIATE_API(CUDAContext, double); INSTANTIATE_API(CUDAContext, double);
#endif #endif
#undef SAME_PADDING
#undef INSTANTIATE_API #undef INSTANTIATE_API
#undef DETERMINE_SAME_PADDING
} // namespace dragon } // namespace dragon
...@@ -5,9 +5,14 @@ ...@@ -5,9 +5,14 @@
namespace dragon { namespace dragon {
#define SAME_PADDING(A, B) \ #define DETERMINE_SAME_PADDING(l, r) \
A[i] = padding_needed / 2; \ if (padding_ != "SAME_UPPER") { \
B[i] = padding_needed - A[i] l[i] = pad_size / 2; \
r[i] = pad_size - l[i]; \
} else { \
r[i] = pad_size / 2; \
l[i] = pad_size - r[i]; \
}
template <class Context> template <class Context>
void PoolOpBase<Context>::Setup(int num_axes) { void PoolOpBase<Context>::Setup(int num_axes) {
...@@ -52,41 +57,27 @@ void PoolOpBase<Context>::ComputeOutShape() { ...@@ -52,41 +57,27 @@ void PoolOpBase<Context>::ComputeOutShape() {
kshape_[i] = in_dims_[i + 2]; kshape_[i] = in_dims_[i + 2];
} }
// Adjust the pads for SAME padding algorithm
if (str::find(padding_, "SAME")) {
for (int i = 0; i < num_axes_; i++) {
auto idm = in_dims_[i + 2];
int64_t odm = (idm + stride_[i] - 1) / (float)stride_[i];
auto padding_needed =
std::max((int64_t)0, (odm - 1) * stride_[i] + kshape_[i] - idm);
if (padding_ == "SAME_UPPER") {
SAME_PADDING(pad_l_, pad_r_);
} else {
SAME_PADDING(pad_r_, pad_l_);
} /*! SAME_LOWER or SAME */
}
}
// Compute the output dimensions // Compute the output dimensions
auto floor_or_ceil = ceil_mode_ > 0 auto floor_or_ceil = ceil_mode_ > 0
? static_cast<float (*)(float)>(&std::ceil) ? static_cast<float (*)(float)>(&std::ceil)
: static_cast<float (*)(float)>(&std::floor); : static_cast<float (*)(float)>(&std::floor);
out_dims_ = in_dims_; out_dims_ = in_dims_;
out_shape_ = Input(0).dims(); out_shape_ = Input(0).dims();
int64_t in_size, k_size, pad_size;
for (int i = 0; i < num_axes_; i++) { for (int i = 0; i < num_axes_; i++) {
auto in_dim = in_dims_[i + 2]; float out_size;
if (!str::find(padding_, "SAME")) { in_size = in_dims_[i + 2], k_size = kshape_[i];
// Explicit pads if (!str::find(padding_, "SAME")) { // Explicit pads
in_dim += pad_l_[i] + pad_r_[i]; pad_size = pad_l_[i] + pad_r_[i];
out_shape_[i + axis_] = out_dims_[i + 2] = out_size = float(in_size + pad_size - k_size) / float(stride_[i]) + 1.f;
floor_or_ceil((in_dim - kshape_[i]) / (float)stride_[i]) + 1; out_size = floor_or_ceil(out_size);
} else { } else { // Auto pads
// Auto pads out_size = std::ceil(float(in_size) / float(stride_[i]));
out_shape_[i + axis_] = out_dims_[i + 2] = pad_size = ((int64_t)out_size - 1) * stride_[i] + k_size - in_size;
floor_or_ceil(in_dim / (float)stride_[i]); pad_size = std::max(pad_size, int64_t(0));
DETERMINE_SAME_PADDING(pad_l_, pad_r_);
} }
out_shape_[i + axis_] = out_dims_[i + 2] = out_size;
} }
} }
...@@ -95,6 +86,6 @@ template class PoolOpBase<CPUContext>; ...@@ -95,6 +86,6 @@ template class PoolOpBase<CPUContext>;
template class PoolOpBase<CUDAContext>; template class PoolOpBase<CUDAContext>;
#endif #endif
#undef SAME_PADDING #undef DETERMINE_SAME_PADDING
} // namespace dragon } // namespace dragon
syntax = "proto2";
package dragon;
message BlobShape {
repeated int64 dim = 1 [packed = true];
}
message BlobProto {
optional BlobShape shape = 7;
repeated float data = 5 [packed = true];
optional int32 num = 1 [default = 0];
optional int32 channels = 2 [default = 0];
optional int32 height = 3 [default = 0];
optional int32 width = 4 [default = 0];
}
message NetParameter {
optional string name = 1;
repeated LayerParameter layer = 100;
}
message LayerParameter {
optional string name = 1;
repeated BlobProto blobs = 7;
}
...@@ -51,26 +51,6 @@ message TensorProto { ...@@ -51,26 +51,6 @@ message TensorProto {
optional string name = 7; optional string name = 7;
} }
// Record the filler of Tensor.
// This structure is kept for backward compatibility
// with caffe1, which relies implicit initializer.
message TensorFillerProto {
optional string tensor = 1;
optional string type = 2 [default = 'constant'];
optional float value = 3 [default = 0];
optional float low = 4 [default = 0];
optional float high = 5 [default = 1];
optional float mean = 6 [default = 0];
optional float std = 7 [default = 1];
optional float scale = 8 [default = 3];
enum VarianceNorm {
FAN_IN = 0;
FAN_OUT = 1;
FAN_AVG = 2;
}
optional VarianceNorm variance_norm = 9 [default = FAN_IN];
}
// Store multiple TensorProto objects in one single proto. // Store multiple TensorProto objects in one single proto.
message TensorProtos { message TensorProtos {
repeated TensorProto protos = 1; repeated TensorProto protos = 1;
...@@ -139,16 +119,6 @@ message OperatorDef { ...@@ -139,16 +119,6 @@ message OperatorDef {
optional string cache_key = 7; optional string cache_key = 7;
} }
// Record the gradient information
message GradientProto {
// The derivative target.
optional string cost = 1;
// The target with respect to?
optional string wrt = 2;
// The external gradient
optional string external = 3;
}
// Graph Definition // Graph Definition
message GraphDef { message GraphDef {
// The graph name. // The graph name.
...@@ -171,6 +141,33 @@ message GraphDef { ...@@ -171,6 +141,33 @@ message GraphDef {
// The name of outputs. // The name of outputs.
repeated string output = 8; repeated string output = 8;
// The gradients information. // The info of gradients.
repeated GradientProto gradient = 9; repeated GradientInfo grad_info = 9;
}
// Record the filler information.
// This structure is kept for backward compatibility
// with caffe, which relies the implicit initializer.
message FillerInfo {
enum VarianceNorm {
FAN_IN = 0;
FAN_OUT = 1;
FAN_AVG = 2;
}
optional string type = 1 [default = 'constant'];
optional float value = 2 [default = 0];
optional float low = 3 [default = 0];
optional float high = 4 [default = 1];
optional float mean = 5 [default = 0];
optional float std = 6 [default = 1];
optional float scale = 7 [default = 3];
optional VarianceNorm variance_norm = 8 [default = FAN_IN];
}
// Record the gradient information.
message GradientInfo {
// The derivative target.
optional string y = 1;
// The differentiated inputs.
repeated string xs = 2;
} }
...@@ -30,7 +30,6 @@ from dragon._api import metrics ...@@ -30,7 +30,6 @@ from dragon._api import metrics
from dragon._api import nn from dragon._api import nn
from dragon._api import optimizers from dragon._api import optimizers
from dragon._api import random from dragon._api import random
from dragon._api import workspace
from dragon._api import vision from dragon._api import vision
# Virtual API # Virtual API
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import as _absolute_import
from __future__ import division as _division
from __future__ import print_function as _print_function
from dragon.core.training.adam import Adam
from dragon.core.training.rmsprop import RMSProp
from dragon.core.training.sgd import Nesterov
from dragon.core.training.sgd import SGD
from dragon.core.training.updater import Updater
__all__ = [_s for _s in dir() if not _s.startswith('_')]
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import as _absolute_import
from __future__ import division as _division
from __future__ import print_function as _print_function
from dragon.core.framework.workspace import feed_tensor
from dragon.core.framework.workspace import fetch_tensor
from dragon.core.framework.workspace import has_tensor
from dragon.core.framework.workspace import load
from dragon.core.framework.workspace import reset_tensor
from dragon.core.framework.workspace import run_operator
from dragon.core.framework.workspace import save
__all__ = [_s for _s in dir() if not _s.startswith('_')]
...@@ -28,6 +28,7 @@ from dragon.core.autograph.tensor import Tensor ...@@ -28,6 +28,7 @@ from dragon.core.autograph.tensor import Tensor
from dragon.core.eager import context as eager_context from dragon.core.eager import context as eager_context
from dragon.core.eager.tensor import EagerTensor from dragon.core.eager.tensor import EagerTensor
from dragon.core.framework import context from dragon.core.framework import context
from dragon.core.framework import device_spec
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.training import optimizer from dragon.core.training import optimizer
from dragon.core.util import decorator from dragon.core.util import decorator
...@@ -276,13 +277,16 @@ class FunctionGuard(object): ...@@ -276,13 +277,16 @@ class FunctionGuard(object):
executables = self.executables executables = self.executables
inputs, kwargs = self.canonicalize_inputs(*args, **kwargs) inputs, kwargs = self.canonicalize_inputs(*args, **kwargs)
executables[0](*inputs, return_outputs=False, **kwargs) executables[0](*inputs, return_outputs=False, **kwargs)
_ = [func(return_outputs=False) for func in executables[1:]] [func(return_outputs=False) for func in executables[1:]]
outputs = [] outputs = []
for obj in self.outputs: current_ws = workspace.get_workspace()
if isinstance(obj, Tensor): for output in self.outputs:
outputs.append(EagerTensor(id=obj.id, own_storage=False)) if isinstance(output, Tensor):
impl = current_ws.GetTensor(output.id)
device = device_spec.DeviceSpec(*impl.device)
outputs.append(EagerTensor(impl=impl, device=device))
else: else:
outputs.append(obj) outputs.append(output)
return outputs return outputs
def __get__(self, instance, owner): def __get__(self, instance, owner):
......
...@@ -23,7 +23,6 @@ from dragon.core.autograph.op_def import OpDef ...@@ -23,7 +23,6 @@ from dragon.core.autograph.op_def import OpDef
from dragon.core.autograph.op_def import OpInfo from dragon.core.autograph.op_def import OpInfo
from dragon.core.autograph.tensor import Tensor from dragon.core.autograph.tensor import Tensor
from dragon.core.framework import config from dragon.core.framework import config
from dragon.core.framework import context
from dragon.core.framework import proto_util from dragon.core.framework import proto_util
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.proto import dragon_pb2 from dragon.core.proto import dragon_pb2
...@@ -32,7 +31,7 @@ from dragon.core.util import nest ...@@ -32,7 +31,7 @@ from dragon.core.util import nest
def add_device_option(graph_def): def add_device_option(graph_def):
"""Add the device option for graph.""" """Add the device option."""
cfg = config.config() cfg = config.config()
str2idx = {'cpu': 0, 'cuda': 1, 'cnml': 2} str2idx = {'cpu': 0, 'cuda': 1, 'cnml': 2}
dev_opt = dragon_pb2.DeviceOption() dev_opt = dragon_pb2.DeviceOption()
...@@ -42,69 +41,66 @@ def add_device_option(graph_def): ...@@ -42,69 +41,66 @@ def add_device_option(graph_def):
graph_def.device_option.CopyFrom(dev_opt) graph_def.device_option.CopyFrom(dev_opt)
def add_gradient_info(graph_def, targets): def add_grad_info(graph_def, targets):
"""Add the gradient info for graph.""" """Add the gradient info."""
gradients = set()
for target in targets: for target in targets:
if target._grad is not None: info = target._grad
gradients.update(target._grad.make_pairs()) if info is not None:
for (cost, wrt) in gradients: graph_def.grad_info.extend([
gradient = dragon_pb2.GradientProto() dragon_pb2.GradientInfo(
gradient.cost, gradient.wrt = str(cost), str(wrt) y=info.y.id,
graph_def.gradient.extend([gradient]) xs=[x.id for x in info.xs])])
def add_optimization(graph_def, level=None): def add_optimization(graph_def, level=None):
"""Add the optimization attribute for graph.""" """Add the optimization argument."""
cfg = config.config() cfg = config.config()
if level is None: if level is None:
level = cfg.graph_optimization level = cfg.graph_optimization
graph_def.arg.add().CopyFrom( graph_def.arg.add().CopyFrom(
proto_util.make_argument( proto_util.make_argument('optimization', level))
'optimization_level', level))
graph_def.graph_type = cfg.graph_type graph_def.graph_type = cfg.graph_type
def add_phase(graph_def, targets): def add_phase(graph_def, targets):
"""Add the phase attribute for graph.""" """Add the phase argument."""
phase = context.get_graph_phase() phase = 'TEST'
if phase is None: for target in targets:
phase = 'TEST' try:
for target in targets: if target._grad and target._grad.required():
if target._grad is not None and \
target._grad.required():
phase = 'TRAIN' phase = 'TRAIN'
break break
except AttributeError:
pass
graph_def.arg.extend([proto_util.make_argument('phase', phase)]) graph_def.arg.extend([proto_util.make_argument('phase', phase)])
def add_update_ops(graph_def, optimizer): def add_update_defs(graph_def, optimizer):
"""Add the update operators for graph.""" """Add the update defs."""
if optimizer is None: if optimizer is None:
return return
grads, update_ops = [], [] grads, update_defs = [], []
extra_arguments = optimizer._extra_kwargs extra_arguments = optimizer._extra_kwargs
extra_arguments['handle'] = optimizer._op_handle extra_arguments['handle'] = optimizer._op_handle
# Generate update operators according to the updater. # Generate op defs according to the collected updates
for e in optimizer._param_group: current_ws = workspace.get_workspace()
(param, grad), arguments = e for (param, grad), arguments in optimizer._param_group:
if workspace.has_tensor(grad): if current_ws.has_tensor(grad):
grads.append(grad) grads.append(grad)
arguments = dict(arguments, **extra_arguments) arguments = dict(arguments, **extra_arguments)
update_ops.append( update_defs.append(
proto_util.make_operator_def( proto_util.make_operator_def(
op_type=optimizer._op_type, op_type=optimizer._op_type,
inputs=[grad], inputs=[grad],
outputs=[param], outputs=[param],
name=OpDef.get_name(), name=OpDef.get_name(),
**arguments **arguments))
))
else: else:
logging.info('Skip to update Tensor({}).'.format(param)) logging.info('Skip to update Tensor({}).'.format(param))
# Insert a reduce op if the process group is found. # Insert a reduce def if the process group is found.
process_group = optimizer._process_group process_group = optimizer._process_group
if process_group is not None: if process_group is not None:
update_ops.insert( update_defs.insert(
0, proto_util.make_operator_def( 0, proto_util.make_operator_def(
op_type='Collective', op_type='Collective',
inputs=grads, inputs=grads,
...@@ -115,7 +111,7 @@ def add_update_ops(graph_def, optimizer): ...@@ -115,7 +111,7 @@ def add_update_ops(graph_def, optimizer):
**process_group.arguments **process_group.arguments
) )
) )
graph_def.op.extend(update_ops) graph_def.op.extend(update_defs)
class Function(object): class Function(object):
...@@ -128,16 +124,15 @@ class Function(object): ...@@ -128,16 +124,15 @@ class Function(object):
self.graph_name = None # Determined after creating self.graph_name = None # Determined after creating
self.inputs, self.outputs = None, None self.inputs, self.outputs = None, None
def create(self, inputs=None, outputs=None, givens=None, updater=None): def create(self, inputs=None, outputs=None, givens=None, optimizer=None):
self.inputs = inputs = [] if inputs is None else nest.flatten(inputs) self.inputs = inputs = [] if inputs is None else nest.flatten(inputs)
self.outputs = outputs = [] if outputs is None else nest.flatten(outputs) self.outputs = outputs = [] if outputs is None else nest.flatten(outputs)
if len(outputs) > 0 and updater is not None: if len(outputs) > 0 and optimizer is not None:
raise ValueError('Specific either <outputs> or <updater>, not both.') raise ValueError('Specific either <outputs> or <optimizer>, not both.')
# Collect the forward defs.
op_info = OpInfo() op_info = OpInfo()
# Collect the forward operators.
requires_grad = False requires_grad = False
for i, output in enumerate(outputs): for i, output in enumerate(outputs):
op_info.merge_from(output) op_info.merge_from(output)
...@@ -149,7 +144,7 @@ class Function(object): ...@@ -149,7 +144,7 @@ class Function(object):
except AttributeError: except AttributeError:
raise ValueError('Output[%d] is not a symbolic tensor.' % i) raise ValueError('Output[%d] is not a symbolic tensor.' % i)
# Handle givens. # Handle the replacements.
if givens is not None: if givens is not None:
name_dict = {} name_dict = {}
for k, v in givens.items(): for k, v in givens.items():
...@@ -161,62 +156,61 @@ class Function(object): ...@@ -161,62 +156,61 @@ class Function(object):
'Excepted a Tensor, ' 'Excepted a Tensor, '
'got {}.'.format(type(v).__name__) 'got {}.'.format(type(v).__name__)
) )
# Update original operators. # Update the original defs.
op_info = copy.deepcopy(op_info) op_info = copy.deepcopy(op_info)
for k in op_info._defs.keys(): for k in op_info._defs.keys():
op_def = op_info._defs[k] op_def = op_info._defs[k]
op_def.input.extend([ op_def.input.extend([
name_dict[input] name_dict[input]
if input in name_dict else input if input in name_dict else input
for input in op_def.input for input in op_def.input])
])
del op_def.input[:len(op_def.input) // 2] del op_def.input[:len(op_def.input) // 2]
# Sort out the states. # Sort out the forward defs.
op_defs = sorted(op_info._defs.items(), key=lambda d: d[0]) op_defs = sorted(op_info._defs.items(), key=lambda d: d[0])
forward_ops = copy.deepcopy([v for k, v in op_defs]) forward_defs = copy.deepcopy([v for k, v in op_defs])
# Generate the backward operators. # Generate the backward defs.
if requires_grad: if requires_grad:
input_grads, grad_targets = {}, [] input_grads, grad_targets = {}, []
for output in outputs: for output in outputs:
grad_info = output._grad info = output._grad
if grad_info is not None: if info is not None:
if grad_info.input is not None: if info.grad_y is not None:
input_grads[output.id] = output._grad.input.id input_grads[output.id] = info.grad_y.id
grad_targets.append(output.id) grad_targets.append(output.id)
forward_ops, gradient_ops, _ = \ backward_defs = grad_maker.GradientMaker.make(
grad_maker.GradientMaker.make( op_defs=forward_defs,
forward_ops=forward_ops, targets=grad_targets,
targets=grad_targets, input_grads=input_grads,
input_grads=input_grads, )
)
else: else:
gradient_ops = [] backward_defs = []
# Fill with all known graph elements. # Fill graph elements.
self.graph_def.op.extend(forward_ops + gradient_ops) self.graph_def.op.extend(forward_defs + backward_defs)
self.graph_def.input.extend([input.name for input in inputs]) self.graph_def.input.extend([input.name for input in inputs])
self.graph_def.output.extend(list(op_info._targets)) self.graph_def.output.extend(list(op_info._targets))
if len(outputs) > 0: if len(outputs) > 0:
add_device_option(self.graph_def) add_device_option(self.graph_def)
add_optimization(self.graph_def) add_optimization(self.graph_def)
add_gradient_info(self.graph_def, outputs) add_grad_info(self.graph_def, outputs)
add_phase(self.graph_def, outputs) add_phase(self.graph_def, outputs)
elif updater is not None: elif optimizer is not None:
add_device_option(self.graph_def) add_device_option(self.graph_def)
add_optimization(self.graph_def, level=0) add_optimization(self.graph_def, level=0)
add_update_ops(self.graph_def, updater) add_update_defs(self.graph_def, optimizer)
# Notify the backend to create and optimize. # Notify the backend to create and optimize.
self.graph_name = workspace.create_graph(self.graph_def) current_ws = workspace.get_workspace()
self.graph_name = current_ws.create_graph(self.graph_def)
# Bind a callback to run this graph. # Bind a callback to run this graph.
self.callback = lambda *args, **kwargs: \ self.callback = lambda *args, **kwargs: \
workspace.run_graph( current_ws.run_graph(
graph=self.graph_name, name=self.graph_name,
inputs=(inputs, args), inputs_and_values=(inputs, args),
outputs=outputs, outputs=outputs,
**kwargs **kwargs
) )
...@@ -273,15 +267,15 @@ class Function(object): ...@@ -273,15 +267,15 @@ class Function(object):
add_phase(graph_def, self.outputs) add_phase(graph_def, self.outputs)
# Notify the backend to create and optimize. # Notify the backend to create and optimize.
current_ws = workspace.get_workspace()
self.graph_def = graph_def self.graph_def = graph_def
self.graph_name = workspace.create_graph(graph_def) self.graph_name = current_ws.create_graph(graph_def)
# Bind a callback to run this graph. # Bind a callback to run this graph.
callback_inputs = self.inputs if explicit_inputs else []
self.callback = lambda *args, **kwargs: \ self.callback = lambda *args, **kwargs: \
workspace.run_graph( current_ws.run_graph(
graph=self.graph_name, name=self.graph_name,
inputs=(callback_inputs, args), inputs_and_values=(self.inputs if explicit_inputs else [], args),
outputs=self.outputs, outputs=self.outputs,
**kwargs **kwargs
) )
......
...@@ -21,37 +21,26 @@ from dragon.core.util import nest ...@@ -21,37 +21,26 @@ from dragon.core.util import nest
class GradientInfo(object): class GradientInfo(object):
"""A class to store the known gradient relations.""" """A class to store the known gradient relations."""
def __init__(self, parent): def __init__(self, y, grad_y=None):
self._parent = parent self._y, self._grad_y, self._xs = y, grad_y, []
self._cost, self._wrt = [], []
self._input = None
@property @property
def cost(self): def grad_y(self):
return self._cost return self._grad_y
@property @property
def input(self): def xs(self):
return self._input return self._xs
@property @property
def wrt(self): def y(self):
return self._wrt return self._y
def add_cost(self, cost): def add_x(self, x):
self._cost.append(cost) self._xs.append(x)
def add_wrt(self, wrt):
self._wrt.append(wrt)
def make_pairs(self):
return [(self._parent.id, wrt) for wrt in self._wrt]
def required(self): def required(self):
return len(self._wrt) > 0 return len(self._xs) > 0
def set_input(self, input):
self._input = input
def gradients(ys, xs, grad_ys=None): def gradients(ys, xs, grad_ys=None):
...@@ -112,18 +101,14 @@ def gradients(ys, xs, grad_ys=None): ...@@ -112,18 +101,14 @@ def gradients(ys, xs, grad_ys=None):
if grad_ys is not None: if grad_ys is not None:
grad_ys = nest.flatten(grad_ys) grad_ys = nest.flatten(grad_ys)
# Record the gradient info (cost, wrt, input), # Record the gradient info (y, grad_y, xs),
# then, generate the gradient references once. # then, generate the gradient references once.
for i, y in enumerate(ys): for i, y in enumerate(ys):
if y._grad is None: if y._grad is None:
y._grad = GradientInfo(y) grad_y = grad_ys[i] if grad_ys is not None else None
if grad_ys is not None: y._grad = GradientInfo(y, grad_y)
y._grad.set_input(grad_ys[i])
for x in xs: for x in xs:
if not hasattr(x, '_grad') or x._grad is None: y._grad.add_x(x)
x._grad = GradientInfo(x)
y._grad.add_wrt(x.id)
x._grad.add_cost(y)
if i == 0: if i == 0:
dxs.append(TensorRef(x.id + '_grad', x.shape, x.dtype)) dxs.append(TensorRef(x.id + '_grad', x.shape, x.dtype))
......
...@@ -13,16 +13,7 @@ ...@@ -13,16 +13,7 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
"""Gradient maker implemented in python. """Python-implemented gradient maker."""
The basic idea of ``GradientMaker`` comes from ``caffe2``,
Jia provided a simple way to bridge the Generator(Python) with OpScheme(C++).
For the efficient C++ implementation, see,
<https://github.com/seetaresearch/Dragon/blob/master/Dragon/src/core/graph_gradient.cc>
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -40,25 +31,25 @@ class GradientMaker(object): ...@@ -40,25 +31,25 @@ class GradientMaker(object):
"""Make def for the gradient based on rules.""" """Make def for the gradient based on rules."""
@classmethod @classmethod
def gen_def(cls, forward_op, g_outputs): def gen_def(cls, op_def, g_outputs):
"""Generate the OperatorDef from forward op.""" """Generate the OperatorDef from forward op."""
g_ops, g_inputs, defaults = backend.CreateGradientDefs( grad_defs, g_inputs, defaults = backend.CreateGradientDefs(
forward_op.SerializeToString(), g_outputs) op_def.SerializeToString(), g_outputs)
for idx, g_op in enumerate(g_ops): for i, grad_def in enumerate(grad_defs):
new_def = dragon_pb2.OperatorDef() new_def = dragon_pb2.OperatorDef()
new_def.ParseFromString(g_op) new_def.ParseFromString(grad_def)
g_ops[idx] = new_def grad_defs[i] = new_def
return g_ops, g_inputs, defaults return grad_defs, g_inputs, defaults
@classmethod @classmethod
def check(cls, forward_op, inputs_to_grads, blacklist, targets): def check(cls, op_def, inputs_to_grads, blacklist, targets):
"""Check if missing gradients. If missing, skip.""" """Check if missing gradients. If missing, skip."""
if forward_op.type in backend.NO_GRADIENT_OPERATORS: if op_def.type in backend.NO_GRADIENT_OPERATORS:
for input in forward_op.input: for input in op_def.input:
blacklist.add(input) blacklist.add(input)
return True, None return True, None
gen_grads = [] gen_grads = []
for idx, output in enumerate(forward_op.output): for idx, output in enumerate(op_def.output):
if output not in inputs_to_grads: if output not in inputs_to_grads:
if output in blacklist: if output in blacklist:
return True, gen_grads return True, gen_grads
...@@ -66,50 +57,43 @@ class GradientMaker(object): ...@@ -66,50 +57,43 @@ class GradientMaker(object):
# Consider to generate virtual gradient for targets. # Consider to generate virtual gradient for targets.
gen_grads.append((output, idx)) gen_grads.append((output, idx))
inputs_to_grads[output] = output + '_grad' inputs_to_grads[output] = output + '_grad'
elif len(forward_op.output) == 1: elif len(op_def.output) == 1:
# We can skip this op, obviously. # We can skip this op, obviously.
return True, gen_grads return True, gen_grads
# Pass, even if missing some grads. # Pass, even if missing some grads.
return False, gen_grads return False, gen_grads
@classmethod @classmethod
def make(cls, forward_ops, targets, input_grads=None): def make(cls, op_defs, targets, input_grads=None):
"""The making procedure.""" """Make the backward op defs."""
inputs_to_grads = {} if input_grads is None else input_grads inputs_to_grads = {} if input_grads is None else input_grads
inputs_count, grads_count = defaultdict(int), defaultdict(int) inputs_count, grads_count = defaultdict(int), defaultdict(int)
all_split_grads, blacklist = set(), set() all_split_grads, blacklist = set(), set()
backward_ops = []
# A DAG may not have any in-place operators.
is_dag = True
# PLAY for the forward. # PLAY for the forward.
for forward_op in forward_ops: for op_def in op_defs:
if forward_op.type in backend.NO_GRADIENT_OPERATORS: if op_def.type in backend.NO_GRADIENT_OPERATORS:
continue continue
outputs = [o for o in forward_op.output] outputs = [output for output in op_def.output]
for input in forward_op.input: for input in op_def.input:
if input not in outputs: if input not in outputs:
# Avoid to count the duplicate input, # Avoid to count the duplicate input,
# (i.e. the in-place output). # (i.e. the in-place output).
inputs_count[input] += 1 inputs_count[input] += 1
else:
is_dag = False
# PLAY for the backward. # PLAY for the backward.
for forward_op in forward_ops[::-1]: backward_defs = []
for op_def in op_defs[::-1]:
# Collect inputs and outputs. # Collect inputs and outputs.
is_skip, gen_grads = cls.check( is_skip, gen_grads = cls.check(
forward_op=forward_op, op_def=op_def,
inputs_to_grads=inputs_to_grads, inputs_to_grads=inputs_to_grads,
blacklist=blacklist, blacklist=blacklist,
targets=targets, targets=targets,
) )
# Missing grads are represented as ``None``. # Missing grads are represented as ``None``.
g_outputs = [inputs_to_grads.get(name, '') g_outputs = [inputs_to_grads.get(name, '') for name in op_def.output]
for name in forward_op.output] grad_defs, g_inputs, defaults = cls.gen_def(op_def, g_outputs)
g_ops, g_inputs, defaults = cls.gen_def(forward_op, g_outputs)
# Append operators. # Append operators.
if not is_skip: if not is_skip:
...@@ -127,17 +111,17 @@ class GradientMaker(object): ...@@ -127,17 +111,17 @@ class GradientMaker(object):
outputs=op_outputs, outputs=op_outputs,
defaults=values, defaults=values,
) )
if forward_op.HasField('device_option'): if op_def.HasField('device_option'):
gen_op.device_option.CopyFrom(forward_op.device_option) gen_op.device_option.CopyFrom(op_def.device_option)
backward_ops.append(gen_op) backward_defs.append(gen_op)
# GradientOp # GradientOp
for g_op in g_ops: for grad_def in grad_defs:
g_op.name = OpDef.get_name() grad_def.name = OpDef.get_name()
backward_ops.append(g_op) backward_defs.append(grad_def)
# Split and gather grads for multi-used input. # Split and gather grads for multi-used input.
for g_op in g_ops: for grad_def in grad_defs:
for g_output_idx, g_output in enumerate(g_op.output): for g_output_idx, g_output in enumerate(grad_def.output):
original_idx = -1 original_idx = -1
for g_input_idx, g_input in enumerate(g_inputs): for g_input_idx, g_input in enumerate(g_inputs):
if g_output == g_input: if g_output == g_input:
...@@ -145,10 +129,10 @@ class GradientMaker(object): ...@@ -145,10 +129,10 @@ class GradientMaker(object):
# Ignore un-used && in-placed GI(?). # Ignore un-used && in-placed GI(?).
if original_idx == -1: if original_idx == -1:
continue continue
if g_output in g_op.input: if g_output in grad_def.input:
continue continue
# Found a split branch. # Found a split branch.
original_name = forward_op.input[original_idx] original_name = op_def.input[original_idx]
if inputs_count[original_name] > 1: if inputs_count[original_name] > 1:
# Split. # Split.
split_name = g_output + '_autosplit_%d' % grads_count[g_output] split_name = g_output + '_autosplit_%d' % grads_count[g_output]
...@@ -161,21 +145,21 @@ class GradientMaker(object): ...@@ -161,21 +145,21 @@ class GradientMaker(object):
for idx in range(grads_count[g_output]): for idx in range(grads_count[g_output]):
if '%s_autosplit_%d' % (g_output, idx) in all_split_grads: if '%s_autosplit_%d' % (g_output, idx) in all_split_grads:
split_inputs.append('%s_autosplit_%d' % (g_output, idx)) split_inputs.append('%s_autosplit_%d' % (g_output, idx))
gather_op = proto_util.make_operator_def( gather_def = proto_util.make_operator_def(
name=OpDef.get_name(), name=OpDef.get_name(),
op_type='GradientGather', op_type='GradientGather',
inputs=split_inputs, inputs=split_inputs,
outputs=[g_output], outputs=[g_output],
) )
if g_op.HasField('device_option'): if grad_def.HasField('device_option'):
gather_op.device_option.CopyFrom(g_op.device_option) gather_def.device_option.CopyFrom(grad_def.device_option)
backward_ops.append(gather_op) backward_defs.append(gather_def)
g_op.output[g_output_idx] = split_name grad_def.output[g_output_idx] = split_name
# Done. # Done.
if not is_skip: if not is_skip:
for name, grad in zip(forward_op.input, g_inputs): for name, grad in zip(op_def.input, g_inputs):
if grad != '': if grad != '':
inputs_to_grads[name] = grad inputs_to_grads[name] = grad
return forward_ops, backward_ops, is_dag return backward_defs
...@@ -30,9 +30,9 @@ class OpInfo(object): ...@@ -30,9 +30,9 @@ class OpInfo(object):
self._defs = dict() self._defs = dict()
self._targets = set() self._targets = set()
def add_def(self, idx, op_def): def add_def(self, index, op_def):
"""Add a operator definition.""" """Add a operator definition."""
self._defs[idx] = op_def self._defs[index] = op_def
def add_target(self, target): def add_target(self, target):
"""Add an extra target relied by inputs.""" """Add an extra target relied by inputs."""
...@@ -74,13 +74,14 @@ class OpDef(object): ...@@ -74,13 +74,14 @@ class OpDef(object):
# Create outputs. # Create outputs.
if outputs is None: if outputs is None:
outputs = [] outputs = []
current_ws = workspace.get_workspace()
name_scope = context.get_name_scope() name_scope = context.get_name_scope()
for i in range(num_outputs): for i in range(num_outputs):
outputs.append(TensorRef( outputs.append(TensorRef(
workspace.get_dummy_name( current_ws.unique_name(
name_scope + (name if name else op_type), name_scope + (name if name else op_type),
suffix=':{}'.format(i), suffix=':{}'.format(i),
domain='Tensor'))) namespace='Tensor')))
else: else:
outputs = nest.flatten(outputs) outputs = nest.flatten(outputs)
num_outputs = len(outputs) num_outputs = len(outputs)
...@@ -124,13 +125,13 @@ class OpDef(object): ...@@ -124,13 +125,13 @@ class OpDef(object):
return spec_func(arguments, inputs, outputs) return spec_func(arguments, inputs, outputs)
@staticmethod @staticmethod
def get_index_and_name(prefix='Op'): def get_index_and_name():
"""Return an unique op name and index.""" """Return an unique op name and index."""
name = workspace.get_dummy_name( name = workspace.get_workspace().unique_name(
prefix, domain='Operator', zero_based=False) 'Op', namespace='Op', zero_based=False)
return int(name.split('_')[-1]), name return int(name.split('_')[-1]), name
@staticmethod @staticmethod
def get_name(prefix='Op'): def get_name():
"""Return an unique op name.""" """Return an unique op name."""
return OpDef.get_index_and_name(prefix)[1] return OpDef.get_index_and_name()[1]
...@@ -190,24 +190,28 @@ def conv_spec(args, inputs, outputs): ...@@ -190,24 +190,28 @@ def conv_spec(args, inputs, outputs):
out_shape = None out_shape = None
try: try:
out_shape = inputs[0].shape[:] out_shape = inputs[0].shape[:]
num_axes = len(out_shape) - 2
channel_axis = 1 if args['data_format'] == 'NCHW' else -1 channel_axis = 1 if args['data_format'] == 'NCHW' else -1
spatial_axis = 2 if args['data_format'] == 'NCHW' else 1 spatial_axis = 2 if args['data_format'] == 'NCHW' else 1
if 'out_channels' in args: if 'out_channels' in args:
out_shape[channel_axis] = args['out_channels'] out_shape[channel_axis] = args['out_channels']
else: else:
out_shape[channel_axis] = inputs[1].shape[0] out_shape[channel_axis] = inputs[1].shape[0]
for i in range(len(out_shape) - 2): for i in range(num_axes):
input_size = out_shape[i + spatial_axis] try:
k = args['kernel_shape'][i] k = args['kernel_shape'][i]
s = args['strides'][i] s = args['strides'][i]
pl, pr = args['pads'][i], args['pads'][i + 2] d = args['dilations'][i]
dk, dp = (k - 1) + 1, pl + pr in_size = out_shape[i + spatial_axis]
if 'SAME' not in args['padding']: k_size = d * (k - 1) + 1
out_shape[i + spatial_axis] = \ if 'SAME' not in args['padding']:
int(float(input_size + dp - dk) / s) + 1 pad_size = args['pads'][i] + args['pads'][i + num_axes]
else: out_size = (in_size + pad_size - k_size) // s + 1
out_shape[i + spatial_axis] = \ else:
int(float(input_size + s - 1) / s) out_size = (in_size + s - 1) // s
except IndexError:
out_size = None
out_shape[i + spatial_axis] = out_size
except (TypeError, IndexError): except (TypeError, IndexError):
pass pass
outputs[0].shape = out_shape outputs[0].shape = out_shape
...@@ -220,30 +224,33 @@ def conv_transpose_spec(args, inputs, outputs): ...@@ -220,30 +224,33 @@ def conv_transpose_spec(args, inputs, outputs):
out_shape = None out_shape = None
try: try:
out_shape = inputs[0].shape[:] out_shape = inputs[0].shape[:]
num_axes = len(out_shape) - 2
channel_axis = 1 if args['data_format'] == 'NCHW' else -1 channel_axis = 1 if args['data_format'] == 'NCHW' else -1
spatial_axis = 2 if args['data_format'] == 'NCHW' else 1 spatial_axis = 2 if args['data_format'] == 'NCHW' else 1
if 'out_channels' in args: if 'out_channels' in args:
out_shape[channel_axis] = args['out_channels'] out_shape[channel_axis] = args['out_channels']
else: else:
out_shape[channel_axis] = inputs[1].shape[1] out_shape[channel_axis] = inputs[1].shape[1]
for i in range(len(out_shape) - 2): for i in range(num_axes):
k = args['kernel_shape'][i] try:
s = args['strides'][i] k = args['kernel_shape'][i]
d = args['dilations'][i] s = args['strides'][i]
pl, pr = args['pads'][i], args['pads'][i + 2] d = args['dilations'][i]
dk, dp = d * (k - 1) + 1, pl + pr in_size = out_shape[i + spatial_axis]
input_size = out_shape[i + spatial_axis] k_size = d * (k - 1) + 1
if 'SAME' not in args['padding']: if 'SAME' not in args['padding']:
out_shape[i + spatial_axis] = s * \ pad_size = args['pads'][i] + args['pads'][i + num_axes]
(input_size - 1) + dk - dp out_size = s * (in_size - 1) + k_size - pad_size
else: if 'output_padding' in args and args['output_padding']:
out_shape[i + spatial_axis] = None out_size += args['output_padding'][i]
if args['output_padding'] is not None: else:
out_shape[i + spatial_axis] = \ if 'output_shape' in args and args['output_shape']:
s * (input_size - 1) + dk + \ out_size = args['output_shape'][i]
args['output_padding'][i] else:
elif args['output_shape'] is not None: out_size = None
out_shape[i + spatial_axis] = args['output_shape'][i] except IndexError:
out_size = None
out_shape[i + spatial_axis] = out_size
except (TypeError, IndexError): except (TypeError, IndexError):
pass pass
outputs[0].shape = out_shape outputs[0].shape = out_shape
...@@ -606,21 +613,24 @@ def pool_spec(args, inputs, outputs): ...@@ -606,21 +613,24 @@ def pool_spec(args, inputs, outputs):
out_shape = None out_shape = None
try: try:
out_shape = inputs[0].shape[:] out_shape = inputs[0].shape[:]
num_axes = len(out_shape) - 2
spatial_axis = 2 if args['data_format'] == 'NCHW' else 1 spatial_axis = 2 if args['data_format'] == 'NCHW' else 1
for i in range(len(out_shape) - 2): for i in range(num_axes):
k = args['kernel_shape'][i]
s = args['strides'][i]
pl, pr = args['pads'][i], args['pads'][i + 2]
if not args['global_pooling']: if not args['global_pooling']:
floor_or_ceil = math.ceil if args['ceil_mode'] else math.floor try:
if 'SAME' not in args['padding']: k = args['kernel_shape'][i]
in_size = out_shape[i + spatial_axis] + pl + pr s = args['strides'][i]
out_size = int(floor_or_ceil(float(in_size - k) / s) + 1)
out_shape[i + spatial_axis] = out_size
else:
in_size = out_shape[i + spatial_axis] in_size = out_shape[i + spatial_axis]
out_size = int(floor_or_ceil(float(in_size) / s)) if 'SAME' not in args['padding']:
out_shape[i + spatial_axis] = out_size floor_or_ceil = math.ceil if args['ceil_mode'] else math.floor
pad_size = args['pads'][i] + args['pads'][i + num_axes]
out_size = float(in_size + pad_size - k) / float(s) + 1
out_size = floor_or_ceil(out_size)
else:
out_size = math.ceil(float(in_size) / float(s))
except IndexError:
out_size = None
out_shape[i + spatial_axis] = out_size
else: else:
out_shape[i + spatial_axis] = 1 out_shape[i + spatial_axis] = 1
except (TypeError, IndexError): except (TypeError, IndexError):
...@@ -959,14 +969,14 @@ def stack_spec(args, inputs, outputs): ...@@ -959,14 +969,14 @@ def stack_spec(args, inputs, outputs):
@register('Tile') @register('Tile')
def tile_spec(args, inputs, outputs): def tile_spec(args, inputs, outputs):
outputs[0].dtype = inputs[0].dtype outputs[0].dtype = inputs[0].dtype
multiples = args['multiples'] repeats = args['repeats']
if multiples is not None: if repeats is not None:
try: try:
out_shape = inputs[0].shape[:] out_shape = inputs[0].shape[:]
for i, multiple in enumerate(multiples): for i, size in enumerate(repeats):
if i < len(out_shape): if i < len(out_shape):
try: try:
out_shape[i] *= multiple out_shape[i] *= size
except TypeError: except TypeError:
out_shape[i] = None out_shape[i] = None
outputs[0].shape = out_shape outputs[0].shape = out_shape
......
...@@ -21,6 +21,7 @@ from dragon.core.framework import context ...@@ -21,6 +21,7 @@ from dragon.core.framework import context
from dragon.core.framework import types from dragon.core.framework import types
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.proto import dragon_pb2 from dragon.core.proto import dragon_pb2
from dragon.core.util import math_util
from dragon.core.util import nest from dragon.core.util import nest
...@@ -45,11 +46,9 @@ class Tensor(types.TensorMetaclass): ...@@ -45,11 +46,9 @@ class Tensor(types.TensorMetaclass):
The optional data type. The optional data type.
""" """
self._op = None self._op, self._grad = None, None
self._grad = None self._name, self._shape, self._dtype = None, None, None
self.name = name self.name, self.shape, self.dtype = name, shape, dtype
self.shape = shape
self.dtype = dtype
@property @property
def dtype(self): def dtype(self):
...@@ -112,8 +111,8 @@ class Tensor(types.TensorMetaclass): ...@@ -112,8 +111,8 @@ class Tensor(types.TensorMetaclass):
if value != '': if value != '':
value = value if value else 'Tensor' value = value if value else 'Tensor'
name_scope = context.get_name_scope() name_scope = context.get_name_scope()
self._name = workspace.get_dummy_name( self._name = workspace.get_workspace().unique_name(
name_scope + value, domain='Tensor') name_scope + value, namespace='Tensor')
else: else:
# Set it manually for same cases # Set it manually for same cases
self._name = value self._name = value
...@@ -142,8 +141,6 @@ class Tensor(types.TensorMetaclass): ...@@ -142,8 +141,6 @@ class Tensor(types.TensorMetaclass):
The shape. The shape.
""" """
if not hasattr(self, '_shape'):
self._shape = None
return self._shape return self._shape
@shape.setter @shape.setter
...@@ -166,6 +163,22 @@ class Tensor(types.TensorMetaclass): ...@@ -166,6 +163,22 @@ class Tensor(types.TensorMetaclass):
else: else:
self._shape = value self._shape = value
@property
def size(self):
"""Return the total number of elements in this tensor.
Returns
-------
int
The total count of elements.
"""
if self._shape is None:
return 0
if None in self._shape:
return numpy.inf
return math_util.prod(self._shape)
def astype(self, dtype, inplace=False): def astype(self, dtype, inplace=False):
"""Cast the data type to a specific one. """Cast the data type to a specific one.
...@@ -186,7 +199,6 @@ class Tensor(types.TensorMetaclass): ...@@ -186,7 +199,6 @@ class Tensor(types.TensorMetaclass):
`dragon.cast(...)`_ : Cast the data type of input. `dragon.cast(...)`_ : Cast the data type of input.
""" """
pass
def constant(self, value=0): def constant(self, value=0):
r"""Register as a variable with constant initializer. r"""Register as a variable with constant initializer.
...@@ -219,7 +231,6 @@ class Tensor(types.TensorMetaclass): ...@@ -219,7 +231,6 @@ class Tensor(types.TensorMetaclass):
`dragon.copy(...)`_ : Copy the value to ref. `dragon.copy(...)`_ : Copy the value to ref.
""" """
pass
def get_value(self): def get_value(self):
"""Copy the data from storage. """Copy the data from storage.
...@@ -229,12 +240,7 @@ class Tensor(types.TensorMetaclass): ...@@ -229,12 +240,7 @@ class Tensor(types.TensorMetaclass):
numpy.ndarray numpy.ndarray
The deep copied value. The deep copied value.
See Also
--------
`dragon.workspace.fetch_tensor(...)`_ : Fetch the value of given tensor.
""" """
pass
def glorot_normal(self, scale=2.): def glorot_normal(self, scale=2.):
r"""Register as a variable with glorot normal initializer. r"""Register as a variable with glorot normal initializer.
...@@ -326,7 +332,6 @@ class Tensor(types.TensorMetaclass): ...@@ -326,7 +332,6 @@ class Tensor(types.TensorMetaclass):
`dragon.reshape(...)`_ : Change the dimensions of input. `dragon.reshape(...)`_ : Change the dimensions of input.
""" """
pass
def set_value(self, value): def set_value(self, value):
"""Feed the const value to the storage. """Feed the const value to the storage.
...@@ -341,12 +346,7 @@ class Tensor(types.TensorMetaclass): ...@@ -341,12 +346,7 @@ class Tensor(types.TensorMetaclass):
dragon.Tensor dragon.Tensor
The self. The self.
See Also
--------
`dragon.workspace.feed_tensor(...)`_ : Feed the value to the given tensor.
""" """
pass
def truncated_normal(self, mean=0, std=1): def truncated_normal(self, mean=0, std=1):
r"""Register as a variable with truncated normal initializer. r"""Register as a variable with truncated normal initializer.
...@@ -407,7 +407,7 @@ class Tensor(types.TensorMetaclass): ...@@ -407,7 +407,7 @@ class Tensor(types.TensorMetaclass):
Parameters Parameters
---------- ----------
value : Union[number, Sequence, numpy.ndarray] value : array_like
The value to convert. The value to convert.
dtype: str, optional dtype: str, optional
The optional data type. The optional data type.
...@@ -420,16 +420,22 @@ class Tensor(types.TensorMetaclass): ...@@ -420,16 +420,22 @@ class Tensor(types.TensorMetaclass):
The constant contains the value. The constant contains the value.
""" """
return Tensor('', dtype=dtype)._from_constant(value, name) if not isinstance(value, numpy.ndarray):
value = numpy.array(value, dtype if dtype else 'float32')
return TensorRef(
name=workspace.get_workspace().unique_name(
name=context.get_name_scope() + (name if name else 'Const'),
suffix=':0',
namespace='Tensor'),
shape=list(value.shape),
dtype=str(value.dtype),
).set_value(value)
def _register_as(self, type, **kwargs): def _register_as(self, type, **kwargs):
"""Fill self with the specific type of filler.""" """Fill self with the specific type of filler."""
filler = dragon_pb2.TensorFillerProto() filler = dragon_pb2.FillerInfo()
filler.tensor = self.name
filler.type = type.lower() filler.type = type.lower()
if filler.type in ['placeholder', 'variable']: if filler.type == 'constant':
pass
elif filler.type == 'constant':
filler.value = kwargs['value'] if 'value' in kwargs else 0 filler.value = kwargs['value'] if 'value' in kwargs else 0
elif filler.type in ['normal', 'gaussian']: elif filler.type in ['normal', 'gaussian']:
filler.mean = kwargs['mean'] if 'mean' in kwargs else 0 filler.mean = kwargs['mean'] if 'mean' in kwargs else 0
...@@ -438,46 +444,59 @@ class Tensor(types.TensorMetaclass): ...@@ -438,46 +444,59 @@ class Tensor(types.TensorMetaclass):
elif filler.type == 'uniform': elif filler.type == 'uniform':
filler.low = kwargs['low'] if 'low' in kwargs else 0 filler.low = kwargs['low'] if 'low' in kwargs else 0
filler.high = kwargs['high'] if 'high' in kwargs else 1 filler.high = kwargs['high'] if 'high' in kwargs else 1
filler.type = 'uniform' elif filler.type == 'truncated_normal':
elif filler.type in ['truncated_normal', 'truncatednormal']:
filler.mean = kwargs['mean'] if 'mean' in kwargs else 0 filler.mean = kwargs['mean'] if 'mean' in kwargs else 0
filler.std = kwargs['std'] if 'std' in kwargs else 1 filler.std = kwargs['std'] if 'std' in kwargs else 1
filler.low = filler.mean - 2.0 * filler.std filler.low = filler.mean - 2.0 * filler.std
filler.high = filler.mean + 2.0 * filler.std filler.high = filler.mean + 2.0 * filler.std
filler.type = 'truncated_normal'
elif filler.type == 'parameterized_truncated_normal':
filler.mean = kwargs['mean'] if 'mean' in kwargs else 0
filler.std = kwargs['std'] if 'std' in kwargs else 1
filler.low = kwargs['low'] if 'low' in kwargs else -2.0
filler.high = kwargs['high'] if 'high' in kwargs else 2.0
elif filler.type in ['glorot_uniform', 'xavier']: elif filler.type in ['glorot_uniform', 'xavier']:
filler.scale = kwargs['scale'] if 'scale' in kwargs else 3.0 filler.scale = kwargs['scale'] if 'scale' in kwargs else 3
elif filler.type in ['glorot_normal', 'msra']: elif filler.type in ['glorot_normal', 'msra']:
filler.scale = kwargs['scale'] if 'scale' in kwargs else 2.0 filler.scale = kwargs['scale'] if 'scale' in kwargs else 2
else: workspace.get_workspace().create_tensor(self.name, filler)
raise ValueError('Unknown filler type: {}'.format(filler.type))
workspace.create_filler(filler)
return self return self
def _from_constant(self, value, name=None):
"""Convert the value to a tensor."""
if not isinstance(value, numpy.ndarray):
value = numpy.array(value, self.dtype if self.dtype else 'float32')
return TensorRef(
name=workspace.get_dummy_name(
basename=context.get_name_scope() +
(name if name else 'Const'),
suffix=':0',
domain='Tensor'),
shape=list(value.shape),
dtype=str(value.dtype),
).set_value(value)
def __add__(self, other): def __add__(self, other):
pass r"""Compute the element-wise addition.
.. math:: \text{out} = \text{self} + \text{other}
Parameters
----------
other : Union[dragon.Tensor, number]
The value to add.
Returns
-------
dragon.Tensor
The **y**.
See Also
--------
`dragon.math.add(...)`_ : Compute the element-wise addition.
"""
def __div__(self, other): def __div__(self, other):
pass r"""Compute the element-wise division.
.. math:: \text{out} = \text{self} \div \text{other}
Parameters
----------
other : Union[dragon.Tensor, number]
The value to divide.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.div(...)`_ : Compute the element-wise division.
"""
def __float__(self): def __float__(self):
"""Return a float python scalar. """Return a float python scalar.
...@@ -491,13 +510,69 @@ class Tensor(types.TensorMetaclass): ...@@ -491,13 +510,69 @@ class Tensor(types.TensorMetaclass):
return float(self.get_value()) return float(self.get_value())
def __ge__(self, other): def __ge__(self, other):
pass r"""Compute element-wise greater-equal comparison.
.. math:: \text{out} = (\text{self} \geq \text{other})
Parameters
----------
other : Union[dragon.Tensor, number]
The value to compare.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.greater_equal(...)`_ : Compute element-wise greater-equal comparison.
"""
def __getitem__(self, item): def __getitem__(self, item):
pass """Select the elements at the specific indices.
Parameters
----------
item : Union[int, slice, dragon.Tensor]
The indices.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.slice(...)`_ : Select the elements according to the given sections.
See Also
--------
`dragon.masked_select(...)`_ : Select the elements where the given mask is 1.
"""
def __gt__(self, other): def __gt__(self, other):
pass r"""Compute element-wise greater comparison.
.. math:: \text{out} = (\text{self} > \text{other})
Parameters
----------
other : Union[dragon.Tensor, number]
The value to compare.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.greater(...)`_ : Compute element-wise greater comparison.
"""
def __hash__(self): def __hash__(self):
return id(self) return id(self)
...@@ -513,20 +588,105 @@ class Tensor(types.TensorMetaclass): ...@@ -513,20 +588,105 @@ class Tensor(types.TensorMetaclass):
""" """
return int(self.get_value()) return int(self.get_value())
def __le__(self, other):
r"""Compute element-wise less-equal comparison.
.. math:: \text{out} = (\text{self} \leq \text{other})
Parameters
----------
other : Union[dragon.Tensor, number]
The value to compare.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.less_equal(...)`_ : Compute element-wise less-equal comparison.
"""
def __lt__(self, other): def __lt__(self, other):
pass r"""Compute element-wise less comparison.
def __le__(self, other): .. math:: \text{out} = (\text{self} < \text{other})
pass
Parameters
----------
other : Union[dragon.Tensor, number]
The value to compare.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.less(...)`_ : Compute element-wise less comparison.
"""
def __mul__(self, other): def __mul__(self, other):
pass r"""Compute the element-wise multiplication.
.. math:: \text{out} = \text{self} \times \text{other}
Parameters
----------
other : Union[dragon.Tensor, number]
The value to multiply.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.mul(...)`_ : Compute the element-wise multiplication.
"""
def __neg__(self): def __neg__(self):
pass r"""Compute the element-wise negative.
.. math:: y = -x
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.negative(...)`_ : Compute the element-wise negative.
"""
def __radd__(self, other): def __radd__(self, other):
pass r"""Compute the element-wise addition.
.. math:: \text{out} = \text{other} + \text{self}
Parameters
----------
other : Union[dragon.Tensor, number]
The value to add.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.add(...)`_ : Compute the element-wise addition.
"""
def __repr__(self): def __repr__(self):
shape_str = ('(' + ', '.join( shape_str = ('(' + ', '.join(
...@@ -538,25 +698,108 @@ class Tensor(types.TensorMetaclass): ...@@ -538,25 +698,108 @@ class Tensor(types.TensorMetaclass):
.format(self.name, shape_str, self.dtype) .format(self.name, shape_str, self.dtype)
def __rdiv__(self, other): def __rdiv__(self, other):
pass r"""Compute the element-wise division.
.. math:: \text{out} = \text{other} \div \text{self}
Parameters
----------
other : Union[dragon.Tensor, number]
The value to be divided.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.div(...)`_ : Compute the element-wise division.
"""
def __rmul__(self, other): def __rmul__(self, other):
pass r"""Compute the element-wise multiplication.
.. math:: \text{out} = \text{other} \times \text{self}
Parameters
----------
other : Union[dragon.Tensor, number]
The value to multiply.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.mul(...)`_ : Compute the element-wise multiplication.
"""
def __rsub__(self, other): def __rsub__(self, other):
pass r"""Compute the element-wise subtraction.
.. math:: \text{out} = \text{other} - \text{self}
def __rtruediv__(self, other): Parameters
return self.__div__(other) ----------
other : Union[dragon.Tensor, number]
The value to be subtracted.
Returns
-------
dragon.Tensor
The output tensor.
See Also
--------
`dragon.math.sub(...)`_ : Compute the element-wise subtraction.
"""
def __setitem__(self, key, value): def __setitem__(self, key, value):
pass """Set the value at the specific indices.
Parameters
----------
key : Union[int, slice, dragon.Tensor]
The indices.
value : number or dragon.Tensor
The value.
See Also
--------
`dragon.assign(...)`_ : Assign the value to ref.
See Also
--------
`dragon.masked_assign(...)`_ : Assign the value to ref where mask is 1.
"""
def __sub__(self, other): def __sub__(self, other):
pass r"""Compute the element-wise subtraction.
.. math:: \text{out} = \text{self} - \text{value}
Parameters
----------
other : Union[dragon.Tensor, number]
The value to subtract.
Returns
-------
dragon.Tensor
The output tensor.
def __truediv__(self, other): See Also
return self.__div__(other) --------
`dragon.math.sub(...)`_ : Compute the element-wise subtraction.
"""
class TensorRef(object): class TensorRef(object):
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
"""Do back-propagation from the eager expressions.""" """Do back-propagation from the executed operations."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -35,13 +35,23 @@ class Tape(object): ...@@ -35,13 +35,23 @@ class Tape(object):
self._defs = [] self._defs = []
self._parent = parent self._parent = parent
self._watched = set() self._watched = set()
self._empty_grads = set()
self._gc = workspace.get_workspace().collectors self._gc = workspace.get_workspace().collectors
self._retain_graph = False self._retain_graph = False
@property
def empty_grads(self):
"""Return the recorded empty grads."""
return list(self._empty_grads)
def add_def(self, op_def): def add_def(self, op_def):
"""Add a new def.""" """Add a new def."""
self._defs.append(op_def) self._defs.append(op_def)
def add_empty_grad(self, tensor_id):
"""Add an empty grad for optimization."""
self._empty_grads.add(tensor_id)
def is_watched(self, tensor): def is_watched(self, tensor):
"""Return true if tensor is watched.""" """Return true if tensor is watched."""
return tensor.id in self._watched return tensor.id in self._watched
...@@ -53,7 +63,7 @@ class Tape(object): ...@@ -53,7 +63,7 @@ class Tape(object):
def __del__(self): def __del__(self):
"""Release the resources.""" """Release the resources."""
for op_def in self._defs: for op_def in self._defs:
self._gc.OPERATOR.collect(op_def.name) self._gc.OP.collect(op_def.name)
for y in op_def.output: for y in op_def.output:
if y not in op_def.input: if y not in op_def.input:
self._gc.TENSOR.collect(y) self._gc.TENSOR.collect(y)
...@@ -113,36 +123,23 @@ class GradientTape(object): ...@@ -113,36 +123,23 @@ class GradientTape(object):
self._pop_tape() self._pop_tape()
# Collect gradient info. # Collect gradient info.
inputs, outputs = [], [] xs, ys, grad_ys = nest.flatten(sources), nest.flatten(target), []
targets, ignores = [], []
target = nest.flatten(target)
sources = nest.flatten(sources)
sources_is_watched = []
if output_gradients is not None: if output_gradients is not None:
output_gradients = nest.flatten(output_gradients) for tensor, grad_tensor in zip(ys, nest.flatten(output_gradients)):
for value, grad in zip(target, output_gradients): if grad_tensor.shape != tensor.shape:
if grad.shape != value.shape:
raise ValueError( raise ValueError(
'Except the dimensions of <output_gradient> is {}, ' 'Excepted the dimensions of output gradient is {}, '
'got {}.'.format(value.shape, grad.shape) 'got {}.'.format(tensor.shape, grad_tensor.shape))
) grad_ys.append(grad_tensor.id)
inputs.append(grad.id)
for t in target:
targets.append(t.id)
for s in sources:
sources_is_watched.append(self._tape.is_watched(s))
if not s.requires_grad and not sources_is_watched[-1]:
ignores.append(s.id + '_grad')
else:
outputs.append(s.id)
# Run the gradient ops sequentially. # Run the gradient ops sequentially.
workspace.run_backward( current_ws = workspace.get_workspace()
forward_ops=self._tape._defs, current_ws.run_backward(
targets=targets, op_defs=self._tape._defs,
sources=outputs, targets=[y.id for y in ys],
input_grads=inputs, sources=[x.id for x in xs],
ignored_grads=ignores, input_grads=grad_ys,
empty_grads=self._tape.empty_grads,
) )
# Remove the tape to release resources. # Remove the tape to release resources.
...@@ -150,12 +147,7 @@ class GradientTape(object): ...@@ -150,12 +147,7 @@ class GradientTape(object):
self._tape = None self._tape = None
# Pack the gradients. # Pack the gradients.
return [_steal_grad_ref(s, w) for s, w return [_steal_grad(current_ws, x) for x in xs]
in zip(sources, sources_is_watched)]
def replay(self):
"""Run the operators stored in the tape."""
workspace.run_operator(self._tape._defs)
def reset(self): def reset(self):
"""Destroy the tape and push a new one.""" """Destroy the tape and push a new one."""
...@@ -187,8 +179,7 @@ class GradientTape(object): ...@@ -187,8 +179,7 @@ class GradientTape(object):
if self._tape is None: if self._tape is None:
raise RuntimeError( raise RuntimeError(
'GradientTape.gradient can only be called ' 'GradientTape.gradient can only be called '
'once on non-persistent tapes.' 'once on non-persistent tapes.')
)
for t in nest.flatten(tensor): for t in nest.flatten(tensor):
self._tape.watch(t) self._tape.watch(t)
...@@ -232,17 +223,13 @@ def pop_tape(): ...@@ -232,17 +223,13 @@ def pop_tape():
_GLOBAL_TAPE_STACK.pop() _GLOBAL_TAPE_STACK.pop()
def _steal_grad_ref(source, is_watched=False): def _steal_grad(ws, source):
if not source.requires_grad and not is_watched: """Steal the grad from backend."""
return None impl = ws.GetTensor(source.id + '_grad')
grad_id = source.id + '_grad' if impl is None:
grad_impl = workspace.get_workspace().GetTensor(grad_id)
if grad_impl is None:
return None return None
device = device_spec.DeviceSpec(*grad_impl.device) device = device_spec.DeviceSpec(*impl.device)
grad_ref = EagerTensor(own_storage=False, device=device) return EagerTensor(impl=impl, device=device)
grad_ref._id, grad_ref._impl = grad_id, grad_impl
return grad_ref
# Define a global stack to store the tapes of current thread. # Define a global stack to store the tapes of current thread.
......
...@@ -32,13 +32,13 @@ def from_dlpack(dlpack): ...@@ -32,13 +32,13 @@ def from_dlpack(dlpack):
The tensor with the dlpack data. The tensor with the dlpack data.
""" """
ws = workspace.get_workspace() current_ws = workspace.get_workspace()
ref = EagerTensor(device=None) # Hack the constructor. tensor = EagerTensor(device=None)
ref.__gc__ = ws.collectors.TENSOR tensor._gc = current_ws.collectors.TENSOR
ref._id = ref.__gc__.alloc('${DLPACK}') tensor._impl = current_ws.create_tensor(
ref._impl = ws.CreateTensor(ref._id).FromDLPack(dlpack) tensor._gc.alloc('${DLPACK}')).FromDLPack(dlpack)
ref._device = device_spec.DeviceSpec(*ref._impl.device) tensor._device = device_spec.DeviceSpec(*tensor._impl.device)
return ref return tensor
def to_dlpack(tensor, readonly=True): def to_dlpack(tensor, readonly=True):
......
...@@ -18,7 +18,6 @@ from __future__ import print_function ...@@ -18,7 +18,6 @@ from __future__ import print_function
from dragon.core.eager import backprop from dragon.core.eager import backprop
from dragon.core.eager.tensor import EagerTensor from dragon.core.eager.tensor import EagerTensor
from dragon.core.framework import device_spec from dragon.core.framework import device_spec
from dragon.core.framework import config
from dragon.core.framework import context from dragon.core.framework import context
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.util import six from dragon.core.util import six
...@@ -33,21 +32,22 @@ def run_operator( ...@@ -33,21 +32,22 @@ def run_operator(
): ):
requires_grad = False requires_grad = False
input_names, output_names = [], [] input_names, output_names = [], []
tape = backprop.get_default_tape() default_tape = backprop.get_default_tape()
for x in inputs: for input in inputs:
input_names.append(x.id) input_names.append(input.id)
if tape is not None: if default_tape is not None:
if x.requires_grad: if input.requires_grad:
requires_grad = True requires_grad = True
elif tape.is_watched(x): elif default_tape.is_watched(input):
requires_grad = True requires_grad = True
else:
default_tape.add_empty_grad(input.id + '_grad')
if tape and tape._retain_graph: if default_tape and default_tape._retain_graph:
requires_grad = True requires_grad = True
# Allocate outputs. # Allocate outputs.
cfg = config.config()
ws = workspace.get_workspace() ws = workspace.get_workspace()
output_scope = context.get_eager_scope(requires_grad) output_scope = context.get_eager_scope(requires_grad)
gc = ws.collectors # Garbage collectors gc = ws.collectors # Garbage collectors
...@@ -57,31 +57,28 @@ def run_operator( ...@@ -57,31 +57,28 @@ def run_operator(
output_names.append(spec) output_names.append(spec)
else: else:
if isinstance(spec, device_spec.DeviceSpec): if isinstance(spec, device_spec.DeviceSpec):
output_id = gc.TENSOR.alloc(output_scope) impl = ws.create_tensor(gc.TENSOR.alloc(output_scope))
ref = EagerTensor(device=spec) outputs[i] = EagerTensor(device=spec, gc=gc.TENSOR, impl=impl)
ref.__gc__, ref._id = gc.TENSOR, output_id
ref._impl = ws.CreateTensor(output_id)
outputs[i] = ref
output_names.append(outputs[i].id) output_names.append(outputs[i].id)
# Generate the OpDef. # Generate OpDef.
op_def = op_def.DeriveTo(input_names, output_names) op_def = op_def.DeriveTo(input_names, output_names)
# Maybe record this operation for future developments. # Record operation for future developments.
if len(inputs) > 0 and no_grad is False: if len(inputs) > 0 and no_grad is False:
if requires_grad: if requires_grad:
for output in outputs: for output in outputs:
output.requires_grad = True output._requires_grad = True
op_def.name = gc.OPERATOR.alloc(op_def.type) op_def.name = gc.OP.alloc(op_def.type)
tape.add_def(op_def) default_tape.add_def(op_def)
else: else:
for output in outputs: for output in outputs:
output.requires_grad = False output._requires_grad = False
# Dispatch the computation. # Dispatch the computation.
if pre_callback is not None: if pre_callback is not None:
pre_callback(ws, op_def.name) pre_callback(ws, op_def.name)
ws.RunOperator(op_def, cfg.graph_verbosity > 0) ws.run_operator(op_def)
# Return the outputs. # Return the outputs.
return outputs if len(outputs) > 1 else outputs[0] return outputs if len(outputs) > 1 else outputs[0]
...@@ -20,7 +20,6 @@ import numpy ...@@ -20,7 +20,6 @@ import numpy
from dragon.core.autograph.tensor import Tensor from dragon.core.autograph.tensor import Tensor
from dragon.core.framework import context from dragon.core.framework import context
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.util import math_util
class EagerTensor(Tensor): class EagerTensor(Tensor):
...@@ -48,30 +47,18 @@ class EagerTensor(Tensor): ...@@ -48,30 +47,18 @@ class EagerTensor(Tensor):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
"""Create an ``EagerTensor``.""" """Create an ``EagerTensor``."""
super(Tensor, self).__init__() super(Tensor, self).__init__()
# Internal properties self._gc = kwargs.get('gc', None)
self._id = kwargs.get('id', None) self._impl = kwargs.get('impl', None)
self._name = kwargs.get('name', self._id) self._name = kwargs.get('name', None)
self._own_storage = kwargs.get('own_storage', True) self._device = kwargs.get('device', context.get_device_spec())
self._requires_grad = kwargs.get('requires_grad', False) self._requires_grad = kwargs.get('requires_grad', False)
self._requires_grad = kwargs.get('trainable', self._requires_grad) self._requires_grad = kwargs.get('trainable', self._requires_grad)
self._device = kwargs.get('device', context.get_device_spec()) self._is_leaf = False
self._const_size = None # Attribute to represent a leaf variable
# Constructor
if len(args) == 0: if len(args) == 0:
# >>> dragon.EagerTensor(shape=?, dtype=?)
shape = kwargs.get('shape', None) shape = kwargs.get('shape', None)
if shape is not None: if shape is not None:
self._from_shape(shape, kwargs.get('dtype', 'float32')) self._from_shape(shape, kwargs.get('dtype', 'float32'))
else:
if self._id is not None:
ws = workspace.get_workspace()
self.__gc__ = ws.collectors.TENSOR
self._impl = ws.CreateTensor(self._id)
else:
self.__gc__ = None
elif len(args) == 1: elif len(args) == 1:
# >>> dragon.EagerTensor(constant)
self._from_numpy( self._from_numpy(
args[0] if isinstance(args[0], numpy.ndarray) args[0] if isinstance(args[0], numpy.ndarray)
else numpy.array(args[0], kwargs.get('dtype', 'float32')), else numpy.array(args[0], kwargs.get('dtype', 'float32')),
...@@ -106,10 +93,7 @@ class EagerTensor(Tensor): ...@@ -106,10 +93,7 @@ class EagerTensor(Tensor):
@dtype.setter @dtype.setter
def dtype(self, value): def dtype(self, value):
raise RuntimeError( raise RuntimeError('Call ``astype(...)`` to change the data type.')
'<dtype> is a readonly property.\n'
'Call ``astype(...)`` to change the data type.'
)
@property @property
def id(self): def id(self):
...@@ -121,7 +105,7 @@ class EagerTensor(Tensor): ...@@ -121,7 +105,7 @@ class EagerTensor(Tensor):
The tensor identity. The tensor identity.
""" """
return self._id return self._impl.name
@property @property
def name(self): def name(self):
...@@ -133,7 +117,7 @@ class EagerTensor(Tensor): ...@@ -133,7 +117,7 @@ class EagerTensor(Tensor):
The tensor name. The tensor name.
""" """
return self._name return self._name or self._impl.id
@name.setter @name.setter
def name(self, value): def name(self, value):
...@@ -174,10 +158,7 @@ class EagerTensor(Tensor): ...@@ -174,10 +158,7 @@ class EagerTensor(Tensor):
@shape.setter @shape.setter
def shape(self, value): def shape(self, value):
raise RuntimeError( raise RuntimeError('Call ``reshape(...)`` to change the dimensions.')
'<shape> is a readonly property.\n'
'Call ``reshape(...)`` to change the dimensions.'
)
@property @property
def size(self): def size(self):
...@@ -211,7 +192,6 @@ class EagerTensor(Tensor): ...@@ -211,7 +192,6 @@ class EagerTensor(Tensor):
`dragon.cast(...)`_ : Cast the data type of input. `dragon.cast(...)`_ : Cast the data type of input.
""" """
pass
def constant(self, value=0): def constant(self, value=0):
r"""Fill self with a constant value. r"""Fill self with a constant value.
...@@ -229,7 +209,6 @@ class EagerTensor(Tensor): ...@@ -229,7 +209,6 @@ class EagerTensor(Tensor):
The self. The self.
""" """
pass
def copy(self): def copy(self):
"""Return a tensor with containing data copied. """Return a tensor with containing data copied.
...@@ -244,7 +223,6 @@ class EagerTensor(Tensor): ...@@ -244,7 +223,6 @@ class EagerTensor(Tensor):
`dragon.copy(...)`_ : Copy the value to ref. `dragon.copy(...)`_ : Copy the value to ref.
""" """
pass
def get_value(self): def get_value(self):
"""Return the value from storage. """Return the value from storage.
...@@ -275,7 +253,6 @@ class EagerTensor(Tensor): ...@@ -275,7 +253,6 @@ class EagerTensor(Tensor):
The self. The self.
""" """
pass
def glorot_uniform(self, mode='FAN_IN', scale=3.): def glorot_uniform(self, mode='FAN_IN', scale=3.):
r"""Fill self from a glorot uniform distribution. r"""Fill self from a glorot uniform distribution.
...@@ -298,7 +275,6 @@ class EagerTensor(Tensor): ...@@ -298,7 +275,6 @@ class EagerTensor(Tensor):
The self. The self.
""" """
pass
def numpy(self, readonly=True): def numpy(self, readonly=True):
"""Create a numpy array sharing the data. """Create a numpy array sharing the data.
...@@ -334,7 +310,6 @@ class EagerTensor(Tensor): ...@@ -334,7 +310,6 @@ class EagerTensor(Tensor):
The self. The self.
""" """
pass
def reshape(self, shape): def reshape(self, shape):
"""Return a tensor containing the same data with new shape. """Return a tensor containing the same data with new shape.
...@@ -354,7 +329,6 @@ class EagerTensor(Tensor): ...@@ -354,7 +329,6 @@ class EagerTensor(Tensor):
`dragon.reshape(...)`_ : Change the dimensions of input. `dragon.reshape(...)`_ : Change the dimensions of input.
""" """
pass
def set_value(self, value): def set_value(self, value):
"""Map the value to storage. """Map the value to storage.
...@@ -393,10 +367,9 @@ class EagerTensor(Tensor): ...@@ -393,10 +367,9 @@ class EagerTensor(Tensor):
The self. The self.
""" """
pass
def uniform(self, low=0, high=1): def uniform(self, low=0, high=1):
r"""Fill self from a uniform distribution. self.self__ = r"""Fill self from a uniform distribution.
.. math:: \text{self} \leftarrow U(\alpha, \beta) .. math:: \text{self} \leftarrow U(\alpha, \beta)
...@@ -413,38 +386,70 @@ class EagerTensor(Tensor): ...@@ -413,38 +386,70 @@ class EagerTensor(Tensor):
The self. The self.
""" """
pass
def _from_numpy(self, array, copy): def _from_numpy(self, array, copy):
"""Create impl from the numpy array.""" """Create impl from the numpy array."""
ws = workspace.get_workspace() ws = workspace.get_workspace()
array = array.copy() if copy else array array = array.copy() if copy else array
self._const_size = array.size self._const_size = array.size
self.__gc__ = ws.collectors.TENSOR self._gc, self._is_leaf = ws.collectors.TENSOR, True
self._id = self.__gc__.alloc(context.get_eager_scope()) self._impl = ws.create_tensor(self._gc.alloc(
self._impl = ws.CreateTensor(self._id).FromNumpy(array) context.get_eager_scope())).FromNumpy(array)
def _from_shape(self, shape, dtype): def _from_shape(self, shape, dtype):
"""Create impl from the shape and data type.""" """Create impl from the shape and data type."""
ws = workspace.get_workspace() ws = workspace.get_workspace()
self._const_size = math_util.prod(shape) self._gc, self._is_leaf = ws.collectors.TENSOR, True
self.__gc__ = ws.collectors.TENSOR self._impl = ws.create_tensor(self._gc.alloc(
self._id = self.__gc__.alloc(context.get_eager_scope()) context.get_eager_scope())).FromShape(shape, dtype)
self._impl = ws.CreateTensor(self._id).FromShape(shape, dtype)
def __add__(self, other): def __add__(self, other):
pass r"""Compute the element-wise addition.
.. math:: \text{out} = \text{self} + \text{value}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to add.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.add(...)`_ : Compute the element-wise addition.
"""
def __del__(self): def __del__(self):
if not self._requires_grad or self._const_size: if (self._is_leaf or not self._requires_grad) and self._gc:
if self._own_storage and self._id: # Always reuse the leaf tensors.
# Always reuse the leaf variables or tensors # PyGC will detect them automatically.
# that do not require grad. self._gc.collect(self.id)
# PyGC will detect them automatically.
self.__gc__.collect(self._id)
def __div__(self, other): def __div__(self, other):
pass r"""Compute the element-wise division.
.. math:: \text{out} = \text{self} \div \text{value}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to divide.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.div(...)`_ : Compute the element-wise division.
"""
def __float__(self): def __float__(self):
"""Return a float python scalar. """Return a float python scalar.
...@@ -455,30 +460,138 @@ class EagerTensor(Tensor): ...@@ -455,30 +460,138 @@ class EagerTensor(Tensor):
The float value. The float value.
""" """
if self.size == 1: return float(self.numpy())
return float(self.numpy())
raise TypeError('Only size-1 array can be converted to python scalar.')
def __ge__(self, other): def __ge__(self, other):
pass r"""Compute element-wise greater-equal comparison.
.. math:: \text{out} = (\text{self} \geq \text{other})
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to compare.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.greater_equal(...)`_ : Compute element-wise greater-equal comparison.
"""
def __getitem__(self, item): def __getitem__(self, item):
pass """Select the elements at the specific indices.
Parameters
----------
item : Union[int, slice, dragon.EagerTensor]
The indices.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.slice(...)`_ : Select the elements according to the given sections.
See Also
--------
`dragon.masked_select(...)`_ : Select the elements where the given mask is 1.
"""
def __gt__(self, other): def __gt__(self, other):
pass r"""Compute element-wise greater comparison.
.. math:: \text{out} = (\text{self} > \text{other})
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to compare.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.greater(...)`_ : Compute element-wise greater comparison.
"""
def __hash__(self): def __hash__(self):
return id(self) return id(self)
def __iadd__(self, other): def __iadd__(self, other):
pass r"""Compute the element-wise addition.
.. math:: \text{self} \mathrel{+}= \text{other}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to add.
Returns
-------
dragon.EagerTensor
The self.
See Also
--------
`dragon.math.add(...)`_ : Compute the element-wise addition.
"""
def __idiv__(self, other): def __idiv__(self, other):
pass r"""Compute the element-wise division.
.. math:: \text{self} \mathrel{\div}= \text{other}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to divide.
Returns
-------
dragon.EagerTensor
The self.
See Also
--------
`dragon.math.div(...)`_ : Compute the element-wise division.
"""
def __imul__(self, other): def __imul__(self, other):
pass r"""Compute the element-wise multiplication.
.. math:: \text{self} \mathrel{\times}= \text{other}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to multiply.
Returns
-------
dragon.EagerTensor
The self.
See Also
--------
`dragon.math.mul(...)`_ : Compute the element-wise multiplication.
"""
def __int__(self): def __int__(self):
"""Return a int python scalar. """Return a int python scalar.
...@@ -492,22 +605,125 @@ class EagerTensor(Tensor): ...@@ -492,22 +605,125 @@ class EagerTensor(Tensor):
return int(self.__float__()) return int(self.__float__())
def __isub__(self, other): def __isub__(self, other):
pass r"""Compute the element-wise division.
def __lt__(self, other): .. math:: \text{self} \mathrel{-}= \text{other}
pass
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to subtract.
Returns
-------
dragon.EagerTensor
The self.
See Also
--------
`dragon.math.sub(...)`_ : Compute the element-wise subtraction.
"""
def __le__(self, other): def __le__(self, other):
pass r"""Compute element-wise less-equal comparison.
.. math:: \text{out} = (\text{self} \leq \text{other})
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to compare.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.less_equal(...)`_ : Compute element-wise less-equal comparison.
"""
def __lt__(self, other):
r"""Compute element-wise less comparison.
.. math:: \text{out} = (\text{self} < \text{other})
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to compare.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.less(...)`_ : Compute element-wise less comparison.
"""
def __mul__(self, other): def __mul__(self, other):
pass r"""Compute the element-wise multiplication.
.. math:: \text{out} = \text{self} \times \text{other}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to multiply.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.mul(...)`_ : Compute the element-wise multiplication.
"""
def __neg__(self): def __neg__(self):
pass r"""Compute the element-wise negative.
.. math:: y = -x
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.negative(...)`_ : Compute the element-wise negative.
"""
def __radd__(self, other): def __radd__(self, other):
pass r"""Compute the element-wise addition.
.. math:: \text{out} = \text{other} + \text{self}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to add.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.add(...)`_ : Compute the element-wise addition.
"""
def __repr__(self): def __repr__(self):
array = self.numpy() array = self.numpy()
...@@ -523,22 +739,105 @@ class EagerTensor(Tensor): ...@@ -523,22 +739,105 @@ class EagerTensor(Tensor):
return content_str + meta_str return content_str + meta_str
def __rdiv__(self, other): def __rdiv__(self, other):
pass r"""Compute the element-wise division.
.. math:: \text{out} = \text{value} \div \text{self}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to be divided.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.div(...)`_ : Compute the element-wise division.
"""
def __rmul__(self, other): def __rmul__(self, other):
pass r"""Compute the element-wise multiplication.
.. math:: \text{out} = \text{other} \times \text{self}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to multiply.
Returns
-------
dragon.EagerTensor
The output tensor.
See Also
--------
`dragon.math.mul(...)`_ : Compute the element-wise multiplication.
"""
def __rsub__(self, other): def __rsub__(self, other):
pass r"""Compute the element-wise subtraction.
.. math:: \text{out} = \text{other} - \text{self}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to be subtracted.
Returns
-------
dragon.EagerTensor
The output tensor.
def __rtruediv__(self, other): See Also
return self.__div__(other) --------
`dragon.math.sub(...)`_ : Compute the element-wise subtraction.
"""
def __setitem__(self, key, value): def __setitem__(self, key, value):
pass """Set the value at the specific indices.
Parameters
----------
key : Union[int, slice, dragon.EagerTensor]
The indices.
value : number or dragon.EagerTensor
The value.
See Also
--------
`dragon.assign(...)`_ : Assign the value to ref.
See Also
--------
`dragon.masked_assign(...)`_ : Assign the value to ref where mask is 1.
"""
def __sub__(self, other): def __sub__(self, other):
pass r"""Compute the element-wise subtraction.
.. math:: \text{out} = \text{self} - \text{other}
Parameters
----------
other : Union[dragon.EagerTensor, number]
The value to subtract.
Returns
-------
dragon.EagerTensor
The output tensor.
def __truediv__(self, other): See Also
return self.__div__(other) --------
`dragon.math.sub(...)`_ : Compute the element-wise subtraction.
"""
...@@ -15,10 +15,11 @@ from __future__ import print_function ...@@ -15,10 +15,11 @@ from __future__ import print_function
from dragon.core.framework import config from dragon.core.framework import config
from dragon.core.framework import device_spec from dragon.core.framework import device_spec
from dragon.core.framework import mapping
from dragon.core.util import tls from dragon.core.util import tls
def device(device_type, device_id=0): def device(device_type, device_index=0):
"""Context-manager to nest the the device spec. """Context-manager to nest the the device spec.
Examples: Examples:
...@@ -32,7 +33,7 @@ def device(device_type, device_id=0): ...@@ -32,7 +33,7 @@ def device(device_type, device_id=0):
---------- ----------
device_type : {'cpu', 'gpu', 'cuda', 'cnml'}, required device_type : {'cpu', 'gpu', 'cuda', 'cnml'}, required
The type of device. The type of device.
device_id : int, optional, default=0 device_index : int, optional, default=0
The index of the device. The index of the device.
Returns Returns
...@@ -41,13 +42,12 @@ def device(device_type, device_id=0): ...@@ -41,13 +42,12 @@ def device(device_type, device_id=0):
The current default device spec. The current default device spec.
""" """
device_type, device_id, device_type.lower(), device_id device_type = device_type.lower()
assert device_type in ('cpu', 'gpu', 'cuda', 'cnml') if device_type not in mapping.DEVICE_STRING_TO_DEVICE_TYPE:
if device_type == 'gpu': raise ValueError('Unsupported device type:', device_type)
device_type = 'cuda'
return _GLOBAL_DEVICE_STACK.get_controller({ return _GLOBAL_DEVICE_STACK.get_controller({
'device_type': device_type, 'device_type': mapping.DEVICE_STRING_TO_DEVICE_TYPE[device_type],
'device_index': device_id, 'device_index': device_index,
}) })
...@@ -96,21 +96,6 @@ def name_scope(name): ...@@ -96,21 +96,6 @@ def name_scope(name):
return _GLOBAL_NAME_STACK.get_controller(default) return _GLOBAL_NAME_STACK.get_controller(default)
def graph_phase(phase):
"""Context-manager to nest the the executing phase for graph.
Parameters
----------
phase : {'TRAIN', 'TEST'}, required
The executing phase.
"""
phase = phase.upper()
assert phase in ('TRAIN', 'TEST'), \
"Specified an unknown phase: " + phase
return _GLOBAL_PHASE_STACK.get_controller(phase)
def get_device_info(): def get_device_info():
"""Return the device info in current nesting.""" """Return the device info in current nesting."""
return _GLOBAL_DEVICE_STACK.get_default() return _GLOBAL_DEVICE_STACK.get_default()
...@@ -144,13 +129,7 @@ def get_name_scope(): ...@@ -144,13 +129,7 @@ def get_name_scope():
return ret if ret is not None else '' return ret if ret is not None else ''
def get_graph_phase():
"""Return the graph phase in current nesting."""
return _GLOBAL_PHASE_STACK.get_default()
# Thread-local stack for nesting scope. # Thread-local stack for nesting scope.
_GLOBAL_DEVICE_STACK = tls.Stack() _GLOBAL_DEVICE_STACK = tls.Stack()
_GLOBAL_EAGER_STACK = tls.Stack([('${GRAPH}', '${DATA}')]) _GLOBAL_EAGER_STACK = tls.Stack([('${GRAPH}', '${DATA}')])
_GLOBAL_NAME_STACK = tls.Stack() _GLOBAL_NAME_STACK = tls.Stack()
_GLOBAL_PHASE_STACK = tls.Stack()
...@@ -17,6 +17,15 @@ from __future__ import print_function ...@@ -17,6 +17,15 @@ from __future__ import print_function
import numpy import numpy
# Mapping to store the supported device types
DEVICE_STRING_TO_DEVICE_TYPE = {
'cpu': 'cpu',
'gpu': 'cuda',
'cuda': 'cuda',
'cnml': 'cnml',
}
# Mapping to convert to the numpy type
TENSOR_TYPE_TO_NP_TYPE = { TENSOR_TYPE_TO_NP_TYPE = {
'bool': numpy.bool, 'bool': numpy.bool,
'int8': numpy.int8, 'int8': numpy.int8,
...@@ -28,6 +37,7 @@ TENSOR_TYPE_TO_NP_TYPE = { ...@@ -28,6 +37,7 @@ TENSOR_TYPE_TO_NP_TYPE = {
'float64': numpy.float64, 'float64': numpy.float64,
} }
# Mapping to convert to the torch tensor class name
TENSOR_TYPE_TO_TORCH_TENSOR = { TENSOR_TYPE_TO_TORCH_TENSOR = {
'bool': 'BoolTensor', 'bool': 'BoolTensor',
'int8': 'CharTensor', 'int8': 'CharTensor',
......
...@@ -30,10 +30,10 @@ from dragon.core.framework import workspace ...@@ -30,10 +30,10 @@ from dragon.core.framework import workspace
class Operator(object): class Operator(object):
"""Wrapper to unify the symbolic and eager operator abstraction.""" """Wrapper to unify the symbolic and eager operator abstraction."""
def __init__(self, key, dev, **kwargs): def __init__(self, cache_key, device, **kwargs):
self._def = None self._def = None
self._cache_key = key self._cache_key = cache_key
self._device = dev self._device = device
self._arg_device = proto_util.get_device_option('cpu') self._arg_device = proto_util.get_device_option('cpu')
self._arg_device = self._arg_device.SerializeToString() self._arg_device = self._arg_device.SerializeToString()
self._seed = kwargs.get('seed', config.config().random_seed) self._seed = kwargs.get('seed', config.config().random_seed)
...@@ -104,7 +104,7 @@ class Operator(object): ...@@ -104,7 +104,7 @@ class Operator(object):
"""Generate the OpDef from attributes.""" """Generate the OpDef from attributes."""
attributes = self.attributes() attributes = self.attributes()
self._def = proto_util.make_operator_cdef( self._def = proto_util.make_operator_cdef(
name=attributes.get('name', 'GenericOp'), name=attributes.get('name', 'Op'),
cache_key=self._cache_key, cache_key=self._cache_key,
op_type=attributes['op_type'], op_type=attributes['op_type'],
device_option=proto_util.get_device_option( device_option=proto_util.get_device_option(
...@@ -128,17 +128,9 @@ def new_leaf(shape, dtype, device, trainable=False): ...@@ -128,17 +128,9 @@ def new_leaf(shape, dtype, device, trainable=False):
def remove_binary_scalar(inputs): def remove_binary_scalar(inputs):
"""Remove the scalar for binary ops.""" """Remove the scalar for binary ops."""
if types.is_tensor(inputs[0]): if types.is_tensor(inputs[0]):
# (Tensor, Number) inputs[1] = scalar_to_tensor(inputs[1], inputs[0].dtype)
inputs[1] = scalar_to_tensor(
inputs[1],
inputs[0].dtype,
)
else: else:
# (Number, Tensor) inputs[0] = scalar_to_tensor(inputs[0], inputs[1].dtype)
inputs[0] = scalar_to_tensor(
inputs[0],
inputs[1].dtype,
)
return inputs return inputs
...@@ -153,15 +145,11 @@ def scalar_to_tensor(input, dtype): ...@@ -153,15 +145,11 @@ def scalar_to_tensor(input, dtype):
'<input> should be a python number, got {}.' '<input> should be a python number, got {}.'
.format(type(input).__name__) .format(type(input).__name__)
) )
tid = '/share/scalar/{}/{}'.format(dtype, str(input)) name = '/share/scalar/{}/{}'.format(dtype, str(input))
if not workspace.has_tensor(tid): ws = workspace.get_workspace()
workspace.feed_tensor(tid, numpy.array(input, dtype)) if not ws.has_tensor(name):
return EagerTensor( ws.feed_tensor(name, numpy.array(input, dtype))
id=tid, return EagerTensor(impl=ws.GetTensor(name), trainable=False)
dtype=dtype,
own_storage=False,
requires_grad=False,
)
# Define a global dict to cache the operators. # Define a global dict to cache the operators.
......
...@@ -17,8 +17,6 @@ from __future__ import print_function ...@@ -17,8 +17,6 @@ from __future__ import print_function
import collections import collections
import contextlib import contextlib
import os
import numpy import numpy
from dragon import backend from dragon import backend
...@@ -27,23 +25,15 @@ from dragon.core.framework import mapping ...@@ -27,23 +25,15 @@ from dragon.core.framework import mapping
from dragon.core.framework import proto_util from dragon.core.framework import proto_util
from dragon.core.framework import types from dragon.core.framework import types
from dragon.core.proto import dragon_pb2 from dragon.core.proto import dragon_pb2
from dragon.core.util import logging from dragon.core.util import serialization
from dragon.core.util import tls from dragon.core.util import tls
from dragon.core.util import six
class OperatorCollector(object):
"""A FIFO free list to manage the resource handle of operators.
Operator who takes gradient will hold a handle,
and it will be collected after the backward pass.
Handles are collected according to the type, class OpCollector(object):
as the size of resources varies greatly. """A FIFO free list to manage the resource handle of operators."""
"""
def __init__(self): def __init__(self, parent):
self._parent = parent
self._type2keys = collections.defaultdict(collections.deque) self._type2keys = collections.defaultdict(collections.deque)
def alloc(self, op_type): def alloc(self, op_type):
...@@ -52,32 +42,23 @@ class OperatorCollector(object): ...@@ -52,32 +42,23 @@ class OperatorCollector(object):
return self._type2keys[op_type].popleft() return self._type2keys[op_type].popleft()
except IndexError: except IndexError:
self._type2keys[op_type].append( self._type2keys[op_type].append(
get_dummy_name( self._parent.unique_name(
basename=op_type, name=op_type,
domain='Operator', namespace='Op',
zero_based=False, zero_based=False))
))
return self._type2keys[op_type].popleft() return self._type2keys[op_type].popleft()
def collect(self, handle): def collect(self, handle):
"""Collect a unique handle.""" """Collect an unique handle."""
op_type, _ = handle.split('_') op_type, _ = handle.split('_')
self._type2keys[op_type].append(handle) self._type2keys[op_type].append(handle)
class TensorCollector(object): class TensorCollector(object):
"""A FIFO free list to manage the reused tensors. """A FIFO free list to manage the reused tensors."""
Tensors with the same scope are reused by turns,
and thus, memory fragments will be reduced.
Note that the fragments are inevitable due to the
naive FIFO policy. Reset the workspace if the number
of fragments is going to increase linearly.
"""
def __init__(self): def __init__(self, parent):
self._parent = parent
self._scope2keys = collections.defaultdict(collections.deque) self._scope2keys = collections.defaultdict(collections.deque)
def alloc(self, scope='${DATA}'): def alloc(self, scope='${DATA}'):
...@@ -86,33 +67,28 @@ class TensorCollector(object): ...@@ -86,33 +67,28 @@ class TensorCollector(object):
return self._scope2keys[scope].popleft() return self._scope2keys[scope].popleft()
except IndexError: except IndexError:
self._scope2keys[scope].append( self._scope2keys[scope].append(
get_dummy_name( self._parent.unique_name(
basename='%s/Tensor' % scope, name='%s/Tensor' % scope,
domain='Tensor', namespace='Tensor',
zero_based=False, zero_based=False))
))
return self._scope2keys[scope].popleft() return self._scope2keys[scope].popleft()
def collect(self, name): def collect(self, name):
"""Collect a unique name.""" """Collect an unique name."""
if name.startswith('${'): scope, _ = name.split('/')
scope, _ = name.split('/') self._scope2keys[scope].append(name)
self._scope2keys[scope].append(name)
return True
else:
return False
class Workspace(backend.Workspace): class Workspace(backend.Workspace):
"""Space to isolate computations that share resources.""" """Sandbox to isolate the resources and computations."""
class Collectors(object): class Collectors(object):
def __init__(self): def __init__(self, workspace):
self.TENSOR = TensorCollector() self.OP = OpCollector(workspace)
self.OPERATOR = OperatorCollector() self.TENSOR = TensorCollector(workspace)
def __init__(self, name=''): def __init__(self, name=''):
"""Create a Workspace. """Create a ``Workspace``.
Parameters Parameters
---------- ----------
...@@ -121,24 +97,20 @@ class Workspace(backend.Workspace): ...@@ -121,24 +97,20 @@ class Workspace(backend.Workspace):
""" """
super(Workspace, self).__init__(name) super(Workspace, self).__init__(name)
self._ref_objects = [] self._references = []
self._collectors = self.Collectors() self._collectors = self.Collectors(self)
@property @property
def collectors(self): def collectors(self):
"""Return the resource collectors.""" """Return the resource collectors."""
return self._collectors return self._collectors
def merge_from(self, other): def as_default(self):
"""Merge a external workspace into ``self``. """Switch ``self`` as the default workspace.
The ``other`` will not be reset until ``self`` is reset. Call this method with the **with** keyword.
Carefulness should be taken to associate with the workspaces.
Parameters Once **with** is exited, the previous default will be set.
----------
other : dragon.Workspace
The given external workspace.
Returns Returns
------- -------
...@@ -146,216 +118,295 @@ class Workspace(backend.Workspace): ...@@ -146,216 +118,295 @@ class Workspace(backend.Workspace):
The ``self``. The ``self``.
""" """
self.MergeFrom(other) return _GLOBAL_DEFAULT_WORKSPACE_STACK.get_controller(self)
self._ref_objects.append(other)
return self
def as_default(self):
"""Switch ``self`` as the default workspace.
Call this method with the **with** keyword. def create_graph(self, graph_def):
"""Create the graph.
Once **with** is exited, the previous default will be set. Parameters
----------
graph_def : GraphDef
The ``GraphDef`` protocol buffer.
Returns Returns
------- -------
dragon.Workspace str
The ``self``. The graph name.
""" """
return _GLOBAL_DEFAULT_WORKSPACE_STACK.get_controller(self) cfg = config.config()
if cfg.graph_verbosity == 2:
print(graph_def)
return self.CreateGraph(
serialization.serialize_proto(graph_def),
cfg.graph_verbosity == 1)
def clear(self): def create_tensor(self, name, filler_info=None):
"""Remove all the tensors. """Create the tensor.
Optionally call this method to clean the memories. Parameters
----------
name : str
The tensor name.
filler_info : FillerInfo
The ``FillerInfo`` protocol buffer.
Returns
-------
TensorImpl
The tensor implementation.
""" """
self.Clear() return self.CreateTensor(
name, serialization.serialize_proto(filler_info))
def feed_tensor(self, tensor, value, dtype=None, enforce_cpu=False):
"""Copy the value to tensor.
def create_filler(filler_def): Examples:
"""Create a tensor filler in current workspace.
Parameters ```python
---------- # Define a named tensor to feed
filler_def : TensorFiller x = dragon.Tensor('x')
The def of filler. dragon.get_workspace().feed_tensor(x, 0)
""" # Feed by specifying a tensor name
filler_def = filler_def if isinstance(filler_def, str) \ # Note that it will create the implementation whatever
else filler_def.SerializePartialToString() dragon.get_workspace().feed_tensor('y', 1)
get_workspace().CreateFiller(filler_def) print(dragon.get_workspace().has_tensor('y')) # True
```
Parameters
----------
tensor : Union[dragon.Tensor, str]
The tensor to feed.
value : array_like
The value to copy.
dtype : str, optional
The optional data type.
enforce_cpu : bool, optional, default=False
**True** to copy using cpu context.
def create_graph(graph_def): """
"""Create the graph in current workspace. if types.is_tensor(value):
# Steal the data if value is a tensor
value = getattr(value, 'get_value')()
# Determine the data type from argument or value
if not isinstance(value, numpy.ndarray):
dtype = 'float32' if dtype is None else dtype
else:
dtype = value.dtype if dtype is None else dtype
if hasattr(tensor, 'dtype') and tensor.dtype is not None:
if tensor.dtype not in mapping.TENSOR_TYPE_TO_NP_TYPE:
raise TypeError('Unsupported data type:', tensor.dtype)
dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor.dtype]
# Determine the copying device option
if enforce_cpu is True:
device_option = proto_util.get_device_option('cpu')
else:
device_option = proto_util.get_default_device_option()
if device_option is None:
device_option = proto_util.get_global_device_option()
# Copy data to the backend
self.FeedTensor(
_stringify_object(tensor),
numpy.array(value, dtype=dtype, copy=False),
serialization.serialize_proto(device_option),
)
def fetch_tensor(self, tensor):
"""Return the value of tensor.
Parameters Parameters
---------- ----------
graph_def : GraphDef tensor : Union[dragon.Tensor, str]
The definition of meta graph. The tensor to fetch.
Returns Returns
------- -------
str numpy.ndarray
The graph name to run. The array copied from backend.
""" """
cfg = config.config() return self.FetchTensor(_stringify_object(tensor))
if cfg.graph_verbosity == 2:
log_dir = cfg.log_dir
if log_dir is not None:
if not os.path.exists(log_dir):
try:
os.makedirs(log_dir)
except Exception:
raise ValueError('The given prefix is invalid.')
path = os.path.join(
log_dir,
graph_def.name + '.txt',
)
with open(path, 'w') as f:
f.write(str(graph_def))
logging.info('Export meta graph to: %s' % path)
else:
print(graph_def)
return get_workspace().CreateGraph(
_stringify_proto(graph_def), cfg.graph_verbosity == 1)
def has_tensor(self, tensor):
"""Return whether the tensor is in this workspace.
def create_tensor(tensor): Parameters
"""Create the tensor in current workspace. ----------
tensor : Union[dragon.Tensor, str]
The tensor.
Parameters Returns
---------- -------
tensor : Union[dragon.Tensor, str] bool
The tensor to create. **True** if tensor is existing otherwise **False**.
""" """
tensor = _stringify_tensor(tensor) return self.HasTensor(_stringify_object(tensor))
get_workspace().CreateTensor(tensor)
def feed_tensor(tensor, value, dtype=None, enforce_cpu=False):
"""Copy the value to tensor.
Examples:
```python
# Define a variable, feed then fetch the value
x = dragon.Tensor().variable()
dragon.workspace.feed_tensor(x, 1)
print(dragon.workspace.fetch_tensor(x))
# Feed by specifying a optional data type
# Fetch through ``Tensor.get_value(...)``
dragon.workspace.feed_tensor(a, [[1, 2, 3]], dtype='float16')
print(x.get_value())
```
Parameters
----------
tensor : Union[dragon.Tensor, str]
The tensor to feed.
value : array_like
The value to copy.
dtype : str, optional
The optional data type.
enforce_cpu : bool, optional, default=False
**True** to copy using cpu context.
""" def merge_from(self, other):
name = tensor.name if hasattr(tensor, 'name') else str(tensor) """Merge resources from another workspace.
if enforce_cpu is True:
dev = proto_util.get_device_option('cpu')
else:
dev = proto_util.get_default_device_option()
if dev is None:
dev = proto_util.get_global_device_option()
# Steal the value from tensor storage if necessary. The ``other`` will not be reset until ``self`` is reset.
if types.is_tensor(value): Carefulness should be taken to associate with the workspaces.
value = getattr(value, 'get_value')()
if not isinstance(value, numpy.ndarray): Parameters
dtype = 'float32' if dtype is None else dtype ----------
else: other : dragon.Workspace
dtype = value.dtype if dtype is None else dtype The workspace to merge.
Returns
-------
dragon.Workspace
The ``self``.
if hasattr(tensor, 'dtype') and tensor.dtype is not None: """
if tensor.dtype not in mapping.TENSOR_TYPE_TO_NP_TYPE: self.MergeFrom(other)
raise TypeError('Unsupported data type: %s' % tensor.dtype) self._references.append(other)
dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor.dtype] return self
dev = _stringify_proto(dev) def register_alias(self, target, alias):
value = numpy.array(value, dtype=dtype, copy=False) """Register an alias for the target.
get_workspace().FeedTensor(name, value, dev)
Parameters
----------
target : Union[str, dragon.Tensor]
The string or named object.
alias : str
The alias.
def fetch_tensor(tensor): """
"""Return the value of tensor. self.RegisterAlias(_stringify_object(target), alias)
Parameters def reset_tensor(self, tensor):
---------- """Reset the tensor.
tensor : Union[dragon.Tensor, str]
The tensor to fetch.
Returns Parameters
------- ----------
numpy.ndarray tensor : Union[dragon.Tensor, str]
The array copied from backend. The tensor to reset.
""" """
tensor = _stringify_tensor(tensor) return self.ResetTensor(_stringify_object(tensor))
return get_workspace().FetchTensor(tensor)
def run_backward(
self,
op_defs,
targets,
sources=None,
input_grads=None,
empty_grads=None,
):
"""Compute the gradients of input operators.
def get_dummy_name(basename, suffix='', domain='', zero_based=True): Parameters
"""Return an unique dummy name in current workspace. ----------
op_defs : Sequence[OperatorDef]
The executed op defs.
targets : Sequence[str]
The derivative targets.
sources : Sequence[str], optional
The differentiated inputs.
input_grads : Sequence[str], optional
The input grad for targets.
empty_grads : Sequence[str], optional
The grads to set to empty.
The dummy name will be formatted as: """
<basename> + <unique_index> + <suffix>. cfg = config.config()
self.RunBackward(
op_defs,
targets,
sources if sources else [],
input_grads if input_grads else [],
empty_grads if empty_grads else [],
cfg.graph_optimization <= 2,
cfg.graph_verbosity > 0,
)
def run_graph(
self,
name,
inputs_and_values=None,
outputs=None,
executing_stage=None,
return_outputs=True,
):
"""Run the graph.
Names in the different ``domain`` could be same. Parameters
----------
name : str
The graph name.
inputs_and_values : Tuple[Sequence, Sequence], optional
The input tensors and feeding values.
outputs : Sequence[dragon.Tensor], optional
The output tensors.
executing_stage : str, optional
The optional executing stage.
return_outputs : bool, optional, default=False
Whether to return the output values.
Parameters """
---------- # The explicit feeding for inputs.
basename : str if inputs_and_values is not None:
The basename. inputs, values = inputs_and_values
suffix : str, optional if len(inputs) != len(values):
The optional suffix adding to basename. raise ValueError(
domain : str, optional 'Specified %d values for %d inputs.'
The optional domain name. % (len(values), len(inputs)))
zero_based : bool, optional, default=True for tensor, value in zip(inputs, values):
Whether number the index from 0. self.feed_tensor(tensor, value)
# Run the graph according to the specified include/exclude rule.
stage_str = executing_stage if executing_stage else 'default'
exec_stage = _PREDEFINED_GRAPH_EXECUTING_STAGES[stage_str]
self.RunGraph(name, exec_stage['include'], exec_stage['exclude'])
# Maybe return the output values.
if return_outputs and outputs is not None:
if len(outputs) == 1:
return outputs[0].get_value()
else:
return [outputs[i].get_value() for i in range(len(outputs))]
def run_operator(self, op_def):
"""Run the operator.
Returns Parameters
------- ----------
str op_def : Union[OperatorDef, Sequence[OperatorDef]]
The unique dummy name. The ``OperatorDef`` protocol buffer.
""" """
return get_workspace().GetDummyName( cfg = config.config()
basename, suffix, domain, zero_based) if isinstance(op_def, dragon_pb2.OperatorDef):
op_def = op_def.SerializePartialToString()
self.RunOperator(op_def, cfg.graph_verbosity > 0)
def unique_name(self, name, suffix='', namespace='', zero_based=True):
"""Return an unique name.
def get_tensor_name(tensor): Names in the different ``namespace`` could be same.
"""Return the name of tensor in current workspace.
Parameters Parameters
---------- ----------
tensor : Union[dragon.Tensor, str] name : str
The tensor to query. The name to make unique.
suffix : str, optional
The optional suffix adding to name.
namespace : str, optional
The optional scope to make unique within.
zero_based : bool, optional, default=True
**True** to number the index from 0 otherwise 1.
Returns Returns
------- -------
str str
The tensor name. The unique name.
""" """
tensor = _stringify_tensor(tensor) return self.UniqueName(name, suffix, namespace, zero_based)
return get_workspace().GetTensorName(tensor)
def get_workspace(): def get_workspace():
...@@ -370,69 +421,6 @@ def get_workspace(): ...@@ -370,69 +421,6 @@ def get_workspace():
return _GLOBAL_DEFAULT_WORKSPACE_STACK.get_default() return _GLOBAL_DEFAULT_WORKSPACE_STACK.get_default()
def has_tensor(tensor):
"""Return a bool indicating if tensor is in current workspace.
Parameters
----------
tensor : Union[dragon.Tensor, str]
The tensor to query.
Returns
-------
bool
**True** if specified tensor is existing otherwise **False**.
"""
tensor = _stringify_tensor(tensor)
return get_workspace().HasTensor(tensor)
def load(file_path, format='pkl'):
"""Load tensors from a binary file.
Parameters
----------
file_path : str
The path of binary file.
format : {'pkl', 'caffe'}, optional
The serializing format.
"""
assert os.path.exists(file_path), \
'File(%s) does not exist.' % file_path
if format == 'pkl':
try:
with open(file_path, 'rb') as f:
state_dict = six.moves.pickle.load(f)
except UnicodeDecodeError:
with open(file_path, 'rb') as f:
state_dict = six.moves.pickle.load(f, encoding='iso-8859-1')
logging.info('Load From Model@: ' + file_path)
logging.info('Model Format: Pickle')
for k, v in state_dict.items():
if has_tensor(k):
feed_tensor(k, v)
logging.info('Tensor({}) is loaded.'.format(k))
elif format == 'caffe':
get_workspace().Load(file_path, 1)
else:
raise TypeError('Unknown binary format: ' + format)
def reset_tensor(tensor):
"""Reset the memory of tensor.
Parameters
----------
tensor : Union[dragon.Tensor, str]
The tensor to reset.
"""
tensor = _stringify_tensor(tensor)
return get_workspace().ResetTensor(tensor)
def reset_workspace(): def reset_workspace():
"""Reset the current default workspace.""" """Reset the current default workspace."""
if not _GLOBAL_DEFAULT_WORKSPACE_STACK.is_cleared(): if not _GLOBAL_DEFAULT_WORKSPACE_STACK.is_cleared():
...@@ -443,185 +431,9 @@ def reset_workspace(): ...@@ -443,185 +431,9 @@ def reset_workspace():
_GLOBAL_DEFAULT_WORKSPACE_STACK.reset() _GLOBAL_DEFAULT_WORKSPACE_STACK.reset()
def run_backward( def _stringify_object(obj):
forward_ops, """Try to stringify a object."""
targets, return obj.id if hasattr(obj, 'id') else obj
sources=None,
input_grads=None,
ignored_grads=None,
):
"""Compute the gradients of input operators.
Parameters
----------
forward_ops : Sequence[OperatorDef]
The referring operators to generate gradients.
targets : Sequence[str]
The solving targets.
sources : Sequence[str], optional
The optional sources to hook the intermediate grads.
input_grads : Sequence[str], optional
The external input grads.
ignored_grads : Sequence[str], optional
The grads that are explicitly ignored.
"""
cfg = config.config()
get_workspace().RunBackward(
forward_ops,
targets,
sources if sources else [],
input_grads if input_grads else [],
ignored_grads if ignored_grads else [],
cfg.graph_optimization > 2,
cfg.graph_verbosity > 0,
)
def run_graph(
graph,
inputs=(),
outputs=(),
stage=None,
return_outputs=True,
):
"""Run the graph in current workspace.
Parameters
----------
graph : str
The name of graph.
inputs : tuple
The **inputs** and **values**.
outputs : Sequence[dragon.Tensor]
The outputs of the graph.
stage : str, optional
The preset custom stages.
return_outputs : bool, optional, default=False
Whether to return the outputs.
Returns
-------
Sequence[numpy.ndarray]
The outputs which are copied to numpy array.
"""
# The explicit feeding.
if len(inputs) > 0 and len(inputs[0]) > 0:
if len(inputs[0]) != len(inputs[1]):
raise RuntimeError(
'Defined {} args, but {} are given.'
.format(len(inputs[0]), len(inputs[1]))
)
for idx in range(len(inputs[0])):
feed_tensor(inputs[0][idx], inputs[1][idx])
# Run the graph according to the specified include/exclude rule.
runtime_stage = stage if stage else 'default'
rule = _PREDEFINED_GRAPH_RUNTIME_STAGES[runtime_stage]
get_workspace().RunGraph(
graph, rule['include'], rule['exclude'])
# Try to return the outputs.
# Force to return may lead to asserts if outputs are not computed.
if return_outputs:
if len(outputs) == 0:
return None
elif len(outputs) == 1:
return outputs[0].get_value()
else:
return [outputs[i].get_value() for i in range(len(outputs))]
def run_operator(op_def):
"""Run the operator(s) in current workspace.
Parameters
----------
op_def : Union[OperatorDef, Sequence[OperatorDef]]
The definition of operator(s).
"""
cfg = config.config()
if isinstance(op_def, dragon_pb2.OperatorDef):
op_def = op_def.SerializeToString()
get_workspace().RunOperator(op_def, cfg.graph_verbosity > 0)
def save(
tensors,
filename,
prefix='',
suffix='.pkl',
format='pkl',
):
"""Serialize tensors into a binary file.
The file path is formatted as:
<prefix> + <filename> + <suffix>
Parameters
----------
tensors : Sequence[dragon.Tensor]
The tensors to be wrote.
filename : str
The filename.
prefix : str, optional, default=''
The prefix.
suffix : str, optional, default='.pkl'
The suffix.
format : {'pkl', 'caffe'}, optional
The serializing format.
"""
file_path = prefix + filename + suffix
dir = os.path.split(file_path)[0]
if len(dir) > 0 and not os.path.exists(dir):
os.makedirs(dir)
if format == 'pkl':
state_dict = {}
for tensor in tensors:
state_dict[tensor.name] = fetch_tensor(tensor)
with open(file_path, 'wb') as f:
six.moves.pickle.dump(
state_dict, f,
six.moves.pickle.HIGHEST_PROTOCOL,
)
logging.info('Save model to: ' + file_path)
logging.info('Model Format: Pickle')
elif format == 'caffe':
names = [tensor.name for tensor in tensors]
get_workspace().Save(file_path, names, 1)
else:
raise TypeError('Unknown binary format: ' + format)
def set_tensor_alias(tensor, alias):
"""Bind an alias to an existing tensor.
Parameters
----------
tensor : Union[dragon.Tensor, str]
The tensor to bind the alias.
alias : str
The alias.
"""
tensor = _stringify_tensor(tensor)
get_workspace().SetTensorAlias(tensor, alias)
def _stringify_proto(obj):
"""Try to stringify a proto-buffer structure."""
return obj.SerializeToString()
def _stringify_tensor(obj):
"""Try to stringify a tensor."""
if hasattr(obj, 'id'):
return str(obj.id)
else:
return str(obj)
class _DefaultWorkspaceStack(tls.Stack): class _DefaultWorkspaceStack(tls.Stack):
...@@ -654,11 +466,11 @@ class _DefaultWorkspaceStack(tls.Stack): ...@@ -654,11 +466,11 @@ class _DefaultWorkspaceStack(tls.Stack):
yield g yield g
# Define a global stack to store the workspaces of current thread. # Global stack to store the workspaces of current thread.
_GLOBAL_DEFAULT_WORKSPACE_STACK = _DefaultWorkspaceStack() _GLOBAL_DEFAULT_WORKSPACE_STACK = _DefaultWorkspaceStack()
# Define some useful runtime stages. # Predefined graph executing stages.
_PREDEFINED_GRAPH_RUNTIME_STAGES = { _PREDEFINED_GRAPH_EXECUTING_STAGES = {
'default': {'include': '', 'exclude': ''}, 'default': {'include': '', 'exclude': ''},
'forward': {'include': '', 'exclude': 'Gradient'}, 'forward': {'include': '', 'exclude': 'Gradient'},
'backward': {'include': 'Gradient', 'exclude': 'Generate'}, 'backward': {'include': 'Gradient', 'exclude': 'Generate'},
......
...@@ -1425,16 +1425,16 @@ def sum(inputs, axis=None, keep_dims=False, **kwargs): ...@@ -1425,16 +1425,16 @@ def sum(inputs, axis=None, keep_dims=False, **kwargs):
@OpSchema.num_inputs(1) @OpSchema.num_inputs(1)
@ArgHelper.repeated_desc(name='multiples') @ArgHelper.repeated_desc(name='repeats')
def tile(inputs, multiples, **kwargs): def tile(inputs, repeats, **kwargs):
r"""Tile the input according to the given multiples. r"""Tile the input according to the given repeats.
Parameters Parameters
---------- ----------
inputs : dragon.Tensor inputs : dragon.Tensor
The input tensor. The input tensor.
multiples : Sequence[Union[int, dragon.Tensor]] repeats : Sequence[Union[int, dragon.Tensor]]
The multiple for each axis. The number of repetitions for each axis.
Returns Returns
------- -------
...@@ -1446,8 +1446,8 @@ def tile(inputs, multiples, **kwargs): ...@@ -1446,8 +1446,8 @@ def tile(inputs, multiples, **kwargs):
op_lib = array_ops_lib.Tile op_lib = array_ops_lib.Tile
if context.executing_eagerly(): if context.executing_eagerly():
return op_lib \ return op_lib \
.instantiate(ndim=len(args['multiples'])) \ .instantiate(ndim=len(args['repeats'])) \
.apply([inputs], args['multiples']) .apply([inputs], args['repeats'])
else: else:
return op_lib.blend(**args) return op_lib.blend(**args)
......
...@@ -18,18 +18,15 @@ class Arange(Operator): ...@@ -18,18 +18,15 @@ class Arange(Operator):
'dtype': self.dtype, 'dtype': self.dtype,
'slice_descs': [ 'slice_descs': [
'${{HANDLE}}/slice[{}]' '${{HANDLE}}/slice[{}]'
.format(n) for n in range(self.num_args) .format(n) for n in range(self.num_args)],
],
} }
} }
def feed(self, ws, handle, slice_args): def feed(self, ws, handle, slice_args):
for i in range(len(slice_args)): for i in range(len(slice_args)):
self.feed_arg( self.feed_arg(
ws, ws, '{}/slice[{}]'.format(handle, i),
'{}/slice[{}]'.format(handle, i), slice_args[i], 'float32')
slice_args[i], 'float32'
)
def forward(self, slice_args, trainable=False): def forward(self, slice_args, trainable=False):
output = self.dispatch( output = self.dispatch(
...@@ -72,9 +69,7 @@ class Cast(Operator): ...@@ -72,9 +69,7 @@ class Cast(Operator):
def attributes(self): def attributes(self):
return { return {
'op_type': 'Cast', 'op_type': 'Cast',
'arguments': { 'arguments': {'dtype': self.dtype},
'dtype': self.dtype,
}
} }
def forward(self, inputs, inplace=False): def forward(self, inputs, inplace=False):
...@@ -104,18 +99,15 @@ class ChannelNormalize(Operator): ...@@ -104,18 +99,15 @@ class ChannelNormalize(Operator):
'dtype': self.dtype, 'dtype': self.dtype,
'perm_descs': [ 'perm_descs': [
'${{HANDLE}}/perm[{}]' '${{HANDLE}}/perm[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
} }
} }
def feed(self, ws, handle, perm): def feed(self, ws, handle, perm):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/perm[{}]'.format(handle, i),
'{}/perm[{}]'.format(handle, i), perm[i], 'int64')
perm[i], 'int64'
)
def forward(self, inputs, perm): def forward(self, inputs, perm):
return self.dispatch( return self.dispatch(
...@@ -152,9 +144,7 @@ class Concat(Operator): ...@@ -152,9 +144,7 @@ class Concat(Operator):
def attributes(self): def attributes(self):
return { return {
'op_type': 'Concat', 'op_type': 'Concat',
'arguments': { 'arguments': {'axis': self.axis},
'axis': self.axis,
}
} }
def forward(self, inputs): def forward(self, inputs):
...@@ -194,24 +184,21 @@ class Expand(Operator): ...@@ -194,24 +184,21 @@ class Expand(Operator):
'arguments': { 'arguments': {
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]' '${{HANDLE}}/dims[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
} }
} }
def feed(self, ws, handle, dims): def feed(self, ws, handle, dims):
for i, d in enumerate(dims): for i, dim in enumerate(dims):
self.feed_arg( self.feed_arg(
ws, ws, '{}/dims[{}]'.format(handle, i),
'{}/dims[{}]'.format(handle, i), dim, 'int64')
d, 'int64'
)
def forward(self, inputs, dims): def forward(self, inputs, dims):
return self.dispatch( return self.dispatch(
inputs, [self.alloc()], inputs, [self.alloc()],
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, dims) self.feed(ws, handle, dims),
) )
...@@ -372,24 +359,21 @@ class Pad(Operator): ...@@ -372,24 +359,21 @@ class Pad(Operator):
'value': self.value, 'value': self.value,
'pads_descs': [ 'pads_descs': [
'${{HANDLE}}/pads[{}]' '${{HANDLE}}/pads[{}]'
.format(n) for n in range(self.ndim * 2) .format(n) for n in range(self.ndim * 2)],
],
} }
} }
def feed(self, ws, handle, pads): def feed(self, ws, handle, pads):
for i, e in enumerate(pads): for i, e in enumerate(pads):
self.feed_arg( self.feed_arg(
ws, ws, '{}/pads[{}]'.format(handle, i),
'{}/pads[{}]'.format(handle, i), e, 'int64')
e, 'int64'
)
def forward(self, inputs, pads): def forward(self, inputs, pads):
return self.dispatch( return self.dispatch(
inputs, [self.alloc()], inputs, [self.alloc()],
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, pads) self.feed(ws, handle, pads),
) )
...@@ -443,18 +427,15 @@ class Reshape(Operator): ...@@ -443,18 +427,15 @@ class Reshape(Operator):
'arguments': { 'arguments': {
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]' '${{HANDLE}}/dims[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
} }
} }
def feed(self, ws, handle, shape): def feed(self, ws, handle, shape):
for i, e in enumerate(shape): for i, e in enumerate(shape):
self.feed_arg( self.feed_arg(
ws, ws, '{}/dims[{}]'.format(handle, i),
'{}/dims[{}]'.format(handle, i), e, 'int64')
e, 'int64'
)
def forward(self, inputs, shape, inplace=False): def forward(self, inputs, shape, inplace=False):
outputs = [inputs[0] if inplace else self.alloc()] outputs = [inputs[0] if inplace else self.alloc()]
...@@ -476,33 +457,27 @@ class Slice(Operator): ...@@ -476,33 +457,27 @@ class Slice(Operator):
'arguments': { 'arguments': {
'starts_descs': [ 'starts_descs': [
'${{HANDLE}}/starts[{}]' '${{HANDLE}}/starts[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
'sizes_descs': [ 'sizes_descs': [
'${{HANDLE}}/sizes[{}]' '${{HANDLE}}/sizes[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
} }
} }
def feed(self, ws, handle, starts, sizes): def feed(self, ws, handle, starts, sizes):
for i, e in enumerate(starts): for i in range(len(starts)):
self.feed_arg( self.feed_arg(
ws, ws, '{}/starts[{}]'.format(handle, i),
'{}/starts[{}]'.format(handle, i), starts[i], 'int64')
e, 'int64'
)
self.feed_arg( self.feed_arg(
ws, ws, '{}/sizes[{}]'.format(handle, i),
'{}/sizes[{}]'.format(handle, i), sizes[i], 'int64')
sizes[i], 'int64'
)
def forward(self, inputs, starts, sizes): def forward(self, inputs, starts, sizes):
return self.dispatch( return self.dispatch(
inputs, [self.alloc()], inputs, [self.alloc()],
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, starts, sizes) self.feed(ws, handle, starts, sizes),
) )
...@@ -547,9 +522,7 @@ class Squeeze(Operator): ...@@ -547,9 +522,7 @@ class Squeeze(Operator):
def attributes(self): def attributes(self):
return { return {
'op_type': 'Squeeze', 'op_type': 'Squeeze',
'arguments': { 'arguments': {'axes': self.axes},
'axes': self.axes,
},
} }
def forward(self, inputs, inplace=False): def forward(self, inputs, inplace=False):
...@@ -565,9 +538,7 @@ class Stack(Operator): ...@@ -565,9 +538,7 @@ class Stack(Operator):
def attributes(self): def attributes(self):
return { return {
'op_type': 'Stack', 'op_type': 'Stack',
'arguments': { 'arguments': {'axis': self.axis},
'axis': self.axis,
}
} }
def forward(self, inputs): def forward(self, inputs):
...@@ -583,26 +554,23 @@ class Tile(Operator): ...@@ -583,26 +554,23 @@ class Tile(Operator):
return { return {
'op_type': 'Tile', 'op_type': 'Tile',
'arguments': { 'arguments': {
'multiples_descs': [ 'repeats_descs': [
'${{HANDLE}}/multiples[{}]' '${{HANDLE}}/repeats[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
} }
} }
def feed(self, ws, handle, multiples): def feed(self, ws, handle, repeats):
for i, d in enumerate(multiples): for i, size in enumerate(repeats):
self.feed_arg( self.feed_arg(
ws, ws, '{}/repeats[{}]'.format(handle, i),
'{}/multiples[{}]'.format(handle, i), size, 'int64')
d, 'int64'
)
def forward(self, inputs, multiples): def forward(self, inputs, repeats):
return self.dispatch( return self.dispatch(
inputs, [self.alloc()], inputs, [self.alloc()],
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, multiples) self.feed(ws, handle, repeats),
) )
...@@ -617,24 +585,21 @@ class Transpose(Operator): ...@@ -617,24 +585,21 @@ class Transpose(Operator):
'arguments': { 'arguments': {
'perm_descs': [ 'perm_descs': [
'${{HANDLE}}/perm[{}]' '${{HANDLE}}/perm[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
} }
} }
def feed(self, ws, handle, perm): def feed(self, ws, handle, perm):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/perm[{}]'.format(handle, i),
'{}/perm[{}]'.format(handle, i), perm[i], 'int64')
perm[i], 'int64'
)
def forward(self, inputs, perm): def forward(self, inputs, perm):
return self.dispatch( return self.dispatch(
inputs, [self.alloc()], inputs, [self.alloc()],
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, perm) self.feed(ws, handle, perm),
) )
......
...@@ -37,15 +37,11 @@ class Assign(Operator): ...@@ -37,15 +37,11 @@ class Assign(Operator):
def feed(self, ws, handle, starts, sizes): def feed(self, ws, handle, starts, sizes):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/starts[{}]'.format(handle, i),
'{}/starts[{}]'.format(handle, i), starts[i], 'int64')
starts[i], 'int64',
)
self.feed_arg( self.feed_arg(
ws, ws, '{}/sizes[{}]'.format(handle, i),
'{}/sizes[{}]'.format(handle, i), sizes[i], 'int64')
sizes[i], 'int64',
)
def forward(self, inputs, starts, sizes): def forward(self, inputs, starts, sizes):
return self.dispatch( return self.dispatch(
......
...@@ -24,12 +24,10 @@ class Initializer(Operator): ...@@ -24,12 +24,10 @@ class Initializer(Operator):
self.dtype = kwargs.get('dtype', 'float32') self.dtype = kwargs.get('dtype', 'float32')
def feed(self, ws, handle, shape): def feed(self, ws, handle, shape):
for i, e in enumerate(shape): for i, dim in enumerate(shape):
self.feed_arg( self.feed_arg(
ws, ws, '{}/dims[{}]'.format(handle, i),
'{}/dims[{}]'.format(handle, i), dim, 'int64')
e, 'int64'
)
def forward( def forward(
self, self,
...@@ -39,18 +37,16 @@ class Initializer(Operator): ...@@ -39,18 +37,16 @@ class Initializer(Operator):
trainable=False, trainable=False,
): ):
inputs = [] if shape_like is None else [shape_like] inputs = [] if shape_like is None else [shape_like]
outputs = [ outputs = [ops.new_leaf(
ops.new_leaf( shape=shape,
shape=shape, dtype=self.dtype,
dtype=self.dtype, device=self.alloc(),
device=self.alloc(), trainable=trainable,
trainable=trainable, ) if out is None else out]
) if out is None else out
]
return self.dispatch( return self.dispatch(
inputs, outputs, inputs, outputs,
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, shape) self.feed(ws, handle, shape),
) )
...@@ -67,8 +63,7 @@ class Eye(Initializer): ...@@ -67,8 +63,7 @@ class Eye(Initializer):
'dtype': self.dtype, 'dtype': self.dtype,
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -86,8 +81,7 @@ class Fill(Initializer): ...@@ -86,8 +81,7 @@ class Fill(Initializer):
'value': float(self.value), 'value': float(self.value),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -107,8 +101,7 @@ class GlorotNormal(Initializer): ...@@ -107,8 +101,7 @@ class GlorotNormal(Initializer):
'mode': self.mode.lower(), 'mode': self.mode.lower(),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -128,8 +121,7 @@ class GlorotUniform(Initializer): ...@@ -128,8 +121,7 @@ class GlorotUniform(Initializer):
'mode': self.mode.lower(), 'mode': self.mode.lower(),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -149,8 +141,7 @@ class RandomNormal(Initializer): ...@@ -149,8 +141,7 @@ class RandomNormal(Initializer):
'std': float(self.std), 'std': float(self.std),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -170,8 +161,7 @@ class RandomUniform(Initializer): ...@@ -170,8 +161,7 @@ class RandomUniform(Initializer):
'high': float(self.high), 'high': float(self.high),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -191,7 +181,6 @@ class TruncatedNormal(Initializer): ...@@ -191,7 +181,6 @@ class TruncatedNormal(Initializer):
'std': float(self.std), 'std': float(self.std),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -83,7 +83,7 @@ class LpNormalize(Operator): ...@@ -83,7 +83,7 @@ class LpNormalize(Operator):
} }
} }
def forward(self,inputs): def forward(self, inputs):
return self.dispatch(inputs, [self.alloc()]) return self.dispatch(inputs, [self.alloc()])
......
...@@ -25,14 +25,14 @@ from dragon.core.ops import init_ops_lib ...@@ -25,14 +25,14 @@ from dragon.core.ops import init_ops_lib
from dragon.core.ops import math_ops_lib from dragon.core.ops import math_ops_lib
def add(self, value): def add(self, other):
r"""Compute the element-wise addition. r"""Compute the element-wise addition.
.. math:: \text{out} = \text{self} + \text{value} .. math:: \text{out} = \text{self} + \text{value}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to add. The value to add.
Returns Returns
...@@ -45,7 +45,7 @@ def add(self, value): ...@@ -45,7 +45,7 @@ def add(self, value):
`dragon.math.add(...)`_ : Compute the element-wise addition. `dragon.math.add(...)`_ : Compute the element-wise addition.
""" """
return _binary_op(self, value, 'Add') return _binary_op(self, other, 'Add')
def astype(self, dtype, inplace=False): def astype(self, dtype, inplace=False):
...@@ -114,14 +114,14 @@ def copy(self): ...@@ -114,14 +114,14 @@ def copy(self):
.instantiate().apply([self], None) .instantiate().apply([self], None)
def div(self, value): def div(self, other):
r"""Compute the element-wise division. r"""Compute the element-wise division.
.. math:: \text{out} = \text{self} \div \text{value} .. math:: \text{out} = \text{self} \div \text{value}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to divide. The value to divide.
Returns Returns
...@@ -134,7 +134,7 @@ def div(self, value): ...@@ -134,7 +134,7 @@ def div(self, value):
`dragon.math.div(...)`_ : Compute the element-wise division. `dragon.math.div(...)`_ : Compute the element-wise division.
""" """
return _binary_op(self, value, 'Div') return _binary_op(self, other, 'Div')
def ge(self, other): def ge(self, other):
...@@ -271,14 +271,14 @@ def gt(self, other): ...@@ -271,14 +271,14 @@ def gt(self, other):
return _binary_op(self, other, 'Greater') return _binary_op(self, other, 'Greater')
def iadd(self, value): def iadd(self, other):
r"""Compute the element-wise addition. r"""Compute the element-wise addition.
.. math:: \text{self} \mathrel{+}= \text{value} .. math:: \text{self} \mathrel{+}= \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to add. The value to add.
Returns Returns
...@@ -291,17 +291,17 @@ def iadd(self, value): ...@@ -291,17 +291,17 @@ def iadd(self, value):
`dragon.math.add(...)`_ : Compute the element-wise addition. `dragon.math.add(...)`_ : Compute the element-wise addition.
""" """
return _binary_op(self, value, 'Add', [self]) return _binary_op(self, other, 'Add', [self])
def idiv(self, value): def idiv(self, other):
r"""Compute the element-wise division. r"""Compute the element-wise division.
.. math:: \text{self} \mathrel{\div}= \text{value} .. math:: \text{self} \mathrel{\div}= \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to divide. The value to divide.
Returns Returns
...@@ -314,17 +314,17 @@ def idiv(self, value): ...@@ -314,17 +314,17 @@ def idiv(self, value):
`dragon.math.div(...)`_ : Compute the element-wise division. `dragon.math.div(...)`_ : Compute the element-wise division.
""" """
return _binary_op(self, value, 'Div', [self]) return _binary_op(self, other, 'Div', [self])
def imul(self, value): def imul(self, other):
r"""Compute the element-wise multiplication. r"""Compute the element-wise multiplication.
.. math:: \text{self} \mathrel{\times}= \text{value} .. math:: \text{self} \mathrel{\times}= \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to multiply. The value to multiply.
Returns Returns
...@@ -337,17 +337,17 @@ def imul(self, value): ...@@ -337,17 +337,17 @@ def imul(self, value):
`dragon.math.mul(...)`_ : Compute the element-wise multiplication. `dragon.math.mul(...)`_ : Compute the element-wise multiplication.
""" """
return _binary_op(self, value, 'Mul', [self]) return _binary_op(self, other, 'Mul', [self])
def isub(self, value): def isub(self, other):
r"""Compute the element-wise division. r"""Compute the element-wise division.
.. math:: \text{self} \mathrel{-}= \text{value} .. math:: \text{self} \mathrel{-}= \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to subtract. The value to subtract.
Returns Returns
...@@ -360,7 +360,7 @@ def isub(self, value): ...@@ -360,7 +360,7 @@ def isub(self, value):
`dragon.math.sub(...)`_ : Compute the element-wise subtraction. `dragon.math.sub(...)`_ : Compute the element-wise subtraction.
""" """
return _binary_op(self, value, 'Sub', [self]) return _binary_op(self, other, 'Sub', [self])
def le(self, other): def le(self, other):
...@@ -409,14 +409,14 @@ def lt(self, other): ...@@ -409,14 +409,14 @@ def lt(self, other):
return _binary_op(self, other, 'Less') return _binary_op(self, other, 'Less')
def mul(self, value): def mul(self, other):
r"""Compute the element-wise multiplication. r"""Compute the element-wise multiplication.
.. math:: \text{out} = \text{self} \times \text{value} .. math:: \text{out} = \text{self} \times \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to multiply. The value to multiply.
Returns Returns
...@@ -429,7 +429,7 @@ def mul(self, value): ...@@ -429,7 +429,7 @@ def mul(self, value):
`dragon.math.mul(...)`_ : Compute the element-wise multiplication. `dragon.math.mul(...)`_ : Compute the element-wise multiplication.
""" """
return _binary_op(self, value, 'Mul') return _binary_op(self, other, 'Mul')
def neg(self): def neg(self):
...@@ -478,14 +478,14 @@ def normal(self, mean=0, std=1): ...@@ -478,14 +478,14 @@ def normal(self, mean=0, std=1):
).apply(shape, out=self) ).apply(shape, out=self)
def radd(self, value): def radd(self, other):
r"""Compute the element-wise addition. r"""Compute the element-wise addition.
.. math:: \text{out} = \text{value} + \text{self} .. math:: \text{out} = \text{other} + \text{self}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to add. The value to add.
Returns Returns
...@@ -498,17 +498,17 @@ def radd(self, value): ...@@ -498,17 +498,17 @@ def radd(self, value):
`dragon.math.add(...)`_ : Compute the element-wise addition. `dragon.math.add(...)`_ : Compute the element-wise addition.
""" """
return _binary_op(value, self, 'Add') return _binary_op(other, self, 'Add')
def rdiv(self, value): def rdiv(self, other):
r"""Compute the element-wise division. r"""Compute the element-wise division.
.. math:: \text{out} = \text{value} \div \text{self} .. math:: \text{out} = \text{value} \div \text{self}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to be divided. The value to be divided.
Returns Returns
...@@ -521,7 +521,7 @@ def rdiv(self, value): ...@@ -521,7 +521,7 @@ def rdiv(self, value):
`dragon.math.div(...)`_ : Compute the element-wise division. `dragon.math.div(...)`_ : Compute the element-wise division.
""" """
return _binary_op(value, self, 'Div') return _binary_op(other, self, 'Div')
def reshape(self, shape): def reshape(self, shape):
...@@ -546,14 +546,14 @@ def reshape(self, shape): ...@@ -546,14 +546,14 @@ def reshape(self, shape):
return array_ops.reshape(self, shape=shape) return array_ops.reshape(self, shape=shape)
def rmul(self, value): def rmul(self, other):
r"""Compute the element-wise multiplication. r"""Compute the element-wise multiplication.
.. math:: \text{out} = \text{value} \times \text{self} .. math:: \text{out} = \text{other} \times \text{self}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to multiply. The value to multiply.
Returns Returns
...@@ -566,17 +566,17 @@ def rmul(self, value): ...@@ -566,17 +566,17 @@ def rmul(self, value):
`dragon.math.mul(...)`_ : Compute the element-wise multiplication. `dragon.math.mul(...)`_ : Compute the element-wise multiplication.
""" """
return _binary_op(value, self, 'Mul') return _binary_op(other, self, 'Mul')
def rsub(self, value): def rsub(self, other):
r"""Compute the element-wise subtraction. r"""Compute the element-wise subtraction.
.. math:: \text{out} = \text{value} - \text{self} .. math:: \text{out} = \text{other} - \text{self}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to be subtracted. The value to be subtracted.
Returns Returns
...@@ -589,7 +589,7 @@ def rsub(self, value): ...@@ -589,7 +589,7 @@ def rsub(self, value):
`dragon.math.sub(...)`_ : Compute the element-wise subtraction. `dragon.math.sub(...)`_ : Compute the element-wise subtraction.
""" """
return _binary_op(value, self, 'Sub') return _binary_op(other, self, 'Sub')
def setitem(self, key, value): def setitem(self, key, value):
...@@ -618,14 +618,14 @@ def setitem(self, key, value): ...@@ -618,14 +618,14 @@ def setitem(self, key, value):
_section_assign(self, value, starts, sizes) _section_assign(self, value, starts, sizes)
def sub(self, value): def sub(self, other):
r"""Compute the element-wise subtraction. r"""Compute the element-wise subtraction.
.. math:: \text{out} = \text{self} - \text{value} .. math:: \text{out} = \text{self} - \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.EagerTensor, number] other : Union[dragon.EagerTensor, number]
The value to subtract. The value to subtract.
Returns Returns
...@@ -638,7 +638,7 @@ def sub(self, value): ...@@ -638,7 +638,7 @@ def sub(self, value):
`dragon.math.sub(...)`_ : Compute the element-wise subtraction. `dragon.math.sub(...)`_ : Compute the element-wise subtraction.
""" """
return _binary_op(self, value, 'Sub') return _binary_op(self, other, 'Sub')
def truncated_normal(self, mean=0, std=1): def truncated_normal(self, mean=0, std=1):
...@@ -809,3 +809,4 @@ EagerTensor.__rsub__ = rsub ...@@ -809,3 +809,4 @@ EagerTensor.__rsub__ = rsub
EagerTensor.__rtruediv__ = rdiv EagerTensor.__rtruediv__ = rdiv
EagerTensor.__setitem__ = setitem EagerTensor.__setitem__ = setitem
EagerTensor.__sub__ = sub EagerTensor.__sub__ = sub
EagerTensor.__truediv__ = div
...@@ -23,14 +23,14 @@ from dragon.core.framework import workspace ...@@ -23,14 +23,14 @@ from dragon.core.framework import workspace
from dragon.core.ops import array_ops from dragon.core.ops import array_ops
def add(self, value): def add(self, other):
r"""Compute the element-wise addition. r"""Compute the element-wise addition.
.. math:: \text{out} = \text{self} + \text{value} .. math:: \text{out} = \text{self} + \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.Tensor, number] other : Union[dragon.Tensor, number]
The value to add. The value to add.
Returns Returns
...@@ -43,7 +43,7 @@ def add(self, value): ...@@ -43,7 +43,7 @@ def add(self, value):
`dragon.math.add(...)`_ : Compute the element-wise addition. `dragon.math.add(...)`_ : Compute the element-wise addition.
""" """
return _binary_op(self, value, 'Add') return _binary_op(self, other, 'Add')
def astype(self, dtype, inplace=False): def astype(self, dtype, inplace=False):
...@@ -89,14 +89,14 @@ def copy(self): ...@@ -89,14 +89,14 @@ def copy(self):
return OpDef.apply('Copy', [self], [outputs]) return OpDef.apply('Copy', [self], [outputs])
def div(self, value): def div(self, other):
r"""Compute the element-wise division. r"""Compute the element-wise division.
.. math:: \text{out} = \text{self} \div \text{value} .. math:: \text{out} = \text{self} \div \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.Tensor, number] other : Union[dragon.Tensor, number]
The value to divide. The value to divide.
Returns Returns
...@@ -109,7 +109,7 @@ def div(self, value): ...@@ -109,7 +109,7 @@ def div(self, value):
`dragon.math.div(...)`_ : Compute the element-wise division. `dragon.math.div(...)`_ : Compute the element-wise division.
""" """
return _binary_op(self, value, 'Div') return _binary_op(self, other, 'Div')
def ge(self, other): def ge(self, other):
...@@ -172,12 +172,8 @@ def get_value(self): ...@@ -172,12 +172,8 @@ def get_value(self):
numpy.ndarray numpy.ndarray
The deep copied value. The deep copied value.
See Also
--------
`dragon.workspace.fetch_tensor(...)`_ : Fetch the value of given tensor.
""" """
return workspace.fetch_tensor(self) return workspace.get_workspace().fetch_tensor(self)
def gt(self, other): def gt(self, other):
...@@ -249,14 +245,14 @@ def lt(self, other): ...@@ -249,14 +245,14 @@ def lt(self, other):
return _binary_op(self, other, 'Less') return _binary_op(self, other, 'Less')
def mul(self, value): def mul(self, other):
r"""Compute the element-wise multiplication. r"""Compute the element-wise multiplication.
.. math:: \text{out} = \text{self} \times \text{value} .. math:: \text{out} = \text{self} \times \text{other}
Parameters Parameters
---------- ----------
value : Union[dragon.Tensor, number] other : Union[dragon.Tensor, number]
The value to multiply. The value to multiply.
Returns Returns
...@@ -269,7 +265,7 @@ def mul(self, value): ...@@ -269,7 +265,7 @@ def mul(self, value):
`dragon.math.mul(...)`_ : Compute the element-wise multiplication. `dragon.math.mul(...)`_ : Compute the element-wise multiplication.
""" """
return _binary_op(self, value, 'Mul') return _binary_op(self, other, 'Mul')
def neg(self): def neg(self):
...@@ -290,14 +286,14 @@ def neg(self): ...@@ -290,14 +286,14 @@ def neg(self):
return _unary_op(self, 'Neg') return _unary_op(self, 'Neg')
def radd(self, value): def radd(self, other):
r"""Compute the element-wise addition. r"""Compute the element-wise addition.
.. math:: \text{out} = \text{value} + \text{self} .. math:: \text{out} = \text{other} + \text{self}
Parameters Parameters
---------- ----------
value : Union[dragon.Tensor, number] other : Union[dragon.Tensor, number]
The value to add. The value to add.
Returns Returns
...@@ -310,17 +306,17 @@ def radd(self, value): ...@@ -310,17 +306,17 @@ def radd(self, value):
`dragon.math.add(...)`_ : Compute the element-wise addition. `dragon.math.add(...)`_ : Compute the element-wise addition.
""" """
return _binary_op(value, self, 'Add') return _binary_op(other, self, 'Add')
def rdiv(self, value): def rdiv(self, other):
r"""Compute the element-wise division. r"""Compute the element-wise division.
.. math:: \text{out} = \text{value} \div \text{self} .. math:: \text{out} = \text{other} \div \text{self}
Parameters Parameters
---------- ----------
value : Union[dragon.Tensor, number] other : Union[dragon.Tensor, number]
The value to be divided. The value to be divided.
Returns Returns
...@@ -333,7 +329,7 @@ def rdiv(self, value): ...@@ -333,7 +329,7 @@ def rdiv(self, value):
`dragon.math.div(...)`_ : Compute the element-wise division. `dragon.math.div(...)`_ : Compute the element-wise division.
""" """
return _binary_op(value, self, 'Div') return _binary_op(other, self, 'Div')
def reshape(self, shape): def reshape(self, shape):
...@@ -358,14 +354,14 @@ def reshape(self, shape): ...@@ -358,14 +354,14 @@ def reshape(self, shape):
return array_ops.reshape(self, shape=shape) return array_ops.reshape(self, shape=shape)
def rmul(self, value): def rmul(self, other):
r"""Compute the element-wise multiplication. r"""Compute the element-wise multiplication.
.. math:: \text{out} = \text{value} \times \text{self} .. math:: \text{out} = \text{other} \times \text{self}
Parameters Parameters
---------- ----------
value : Union[dragon.Tensor, number] other : Union[dragon.Tensor, number]
The value to multiply. The value to multiply.
Returns Returns
...@@ -378,17 +374,17 @@ def rmul(self, value): ...@@ -378,17 +374,17 @@ def rmul(self, value):
`dragon.math.mul(...)`_ : Compute the element-wise multiplication. `dragon.math.mul(...)`_ : Compute the element-wise multiplication.
""" """
return _binary_op(value, self, 'Mul') return _binary_op(other, self, 'Mul')
def rsub(self, value): def rsub(self, other):
r"""Compute the element-wise subtraction. r"""Compute the element-wise subtraction.
.. math:: \text{out} = \text{value} - \text{self} .. math:: \text{out} = \text{other} - \text{self}
Parameters Parameters
---------- ----------
value : Union[dragon.Tensor, number] other : Union[dragon.Tensor, number]
The value to be subtracted. The value to be subtracted.
Returns Returns
...@@ -401,7 +397,7 @@ def rsub(self, value): ...@@ -401,7 +397,7 @@ def rsub(self, value):
`dragon.math.sub(...)`_ : Compute the element-wise subtraction. `dragon.math.sub(...)`_ : Compute the element-wise subtraction.
""" """
return _binary_op(value, self, 'Sub') return _binary_op(other, self, 'Sub')
def setitem(self, key, value): def setitem(self, key, value):
...@@ -443,23 +439,19 @@ def set_value(self, value): ...@@ -443,23 +439,19 @@ def set_value(self, value):
dragon.Tensor dragon.Tensor
The self. The self.
See Also
--------
`dragon.workspace.feed_tensor(...)`_ : Feed the value to the given tensor.
""" """
workspace.feed_tensor(self, value) workspace.get_workspace().feed_tensor(self, value)
return self return self
def sub(self, value): def sub(self, other):
r"""Compute the element-wise subtraction. r"""Compute the element-wise subtraction.
.. math:: \text{out} = \text{self} - \text{value} .. math:: \text{out} = \text{self} - \text{value}
Parameters Parameters
---------- ----------
value : Union[dragon.Tensor, number] other : Union[dragon.Tensor, number]
The value to subtract. The value to subtract.
Returns Returns
...@@ -472,7 +464,7 @@ def sub(self, value): ...@@ -472,7 +464,7 @@ def sub(self, value):
`dragon.math.sub(...)`_ : Compute the element-wise subtraction. `dragon.math.sub(...)`_ : Compute the element-wise subtraction.
""" """
return _binary_op(self, value, 'Sub') return _binary_op(self, other, 'Sub')
def _binary_op(a, b, op_type): def _binary_op(a, b, op_type):
...@@ -570,3 +562,4 @@ Tensor.__rtruediv__ = rdiv ...@@ -570,3 +562,4 @@ Tensor.__rtruediv__ = rdiv
Tensor.__rsub__ = rsub Tensor.__rsub__ = rsub
Tensor.__setitem__ = setitem Tensor.__setitem__ = setitem
Tensor.__sub__ = sub Tensor.__sub__ = sub
Tensor.__truediv__ = div
...@@ -157,7 +157,7 @@ def conv2d_transpose( ...@@ -157,7 +157,7 @@ def conv2d_transpose(
group : int, optional, default=1 group : int, optional, default=1
The group size of convolution. The group size of convolution.
output_padding : Sequence[Union[int, dragon.Tensor]], optional output_padding : Sequence[Union[int, dragon.Tensor]], optional
The value padded to the right side. The extra size padding to output.
output_shape : Sequence[Union[int, dragon.Tensor]], optional output_shape : Sequence[Union[int, dragon.Tensor]], optional
The output shape for **SAME** padding. The output shape for **SAME** padding.
padding : {'VALID', 'SAME', 'SAME_UPPER', 'SAME_LOWER'}, optional padding : {'VALID', 'SAME', 'SAME_UPPER', 'SAME_LOWER'}, optional
...@@ -176,7 +176,7 @@ def conv2d_transpose( ...@@ -176,7 +176,7 @@ def conv2d_transpose(
raise ValueError('Unsupported padding algorithm: %s' % padding) raise ValueError('Unsupported padding algorithm: %s' % padding)
if data_format not in ('NCHW', 'NHWC'): if data_format not in ('NCHW', 'NHWC'):
raise ValueError('Unsupported data format: %s' % data_format) raise ValueError('Unsupported data format: %s' % data_format)
if output_padding is not None or output_shape is not None: if output_shape is not None and 'SAME' not in padding:
args['padding'] = 'SAME' args['padding'] = 'SAME'
for key in ('kernel_shape', 'strides', 'pads', 'dilations'): for key in ('kernel_shape', 'strides', 'pads', 'dilations'):
if key == 'pads': if key == 'pads':
...@@ -327,7 +327,7 @@ def pool2d( ...@@ -327,7 +327,7 @@ def pool2d(
pads=0, pads=0,
padding='VALID', padding='VALID',
ceil_mode=False, ceil_mode=False,
mode='max', mode='MAX',
data_format='NCHW', data_format='NCHW',
global_pooling=False, global_pooling=False,
**kwargs **kwargs
...@@ -366,7 +366,8 @@ def pool2d( ...@@ -366,7 +366,8 @@ def pool2d(
""" """
args = parse_args(locals()) args = parse_args(locals())
if mode not in ('MAX', 'AVG'): args['mode'] = mode.upper()
if args['mode'] not in ('MAX', 'AVG'):
raise ValueError('Unsupported pooling mode: %s' % mode) raise ValueError('Unsupported pooling mode: %s' % mode)
if padding not in ('VALID', 'SAME', 'SAME_UPPER', 'SAME_LOWER'): if padding not in ('VALID', 'SAME', 'SAME_UPPER', 'SAME_LOWER'):
raise ValueError('Unsupported padding algorithm: %s' % padding) raise ValueError('Unsupported padding algorithm: %s' % padding)
...@@ -386,7 +387,7 @@ def pool2d( ...@@ -386,7 +387,7 @@ def pool2d(
pads=args['pads'], pads=args['pads'],
padding=padding, padding=padding,
ceil_mode=ceil_mode, ceil_mode=ceil_mode,
mode=mode, mode=args['mode'],
data_format=data_format, data_format=data_format,
global_pooling=global_pooling, global_pooling=global_pooling,
).apply([inputs]) ).apply([inputs])
......
...@@ -104,9 +104,6 @@ class ConvTranspose2d(_ConvNd): ...@@ -104,9 +104,6 @@ class ConvTranspose2d(_ConvNd):
super(ConvTranspose2d, self).__init__(key, dev, **kwargs) super(ConvTranspose2d, self).__init__(key, dev, **kwargs)
self.output_padding = kwargs.get('output_padding', None) self.output_padding = kwargs.get('output_padding', None)
self.output_shape = kwargs.get('output_shape', None) self.output_shape = kwargs.get('output_shape', None)
if self.output_padding is not None or \
self.output_shape is not None:
self.padding = 'SAME'
def attributes(self): def attributes(self):
return { return {
...@@ -169,13 +166,11 @@ class Resize(Operator): ...@@ -169,13 +166,11 @@ class Resize(Operator):
'mode': self.mode, 'mode': self.mode,
'align_corners': self.align_corners, 'align_corners': self.align_corners,
'sizes_descs': [ 'sizes_descs': [
'${{HANDLE}}/sizes[{}]'.format(n) '${{HANDLE}}/sizes[{}]'
for n in range(self.num_sizes) .format(n) for n in range(self.num_sizes)],
],
'scales_descs': [ 'scales_descs': [
'${{HANDLE}}/scales[{}]'.format(n) '${{HANDLE}}/scales[{}]'
for n in range(self.num_scales) .format(n) for n in range(self.num_scales)],
],
'data_format': self.data_format, 'data_format': self.data_format,
} }
} }
...@@ -183,22 +178,18 @@ class Resize(Operator): ...@@ -183,22 +178,18 @@ class Resize(Operator):
def feed(self, ws, handle, sizes, scales): def feed(self, ws, handle, sizes, scales):
for i in range(self.num_sizes): for i in range(self.num_sizes):
self.feed_arg( self.feed_arg(
ws, ws, '{}/sizes[{}]'.format(handle, i),
'{}/sizes[{}]'.format(handle, i), sizes[i], 'int64')
sizes[i], 'int64',
)
for i in range(self.num_scales): for i in range(self.num_scales):
self.feed_arg( self.feed_arg(
ws, ws, '{}/scales[{}]'.format(handle, i),
'{}/scales[{}]'.format(handle, i), scales[i], 'float32')
scales[i], 'float32',
)
def forward(self, inputs, sizes=None, scales=None): def forward(self, inputs, sizes=None, scales=None):
return self.dispatch( return self.dispatch(
inputs, [self.alloc()], inputs, [self.alloc()],
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, sizes, scales) self.feed(ws, handle, sizes, scales),
) )
......
...@@ -10,61 +10,45 @@ package dragon; ...@@ -10,61 +10,45 @@ package dragon;
// Store the serialized Tensor objects. // Store the serialized Tensor objects.
message TensorProto { message TensorProto {
repeated int32 dims = 1; repeated int32 dims = 1;
enum DataType { enum DataType {
UNDEFINED = 0; UNDEFINED = 0;
// Basic types. // Basic types.
FLOAT = 1; FLOAT = 1;
INT32 = 2; INT32 = 2;
BYTE = 3; BYTE = 3;
STRING = 4; STRING = 4;
// Less-commonly used data types. // Less-commonly used data types.
BOOL = 5; BOOL = 5;
UINT8 = 6; UINT8 = 6;
INT8 = 7; INT8 = 7;
UINT16 = 8; UINT16 = 8;
INT16 = 9; INT16 = 9;
INT64 = 10; INT64 = 10;
FLOAT16 = 12; FLOAT16 = 12;
DOUBLE = 13; DOUBLE = 13;
} }
optional DataType data_type = 2 [default = FLOAT]; optional DataType data_type = 2 [default = FLOAT];
// For float. // For float.
repeated float float_data = 3 [packed = true]; repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16 // For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise // Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field. // to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true]; repeated int32 int32_data = 4 [packed = true];
// For bytes. // For bytes.
optional bytes byte_data = 5; optional bytes byte_data = 5;
// For strings. // For strings.
repeated bytes string_data = 6; repeated bytes string_data = 6;
// For double. // For double.
repeated double double_data = 9 [packed = true]; repeated double double_data = 9 [packed = true];
// For int64. // For int64.
repeated int64 int64_data = 10 [packed = true]; repeated int64 int64_data = 10 [packed = true];
// Store the raw data, contents are serialized as little-endian. // Store the raw data, contents are serialized as little-endian.
optional bytes raw_data = 13; optional bytes raw_data = 13;
// Optionally, a name for the tensor. // Optionally, a name for the tensor.
optional string name = 7; optional string name = 7;
}
// Record the filler of Tensor.
// This structure is kept for backward compatibility
// with caffe1, which relies implicit initializer.
message TensorFillerProto {
optional string tensor = 1;
optional string type = 2 [default = 'constant'];
optional float value = 3 [default = 0];
optional float low = 4 [default = 0];
optional float high = 5 [default = 1];
optional float mean = 6 [default = 0];
optional float std = 7 [default = 1];
optional float scale = 8 [default = 3];
enum VarianceNorm { FAN_IN = 0; FAN_OUT = 1; FAN_AVG=2; }
optional VarianceNorm variance_norm = 9 [default = FAN_IN];
} }
// Store multiple TensorProto objects in one single proto. // Store multiple TensorProto objects in one single proto.
...@@ -74,99 +58,116 @@ message TensorProtos { ...@@ -74,99 +58,116 @@ message TensorProtos {
// DeviceType that Dragon currently supports. // DeviceType that Dragon currently supports.
enum DeviceTypeProto { enum DeviceTypeProto {
// The default device. // The default device.
PROTO_CPU = 0; PROTO_CPU = 0;
// NVIDIA's CUDA Environment. // NVIDIA's CUDA Environment.
PROTO_CUDA = 1; PROTO_CUDA = 1;
// CAMBRICON's CNML Environment. // CAMBRICON's CNML Environment.
PROTO_CNML = 2; PROTO_CNML = 2;
} }
// Device-specific options. // Device-specific options.
message DeviceOption { message DeviceOption {
// The type of device to dispatch executions. // The type of device to dispatch executions.
optional DeviceTypeProto device_type = 1 [default = PROTO_CPU]; optional DeviceTypeProto device_type = 1 [default = PROTO_CPU];
// The index of this device. // The index of this device.
optional int32 device_id = 2 [default = 0]; optional int32 device_id = 2 [default = 0];
// The random seed to start the random generator. // The random seed to start the random generator.
optional uint32 random_seed = 3 [default = 3]; optional uint32 random_seed = 3 [default = 3];
} }
// A named argument containing either singular float, integer and string // A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays. // values, or repeated float, int and string arrays.
message Argument { message Argument {
// The name of this argument. // The name of this argument.
optional string name = 1; optional string name = 1;
// Store the float32 value. // Store the float32 value.
optional float f = 2; optional float f = 2;
// Store the bool, int32, int64 value. // Store the bool, int32, int64 value.
optional int64 i = 3; optional int64 i = 3;
// Store the string value. // Store the string value.
optional bytes s = 4; optional bytes s = 4;
// Store the float32 values. // Store the float32 values.
repeated float floats = 7; repeated float floats = 7;
// Store the bool, int32, int64 values. // Store the bool, int32, int64 values.
repeated int64 ints = 8; repeated int64 ints = 8;
// Store the string values. // Store the string values.
repeated bytes strings = 9; repeated bytes strings = 9;
} }
// Operator Definition // Operator Definition
message OperatorDef { message OperatorDef {
// The name of inputs. // The name of inputs.
repeated string input = 1; repeated string input = 1;
// The name of outputs. // The name of outputs.
repeated string output = 2; repeated string output = 2;
// The optional name of this operator. // The optional name of this operator.
optional string name = 3; optional string name = 3;
// The operator type. // The operator type.
optional string type = 4; optional string type = 4;
// The arguments. // The arguments.
repeated Argument arg = 5; repeated Argument arg = 5;
// The device option that the operator should run under. // The device option that the operator should run under.
optional DeviceOption device_option = 6; optional DeviceOption device_option = 6;
// The optional unique key for this operator. // The optional unique key for this operator.
// Set it to persist operators in the eager mode. // Set it to persist operators in the eager mode.
optional string cache_key = 7; optional string cache_key = 7;
}
// Record the gradient information
message GradientProto {
// The derivative target.
optional string cost = 1;
// The target with respect to?
optional string wrt = 2;
// The external gradient
optional string external = 3;
} }
// Graph Definition // Graph Definition
message GraphDef { message GraphDef {
// The graph name. // The graph name.
optional string name = 1; optional string name = 1;
// The operators to execute. // The operators to execute.
repeated OperatorDef op = 2; repeated OperatorDef op = 2;
// The type of graph. // The type of graph.
optional string graph_type = 3; optional string graph_type = 3;
// The device option for this graph. // The device option for this graph.
optional DeviceOption device_option = 5; optional DeviceOption device_option = 5;
// The arguments. // The arguments.
repeated Argument arg = 6; repeated Argument arg = 6;
// The name of inputs. // The name of inputs.
repeated string input = 7; repeated string input = 7;
// The name of outputs. // The name of outputs.
repeated string output = 8; repeated string output = 8;
// The gradients information. // The info of gradients.
repeated GradientProto gradient = 9; repeated GradientInfo grad_info = 9;
} }
\ No newline at end of file
// Record the filler information.
// This structure is kept for backward compatibility
// with caffe, which relies the implicit initializer.
message FillerInfo {
enum VarianceNorm {
FAN_IN = 0;
FAN_OUT = 1;
FAN_AVG = 2;
}
optional string type = 1 [default = 'constant'];
optional float value = 2 [default = 0];
optional float low = 3 [default = 0];
optional float high = 4 [default = 1];
optional float mean = 5 [default = 0];
optional float std = 6 [default = 1];
optional float scale = 7 [default = 3];
optional VarianceNorm variance_norm = 8 [default = FAN_IN];
}
// Record the gradient information.
message GradientInfo {
// The derivative target.
optional string y = 1;
// The differentiated inputs.
repeated string xs = 2;
}
...@@ -112,7 +112,7 @@ class Optimizer(object): ...@@ -112,7 +112,7 @@ class Optimizer(object):
if extra is not None: if extra is not None:
self._defaults = dict(self._defaults, **extra) self._defaults = dict(self._defaults, **extra)
for k, v in self._defaults.items(): for k, v in self._defaults.items():
workspace.feed_tensor( workspace.get_workspace().feed_tensor(
'/share/hyper/%s/%s' % (self._op_handle, k), v, '/share/hyper/%s/%s' % (self._op_handle, k), v,
dtype='float32', enforce_cpu=True, dtype='float32', enforce_cpu=True,
) )
...@@ -140,14 +140,14 @@ class Optimizer(object): ...@@ -140,14 +140,14 @@ class Optimizer(object):
def __getattr__(self, item): def __getattr__(self, item):
defaults = self.__dict__.get('_defaults') defaults = self.__dict__.get('_defaults')
if item in defaults: if item in defaults:
return workspace.fetch_tensor( return workspace.get_workspace().fetch_tensor(
'/share/hyper/%s/%s' % (self._op_handle, item)) '/share/hyper/%s/%s' % (self._op_handle, item))
return self.__dict__[item] return self.__dict__[item]
def __setattr__(self, key, value): def __setattr__(self, key, value):
defaults = self.__dict__.get('_defaults') defaults = self.__dict__.get('_defaults')
if defaults is not None and key in defaults: if defaults is not None and key in defaults:
workspace.feed_tensor( workspace.get_workspace().feed_tensor(
'/share/hyper/%s/%s' % (self._op_handle, key), value, '/share/hyper/%s/%s' % (self._op_handle, key), value,
dtype='float32', enforce_cpu=True) dtype='float32', enforce_cpu=True)
else: else:
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the base updater class.
We dubbed it as ``Updater``, because ``Optimizer``
has already been abused by many deep learning frameworks.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core import distributed
from dragon.core.eager import context
from dragon.core.framework import workspace
from dragon.core.ops import distributed_ops_lib
from dragon.core.ops import training_ops_lib
class Updater(object):
"""The base class of updaters."""
# Store the global unique slot index.
_DEFAULT_UNIQUE_SLOT_ID = 0
def __init__(
self,
scale_gradient=1.,
clip_gradient=-1.,
l2_decay=-1.,
name=None,
):
"""Create an ``Updater``.
Parameters
----------
scale_gradient : float, optional, default=1.
The factor to scale gradients.
clip_gradient : float, optional, default=-1.
The norm thresh to clip gradients.
l2_decay : float, optional, default=-1.
The l2 decay factor.
name : str, optional
The optional name for buffers.
"""
self._defaults = {
'scale_gradient': scale_gradient,
'clip_gradient': clip_gradient,
'l2_decay': l2_decay,
}
self._param_group = []
if name:
self._slot = name
else:
Updater._DEFAULT_UNIQUE_SLOT_ID += 1
self._slot = 'Updater/Slot:{}'.format(
Updater._DEFAULT_UNIQUE_SLOT_ID)
self._op_type = None
self._process_group = distributed.get_group()
self._extra_kwargs = {}
def apply_gradients(
self,
values_and_grads,
lr_mult=None,
decay_mult=None,
):
"""Apply the gradients on values.
Parameters
----------
values_and_grads : Sequence[Sequence[dragon.Tensor]]
The values and grads.
lr_mult : number, optional
The multiplier on learning rate.
decay_mult : number, optional
The multiplier on weight decay.
"""
if context.executing_eagerly():
# Filter value whose grad is missing.
values, grads = [], []
for v, g in values_and_grads:
if g is not None:
values.append(v)
grads.append(g)
# Accumulate grads from the current process group.
if self._process_group is not None:
distributed_ops_lib.Collective \
.instantiate(
operation='MEAN',
communication='ALLREDUCE',
group=self._process_group,
).apply(grads)
# Apply the updates.
for v, g in zip(values, grads):
self._run_update(v, g, lr_mult, decay_mult)
else:
# Store for the lazy compilation.
for v, g in values_and_grads:
self._add_update(v, g, lr_mult, decay_mult)
return self
def _init_set_defaults(self, extra=None):
"""Initialize the defaults into current workspace."""
if extra is not None:
self._defaults = dict(self._defaults, **extra)
self._op_type = self.__class__.__name__ + 'Update'
for k, v in self._defaults.items():
workspace.feed_tensor(
self._slot + "/" + k, v,
dtype='float32', enforce_cpu=True,
)
def _add_update(self, param, grad, lr_mult=None, decay_mult=None):
"""Add a symbolic operator for updating."""
pair = (v.id if hasattr(v, 'id') else v for v in (param, grad))
self._param_group.append(
(pair, {
'lr_mult': float(lr_mult) if lr_mult is not None else 1.,
'decay_mult': float(decay_mult) if decay_mult is not None else 1.,
})
)
def _run_update(self, param, grad, lr_mult=None, decay_mult=None):
"""Run an eager operation for updating."""
return training_ops_lib.ParamUpdate \
.instantiate(
slot=self._slot,
op_type=self._op_type,
lr_mult=float(lr_mult) if lr_mult is not None else 1.,
decay_mult=float(decay_mult) if decay_mult is not None else 1.,
).apply(grad, param)
def __getattr__(self, item):
defaults = self.__dict__.get('_defaults')
if item in defaults:
return workspace.fetch_tensor(
self._slot + '/' + item)
return self.__dict__[item]
def __setattr__(self, key, value):
defaults = self.__dict__.get('_defaults')
if defaults is not None and key in defaults:
workspace.feed_tensor(
self._slot + '/' + key, value,
dtype='float32', enforce_cpu=True,
)
else:
object.__setattr__(self, key, value)
...@@ -7,10 +7,6 @@ ...@@ -7,10 +7,6 @@
# #
# <https://opensource.org/licenses/BSD-2-Clause> # <https://opensource.org/licenses/BSD-2-Clause>
# #
# Codes are based on:
#
# <https://github.com/onnx/onnx/blob/master/onnx/__init__.py>
#
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
...@@ -22,29 +18,19 @@ from typing import IO ...@@ -22,29 +18,19 @@ from typing import IO
from typing import Optional from typing import Optional
from typing import Text from typing import Text
from google.protobuf import message
try:
from onnx import ModelProto
except ImportError:
from dragon.core.util import deprecation
ModelProto = deprecation.not_installed('onnx')
def save_bytes(str, f):
# str should be bytes, """Save bytes to the file."""
# f should be either writable or a file path. if hasattr(f, 'write') and callable(cast(IO[bytes], f).write):
def _save_bytes(str, f):
if hasattr(f, 'write') and \
callable(cast(IO[bytes], f).write):
cast(IO[bytes], f).write(str) cast(IO[bytes], f).write(str)
else: else:
with open(cast(Text, f), 'wb') as writable: with open(cast(Text, f), 'wb') as writable:
writable.write(str) writable.write(str)
# f should be either readable or a file path. def load_bytes(f):
def _load_bytes(f): """Load bytes from the file."""
if hasattr(f, 'read') and \ if hasattr(f, 'read') and callable(cast(IO[bytes], f).read):
callable(cast(IO[bytes], f).read):
s = cast(IO[bytes], f).read() s = cast(IO[bytes], f).read()
else: else:
with open(cast(Text, f), 'rb') as readable: with open(cast(Text, f), 'rb') as readable:
...@@ -52,8 +38,11 @@ def _load_bytes(f): ...@@ -52,8 +38,11 @@ def _load_bytes(f):
return s return s
def _serialize(proto): def serialize_proto(proto):
if isinstance(proto, bytes): """Serialize the protocol buffer object."""
if proto is None:
return b''
elif isinstance(proto, bytes):
return proto return proto
elif hasattr(proto, 'SerializeToString') and \ elif hasattr(proto, 'SerializeToString') and \
callable(proto.SerializeToString): callable(proto.SerializeToString):
...@@ -61,52 +50,23 @@ def _serialize(proto): ...@@ -61,52 +50,23 @@ def _serialize(proto):
return result return result
else: else:
raise ValueError( raise ValueError(
'No SerializeToString method is detected. ' 'No <SerializeToString> method. Type is {}'
'neither proto is a str.\ntype is {}' .format(type(proto)))
.format(type(proto))
)
def _deserialize(s, proto): def deserialize_proto(s, proto):
"""Deserialize the protocol buffer object."""
if not isinstance(s, bytes): if not isinstance(s, bytes):
raise ValueError( raise ValueError(
'Parameter s must be bytes, ' 'Excepted serialized bytes, got type: {}'.format(type(s)))
'but got type: {}'
.format(type(s))
)
if not (hasattr(proto, 'ParseFromString') and if not (hasattr(proto, 'ParseFromString') and
callable(proto.ParseFromString)): callable(proto.ParseFromString)):
raise ValueError( raise ValueError(
'No ParseFromString method is detected. ' 'No <ParseFromString> method. Type is {}'
'\ntype is {}'.format(type(proto)) .format(type(proto)))
)
decoded = cast(Optional[int], proto.ParseFromString(s)) decoded = cast(Optional[int], proto.ParseFromString(s))
if decoded is not None and decoded != len(s): if decoded is not None and decoded != len(s):
raise message.DecodeError( raise RuntimeError(
"Protobuf decoding consumed too few bytes: {} out of {}" 'Protobuf decoding consumed too few bytes: {} out of {}'
.format(decoded, len(s)) .format(decoded, len(s)))
)
return proto return proto
def save_model(proto, f):
s = _serialize(proto)
_save_bytes(s, f)
def load_model_from_string(s):
if ModelProto is None:
raise ImportError('ONNX is not installed.')
return _deserialize(s, ModelProto())
def load_model(f):
s = _load_bytes(f)
return load_model_from_string(s)
load = load_model
load_from_string = load_model_from_string
save = save_model
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import unittest
import argparse
import dragon
from dragon.vm import torch as torch_vm
parser = argparse.ArgumentParser(add_help=False)
TEST_CUDA = dragon.cuda.is_available()
def new_tensor(data, constructor='EagerTensor', execution=None):
if execution is not None:
if execution == 'GRAPH_MODE':
return dragon.Tensor(
shape=data.shape,
dtype=str(data.dtype),
).set_value(data)
else:
return dragon.EagerTensor(data, copy=True)
if constructor == 'EagerTensor':
return dragon.EagerTensor(data, copy=True)
elif constructor == 'Tensor':
return dragon.Tensor(
shape=data.shape,
dtype=str(data.dtype),
).set_value(data)
elif constructor == 'torch.Tensor':
return torch_vm.tensor(data)
else:
raise ValueError('Unknown constructor:', constructor)
def run_tests(argv=None):
"""Run tests under the current ``__main__``."""
if argv is None:
args, remaining = parser.parse_known_args()
argv = [sys.argv[0]] + remaining
unittest.main(argv=argv)
...@@ -92,7 +92,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -92,7 +92,7 @@ class DataTransformer(multiprocessing.Process):
Parameters Parameters
---------- ----------
example : Dict example : dict
The input example. The input example.
Returns Returns
......
...@@ -54,7 +54,7 @@ class DragonFrontend(object): ...@@ -54,7 +54,7 @@ class DragonFrontend(object):
constants=None, constants=None,
value_info=None, value_info=None,
opset_version=None, opset_version=None,
workspace=None, ws=None,
verbose=True, verbose=True,
): ):
input_names = [] if input_names is None else input_names input_names = [] if input_names is None else input_names
...@@ -79,12 +79,12 @@ class DragonFrontend(object): ...@@ -79,12 +79,12 @@ class DragonFrontend(object):
blob_aliases = {} blob_aliases = {}
for i, alias in enumerate(output_names): for i, alias in enumerate(output_names):
blob_aliases[graph_def.output[i]] = alias blob_aliases[graph_def.output[i]] = alias
workspace.SetTensorAlias(graph_def.output[i], alias) ws.RegisterAlias(graph_def.output[i], alias)
if graph_def.output[i] in value_info: if graph_def.output[i] in value_info:
value_info[alias] = value_info[graph_def.output[i]] value_info[alias] = value_info[graph_def.output[i]]
for i, alias in enumerate(input_names): for i, alias in enumerate(input_names):
blob_aliases[graph_def.input[i]] = alias blob_aliases[graph_def.input[i]] = alias
workspace.SetTensorAlias(graph_def.input[i], alias) ws.RegisterAlias(graph_def.input[i], alias)
if graph_def.input[i] in value_info: if graph_def.input[i] in value_info:
value_info[alias] = value_info[graph_def.input[i]] value_info[alias] = value_info[graph_def.input[i]]
...@@ -116,15 +116,14 @@ class DragonFrontend(object): ...@@ -116,15 +116,14 @@ class DragonFrontend(object):
for op in graph_def.op: for op in graph_def.op:
# Get the shape of inputs and outputs. # Get the shape of inputs and outputs.
for name in itertools.chain(op.input, op.output): for name in itertools.chain(op.input, op.output):
tensor_impl = workspace.GetTensor(name) impl = ws.GetTensor(name)
if tensor_impl is not None: if impl is not None:
shapes[name] = tensor_impl.dims shapes[name] = impl.dims
else: else:
shapes[name] = value_info[name][1] shapes[name] = value_info[name][1]
# Translate definition. # Translate definition.
nodes, const_tensors = cls._translate( nodes, const_tensors = cls._translate(op, opset_version, shapes, ws)
op, opset_version, shapes, workspace)
# Rewritten for names. # Rewritten for names.
for node in nodes: for node in nodes:
...@@ -135,8 +134,7 @@ class DragonFrontend(object): ...@@ -135,8 +134,7 @@ class DragonFrontend(object):
# Directly convert outputs as const tensors if necessary. # Directly convert outputs as const tensors if necessary.
if None in nodes: if None in nodes:
const_tensors = [helper.from_tensor(name, workspace) const_tensors = [helper.from_tensor(name, ws) for name in op.output]
for name in op.output]
else: else:
onnx_graph.node.extend(nodes) onnx_graph.node.extend(nodes)
......
...@@ -472,15 +472,14 @@ def squeeze_exporter(op_def, shape_dict, ws): ...@@ -472,15 +472,14 @@ def squeeze_exporter(op_def, shape_dict, ws):
@exporter.register('Tile') @exporter.register('Tile')
def tile_exporter(op_def, shape_dict, ws): def tile_exporter(op_def, shape_dict, ws):
node, const_tensors = exporter.translate(**locals()) node, const_tensors = exporter.translate(**locals())
repeats = [] repeats = []
for arg in op_def.arg: for arg in op_def.arg:
if arg.name == 'multiples': if arg.name == 'repeats':
repeats = [e for e in arg.ints] repeats = [e for e in arg.ints]
elif arg.name == 'multiples_desc': elif arg.name == 'repeats_desc':
repeats = helper.fetch_argument(op_def, arg, ws) repeats = helper.fetch_argument(op_def, arg, ws)
elif arg.name == 'multiples_descs': elif arg.name == 'repeats_descs':
repeats = helper.fetch_arguments(op_def, arg, ws) repeats = helper.fetch_arguments(op_def, arg, ws)
repeats = helper.from_array( repeats = helper.from_array(
......
...@@ -21,9 +21,9 @@ import numpy ...@@ -21,9 +21,9 @@ import numpy
from dragon.core.autograph import function_lib from dragon.core.autograph import function_lib
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.proto import dragon_pb2 from dragon.core.proto import dragon_pb2
from dragon.core.util import serialization
from dragon.vm.onnx.frontend import graph_def_to_onnx_model from dragon.vm.onnx.frontend import graph_def_to_onnx_model
from dragon.vm.onnx.helper import mapping from dragon.vm.onnx.helper import mapping
from dragon.vm.onnx.serialization import save_model
def export_from_graph( def export_from_graph(
...@@ -40,7 +40,7 @@ def export_from_graph( ...@@ -40,7 +40,7 @@ def export_from_graph(
enable_onnx_checker=True, enable_onnx_checker=True,
): ):
"""Export an onnx model from the graph.""" """Export an onnx model from the graph."""
save_model(graph_def_to_onnx_model( model = graph_def_to_onnx_model(
graph_def=graph_def, graph_def=graph_def,
input_names=input_names, input_names=input_names,
output_names=output_names, output_names=output_names,
...@@ -50,7 +50,8 @@ def export_from_graph( ...@@ -50,7 +50,8 @@ def export_from_graph(
opset_version=opset_version, opset_version=opset_version,
workspace=workspace, workspace=workspace,
verbose=verbose, verbose=verbose,
enable_onnx_checker=enable_onnx_checker), f) enable_onnx_checker=enable_onnx_checker)
serialization.save_bytes(serialization.serialize_proto(model), f)
def import_to_function(model_path, explicit_inputs=False): def import_to_function(model_path, explicit_inputs=False):
......
/*!
* Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
*
* Licensed under the BSD 2-Clause License.
* You should have received a copy of the BSD 2-Clause License
* along with the software. If not, See,
*
* <https://opensource.org/licenses/BSD-2-Clause>
*
* ------------------------------------------------------------
*/
#ifndef DRAGON_UTILS_CAFFEMODEL_H_
#define DRAGON_UTILS_CAFFEMODEL_H_
#include "dragon/core/workspace.h"
#ifdef BUILD_RUNTIME
#include "dragon/proto_lite/caffemodel.pb.h"
#else
#include "dragon/proto/caffemodel.pb.h"
#endif
namespace dragon {
inline void LoadCaffeModel(const string& file, Workspace* ws) {
NetParameter net_param;
ReadProtoFromBinaryFile(file.c_str(), &net_param);
LOG(INFO) << "Restore From Model @: " << file << "......";
LOG(INFO) << "Model Format: CaffeModel";
for (int i = 0; i < net_param.layer_size(); i++) {
const auto& layer = net_param.layer(i);
const auto& layer_name = layer.name();
auto prefix = layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++) {
auto tensor_name = prefix + str::to(j);
if (!ws->HasTensor(tensor_name)) {
LOG(WARNING) << "Tensor(" << tensor_name << ") "
<< "does not exist in any Graphs, skip.";
} else {
auto blob = layer.blobs(j);
vec64_t tensor_shape;
for (auto dim : blob.shape().dim())
tensor_shape.push_back(dim);
auto* tensor = ws->GetTensor(tensor_name);
std::stringstream dim_str;
if (tensor_shape.size() > 0) {
tensor->Reshape(tensor_shape);
CHECK_EQ(tensor->count(), blob.data_size())
<< "\nTensor(" << tensor_name << ") "
<< "failed to load, except size: " << tensor->count()
<< ", loaded: " << blob.data_size();
dim_str << tensor->DimString();
} else {
tensor->Reshape({blob.data_size()});
dim_str << "(missing)";
}
auto* x = tensor->mutable_data<float, CPUContext>();
for (int xi = 0; xi < blob.data_size(); ++xi) {
x[xi] = blob.data(xi);
}
LOG(INFO) << "Tensor(" << tensor_name << ") "
<< "loaded, shape: " << dim_str.str()
<< ", size: " << blob.data_size();
}
}
}
}
inline void SavaCaffeModel(const string& file, const vector<Tensor*>& tensors) {
int j = -1;
NetParameter net;
Map<string, int> layer_hash;
for (int i = 0; i < tensors.size(); i++) {
if (tensors[i]->count() <= 0) continue;
auto splits = str::split(tensors[i]->name(), "/param:");
if (layer_hash.count(splits[0]) == 0) {
layer_hash[splits[0]] = ++j;
auto* layer = net.add_layer();
layer->set_name(splits[0]);
}
auto* blob = net.mutable_layer(j)->add_blobs();
for (auto dim : tensors[i]->dims())
blob->mutable_shape()->add_dim(dim);
if (XIsType((*tensors[i]), float)) {
auto* x = tensors[i]->data<float, CPUContext>();
for (int xi = 0; xi < tensors[i]->count(); ++xi)
blob->mutable_data()->Add(x[xi]);
} else if (XIsType((*tensors[i]), float16)) {
auto* x = tensors[i]->data<float16, CPUContext>();
for (int xi = 0; xi < tensors[i]->count(); ++xi)
blob->mutable_data()->Add(cast::to<float>(x[xi]));
}
}
WriteProtoToBinaryFile(net, file.c_str());
LOG(INFO) << "Save the model @: " << file << "......";
LOG(INFO) << "Model format: Caffe";
}
} // namespace dragon
#endif // DRAGON_UTILS_CAFFEMODEL_H_
...@@ -21,112 +21,108 @@ namespace dragon { ...@@ -21,112 +21,108 @@ namespace dragon {
template <typename T, class Context> template <typename T, class Context>
class Filler { class Filler {
public: public:
explicit Filler(const TensorFillerProto& proto) : proto_(proto) {} explicit Filler(const FillerInfo& info) : info_(info) {}
virtual ~Filler() {} virtual ~Filler() {}
virtual void Fill(Tensor* X, Context* ctx) = 0; virtual void Fill(Tensor* X, Context* ctx) = 0;
const TensorFillerProto& proto() { const FillerInfo& info() {
return proto_; return info_;
} }
protected: protected:
TensorFillerProto proto_; FillerInfo info_;
}; };
template <typename T, class Context> template <typename T, class Context>
class ConstantFiller final : public Filler<T, Context> { class ConstantFiller final : public Filler<T, Context> {
public: public:
explicit ConstantFiller(const TensorFillerProto& proto) explicit ConstantFiller(const FillerInfo& info) : Filler<T, Context>(info) {}
: Filler<T, Context>(proto) {}
void Fill(Tensor* X, Context* ctx) override { void Fill(Tensor* X, Context* ctx) override {
math::Set( math::Set(
X->count(), X->count(),
cast::to<T>(proto().value()), cast::to<T>(info().value()),
X->mutable_data<T, Context>(), X->mutable_data<T, Context>(),
ctx); ctx);
} }
protected: protected:
using Filler<T, Context>::proto; using Filler<T, Context>::info;
}; };
template <typename T, class Context> template <typename T, class Context>
class NormalFiller final : public Filler<T, Context> { class NormalFiller final : public Filler<T, Context> {
public: public:
explicit NormalFiller(const TensorFillerProto& proto) explicit NormalFiller(const FillerInfo& info) : Filler<T, Context>(info) {}
: Filler<T, Context>(proto) {}
void Fill(Tensor* X, Context* ctx) override { void Fill(Tensor* X, Context* ctx) override {
math::RandomNormal( math::RandomNormal(
X->count(), X->count(),
proto().mean(), info().mean(),
proto().std(), info().std(),
X->mutable_data<T, Context>(), X->mutable_data<T, Context>(),
ctx); ctx);
} }
protected: protected:
using Filler<T, Context>::proto; using Filler<T, Context>::info;
}; };
template <typename T, class Context> template <typename T, class Context>
class TruncatedNormalFiller final : public Filler<T, Context> { class TruncatedNormalFiller final : public Filler<T, Context> {
public: public:
explicit TruncatedNormalFiller(const TensorFillerProto& proto) explicit TruncatedNormalFiller(const FillerInfo& info)
: Filler<T, Context>(proto) {} : Filler<T, Context>(info) {}
void Fill(Tensor* X, Context* /* ctx */) override { void Fill(Tensor* X, Context* /* ctx */) override {
CPUContext ctx; // Enforce the cpu implementation CPUContext ctx; // Enforce the cpu implementation
math::TruncatedNormal( math::TruncatedNormal(
X->count(), X->count(),
proto().mean(), info().mean(),
proto().std(), info().std(),
proto().low(), info().low(),
proto().high(), info().high(),
X->mutable_data<T, CPUContext>(), X->mutable_data<T, CPUContext>(),
&ctx); &ctx);
} }
protected: protected:
using Filler<T, Context>::proto; using Filler<T, Context>::info;
}; };
template <typename T, class Context> template <typename T, class Context>
class UniformFiller final : public Filler<T, Context> { class UniformFiller final : public Filler<T, Context> {
public: public:
explicit UniformFiller(const TensorFillerProto& proto) explicit UniformFiller(const FillerInfo& info) : Filler<T, Context>(info) {}
: Filler<T, Context>(proto) {}
void Fill(Tensor* X, Context* ctx) override { void Fill(Tensor* X, Context* ctx) override {
math::RandomUniform( math::RandomUniform(
X->count(), X->count(),
proto().low(), info().low(),
proto().high(), info().high(),
X->mutable_data<T, Context>(), X->mutable_data<T, Context>(),
ctx); ctx);
} }
protected: protected:
using Filler<T, Context>::proto; using Filler<T, Context>::info;
}; };
template <typename T, class Context> template <typename T, class Context>
class XavierFiller final : public Filler<T, Context> { class GlorotUniformFiller final : public Filler<T, Context> {
public: public:
explicit XavierFiller(const TensorFillerProto& proto) explicit GlorotUniformFiller(const FillerInfo& info)
: Filler<T, Context>(proto) {} : Filler<T, Context>(info) {}
void Fill(Tensor* X, Context* ctx) override { void Fill(Tensor* X, Context* ctx) override {
auto fan_in = X->count() / X->dim(0); auto fan_in = X->count() / X->dim(0);
auto fan_out = X->count() / X->dim(1); auto fan_out = X->count() / X->dim(1);
float n = (float)fan_in, scale = 3.f; float n = (float)fan_in, scale = 3.f;
if (proto().has_scale()) scale = proto().scale(); if (info().has_scale()) scale = info().scale();
if (proto().variance_norm() == TensorFillerProto_VarianceNorm_FAN_AVG) { if (info().variance_norm() == FillerInfo_VarianceNorm_FAN_AVG) {
n = (fan_in + fan_out) / 2.f; n = (fan_in + fan_out) / 2.f;
} else if ( } else if (info().variance_norm() == FillerInfo_VarianceNorm_FAN_OUT) {
proto().variance_norm() == TensorFillerProto_VarianceNorm_FAN_OUT) {
n = (float)fan_out; n = (float)fan_out;
} }
float limit = std::sqrt(scale / n); float limit = std::sqrt(scale / n);
...@@ -135,24 +131,23 @@ class XavierFiller final : public Filler<T, Context> { ...@@ -135,24 +131,23 @@ class XavierFiller final : public Filler<T, Context> {
} }
protected: protected:
using Filler<T, Context>::proto; using Filler<T, Context>::info;
}; };
template <typename T, class Context> template <typename T, class Context>
class MSRAFiller final : public Filler<T, Context> { class GlorotNormalFiller final : public Filler<T, Context> {
public: public:
explicit MSRAFiller(const TensorFillerProto& proto) explicit GlorotNormalFiller(const FillerInfo& info)
: Filler<T, Context>(proto) {} : Filler<T, Context>(info) {}
void Fill(Tensor* X, Context* ctx) override { void Fill(Tensor* X, Context* ctx) override {
auto fan_in = X->count() / X->dim(0); auto fan_in = X->count() / X->dim(0);
auto fan_out = X->count() / X->dim(1); auto fan_out = X->count() / X->dim(1);
float n = (float)fan_in, scale = 2.f; float n = (float)fan_in, scale = 2.f;
if (proto().has_scale()) scale = proto().scale(); if (info().has_scale()) scale = info().scale();
if (proto().variance_norm() == TensorFillerProto_VarianceNorm_FAN_AVG) { if (info().variance_norm() == FillerInfo_VarianceNorm_FAN_AVG) {
n = (fan_in + fan_out) / 2.f; n = (fan_in + fan_out) / 2.f;
} else if ( } else if (info().variance_norm() == FillerInfo_VarianceNorm_FAN_OUT) {
proto().variance_norm() == TensorFillerProto_VarianceNorm_FAN_OUT) {
n = (float)fan_out; n = (float)fan_out;
} }
float std = std::sqrt(scale / n); float std = std::sqrt(scale / n);
...@@ -161,26 +156,26 @@ class MSRAFiller final : public Filler<T, Context> { ...@@ -161,26 +156,26 @@ class MSRAFiller final : public Filler<T, Context> {
} }
protected: protected:
using Filler<T, Context>::proto; using Filler<T, Context>::info;
}; };
template <typename T, class Context> template <typename T, class Context>
Filler<T, Context>* CreateFiller(const TensorFillerProto& proto) { Filler<T, Context>* CreateFiller(const FillerInfo& info) {
const string& type = proto.type(); const string& type = info.type();
if (type == "constant") { if (type == "constant") {
return new ConstantFiller<T, Context>(proto); return new ConstantFiller<T, Context>(info);
} else if (type == "uniform") { } else if (type == "uniform") {
return new UniformFiller<T, Context>(proto); return new UniformFiller<T, Context>(info);
} else if (type == "normal") { } else if (type == "normal") {
return new NormalFiller<T, Context>(proto); return new NormalFiller<T, Context>(info);
} else if (type == "truncated_normal") { } else if (type == "truncated_normal") {
return new TruncatedNormalFiller<T, Context>(proto); return new TruncatedNormalFiller<T, Context>(info);
} else if (type == "xavier" || type == "glorot_uniform") { } else if (type == "glorot_uniform" || type == "xavier") {
return new XavierFiller<T, Context>(proto); return new GlorotUniformFiller<T, Context>(info);
} else if (type == "msra" || type == "glorot_normal") { } else if (type == "glorot_normal" || type == "msra") {
return new MSRAFiller<T, Context>(proto); return new GlorotNormalFiller<T, Context>(info);
} }
return new ConstantFiller<T, Context>(proto); return new ConstantFiller<T, Context>(info);
} }
} // namespace dragon } // namespace dragon
......
...@@ -81,7 +81,8 @@ def constant(value, dtype=None, shape=None, name='Const'): ...@@ -81,7 +81,8 @@ def constant(value, dtype=None, shape=None, name='Const'):
return EagerTensor(value, name=name + ':0') return EagerTensor(value, name=name + ':0')
else: else:
return TensorRef( return TensorRef(
name=workspace.get_dummy_name(name, ':0', 'Tensor'), name=workspace.get_workspace().unique_name(
name, ':0', 'dragon.Tensor'),
shape=list(value.shape), shape=list(value.shape),
dtype=str(value.dtype), dtype=str(value.dtype),
).set_value(value) ).set_value(value)
...@@ -18,7 +18,6 @@ from __future__ import division ...@@ -18,7 +18,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.core.util import six from dragon.core.util import six
from dragon.core.framework import types
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.vm.tensorflow.core.framework import tensor_shape from dragon.vm.tensorflow.core.framework import tensor_shape
from dragon.vm.tensorflow.core.ops import array_ops from dragon.vm.tensorflow.core.ops import array_ops
...@@ -43,7 +42,7 @@ def Input( ...@@ -43,7 +42,7 @@ def Input(
# Create a placeholder with determined ``batch_size`` # Create a placeholder with determined ``batch_size``
x = tf.keras.Input(shape=(8,), batch_size=8, dtype='float32') x = tf.keras.Input(shape=(8,), batch_size=8, dtype='float32')
# Create a placeholder aliasing an existing symbolic tensor # Create a placeholder aliasing an existing tensor
x = dragon.Tensor('x', shape=(8,), dtype='float32').variable() x = dragon.Tensor('x', shape=(8,), dtype='float32').variable()
xx = tf.keras.Input(tensor=x) xx = tf.keras.Input(tensor=x)
``` ```
...@@ -59,7 +58,7 @@ def Input( ...@@ -59,7 +58,7 @@ def Input(
dtype : str, optional dtype : str, optional
The optional data type. The optional data type.
tensor : dragon.Tensor, optional tensor : dragon.Tensor, optional
The existing symbolic tensor aliased to the placeholder. The existing tensor aliased to input.
Returns Returns
------- -------
...@@ -99,16 +98,9 @@ def Input( ...@@ -99,16 +98,9 @@ def Input(
elif isinstance(shape, six.integer_types): elif isinstance(shape, six.integer_types):
shape = (shape,) shape = (shape,)
placeholder = \ placeholder = array_ops.placeholder(
array_ops.placeholder( dtype=dtype, shape=shape, name=name if name else 'input')
dtype=dtype,
shape=shape,
name=name if name else 'input',
)
if tensor is not None: if tensor is not None:
if not types.is_symbolic_tensor(tensor): workspace.get_workspace().register_alias(tensor, placeholder.id)
raise ValueError('Accepted a dragon.Tensor only.')
workspace.set_tensor_alias(tensor, placeholder.name)
return placeholder return placeholder
...@@ -127,6 +127,7 @@ class Optimizer(optimizer_v1.Optimizer): ...@@ -127,6 +127,7 @@ class Optimizer(optimizer_v1.Optimizer):
def _create_hypers(self): def _create_hypers(self):
if self._hypers_created: if self._hypers_created:
return return
current_ws = workspace.get_workspace()
for name, value in sorted(self._hyper.items()): for name, value in sorted(self._hyper.items()):
if types.is_tensor(value) or callable(value): if types.is_tensor(value) or callable(value):
pass pass
...@@ -141,7 +142,7 @@ class Optimizer(optimizer_v1.Optimizer): ...@@ -141,7 +142,7 @@ class Optimizer(optimizer_v1.Optimizer):
hyper = self._hyper[name] hyper = self._hyper[name]
alias = self._alias.get(name, None) alias = self._alias.get(name, None)
if alias is not None: if alias is not None:
workspace.set_tensor_alias(hyper, alias) current_ws.register_alias(hyper, alias)
self._hypers_created = True self._hypers_created = True
@staticmethod @staticmethod
...@@ -173,10 +174,10 @@ class Optimizer(optimizer_v1.Optimizer): ...@@ -173,10 +174,10 @@ class Optimizer(optimizer_v1.Optimizer):
def _init_set_name(self, name, zero_based=True): def _init_set_name(self, name, zero_based=True):
"""Set a name for sharing weights.""" """Set a name for sharing weights."""
if not name: if not name:
self._name = workspace.get_dummy_name( self._name = workspace.get_workspace().unique_name(
basename=generic_utils.to_snake_case( name=generic_utils.to_snake_case(
self.__class__.__name__), self.__class__.__name__),
domain='Object', namespace='Object',
zero_based=zero_based, zero_based=zero_based,
) )
else: else:
...@@ -188,7 +189,7 @@ class Optimizer(optimizer_v1.Optimizer): ...@@ -188,7 +189,7 @@ class Optimizer(optimizer_v1.Optimizer):
self._hyper[name] = value self._hyper[name] = value
else: else:
if types.is_tensor(self._hyper[name]): if types.is_tensor(self._hyper[name]):
workspace.feed_tensor( workspace.get_workspace().feed_tensor(
self._hyper[name].id, self._hyper[name].id,
value, value,
dtype='float32', dtype='float32',
......
...@@ -147,9 +147,9 @@ class Module(object): ...@@ -147,9 +147,9 @@ class Module(object):
def _init_set_name(self, name=None, zero_based=True): def _init_set_name(self, name=None, zero_based=True):
if name is None: if name is None:
self._name = workspace.get_dummy_name( self._name = workspace.get_workspace().unique_name(
basename=camel_to_snake(self.__class__.__name__), name=camel_to_snake(self.__class__.__name__),
domain='Object', namespace='Object',
zero_based=zero_based, zero_based=zero_based,
) )
else: else:
......
...@@ -478,11 +478,9 @@ def placeholder(dtype=None, shape=None, name=None): ...@@ -478,11 +478,9 @@ def placeholder(dtype=None, shape=None, name=None):
""" """
# Construct a tensor from the explicit name # Construct a tensor from the explicit name
return TensorRef( return TensorRef(
workspace.get_dummy_name( workspace.get_workspace().unique_name(
context.get_name_scope() + name context.get_name_scope() + name if name else 'Placeholder',
if name else 'Placeholder', suffix=':0', namespace='Tensor'),
suffix=':0', domain='Tensor',
),
dtype=str(dtype) if dtype else dtype, dtype=str(dtype) if dtype else dtype,
shape=shape, shape=shape,
).placeholder() ).placeholder()
...@@ -528,8 +526,8 @@ def shape(input, name=None): ...@@ -528,8 +526,8 @@ def shape(input, name=None):
```python ```python
x = tf.ones((2, 3)) x = tf.ones((2, 3))
print(x.shape) # Return a sequence print(x.shape) # Return a sequence
print(tf.shape(x)) # Return a tensor print(tf.shape(x)) # Return a tensor
``` ```
Parameters Parameters
...@@ -686,11 +684,11 @@ def squeeze(input, axis=None, name=None): ...@@ -686,11 +684,11 @@ def squeeze(input, axis=None, name=None):
# Remove all matched dimensions if ``axis`` is None # Remove all matched dimensions if ``axis`` is None
# Otherwise, only the specified axes will be removed # Otherwise, only the specified axes will be removed
print(tf.squeeze(x).shape) # (1, 2, 2, 1) -> (2, 2) print(tf.squeeze(x).shape) # (1, 2, 2, 1) -> (2, 2)
print(tf.squeeze(x, axis=0).shape) # (1, 2, 2, 1) -> (2, 2, 1) print(tf.squeeze(x, axis=0).shape) # (1, 2, 2, 1) -> (2, 2, 1)
# A negative axis is the last-k axis # A negative axis is the last-k axis
print(tf.squeeze(x, axis=3).shape) # (1, 2, 2, 1) -> (1, 2, 2) print(tf.squeeze(x, axis=3).shape) # (1, 2, 2, 1) -> (1, 2, 2)
print(tf.squeeze(x, axis=-1).shape) # Equivalent print(tf.squeeze(x, axis=-1).shape) # Equivalent
# Also, ``axis`` could be a sequence of integers # Also, ``axis`` could be a sequence of integers
...@@ -716,7 +714,7 @@ def squeeze(input, axis=None, name=None): ...@@ -716,7 +714,7 @@ def squeeze(input, axis=None, name=None):
def tile(input, multiples, name=None): def tile(input, multiples, name=None):
return array_ops.tile(input, multiples=multiples, name=name) return array_ops.tile(input, repeats=multiples, name=name)
def transpose(a, perm=None, name=None): def transpose(a, perm=None, name=None):
......
...@@ -111,10 +111,10 @@ class LayerList(module.Module): ...@@ -111,10 +111,10 @@ class LayerList(module.Module):
return len(self._layers) return len(self._layers)
def __repr__(self): def __repr__(self):
tmpstr = 'LayerList' + '(\n' tmp_str = 'LayerList' + '(\n'
for idx, layer in enumerate(self._layers): for idx, layer in enumerate(self._layers):
modstr = layer.__repr__() mod_str = layer.__repr__()
modstr = self._addindent(modstr, 2) mod_str = self._add_indent(mod_str, 2)
tmpstr = tmpstr + ' (' + str(idx) + '): ' + modstr + '\n' tmp_str = tmp_str + ' (' + str(idx) + '): ' + mod_str + '\n'
tmpstr = tmpstr + ')' tmp_str = tmp_str + ')'
return tmpstr return tmp_str
...@@ -245,12 +245,12 @@ class Module(object): ...@@ -245,12 +245,12 @@ class Module(object):
) )
@staticmethod @staticmethod
def _addindent(s_, numSpaces): def _add_indent(s_, num_spaces):
s = s_.split('\n') s = s_.split('\n')
if len(s) == 1: if len(s) == 1:
return s_ return s_
first = s.pop(0) first = s.pop(0)
s = [(numSpaces * ' ') + line for line in s] s = [(num_spaces * ' ') + line for line in s]
s = '\n'.join(s) s = '\n'.join(s)
s = first + '\n' + s s = first + '\n' + s
return s return s
...@@ -297,9 +297,9 @@ class Module(object): ...@@ -297,9 +297,9 @@ class Module(object):
def _set_name(self, name=None, zero_based=True): def _set_name(self, name=None, zero_based=True):
"""Set the module name.""" """Set the module name."""
if name is None: if name is None:
self._name = workspace.get_dummy_name( self._name = workspace.get_workspace().unique_name(
basename=self.__class__.__name__.lower(), name=self.__class__.__name__.lower(),
domain='Object', namespace='Object',
zero_based=zero_based, zero_based=zero_based,
) )
else: else:
......
...@@ -288,7 +288,6 @@ def _load_weights_from_hdf5_group(f, modules, skip=False): ...@@ -288,7 +288,6 @@ def _load_weights_from_hdf5_group(f, modules, skip=False):
matched_info = [] matched_info = []
module_dict = {m.name: m for m in modules} module_dict = {m.name: m for m in modules}
module_names = [n.decode('utf8') for n in f.attrs["layer_names"]] module_names = [n.decode('utf8') for n in f.attrs["layer_names"]]
for idx, name in enumerate(module_names): for idx, name in enumerate(module_names):
if name not in module_dict: if name not in module_dict:
if not skip: if not skip:
...@@ -300,7 +299,6 @@ def _load_weights_from_hdf5_group(f, modules, skip=False): ...@@ -300,7 +299,6 @@ def _load_weights_from_hdf5_group(f, modules, skip=False):
value_names = [n.decode('utf8') for n in g.attrs['weight_names']] value_names = [n.decode('utf8') for n in g.attrs['weight_names']]
value_dict = dict((name, g[name]) for name in value_names) value_dict = dict((name, g[name]) for name in value_names)
matched_info += _assign_weights_from_dict(weight_dict, value_dict, skip=True) matched_info += _assign_weights_from_dict(weight_dict, value_dict, skip=True)
return matched_info return matched_info
...@@ -327,6 +325,7 @@ def _save_weights_to_hdf5_group(f, modules): ...@@ -327,6 +325,7 @@ def _save_weights_to_hdf5_group(f, modules):
def _set_value(input, value): def _set_value(input, value):
"""Set the copied value to input.""" """Set the copied value to input."""
if hasattr(input, 'id'): if hasattr(input, 'id'):
workspace.feed_tensor(input.id, value, enforce_cpu=True) workspace.get_workspace().feed_tensor(
input.id, value, enforce_cpu=True)
else: else:
raise ValueError('Input is not a legal tensor.') raise ValueError('Input is not a legal tensor.')
...@@ -84,9 +84,9 @@ class LayerMetaclass(object): ...@@ -84,9 +84,9 @@ class LayerMetaclass(object):
def _init_set_name(self, name=None, zero_based=True): def _init_set_name(self, name=None, zero_based=True):
"""Set the model name when necessary.""" """Set the model name when necessary."""
if name is None: if name is None:
self._name = workspace.get_dummy_name( self._name = workspace.get_workspace().unique_name(
basename=self.__class__.__name__.lower(), name=self.__class__.__name__.lower(),
domain='Object', namespace='Object',
zero_based=zero_based, zero_based=zero_based,
) )
else: else:
...@@ -378,21 +378,21 @@ class LayerList(Layer): ...@@ -378,21 +378,21 @@ class LayerList(Layer):
return len(self._all_layers) return len(self._all_layers)
def __repr__(self): def __repr__(self):
tmpstr = 'LayerList' + '(\n' tmp_str = 'LayerList' + '(\n'
for idx, layer in enumerate(self._all_layers): for idx, layer in enumerate(self._all_layers):
modstr = layer.__repr__() mod_str = layer.__repr__()
modstr = _addindent(modstr, 2) mod_str = _add_indent(mod_str, 2)
tmpstr = tmpstr + ' (' + str(idx) + '): ' + modstr + '\n' tmp_str = tmp_str + ' (' + str(idx) + '): ' + mod_str + '\n'
tmpstr = tmpstr + ')' tmp_str = tmp_str + ')'
return tmpstr return tmp_str
def _addindent(s_, numSpaces): def _add_indent(s_, num_spaces):
s = s_.split('\n') s = s_.split('\n')
if len(s) == 1: if len(s) == 1:
return s_ return s_
first = s.pop(0) first = s.pop(0)
s = [(numSpaces * ' ') + line for line in s] s = [(num_spaces * ' ') + line for line in s]
s = '\n'.join(s) s = '\n'.join(s)
s = first + '\n' + s s = first + '\n' + s
return s return s
...@@ -123,17 +123,14 @@ class Binding(object): ...@@ -123,17 +123,14 @@ class Binding(object):
if self._device_tensor is None: if self._device_tensor is None:
spec = device_spec.DeviceSpec('cuda', self.device_id) spec = device_spec.DeviceSpec('cuda', self.device_id)
self._device_opt = spec.to_proto(serialized=True) self._device_opt = spec.to_proto(serialized=True)
ws = workspace.get_workspace() current_ws = workspace.get_workspace()
ref = EagerTensor(device=spec) # Hack the constructor. tensor = EagerTensor(device=spec) # Hack the constructor.
ref.__gc__ = ws.collectors.TENSOR tensor._gc = current_ws.collectors.TENSOR
ref._id = ref.__gc__.alloc('${DLPACK}') tensor._impl = current_ws.create_tensor(
ref._impl = ws.CreateTensor(ref._id).FromPointer( tensor._gc.alloc('${DLPACK}')).FromPointer(
self._shape, self._shape, self._dtype,
self._dtype, self._device_opt, self.device_buffer.ptr)
self._device_opt, self._device_tensor = tensor
self.device_buffer.ptr,
)
self._device_tensor = ref
return self._device_tensor._impl.ToDLPack(self._device_opt, True) return self._device_tensor._impl.ToDLPack(self._device_opt, True)
@property @property
...@@ -187,17 +184,14 @@ class Binding(object): ...@@ -187,17 +184,14 @@ class Binding(object):
if self._host_tensor is None: if self._host_tensor is None:
spec = device_spec.DeviceSpec('cpu') spec = device_spec.DeviceSpec('cpu')
self._host_opt = spec.to_proto(serialized=True) self._host_opt = spec.to_proto(serialized=True)
ws = workspace.get_workspace() current_ws = workspace.get_workspace()
ref = EagerTensor(device=spec) # Hack the constructor. tensor = EagerTensor(device=spec) # Hack the constructor.
ref.__gc__ = ws.collectors.TENSOR tensor._gc = current_ws.collectors.TENSOR
ref._id = ref.__gc__.alloc('${DLPACK}') tensor._impl = current_ws.create_tensor(
ref._impl = ws.CreateTensor(ref._id).FromPointer( tensor._gc.alloc('${DLPACK}')).FromPointer(
self._shape, self._shape, self._dtype,
self._dtype, self._host_opt, self.host_buffer.ctypes.data)
self._host_opt, self._host_tensor = tensor
self.host_buffer.ctypes.data,
)
self._host_tensor = ref
return self._host_tensor._impl.ToDLPack(self._host_opt, True) return self._host_tensor._impl.ToDLPack(self._host_opt, True)
@property @property
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import unittest
import dragon
import numpy as np
from dragon.core.eager.context import context as execution_context
from dragon.core.testing.unittest.common_utils import run_tests
from dragon.core.testing.unittest.common_utils import TEST_CUDA
class TestTensor(unittest.TestCase):
"""Test the tensor class."""
def test_properties(self):
a, b = dragon.Tensor(), dragon.EagerTensor(0)
self.assertEqual(dragon.Tensor().ndim, 0)
self.assertEqual(dragon.Tensor(shape=(2,)).ndim, 1)
self.assertEqual(dragon.Tensor().shape, None)
self.assertEqual(dragon.Tensor(shape=(2,)).shape, [2])
self.assertEqual(dragon.Tensor().size, 0)
self.assertEqual(dragon.Tensor(shape=(2, None)).size, math.inf)
self.assertEqual(dragon.Tensor(shape=(2,)).size, 2)
self.assertEqual(dragon.Tensor().dtype, None)
self.assertEqual(dragon.Tensor(dtype='float32').dtype, 'float32')
self.assertEqual(dragon.EagerTensor(shape=(2,)).ndim, 1)
self.assertEqual(dragon.EagerTensor(shape=(2,)).shape, [2])
self.assertEqual(dragon.EagerTensor(shape=(2,)).size, 2)
self.assertEqual(dragon.EagerTensor(shape=(2,), dtype='float32').dtype, 'float32')
self.assertEqual(dragon.EagerTensor().device, dragon.EagerTensor().device)
self.assertNotEqual(a.__hash__(), b.__hash__())
self.assertNotEqual(a.__repr__(), b.__repr__())
self.assertNotEqual(b.__repr__(), dragon.EagerTensor([2]).__repr__())
self.assertEqual(int(a.variable().placeholder().set_value(1)), 1)
self.assertEqual(float(dragon.Tensor.convert_to(1)), 1.)
self.assertEqual(int(b.set_value(1)), 1)
self.assertEqual(float(b), 1.)
self.assertEqual(int(b.get_value()), 1)
try:
a.shape = 1
except TypeError:
pass
try:
b.shape = (2, 3)
except RuntimeError:
pass
try:
b.dtype = 'float64'
except RuntimeError:
pass
try:
b = dragon.EagerTensor(0, 0)
except ValueError:
pass
with dragon.name_scope('a'):
a.name = 'a'
self.assertEqual(a.name, 'a/a')
with dragon.name_scope(''):
b.name = 'b'
self.assertEqual(b.name, 'b')
def test_dlpack_converter(self):
data = np.array([0., 1., 2.], 'float32')
with dragon.device('cpu'), dragon.eager_scope():
x = dragon.EagerTensor(data, copy=True)
x_to_dlpack = dragon.dlpack.to_dlpack(x)
x_from_dlpack = dragon.dlpack.from_dlpack(x_to_dlpack)
self.assertEqual(x_from_dlpack.shape, list(data.shape))
self.assertEqual(x_from_dlpack.dtype, str(data.dtype))
self.assertLessEqual(np.abs(x_from_dlpack.numpy() - data).max(), 1e-5)
@unittest.skipIf(not TEST_CUDA, 'CUDA unavailable')
def test_dlpack_converter_cuda(self):
data = np.array([0., 1., 2.], 'float32')
with dragon.device('cuda', 0), execution_context().mode('EAGER_MODE'):
x = dragon.EagerTensor(data, copy=True) + 0
x_to_dlpack = dragon.dlpack.to_dlpack(x)
x_from_dlpack = dragon.dlpack.from_dlpack(x_to_dlpack)
self.assertEqual(x_from_dlpack.device.type, 'cuda')
self.assertEqual(x_from_dlpack.device.index, 0)
self.assertEqual(x_from_dlpack.shape, list(data.shape))
self.assertEqual(x_from_dlpack.dtype, str(data.dtype))
self.assertLessEqual(np.abs(x_from_dlpack.numpy() - data).max(), 1e-5)
class TestWorkspace(unittest.TestCase):
"""Test the workspace class."""
def test_merge_form(self):
w1, w2 = dragon.Workspace(), dragon.Workspace()
with w1.as_default():
x = dragon.Tensor(str(id(w1))).set_value(0)
w2.merge_from(w1)
with w2.as_default():
self.assertEqual(int(x), 0)
if __name__ == '__main__':
run_tests()
...@@ -247,7 +247,7 @@ class TestActivationOps(OpTestCase): ...@@ -247,7 +247,7 @@ class TestActivationOps(OpTestCase):
result = np.maximum(data1, 0.) + np.minimum(data1, 0.) * data2 result = np.maximum(data1, 0.) + np.minimum(data1, 0.) * data2
grad1 = data1 * ((data1 > 0.) + (data1 < 0.) * data2) grad1 = data1 * ((data1 > 0.) + (data1 < 0.) * data2)
grad2 = reduce_like(data1 * ((data1 < 0.) * data1), data2) grad2 = reduce_like(data1 * ((data1 < 0.) * data1), data2)
self.assertEqual([y, dx, dw], [result, grad1, grad2.reshape((-1,))]) self.assertEqual([y, dx, dw], [result, grad1, grad2.flatten()])
@unittest.skipIf(not TEST_CUDA, 'CUDA unavailable') @unittest.skipIf(not TEST_CUDA, 'CUDA unavailable')
def test_prelu_cuda(self): def test_prelu_cuda(self):
...@@ -831,19 +831,20 @@ class TestArrayOps(OpTestCase): ...@@ -831,19 +831,20 @@ class TestArrayOps(OpTestCase):
self.test_stack() self.test_stack()
def test_tile(self): def test_tile(self):
entries = [(1, 1), (1, 2), (2, 1), (2, 2)] entries = [(2,), (1, 1), (1, 2), (2, 1), (2, 2)]
for execution in ('EAGER_MODE', 'GRAPH_MODE'): for execution in ('EAGER_MODE', 'GRAPH_MODE'):
with execution_context().mode(execution): with execution_context().mode(execution):
for multiples in entries: for repeats in entries:
data = arange((2, 2)) data = arange((2, 2))
grad = np.tile(data, multiples)
x = new_tensor(data) x = new_tensor(data)
dy = new_tensor(grad)
with dragon.GradientTape() as tape: with dragon.GradientTape() as tape:
tape.watch(x) tape.watch(x)
y = dragon.tile(x, multiples) y = dragon.tile(x, repeats)
repeats = repeats + (1,) * (len(data.shape) - len(repeats))
grad = np.tile(data, repeats)
dy = new_tensor(grad)
dx = tape.gradient(y, [x], output_gradients=[dy])[0] dx = tape.gradient(y, [x], output_gradients=[dy])[0]
self.assertEqual([y, dx], [grad, data * np.prod(multiples)]) self.assertEqual([y, dx], [grad, data * np.prod(repeats)])
@unittest.skipIf(not TEST_CUDA, 'CUDA unavailable') @unittest.skipIf(not TEST_CUDA, 'CUDA unavailable')
def test_tile_cuda(self): def test_tile_cuda(self):
...@@ -2784,7 +2785,8 @@ class TestTrainingOps(OpTestCase): ...@@ -2784,7 +2785,8 @@ class TestTrainingOps(OpTestCase):
self.adam = dragon.optimizers.Adam() self.adam = dragon.optimizers.Adam()
self.nesterov = dragon.optimizers.Nesterov() self.nesterov = dragon.optimizers.Nesterov()
self.rmsprop = dragon.optimizers.RMSprop() self.rmsprop = dragon.optimizers.RMSprop()
self.sgd = dragon.optimizers.SGD() self.sgd = dragon.optimizers.SGD(name='MyOptimizer')
self.sgd.base_lr = 0.01
def test_adam_update(self): def test_adam_update(self):
with execution_context().mode('EAGER_MODE'): with execution_context().mode('EAGER_MODE'):
...@@ -2798,7 +2800,7 @@ class TestTrainingOps(OpTestCase): ...@@ -2798,7 +2800,7 @@ class TestTrainingOps(OpTestCase):
coef = math.sqrt(1 - math.pow(beta2, t)) / (1 - math.pow(beta1, t)) coef = math.sqrt(1 - math.pow(beta2, t)) / (1 - math.pow(beta1, t))
data4 = uniform((2, 3)) data4 = uniform((2, 3))
grad = new_tensor(data4) grad = new_tensor(data4)
self.adam._run_update(param, grad) self.adam.apply_gradients([[param, grad]])
data2 = beta1 * data2 + (1 - beta1) * data4 data2 = beta1 * data2 + (1 - beta1) * data4
data3 = beta2 * data3 + (1 - beta2) * np.square(data4) data3 = beta2 * data3 + (1 - beta2) * np.square(data4)
data1 -= (lr * coef * data2 / (np.sqrt(data3) + eps)) data1 -= (lr * coef * data2 / (np.sqrt(data3) + eps))
...@@ -2817,7 +2819,7 @@ class TestTrainingOps(OpTestCase): ...@@ -2817,7 +2819,7 @@ class TestTrainingOps(OpTestCase):
for i in range(2): for i in range(2):
data3 = uniform((2, 3)) data3 = uniform((2, 3))
grad = new_tensor(data3) grad = new_tensor(data3)
self.nesterov._run_update(param, grad) self.nesterov.apply_gradients([[param, grad]])
data2_new = momentum * data2 + lr * data3 data2_new = momentum * data2 + lr * data3
data1 -= (1 + momentum) * data2_new - momentum * data2 data1 -= (1 + momentum) * data2_new - momentum * data2
data2 = data2_new data2 = data2_new
...@@ -2838,7 +2840,7 @@ class TestTrainingOps(OpTestCase): ...@@ -2838,7 +2840,7 @@ class TestTrainingOps(OpTestCase):
for i in range(2): for i in range(2):
data4 = uniform((2, 3)) data4 = uniform((2, 3))
grad = new_tensor(data4) grad = new_tensor(data4)
self.rmsprop._run_update(param, grad) self.rmsprop.apply_gradients([[param, grad]])
data3 = decay * data3 + (1 - decay) * np.square(data4) data3 = decay * data3 + (1 - decay) * np.square(data4)
data2 = momentum * data2 + (lr * data4 / (np.sqrt(data3) + eps)) data2 = momentum * data2 + (lr * data4 / (np.sqrt(data3) + eps))
data1 -= data2 data1 -= data2
...@@ -2857,7 +2859,7 @@ class TestTrainingOps(OpTestCase): ...@@ -2857,7 +2859,7 @@ class TestTrainingOps(OpTestCase):
for i in range(2): for i in range(2):
data3 = uniform((2, 3)) data3 = uniform((2, 3))
grad = new_tensor(data3) grad = new_tensor(data3)
self.sgd._run_update(param, grad) self.sgd.apply_gradients([[param, grad]])
data2 = momentum * data2 + lr * data3 data2 = momentum * data2 + lr * data3
data1 -= data2 data1 -= data2
self.assertEqual(param, data1) self.assertEqual(param, data1)
...@@ -3494,7 +3496,7 @@ def reduce_like(data, other, reduction='sum'): ...@@ -3494,7 +3496,7 @@ def reduce_like(data, other, reduction='sum'):
def uniform(shape, dtype='float32'): def uniform(shape, dtype='float32'):
"""Return the uniform data with given shape.""" """Return the uniform data with given shape."""
return np.random.uniform(size=shape).astype(dtype) return np.random.uniform(-1., 1., size=shape).astype(dtype)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -19,7 +19,8 @@ import subprocess ...@@ -19,7 +19,8 @@ import subprocess
import argparse import argparse
TESTS_AND_SOURCES = [ TESTS_AND_SOURCES = [
('dragon/core/test_ops', 'dragon.core.ops'), ('dragon/core/test_framework', 'dragon.core'),
('dragon/core/test_ops', 'dragon.core'),
] ]
TESTS = [t[0] for t in TESTS_AND_SOURCES] TESTS = [t[0] for t in TESTS_AND_SOURCES]
......
...@@ -14,6 +14,7 @@ from __future__ import division as _division ...@@ -14,6 +14,7 @@ from __future__ import division as _division
from __future__ import print_function as _print_function from __future__ import print_function as _print_function
# Modules # Modules
from dragon.vm.torch import autograd
from dragon.vm.torch import jit from dragon.vm.torch import jit
from dragon.vm.torch import nn from dragon.vm.torch import nn
from dragon.vm.torch import onnx from dragon.vm.torch import onnx
......
...@@ -13,6 +13,7 @@ from __future__ import absolute_import ...@@ -13,6 +13,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.vm.torch.autograd.backprop import backward
from dragon.vm.torch.autograd.grad_mode import enable_grad from dragon.vm.torch.autograd.grad_mode import enable_grad
from dragon.vm.torch.autograd.grad_mode import no_grad from dragon.vm.torch.autograd.grad_mode import no_grad
from dragon.vm.torch.autograd.grad_mode import set_grad_enabled from dragon.vm.torch.autograd.grad_mode import set_grad_enabled
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Do back-propagation from the executed functions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch import executor
def backward(tensors, grad_tensors=None, retain_graph=False):
"""Compute the derivatives of tensors w.r.t. graph leaves.
Parameters
----------
tensors : Sequence[dragon.vm.torch.Tensor]
The derivative targets.
grad_tensors : Sequence[dragon.vm.torch.Tensor], optional
The optional gradient of ``tensors``.
retain_graph : bool, optional, default=False
**False** to free the graph used to compute grad.
"""
return executor.run_backward(
tensors=tensors,
grad_tensors=grad_tensors,
retain_graph=retain_graph,
)
...@@ -98,7 +98,7 @@ class Function(object): ...@@ -98,7 +98,7 @@ class Function(object):
"""Generate the OpDef from attributes.""" """Generate the OpDef from attributes."""
attributes = self.attributes() attributes = self.attributes()
self._def = proto_util.make_operator_cdef( self._def = proto_util.make_operator_cdef(
name=attributes.get('name', 'GenericOp'), name=attributes.get('name', 'Op'),
cache_key=self._cache_key, cache_key=self._cache_key,
op_type=attributes['op_type'], op_type=attributes['op_type'],
device_option=proto_util.get_device_option( device_option=proto_util.get_device_option(
......
...@@ -15,69 +15,15 @@ from __future__ import print_function ...@@ -15,69 +15,15 @@ from __future__ import print_function
import warnings import warnings
from dragon.core.framework import workspace
from dragon.vm.torch.tensor import Tensor
class Variable(object):
def Variable(tensor, requires_grad=False, volatile=False): """The variable class."""
if volatile:
warnings.warn("volatile was removed and now has no effect. " def __new__(cls, tensor, requires_grad=False, volatile=False):
"Use `with torch.no_grad():` instead.", stacklevel=2) if volatile:
if requires_grad and volatile: warnings.warn("volatile was removed and now has no effect. "
raise RuntimeError("Variable can't be volatile and require_grad at the same time!") "Use `with torch.no_grad():` instead.", stacklevel=2)
tensor.requires_grad = requires_grad if requires_grad and volatile:
return tensor raise RuntimeError("Variable can't be volatile and require_grad at the same time.")
tensor.requires_grad = requires_grad
return tensor
@property
def volatile(self):
warnings.warn("volatile was removed (Variable.volatile is always False)", stacklevel=2)
return False
def backward(self, gradient=None):
if not self._requires_grad:
raise RuntimeError(
'This variable does not require grads.'
'\nCan not backward from this variable.'
)
# Collect and sort out the operation from tapes.
operations = [v for k, v in sorted(self.__tape__.operations.items())]
# Prepare resources to optimize the backward pass.
input_grads = []
if gradient is not None:
if not isinstance(gradient, Tensor):
raise TypeError(
'<gradient> can be either Tensor, Variable or None, '
'got {}'.format(type(gradient).__name__)
)
if gradient.shape != self.shape:
raise ValueError(
'Except the dimensions of <gradient> is {}, '
'got {}.'.format(self.shape, gradient.shape))
input_grads.append(gradient.id)
# Dispatch the backward execution.
workspace.run_backward(
operations,
targets=[self.id],
sources=None,
input_grads=input_grads,
ignored_grads=list(self._ignored_grads),
)
# Release the holt resources.
gc = workspace.get_workspace().collectors
for op_def in operations:
gc.OPERATOR.collect(op_def.name)
for output in op_def.output:
if output not in op_def.input:
gc.TENSOR.collect(output)
# The monkey-patching.
Tensor.backward = backward
Tensor.volatile = volatile
...@@ -15,12 +15,11 @@ from __future__ import absolute_import ...@@ -15,12 +15,11 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from dragon.core.framework import config
from dragon.core.framework import context from dragon.core.framework import context
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.util import six from dragon.core.util import six
from dragon.vm.torch.autograd import grad_mode from dragon.vm.torch.autograd import grad_mode
from dragon.vm.torch.cpp import device as Device from dragon.vm.torch.cpp import device as device_cls
from dragon.vm.torch.jit import tape from dragon.vm.torch.jit import tape
from dragon.vm.torch.tensor import Tensor from dragon.vm.torch.tensor import Tensor
...@@ -32,6 +31,7 @@ def run_operator( ...@@ -32,6 +31,7 @@ def run_operator(
no_grad=False, no_grad=False,
pre_callback=None, pre_callback=None,
): ):
"""Compute the outputs."""
requires_grad = False requires_grad = False
input_names, output_names = [], [] input_names, output_names = [], []
...@@ -43,7 +43,6 @@ def run_operator( ...@@ -43,7 +43,6 @@ def run_operator(
requires_grad = requires_grad and grad_mode.is_grad_enabled() requires_grad = requires_grad and grad_mode.is_grad_enabled()
# Allocate outputs. # Allocate outputs.
cfg = config.config()
ws = workspace.get_workspace() ws = workspace.get_workspace()
output_scope = context.get_eager_scope(requires_grad) output_scope = context.get_eager_scope(requires_grad)
gc = ws.collectors # Garbage collectors gc = ws.collectors # Garbage collectors
...@@ -52,46 +51,90 @@ def run_operator( ...@@ -52,46 +51,90 @@ def run_operator(
if isinstance(spec, six.string_types): if isinstance(spec, six.string_types):
output_names.append(spec) output_names.append(spec)
else: else:
if isinstance(spec, Device): if isinstance(spec, device_cls):
output_id = gc.TENSOR.alloc(output_scope) impl = ws.create_tensor(gc.TENSOR.alloc(output_scope))
ref = Tensor(device=spec) outputs[i] = Tensor(device=spec, gc=gc.TENSOR, impl=impl)
ref.__gc__, ref._id = gc.TENSOR, output_id
ref._impl = ws.CreateTensor(output_id)
outputs[i] = ref
output_names.append(outputs[i].id) output_names.append(outputs[i].id)
# Generate the OpDef. # Generate the OpDef.
default_tape = tape.get_default_tape() default_tape = tape.get_default_tape()
op_def = op_def.DeriveTo(input_names, output_names) op_def = op_def.DeriveTo(input_names, output_names)
# Maybe record this operation for future developments. # Record this operation for future developments.
if default_tape is not None: if default_tape is not None:
default_tape.add_def(op_def) default_tape.add_def(op_def)
requires_grad = requires_grad or default_tape.retain_graph requires_grad = requires_grad or default_tape.retain_graph
if len(inputs) > 0 and no_grad is False: if len(inputs) > 0 and no_grad is False:
if requires_grad: if requires_grad:
ignores = set()
instance_tape = tape.Tape() instance_tape = tape.Tape()
for input in inputs: for input in inputs:
instance_tape.merge_from(input.__tape__) instance_tape.merge_from(input._tape)
ignores = ignores.union(input._ignored_grads) if not input._requires_grad:
op_def.name = gc.OPERATOR.alloc(op_def.type) instance_tape.add_empty_grad(input.id + '_grad')
op_def.name = gc.OP.alloc(op_def.type)
instance_tape.add_operation(op_def) instance_tape.add_operation(op_def)
for output in outputs: for output in outputs:
output.requires_grad = True output._tape = instance_tape
output._ignored_grads = ignores output._requires_grad = True
output.__tape__ = instance_tape
else: else:
if default_tape is not None and default_tape.retain_ops: if default_tape is not None and default_tape.retain_ops:
op_def.name = gc.OPERATOR.alloc(op_def.type) op_def.name = gc.OP.alloc(op_def.type)
for output in outputs: for output in outputs:
output.requires_grad = False output._requires_grad = False
# Dispatch the computation. # Dispatch the computation.
if pre_callback is not None: if pre_callback is not None:
pre_callback(ws, op_def.name) pre_callback(ws, op_def.name)
ws.RunOperator(op_def, cfg.graph_verbosity > 0) ws.run_operator(op_def)
# Return the outputs. # Return the outputs.
return outputs if len(outputs) > 1 else outputs[0] return outputs if len(outputs) > 1 else outputs[0]
def run_backward(tensors, grad_tensors=None, retain_graph=False):
"""Compute the gradients."""
# Collect the volatiles and tape from tensors
default_tape = tape.Tape()
for i, tensor in enumerate(tensors):
if not tensor._requires_grad:
raise RuntimeError('Element %d of tensors does not require grad.' % i)
default_tape.merge_from(tensor._tape)
# Collect the grad from tensors
input_grads = []
if grad_tensors is not None:
if len(grad_tensors) != len(tensors):
raise ValueError('Number of tensors and grad tensors should be same.')
for i, grad_tensor in enumerate(grad_tensors):
if not isinstance(grad_tensor, Tensor):
raise TypeError(
'Element {} of grad tensors should be a tensor, got {}.'
.format(i, type(grad_tensor).__name__))
if grad_tensor.shape != tensors[i].shape:
raise ValueError(
'Size of element {} of grad tensors should be {}, got {}.'
.format(i, tensors[i].shape, grad_tensor.shape))
input_grads.append(grad_tensor.id)
# Prepare resources to optimize the backward pass.
op_defs = [v for k, v in sorted(default_tape.operations.items())]
# Dispatch the backward execution.
current_ws = workspace.get_workspace()
current_ws.run_backward(
op_defs=op_defs,
targets=[tensor.id for tensor in tensors],
sources=default_tape.sources,
input_grads=input_grads,
empty_grads=default_tape.empty_grads,
)
# Free the retained resources
if not retain_graph:
gc = current_ws.collectors
for op_def in op_defs:
gc.OP.collect(op_def.name)
for output in op_def.output:
if output not in op_def.input:
gc.TENSOR.collect(output)
...@@ -34,32 +34,54 @@ class Tape(object): ...@@ -34,32 +34,54 @@ class Tape(object):
def __init__(self, retain_ops=False, retain_graph=False): def __init__(self, retain_ops=False, retain_graph=False):
self._defs = [] self._defs = []
self._operations = dict() self._operations = dict()
self._sources = set()
self._empty_grads = set()
self.retain_ops = retain_ops self.retain_ops = retain_ops
self.retain_graph = retain_graph self.retain_graph = retain_graph
@property @property
def defs(self): def defs(self):
"""Return the recording defs.""" """Return the recorded defs."""
return self._defs return self._defs
@property @property
def empty_grads(self):
"""Return the recorded empty grads."""
return list(self._empty_grads)
@property
def operations(self): def operations(self):
"""Return the recording operations.""" """Return the recorded operations."""
return self._operations return self._operations
@property
def sources(self):
"""Return the recorded empty grads."""
return list(self._sources)
def add_def(self, op_def): def add_def(self, op_def):
"""Add a new def.""" """Add a new def."""
self._defs.append(op_def) self._defs.append(op_def)
def add_empty_grad(self, tensor_id):
"""Add an empty grad for optimization."""
self._empty_grads.add(tensor_id)
def add_operation(self, op_def): def add_operation(self, op_def):
"""Add a new operation.""" """Add a new operation."""
uid = next(self.UID_GENERATOR) uid = next(self.UID_GENERATOR)
self._operations[uid] = op_def self._operations[uid] = op_def
def add_source(self, tensor_id):
"""Add a source for optimization."""
self._sources.add(tensor_id)
def merge_from(self, other): def merge_from(self, other):
"""Merge operations from another.""" """Merge operations from another."""
if other is not None: if other is not None:
self._operations = {**self._operations, **other._operations} self._operations = {**self._operations, **other._operations}
self._sources = self._sources.union(other._sources)
self._empty_grads = self._empty_grads.union(other._empty_grads)
def __enter__(self): def __enter__(self):
"""Enter the tape into the stack.""" """Enter the tape into the stack."""
......
...@@ -83,9 +83,10 @@ class FunctionGuard(object): ...@@ -83,9 +83,10 @@ class FunctionGuard(object):
symbols = self.inputs symbols = self.inputs
inputs, extra_args = self._function_spec \ inputs, extra_args = self._function_spec \
.canonicalize_inputs(*args, **kwargs) .canonicalize_inputs(*args, **kwargs)
current_ws = workspace.get_workspace()
for sym, data in zip(symbols, inputs): for sym, data in zip(symbols, inputs):
if hasattr(data, 'id'): if hasattr(data, 'id'):
workspace.set_tensor_alias(data.id, sym.id) current_ws.register_alias(data.id, sym.id)
return symbols, extra_args return symbols, extra_args
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
...@@ -125,7 +126,7 @@ class FunctionGuard(object): ...@@ -125,7 +126,7 @@ class FunctionGuard(object):
# In this case, we have the recorded IR. # In this case, we have the recorded IR.
# Notify the backend to run directly. # Notify the backend to run directly.
self.canonicalize_inputs(*args, **kwargs) self.canonicalize_inputs(*args, **kwargs)
workspace.run_operator(self.defs) workspace.get_workspace().run_operator(self.defs)
return self.outputs return self.outputs
def __get__(self, instance, owner): def __get__(self, instance, owner):
......
...@@ -40,7 +40,6 @@ class _ConvNd(function.Function): ...@@ -40,7 +40,6 @@ class _ConvNd(function.Function):
self.dilations = kwargs.get('dilations', 1) self.dilations = kwargs.get('dilations', 1)
self.group = kwargs.get('group', None) self.group = kwargs.get('group', None)
self.output_padding = kwargs.get('output_padding', None) self.output_padding = kwargs.get('output_padding', None)
self.padding = None if self.output_padding is None else 'SAME'
def attributes(self): def attributes(self):
return { return {
...@@ -50,7 +49,6 @@ class _ConvNd(function.Function): ...@@ -50,7 +49,6 @@ class _ConvNd(function.Function):
'strides': self.strides, 'strides': self.strides,
'pads': self.pads, 'pads': self.pads,
'dilations': self.dilations, 'dilations': self.dilations,
'padding': self.padding,
'output_padding': self.output_padding, 'output_padding': self.output_padding,
'group': self.group, 'group': self.group,
'data_format': 'NCHW', 'data_format': 'NCHW',
...@@ -511,35 +509,29 @@ class Resize(function.Function): ...@@ -511,35 +509,29 @@ class Resize(function.Function):
'align_corners': self.align_corners, 'align_corners': self.align_corners,
'data_format': 'NCHW', 'data_format': 'NCHW',
'sizes_descs': [ 'sizes_descs': [
'${{HANDLE}}/sizes[{}]'.format(n) '${{HANDLE}}/sizes[{}]'
for n in range(self.num_sizes) .format(n) for n in range(self.num_sizes)],
],
'scales_descs': [ 'scales_descs': [
'${{HANDLE}}/scales[{}]'.format(n) '${{HANDLE}}/scales[{}]'
for n in range(self.num_scales) .format(n) for n in range(self.num_scales)],
],
} }
} }
def feed(self, ws, handle, sizes, scales): def feed(self, ws, handle, sizes, scales):
for i in range(self.num_sizes): for i in range(self.num_sizes):
self.feed_arg( self.feed_arg(
ws, ws, '{}/sizes[{}]'.format(handle, i),
'{}/sizes[{}]'.format(handle, i), sizes[i], 'int64')
sizes[i], 'int64',
)
for i in range(self.num_scales): for i in range(self.num_scales):
self.feed_arg( self.feed_arg(
ws, ws, '{}/scales[{}]'.format(handle, i),
'{}/scales[{}]'.format(handle, i), scales[i], 'float32')
scales[i], 'float32',
)
def forward(self, input, sizes=None, scales=None): def forward(self, input, sizes=None, scales=None):
return self.dispatch( return self.dispatch(
[input], [self.alloc()], [input], [self.alloc()],
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, sizes, scales) self.feed(ws, handle, sizes, scales),
) )
......
...@@ -237,7 +237,7 @@ class Module(object): ...@@ -237,7 +237,7 @@ class Module(object):
Parameters Parameters
---------- ----------
state_dict : Dict state_dict : dict
The state dict. The state dict.
strict : bool, optional, default=True strict : bool, optional, default=True
**True** to verify the names strictly. **True** to verify the names strictly.
...@@ -474,7 +474,7 @@ class Module(object): ...@@ -474,7 +474,7 @@ class Module(object):
Parameters Parameters
---------- ----------
destination : Dict, optional destination : dict, optional
The optional output dict. The optional output dict.
prefix : str, optional, default='' prefix : str, optional, default=''
The prefix added to the name of states. The prefix added to the name of states.
...@@ -556,7 +556,7 @@ class Module(object): ...@@ -556,7 +556,7 @@ class Module(object):
child_lines = [] child_lines = []
for key, module in self._modules.items(): for key, module in self._modules.items():
mod_str = repr(module) mod_str = repr(module)
mod_str = _addindent(mod_str, 2) mod_str = _add_indent(mod_str, 2)
child_lines.append('(' + key + '): ' + mod_str) child_lines.append('(' + key + '): ' + mod_str)
lines = extra_lines + child_lines lines = extra_lines + child_lines
main_str = self._get_name() + '(' main_str = self._get_name() + '('
...@@ -599,7 +599,7 @@ class Module(object): ...@@ -599,7 +599,7 @@ class Module(object):
object.__setattr__(self, key, value) object.__setattr__(self, key, value)
def _addindent(s_, num_spaces): def _add_indent(s_, num_spaces):
s = s_.split('\n') s = s_.split('\n')
if len(s) == 1: if len(s) == 1:
return s_ return s_
......
...@@ -61,15 +61,11 @@ class Assign(function.Function): ...@@ -61,15 +61,11 @@ class Assign(function.Function):
def feed(self, ws, handle, starts, sizes): def feed(self, ws, handle, starts, sizes):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/starts[{}]'.format(handle, i),
'{}/starts[{}]'.format(handle, i), starts[i], 'int64')
starts[i], 'int64',
)
self.feed_arg( self.feed_arg(
ws, ws, '{}/sizes[{}]'.format(handle, i),
'{}/sizes[{}]'.format(handle, i), sizes[i], 'int64')
sizes[i], 'int64',
)
def forward(self, out, starts, sizes, input): def forward(self, out, starts, sizes, input):
self._check_device([input, out]) self._check_device([input, out])
...@@ -90,9 +86,7 @@ class Cast(function.Function): ...@@ -90,9 +86,7 @@ class Cast(function.Function):
def attributes(self): def attributes(self):
return { return {
'op_type': 'Cast', 'op_type': 'Cast',
'arguments': { 'arguments': {'dtype': self.dtype},
'dtype': self.dtype,
},
} }
def forward(self, input, inplace=False): def forward(self, input, inplace=False):
...@@ -122,18 +116,15 @@ class ChannelNormalize(function.Function): ...@@ -122,18 +116,15 @@ class ChannelNormalize(function.Function):
'dtype': self.dtype, 'dtype': self.dtype,
'perm_descs': [ 'perm_descs': [
'${{HANDLE}}/perm[{}]' '${{HANDLE}}/perm[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
} }
} }
def feed(self, ws, handle, perm): def feed(self, ws, handle, perm):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/perm[{}]'.format(handle, i),
'{}/perm[{}]'.format(handle, i), perm[i], 'int64')
perm[i], 'int64',
)
def forward(self, input, perm): def forward(self, input, perm):
return self.dispatch( return self.dispatch(
...@@ -171,9 +162,7 @@ class Concat(function.Function): ...@@ -171,9 +162,7 @@ class Concat(function.Function):
def attributes(self): def attributes(self):
return { return {
'op_type': 'Concat', 'op_type': 'Concat',
'arguments': { 'arguments': {'axis': self.axis},
'axis': self.axis,
},
} }
def forward(self, seq, out=None): def forward(self, seq, out=None):
...@@ -215,18 +204,15 @@ class Expand(function.Function): ...@@ -215,18 +204,15 @@ class Expand(function.Function):
'arguments': { 'arguments': {
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]' '${{HANDLE}}/dims[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
}, },
} }
def feed(self, ws, handle, times): def feed(self, ws, handle, times):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/dims[{}]'.format(handle, i),
'{}/dims[{}]'.format(handle, i), times[i], 'int64')
times[i], 'int64',
)
def forward(self, input, dims): def forward(self, input, dims):
return self.dispatch( return self.dispatch(
...@@ -361,18 +347,15 @@ class Reshape(function.Function): ...@@ -361,18 +347,15 @@ class Reshape(function.Function):
'arguments': { 'arguments': {
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]' '${{HANDLE}}/dims[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
}, },
} }
def feed(self, ws, handle, shape): def feed(self, ws, handle, shape):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/dims[{}]'.format(handle, i),
'{}/dims[{}]'.format(handle, i), shape[i], 'int64')
shape[i], 'int64',
)
def forward(self, input, shape, out=None): def forward(self, input, shape, out=None):
out = out if out else self.alloc() out = out if out else self.alloc()
...@@ -394,27 +377,21 @@ class Slice(function.Function): ...@@ -394,27 +377,21 @@ class Slice(function.Function):
'arguments': { 'arguments': {
'starts_descs': [ 'starts_descs': [
'${{HANDLE}}/starts[{}]' '${{HANDLE}}/starts[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
'sizes_descs': [ 'sizes_descs': [
'${{HANDLE}}/sizes[{}]' '${{HANDLE}}/sizes[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
}, },
} }
def feed(self, ws, handle, starts, sizes): def feed(self, ws, handle, starts, sizes):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/starts[{}]'.format(handle, i),
'{}/starts[{}]'.format(handle, i), starts[i], 'int64')
starts[i], 'int64',
)
self.feed_arg( self.feed_arg(
ws, ws, '{}/sizes[{}]'.format(handle, i),
'{}/sizes[{}]'.format(handle, i), sizes[i], 'int64')
sizes[i], 'int64',
)
def forward(self, input, starts, sizes): def forward(self, input, starts, sizes):
return self.dispatch( return self.dispatch(
...@@ -489,19 +466,18 @@ class Tile(function.Function): ...@@ -489,19 +466,18 @@ class Tile(function.Function):
return { return {
'op_type': 'Tile', 'op_type': 'Tile',
'arguments': { 'arguments': {
'multiples_descs': [ 'repeats_descs': [
'${{HANDLE}}/multiples[{}]' '${{HANDLE}}/repeats[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
}, },
} }
def feed(self, ws, handle, times): def feed(self, ws, handle, repeats):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws,
'{}/multiples[{}]'.format(handle, i), '{}/repeats[{}]'.format(handle, i),
times[i], 'int64', repeats[i], 'int64',
) )
def forward(self, input, times): def forward(self, input, times):
...@@ -523,18 +499,15 @@ class Transpose(function.Function): ...@@ -523,18 +499,15 @@ class Transpose(function.Function):
'arguments': { 'arguments': {
'perm_descs': [ 'perm_descs': [
'${{HANDLE}}/perm[{}]' '${{HANDLE}}/perm[{}]'
.format(n) for n in range(self.ndim) .format(n) for n in range(self.ndim)],
],
}, },
} }
def feed(self, ws, handle, perm): def feed(self, ws, handle, perm):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/perm[{}]'.format(handle, i),
'{}/perm[{}]'.format(handle, i), perm[i], 'int64')
perm[i], 'int64',
)
def forward(self, input, perm): def forward(self, input, perm):
return self.dispatch( return self.dispatch(
......
...@@ -25,10 +25,8 @@ class _Initializer(function.Function): ...@@ -25,10 +25,8 @@ class _Initializer(function.Function):
def feed(self, ws, handle, shape): def feed(self, ws, handle, shape):
for i in range(self.ndim): for i in range(self.ndim):
self.feed_arg( self.feed_arg(
ws, ws, '{}/dims[{}]'.format(handle, i),
'{}/dims[{}]'.format(handle, i), shape[i], 'int64')
shape[i], 'int64',
)
def forward(self, out, shape, shape_like=None): def forward(self, out, shape, shape_like=None):
return self.dispatch( return self.dispatch(
...@@ -51,22 +49,19 @@ class Arange(function.Function): ...@@ -51,22 +49,19 @@ class Arange(function.Function):
'dtype': self.dtype, 'dtype': self.dtype,
'slice_descs': [ 'slice_descs': [
'${{HANDLE}}/slice[{}]' '${{HANDLE}}/slice[{}]'
.format(n) for n in range(self.num_args) .format(n) for n in range(self.num_args)],
],
} }
} }
def feed(self, ws, handle, slice_args): def feed(self, ws, handle, slice_args):
for i in range(len(slice_args)): for i in range(len(slice_args)):
self.feed_arg( self.feed_arg(
ws, ws, '{}/slice[{}]'.format(handle, i),
'{}/slice[{}]'.format(handle, i), slice_args[i], 'float32')
slice_args[i], 'float32'
)
def forward(self, slice_args): def forward(self, slice_args, out=None):
return self.dispatch( return self.dispatch(
[], [self.alloc()], [], [out if out else self.alloc()],
callback=lambda ws, handle: callback=lambda ws, handle:
self.feed(ws, handle, slice_args) self.feed(ws, handle, slice_args)
) )
...@@ -85,8 +80,7 @@ class Eye(_Initializer): ...@@ -85,8 +80,7 @@ class Eye(_Initializer):
'dtype': self.dtype, 'dtype': self.dtype,
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -104,8 +98,7 @@ class Fill(_Initializer): ...@@ -104,8 +98,7 @@ class Fill(_Initializer):
'value': float(self.value), 'value': float(self.value),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -125,8 +118,7 @@ class RandomNormal(_Initializer): ...@@ -125,8 +118,7 @@ class RandomNormal(_Initializer):
'std': float(self.std), 'std': float(self.std),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -146,7 +138,6 @@ class RandomUniform(_Initializer): ...@@ -146,7 +138,6 @@ class RandomUniform(_Initializer):
'high': float(self.high), 'high': float(self.high),
'dims_descs': [ 'dims_descs': [
'${{HANDLE}}/dims[{}]'.format(n) '${{HANDLE}}/dims[{}]'.format(n)
for n in range(self.ndim) for n in range(self.ndim)],
],
}, },
} }
...@@ -97,7 +97,7 @@ def eye( ...@@ -97,7 +97,7 @@ def eye(
The rows and cols of matrix are determined by ``n`` and ``m``: The rows and cols of matrix are determined by ``n`` and ``m``:
```python ```python
print(torch.eye(2)) # [[1., 0.], [0., 1.]] print(torch.eye(2)) # [[1., 0.], [0., 1.]]
print(torch.eye(2, 3)) # [[1., 0., 0.], [0., 1., 0.]] print(torch.eye(2, 3)) # [[1., 0., 0.], [0., 1., 0.]]
``` ```
...@@ -125,11 +125,8 @@ def eye( ...@@ -125,11 +125,8 @@ def eye(
m = n if m is None else m m = n if m is None else m
out = utils.new_leaf([n, m], locals()) if out is None else out out = utils.new_leaf([n, m], locals()) if out is None else out
return _functions.Eye \ return _functions.Eye \
.instantiate( .instantiate(out.device, ndim=2, dtype=out.dtype) \
out.device, .apply(out, [n, m])
ndim=2,
dtype=out.dtype,
).apply(out, [n, m])
def fill(out, shape, value): def fill(out, shape, value):
...@@ -144,11 +141,8 @@ def fill(out, shape, value): ...@@ -144,11 +141,8 @@ def fill(out, shape, value):
def fill_like(out, shape_like, value): def fill_like(out, shape_like, value):
return _functions.Fill \ return _functions.Fill \
.instantiate( .instantiate(out.device, value=float(value), dtype=out.dtype) \
out.device, .apply(out, [], shape_like)
value=float(value),
dtype=out.dtype,
).apply(out, [], shape_like)
def normal(*size, **kwargs): def normal(*size, **kwargs):
......
...@@ -18,6 +18,7 @@ from __future__ import print_function ...@@ -18,6 +18,7 @@ from __future__ import print_function
from dragon.vm.torch.ops.array import functional as array_funcs from dragon.vm.torch.ops.array import functional as array_funcs
from dragon.vm.torch.ops.math import functional as math_funcs from dragon.vm.torch.ops.math import functional as math_funcs
from dragon.vm.torch.ops.init import functional as init_funcs from dragon.vm.torch.ops.init import functional as init_funcs
from dragon.vm.torch import executor
from dragon.vm.torch.tensor import Tensor from dragon.vm.torch.tensor import Tensor
...@@ -85,6 +86,24 @@ def add_(self, value): ...@@ -85,6 +86,24 @@ def add_(self, value):
return math_funcs.add(self, value, self) return math_funcs.add(self, value, self)
def backward(self, gradient=None, retain_graph=False):
"""Compute the derivatives of this tensor w.r.t. graph leaves.
Parameters
----------
gradient : dragon.vm.torch.Tensor, optional
The optional gradient of this tensor.
retain_graph : bool, optional, default=False
**False** to free the graph used to compute grad.
"""
return executor.run_backward(
tensors=[self],
grad_tensors=None if gradient is None else [gradient],
retain_graph=retain_graph,
)
def bitwise_not(self): def bitwise_not(self):
r"""Compute the element-wise NOT bitwise operation. r"""Compute the element-wise NOT bitwise operation.
...@@ -1638,6 +1657,7 @@ def _process_indices(item): ...@@ -1638,6 +1657,7 @@ def _process_indices(item):
Tensor.abs = abs Tensor.abs = abs
Tensor.add = add Tensor.add = add
Tensor.add_ = add_ Tensor.add_ = add_
Tensor.backward = backward
Tensor.bitwise_not = bitwise_not Tensor.bitwise_not = bitwise_not
Tensor.bitwise_not_ = bitwise_not_ Tensor.bitwise_not_ = bitwise_not_
Tensor.bitwise_xor = bitwise_xor Tensor.bitwise_xor = bitwise_xor
......
...@@ -46,13 +46,14 @@ class ParamUpdate(function.Function): ...@@ -46,13 +46,14 @@ class ParamUpdate(function.Function):
class GradAccumulate(function.Function): class GradAccumulate(function.Function):
def __init__(self, key, dev, **kwargs): def __init__(self, key, dev, **kwargs):
super(GradAccumulate, self).__init__(key, dev, **kwargs) super(GradAccumulate, self).__init__(key, dev, **kwargs)
self.momentum = kwargs.get('momentum', 1)
def attributes(self): def attributes(self):
return { return {
'op_type': 'Axpby', 'op_type': 'Axpby',
'arguments': {'alpha': 1., 'beta': 1.}, 'arguments': {'alpha': 1., 'beta': float(self.momentum)},
} }
def forward(self, grads): def forward(self, grads):
outputs = [grad.id + '[acc]' for grad in grads] outputs = [grad.id + '[accum]' for grad in grads]
return self.dispatch(grads, outputs, no_grad=True) return self.dispatch(grads, outputs, no_grad=True)
...@@ -17,16 +17,17 @@ from dragon.core.util import nest ...@@ -17,16 +17,17 @@ from dragon.core.util import nest
from dragon.vm.torch.ops.training import _functions from dragon.vm.torch.ops.training import _functions
def grad_accumulate(grads): def accumulate_grad(grads, momentum=1):
"""Accumulate the gradients.""" """Accumulate the gradients."""
grads = nest.flatten(grads) grads = nest.flatten(grads)
if len(grads) == 0: if len(grads) == 0:
return return
return _functions.GradAccumulate \ return _functions.GradAccumulate \
.instantiate(grads[0].device).apply(grads) .instantiate(grads[0].device, momentum=momentum) \
.apply(grads)
def param_update( def update_param(
param, param,
grad, grad,
op_type, op_type,
...@@ -34,7 +35,7 @@ def param_update( ...@@ -34,7 +35,7 @@ def param_update(
lr_mult=1, lr_mult=1,
decay_mult=1, decay_mult=1,
): ):
"""Apply the param update.""" """Apply the parameter update."""
return _functions.ParamUpdate \ return _functions.ParamUpdate \
.instantiate( .instantiate(
param.device, param.device,
......
...@@ -34,23 +34,9 @@ def new_leaf(sizes, kwargs): ...@@ -34,23 +34,9 @@ def new_leaf(sizes, kwargs):
def remove_binary_scalar(input, value): def remove_binary_scalar(input, value):
"""Remove the python scalar for binary ops.""" """Remove the python scalar for binary ops."""
if isinstance(input, Tensor): if isinstance(input, Tensor):
# (Tensor, Number) return input, scalar_to_tensor(value, input.dtype, input.device)
return \
input, \
scalar_to_tensor(
value,
input.dtype,
input.device,
)
else: else:
# (Number, Tensor) return scalar_to_tensor(input, value.dtype, value.device), value
return \
scalar_to_tensor(
input,
value.dtype,
value.device,
), \
value
def scalar_to_tensor(input, dtype, device): def scalar_to_tensor(input, dtype, device):
...@@ -64,12 +50,11 @@ def scalar_to_tensor(input, dtype, device): ...@@ -64,12 +50,11 @@ def scalar_to_tensor(input, dtype, device):
'<input> should be a python number, got {}.' '<input> should be a python number, got {}.'
.format(type(input).__name__) .format(type(input).__name__)
) )
tid = '/share/scalar/{}/{}'.format(dtype, str(input)) name = '/share/scalar/{}/{}'.format(dtype, str(input))
if not workspace.has_tensor(tid): current_ws = workspace.get_workspace()
workspace.feed_tensor(tid, numpy.array(input, dtype=dtype)) if not current_ws.has_tensor(name):
t = Tensor(id=tid, dtype=dtype, device=device, own_storage=False) current_ws.feed_tensor(name, numpy.array(input, dtype=dtype))
t.requires_grad = False return Tensor(device=device, impl=current_ws.GetTensor(name), requires_grad=False)
return t
def unify_devices(tensors, key='Inputs'): def unify_devices(tensors, key='Inputs'):
...@@ -78,13 +63,11 @@ def unify_devices(tensors, key='Inputs'): ...@@ -78,13 +63,11 @@ def unify_devices(tensors, key='Inputs'):
if len(set(types)) != 1: if len(set(types)) != 1:
raise ValueError( raise ValueError(
'{} from different device type: [{}].' '{} from different device type: [{}].'
.format(key, ', '.join(types)) .format(key, ', '.join(types)))
)
if types[0] == 'cuda': if types[0] == 'cuda':
indices = [t._device.index for t in tensors] indices = [t._device.index for t in tensors]
if len(set(indices)) != 1: if len(set(indices)) != 1:
raise ValueError( raise ValueError(
'{} from different cuda device: [{}].' '{} from different cuda device: [{}].'
.format(key, ', '.join([str(d) for d in indices])) .format(key, ', '.join([str(d) for d in indices])))
)
return cpp.device(types[0], indices[0]) return cpp.device(types[0], indices[0])
...@@ -53,7 +53,7 @@ class Optimizer(object): ...@@ -53,7 +53,7 @@ class Optimizer(object):
---------- ----------
params : Sequence[dragon.vm.torch.nn.Parameter] params : Sequence[dragon.vm.torch.nn.Parameter]
The parameters to optimize. The parameters to optimize.
defaults : Dict defaults : dict
The pre-defined default hyper-parameters. The pre-defined default hyper-parameters.
""" """
...@@ -73,29 +73,39 @@ class Optimizer(object): ...@@ -73,29 +73,39 @@ class Optimizer(object):
self._process_group = distributed.get_group() self._process_group = distributed.get_group()
self._shared_args = {} self._shared_args = {}
def accumulate_grad(self): def accumulate(self, momentum):
"""Accumulate all gradients. """Accumulate the gradient of params.
Call this method after a ``backward`` pass: Call this method after each ``backward`` pass:
```python ```python
x = torch.ones(1, 3, requires_grad=True) x = torch.ones(1, requires_grad=True)
for i in range(10): optimizer = torch.optim.SGD([x], lr=0.1)
y = x + 1 for epoch in range(2):
y.backward() for step in range(3):
optimizer.accumulate_grad() y = x + 1
optimizer.step() y.backward()
# Note to zero the accumulation at the first step
optimizer.accumulate(momentum=1 if step > 0 else 1)
optimizer.step()
print(x) # 0.4
``` ```
Parameters
----------
momentum : float, required
The momentum to the accumulated value.
""" """
grads = [] grads = []
current_ws = workspace.get_workspace()
for group in self.param_groups: for group in self.param_groups:
for p in group['params']: group['_internal/grad_accum'] = True
g = self._steal_grad(p) for param in group['params']:
if g is not None: grad = self._steal_grad(current_ws, param)
grads.append(g) if grad is not None:
p.__accumulating__ = True grads.append(grad)
training_funcs.grad_accumulate(grads) training_funcs.accumulate_grad(grads, momentum)
def add_param_group(self, param_group): def add_param_group(self, param_group):
"""Add a new param group into the optimizer. """Add a new param group into the optimizer.
...@@ -120,7 +130,7 @@ class Optimizer(object): ...@@ -120,7 +130,7 @@ class Optimizer(object):
Parameters Parameters
---------- ----------
param_group : Dict param_group : dict
The param group to add. The param group to add.
""" """
...@@ -137,10 +147,7 @@ class Optimizer(object): ...@@ -137,10 +147,7 @@ class Optimizer(object):
for param in param_group['params']: for param in param_group['params']:
if not param.requires_grad: if not param.requires_grad:
raise ValueError( raise ValueError("Optimize a parameter that doesn't require grad.")
"Optimizing a parameter that "
"doesn't require gradients."
)
for name, default in self.defaults.items(): for name, default in self.defaults.items():
if default is required and name not in param_group: if default is required and name not in param_group:
...@@ -156,6 +163,9 @@ class Optimizer(object): ...@@ -156,6 +163,9 @@ class Optimizer(object):
param_group['name'] = 'Optimizer_{}'.format( param_group['name'] = 'Optimizer_{}'.format(
Optimizer._DEFAULT_UNIQUE_HANDLE_INDEX) Optimizer._DEFAULT_UNIQUE_HANDLE_INDEX)
if '_internal/grad_accum' not in param_group:
param_group['_internal/grad_accum'] = False
param_set = set() param_set = set()
for group in self.param_groups: for group in self.param_groups:
param_set.update(set(group['params'])) param_set.update(set(group['params']))
...@@ -179,11 +189,13 @@ class Optimizer(object): ...@@ -179,11 +189,13 @@ class Optimizer(object):
``` ```
""" """
current_ws = workspace.get_workspace()
for group in self.param_groups: for group in self.param_groups:
self._run_updates(group) self._run_updates(current_ws, group)
group['_internal/grad_accum'] = False
def zero_grad(self, reset=False): def zero_grad(self, reset=False):
"""Set all gradients to zeros. """Set the gradient of params to zero.
This method is not necessary usually, as we will overwrite This method is not necessary usually, as we will overwrite
the gradients in the next computation. the gradients in the next computation.
...@@ -201,6 +213,7 @@ class Optimizer(object): ...@@ -201,6 +213,7 @@ class Optimizer(object):
x += m2(x) x += m2(x)
optimizer.zero_grad(reset=True) optimizer.zero_grad(reset=True)
x.backward() x.backward()
optimizer.step()
``` ```
Parameters Parameters
...@@ -209,37 +222,26 @@ class Optimizer(object): ...@@ -209,37 +222,26 @@ class Optimizer(object):
**True** to reset the memory instead of zeroing. **True** to reset the memory instead of zeroing.
""" """
current_ws = workspace.get_workspace()
for group in self.param_groups: for group in self.param_groups:
for p in group['params']: for param in group['params']:
g = self._steal_grad(p, p.__accumulating__) grad = self._steal_grad(current_ws, param)
p.__accumulating__ = False if grad is not None:
if g is not None: current_ws.reset_tensor(grad) if reset else grad.zero_()
if reset:
workspace.reset_tensor(g)
else:
g.zero_()
def _init_set_defaults(self, group):
"""Initialize the defaults into current workspace."""
template = '/share/hyper/%s/{}' % group['name']
for k, v in group.items():
if k in self._shared_args:
workspace.feed_tensor(
template.format(self._shared_args[k]),
v, dtype='float32', enforce_cpu=True)
def _run_updates(self, group): def _run_updates(self, ws, group):
"""Run updates for the parameter group.""" """Run updates for the parameter group."""
# Collect params and grads. # Collect params and grads.
params, grads = [], [] params, grads = [], []
grad_accum = group['_internal/grad_accum']
for p in group['params']: for p in group['params']:
g = self._steal_grad(p, p.__accumulating__) g = self._steal_grad(ws, p, grad_accum)
if g is not None: if g is not None:
params.append(p) params.append(p)
grads.append(g) grads.append(g)
# Reset the shared defaults. # Reset the shared defaults.
self._init_set_defaults(group) self._reset_defaults(ws, group)
# Accumulate grads from the current process group. # Accumulate grads from the current process group.
if self._process_group is not None: if self._process_group is not None:
...@@ -251,7 +253,7 @@ class Optimizer(object): ...@@ -251,7 +253,7 @@ class Optimizer(object):
# Apply the specific update. # Apply the specific update.
for p, g in zip(params, grads): for p, g in zip(params, grads):
training_funcs.param_update( training_funcs.update_param(
p, g, p, g,
op_type=self._op_type, op_type=self._op_type,
op_handle=group['name'], op_handle=group['name'],
...@@ -259,16 +261,24 @@ class Optimizer(object): ...@@ -259,16 +261,24 @@ class Optimizer(object):
decay_mult=group.get('decay_mult', 1), decay_mult=group.get('decay_mult', 1),
) )
def _reset_defaults(self, ws, group):
"""Reset the defaults to backend."""
template = '/share/hyper/%s/{}' % group['name']
for name, value in group.items():
if name in self._shared_args:
ws.feed_tensor(
tensor=template.format(self._shared_args[name]),
value=value,
dtype='float32',
enforce_cpu=True,
)
@staticmethod @staticmethod
def _steal_grad(param, accumulating=False): def _steal_grad(ws, param, grad_accum=False):
"""Steal the grad tensor if existing.""" """Steal the grad from backend."""
grad_id = param.id + ('_grad[acc]' if accumulating else '_grad') impl = ws.GetTensor(param.id + ('_grad[accum]' if grad_accum else '_grad'))
if workspace.has_tensor(grad_id): if impl is not None:
return Tensor( return Tensor(device=param.device, impl=impl)
id=grad_id,
own_storage=False,
device=param.device,
)
return None return None
def __repr__(self): def __repr__(self):
......
...@@ -88,8 +88,7 @@ def _save(obj, f, pickle_module, pickle_protocol): ...@@ -88,8 +88,7 @@ def _save(obj, f, pickle_module, pickle_protocol):
def save(obj, f, pickle_module=PICKLE_MODULE, pickle_protocol=DEFAULT_PROTOCOL): def save(obj, f, pickle_module=PICKLE_MODULE, pickle_protocol=DEFAULT_PROTOCOL):
return _with_file_like( return _with_file_like(
f, "wb", lambda f: _save(obj, f, pickle_module, pickle_protocol) f, "wb", lambda f: _save(obj, f, pickle_module, pickle_protocol))
)
def _load(f, map_location=None, pickle_module=six.moves.pickle, file=None): def _load(f, map_location=None, pickle_module=six.moves.pickle, file=None):
......
...@@ -14,13 +14,13 @@ from __future__ import division ...@@ -14,13 +14,13 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy import numpy
import warnings
from dragon.core.framework import config from dragon.core.framework import config
from dragon.core.framework import context from dragon.core.framework import context
from dragon.core.framework import mapping from dragon.core.framework import mapping
from dragon.core.framework import proto_util from dragon.core.framework import proto_util
from dragon.core.framework import workspace from dragon.core.framework import workspace
from dragon.core.util import math_util
from dragon.core.util import six from dragon.core.util import six
from dragon.vm.torch import cpp from dragon.vm.torch import cpp
...@@ -67,40 +67,24 @@ class Tensor(object): ...@@ -67,40 +67,24 @@ class Tensor(object):
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
# Internal properties self._tape = None
self._id = kwargs.get('id', None) self._gc = kwargs.get('gc', None)
self._impl = kwargs.get('impl', None)
self._device = kwargs.get('device', cpp.device()) self._device = kwargs.get('device', cpp.device())
self._requires_grad = kwargs.get('requires_grad', False) self._requires_grad = kwargs.get('requires_grad', False)
self._own_storage = kwargs.get('own_storage', True)
self._const_size = None # Attribute to represent a leaf variable
self._ignored_grads = set() # Blacklist of the non-gradient variables
self.__tape__ = None # Instance tape to record operations
self.__accumulating__ = False # Flag for gradient accumulating
# Constructor
if len(args) == 0: if len(args) == 0:
# >>> Empty tensor self._is_leaf = False
if self._id is not None:
ws = workspace.get_workspace()
self.__gc__ = ws.collectors.TENSOR
self._impl = ws.CreateTensor(self._id)
else:
self.__gc__ = None
elif len(args) == 1: elif len(args) == 1:
if isinstance(args[0], (list, tuple)): if isinstance(args[0], (list, tuple)):
# >>> torch.Tensor(sequence)
dtype = kwargs.get('dtype', 'float32') dtype = kwargs.get('dtype', 'float32')
self._from_numpy(numpy.array(args[0], dtype=dtype), copy=False) self._from_numpy(numpy.array(args[0], dtype=dtype), copy=False)
elif isinstance(args[0], numpy.ndarray): elif isinstance(args[0], numpy.ndarray):
# >>> torch.Tensor(array)
self._from_numpy(args[0], copy=kwargs.get('copy', True)) self._from_numpy(args[0], copy=kwargs.get('copy', True))
else: else:
# >>> torch.Tensor(size)
if not isinstance(args[0], six.integer_types): if not isinstance(args[0], six.integer_types):
raise ValueError('Excepted an integer as size.') raise ValueError('Excepted an integer as size.')
self._from_shape([args[0]], kwargs.get('dtype', 'float32')) self._from_shape([args[0]], kwargs.get('dtype', 'float32'))
else: else:
# >>> torch.Tensor(*sizes)
if not all(isinstance(arg, six.integer_types) for arg in args): if not all(isinstance(arg, six.integer_types) for arg in args):
raise ValueError('Excepted integer(s) as sizes.') raise ValueError('Excepted integer(s) as sizes.')
self._from_shape(args, kwargs.get('dtype', 'float32')) self._from_shape(args, kwargs.get('dtype', 'float32'))
...@@ -115,7 +99,7 @@ class Tensor(object): ...@@ -115,7 +99,7 @@ class Tensor(object):
The data tensor. The data tensor.
""" """
return Tensor(device=self.device, id=self._id, own_storage=False) return Tensor(device=self.device, impl=self._impl)
@property @property
def dtype(self): def dtype(self):
...@@ -143,7 +127,7 @@ class Tensor(object): ...@@ -143,7 +127,7 @@ class Tensor(object):
@property @property
def grad(self): def grad(self):
"""Return a grad reference if gradient had be computed. """Return the grad of this tensor if computed.
Returns Returns
------- -------
...@@ -151,14 +135,11 @@ class Tensor(object): ...@@ -151,14 +135,11 @@ class Tensor(object):
The grad tensor. The grad tensor.
""" """
grad_id = self._id + '_grad' if self._requires_grad and self._gc:
grad_impl = workspace.get_workspace().GetTensor(grad_id) impl = self._gc._workspace.GetTensor(self.id + '_grad')
if grad_impl is None: if impl is not None:
return None return Tensor(device=self.device, impl=impl)
grad_ref = Tensor(own_storage=False) return None
grad_ref._device = cpp.device(*self._impl.device)
grad_ref._id, grad_ref._impl = grad_id, grad_impl
return grad_ref
@property @property
def grad_fn(self): def grad_fn(self):
...@@ -174,7 +155,19 @@ class Tensor(object): ...@@ -174,7 +155,19 @@ class Tensor(object):
The identity. The identity.
""" """
return self._id return self._impl.name
@property
def is_leaf(self):
"""Return whether tensor is a leaf.
Returns
-------
bool
**True** if this is a leaf tensor otherwise **False**.
"""
return self._is_leaf or not self._requires_grad
@property @property
def requires_grad(self): def requires_grad(self):
...@@ -191,9 +184,6 @@ class Tensor(object): ...@@ -191,9 +184,6 @@ class Tensor(object):
@requires_grad.setter @requires_grad.setter
def requires_grad(self, value): def requires_grad(self, value):
self._requires_grad = value self._requires_grad = value
if self._const_size is not None:
self._ignored_grads = set() if value \
else {self._id + '_grad'}
@property @property
def shape(self): def shape(self):
...@@ -207,6 +197,11 @@ class Tensor(object): ...@@ -207,6 +197,11 @@ class Tensor(object):
""" """
return self.size() return self.size()
@property
def volatile(self):
warnings.warn('Attribute ``volatile`` was removed (always False).', stacklevel=2)
return False
def abs(self): def abs(self):
r"""Return a tensor with the absolute value. r"""Return a tensor with the absolute value.
...@@ -268,18 +263,17 @@ class Tensor(object): ...@@ -268,18 +263,17 @@ class Tensor(object):
""" """
pass pass
def backward(self, gradient=None): def backward(self, gradient=None, retain_graph=False):
"""Compute the gradients starting from this tensor. """Compute the derivatives of this tensor w.r.t. graph leaves.
If ``gradient`` is not provided, **ones** will be used instead.
Parameters Parameters
--------- ----------
gradient : dragon.vm.torch.Tensor, optional gradient : dragon.vm.torch.Tensor, optional
The optional input gradient. The optional gradient of this tensor.
retain_graph : bool, optional, default=False
**False** to free the graph used to compute grad.
""" """
pass
def bitwise_not(self): def bitwise_not(self):
r"""Compute the element-wise NOT bitwise operation. r"""Compute the element-wise NOT bitwise operation.
...@@ -546,9 +540,6 @@ class Tensor(object): ...@@ -546,9 +540,6 @@ class Tensor(object):
src._device.index src._device.index
), ),
) )
# Transfer the const size if necessary
self._const_size = src.size() \
if self._const_size else None
return self return self
def cos(self): def cos(self):
...@@ -1506,6 +1497,11 @@ class Tensor(object): ...@@ -1506,6 +1497,11 @@ class Tensor(object):
""" """
pass pass
def retain_grad(self):
"""Retain grad for the non-leaf tensor."""
if self._tape:
self._tape.add_source(self.id)
def round(self): def round(self):
r"""Return a tensor taken the round of elements. r"""Return a tensor taken the round of elements.
...@@ -1934,9 +1930,6 @@ class Tensor(object): ...@@ -1934,9 +1930,6 @@ class Tensor(object):
""" """
pass pass
def volatile(self):
pass
def zero_(self): def zero_(self):
r"""Fill self with constant 0. r"""Fill self with constant 0.
...@@ -1954,20 +1947,16 @@ class Tensor(object): ...@@ -1954,20 +1947,16 @@ class Tensor(object):
"""Create impl from the numpy array.""" """Create impl from the numpy array."""
ws = workspace.get_workspace() ws = workspace.get_workspace()
array = array.copy() if copy else array array = array.copy() if copy else array
self._const_size = array.size self._gc, self._is_leaf = ws.collectors.TENSOR, True
self.__gc__ = ws.collectors.TENSOR self._impl = ws.create_tensor(self._gc.alloc(
self._id = self.__gc__.alloc(context.get_eager_scope()) context.get_eager_scope())).FromNumpy(array)
self._impl = ws.CreateTensor(self._id).FromNumpy(array)
self.requires_grad = self._requires_grad
def _from_shape(self, shape, dtype): def _from_shape(self, shape, dtype):
"""Create impl from the shape and data type.""" """Create impl from the shape and data type."""
ws = workspace.get_workspace() ws = workspace.get_workspace()
self._const_size = math_util.prod(shape) self._gc, self._is_leaf = ws.collectors.TENSOR, True
self.__gc__ = ws.collectors.TENSOR self._impl = ws.create_tensor(self._gc.alloc(
self._id = self.__gc__.alloc(context.get_eager_scope()) context.get_eager_scope())).FromShape(shape, dtype)
self._impl = ws.CreateTensor(self._id).FromShape(shape, dtype)
self.requires_grad = self._requires_grad
def _type2str(self): def _type2str(self):
"""Return the tensor type string.""" """Return the tensor type string."""
...@@ -1977,12 +1966,10 @@ class Tensor(object): ...@@ -1977,12 +1966,10 @@ class Tensor(object):
return self.add(other) return self.add(other)
def __del__(self): def __del__(self):
if not self._requires_grad or self._const_size: if self.is_leaf and self._gc:
if self._own_storage and self._id: # Always reuse the leaf tensors.
# Always reuse the leaf variables or tensors # PyGC will detect them automatically.
# that do not require grad. self._gc.collect(self.id)
# PyGC will detect them automatically.
self.__gc__.collect(self._id)
def __div__(self, other): def __div__(self, other):
return self.div(other) return self.div(other)
......
...@@ -32,13 +32,13 @@ def from_dlpack(dlpack): ...@@ -32,13 +32,13 @@ def from_dlpack(dlpack):
The tensor with the dlpack data. The tensor with the dlpack data.
""" """
ws = workspace.get_workspace() current_ws = workspace.get_workspace()
ref = Tensor(device=None) # Hack the constructor. tensor = Tensor(device=None)
ref.__gc__ = ws.collectors.TENSOR tensor._gc = current_ws.collectors.TENSOR
ref._id = ref.__gc__.alloc('${DLPACK}') tensor._impl = current_ws.create_tensor(
ref._impl = ws.CreateTensor(ref._id).FromDLPack(dlpack) tensor._gc.alloc('${DLPACK}')).FromDLPack(dlpack)
ref._device = cpp.device(*ref._impl.device) tensor._device = cpp.device(*tensor._impl.device)
return ref return tensor
def to_dlpack(tensor, readonly=True): def to_dlpack(tensor, readonly=True):
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!