Commit 8dbb73a7 by Ting PAN

Fix the device inference in eager execution

Summary:
This commit computes the correct device for an existing output tensor.
1 parent c0c43218
Showing with 296 additions and 295 deletions
......@@ -6,32 +6,35 @@ regularizers
Classes
-------
`class L1 <regularizers/L1.html>`_
: The L1 regularizer.
`class L1L2 <regularizers/L1L2.html>`_
: The L1L2 regularizer.
`class L2 <regularizers/L1.html>`_
: The L1 regularizer.
`class Regularizer <regularizers/Regularizer.html>`_
: The base regularizer class.
Functions
---------
`l1(...) <regularizers/l1.html>`_
: Create a L1 regularizer.
`get(...) <regularizers/get.html>`_
: Return the regularizer callable by identifier.
`l1_l2(...) <regularizers/l1_l2.html>`_
: Create a L1L2 regularizer.
`l2(...) <regularizers/l2.html>`_
: Create a L2 regularizer.
.. toctree::
:hidden:
regularizers/l1
regularizers/get
regularizers/L1
regularizers/L1L2
regularizers/l1_l2
regularizers/l2
regularizers/L2
regularizers/Regularizer
.. raw:: html
......
L1
==
.. autoclass:: dragon.vm.tensorflow.keras.regularizers.L1
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.regularizers.L1.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.regularizers.";
color: #103d3e;
}
</style>
l2
L2
==
.. autofunction:: dragon.vm.tensorflow.keras.regularizers.l2
.. autoclass:: dragon.vm.tensorflow.keras.regularizers.L2
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.regularizers.L2.__init__
.. raw:: html
......
l1
==
get
===
.. autofunction:: dragon.vm.tensorflow.keras.regularizers.l1
.. autofunction:: dragon.vm.tensorflow.keras.regularizers.get
.. raw:: html
......
......@@ -35,8 +35,9 @@ void InitializeOp<Context>::RunOnDevice() {
vec64_t out_shape;
int ndims;
dims(0, &ndims);
for (int i = 0; i < ndims; i++)
for (int i = 0; i < ndims; i++) {
out_shape.push_back(dims(i));
}
Output(0)->Reshape(out_shape);
}
}
......
......@@ -4,12 +4,7 @@ namespace dragon {
template <class Context>
void ShapeOp<Context>::RunOnDevice() {
Output(0)->Reshape({Input(0).ndim()});
auto* y = Output(0)->template mutable_data<int64_t, CPUContext>();
for (int i = 0; i < Input(0).ndim(); i++)
y[i] = Input(0).dim(i);
Output(0)->template CopyFrom<int64_t>(Input(0).dims());
}
DEPLOY_CPU(Shape);
......
......@@ -23,6 +23,8 @@ class ShapeOp final : public Operator<Context> {
SIMPLE_CTOR_DTOR(ShapeOp);
USE_OPERATOR_FUNCTIONS;
void SwitchToDevice() override {}
void RunOnDevice() override;
};
......
......@@ -39,7 +39,7 @@ def set_execution(execution='GRAPH_MODE'):
"""
if execution not in ('GRAPH_MODE', 'EAGER_MODE'):
raise ValueError('Unsupported execution mode:', execution)
raise ValueError('Unsupported execution: ' + execution)
config.config().graph_execution = execution
......@@ -75,7 +75,7 @@ def set_scheduler(scheduler='SIMPLE'):
"""
if scheduler not in ('SIMPLE', 'FUSION'):
raise ValueError('Unsupported scheduler type:', scheduler)
raise ValueError('Unsupported scheduler: ' + scheduler)
if scheduler == 'SIMPLE':
config.config().graph_type = ''
elif scheduler == 'FUSION':
......
......@@ -40,7 +40,7 @@ class Backend(object):
if not is_nccl_available():
raise ValueError('NCCL backend is not available.')
elif value == Backend.UNDEFINED:
raise ValueError('Invalid backend:', name)
raise ValueError('Invalid backend: ' + name)
return value
......
......@@ -44,7 +44,7 @@ def device(device_type, device_index=0):
"""
device_type = device_type.lower()
if device_type not in mapping.DEVICE_STRING_TO_DEVICE_TYPE:
raise ValueError('Unsupported device type:', device_type)
raise ValueError('Unsupported device type: ' + device_type)
return _GLOBAL_DEVICE_STACK.get_controller({
'device_type': mapping.DEVICE_STRING_TO_DEVICE_TYPE[device_type],
'device_index': device_index,
......
......@@ -37,8 +37,11 @@ class Operator(object):
self._arg_device = self._arg_device.SerializeToString()
self._seed = kwargs.get('seed', config.config().random_seed)
def alloc(self):
"""Return the executing device to create an output tensor."""
def alloc(self, out=None):
"""Return or bind the executing device to output tensor."""
if out is not None:
out._device = self._device.copy()
return out
return self._device.copy()
def apply(self, *args, **kwargs):
......@@ -124,11 +127,6 @@ class Operator(object):
return self.forward(*args, **kwargs)
def new_leaf(shape, dtype, device, trainable=False):
"""Return a leaf resource."""
return EagerTensor(**locals())
def remove_binary_scalar(inputs):
"""Remove the scalar for binary ops."""
if types.is_tensor(inputs[0]):
......
......@@ -211,7 +211,7 @@ class Workspace(backend.Workspace):
dtype = value.dtype if dtype is None else dtype
if hasattr(tensor, 'dtype') and tensor.dtype is not None:
if tensor.dtype not in mapping.TENSOR_TYPE_TO_NP_TYPE:
raise TypeError('Unsupported data type:', tensor.dtype)
raise TypeError('Unsupported data type: ' + tensor.dtype)
dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor.dtype]
# Determine the copying device option
if enforce_cpu is True:
......
......@@ -61,10 +61,8 @@ def dropout(inputs, prob=0.5, scale=True, **kwargs):
op_lib = activation_ops_lib.Dropout
if context.executing_eagerly():
return op_lib \
.instantiate(
prob=args['prob'],
scale=scale,
).apply([inputs], inplace=inplace)
.instantiate(prob=args['prob'], scale=scale) \
.apply([inputs], inplace=inplace)
else:
return op_lib.blend(**args)
......
......@@ -26,7 +26,7 @@ class Activation(Operator):
return {'op_type': self.op_type, 'arguments': {}}
def forward(self, inputs, inplace=False):
outputs = [inputs[0] if inplace else self.alloc()]
outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
return self.dispatch(inputs, outputs)
......@@ -46,7 +46,7 @@ class Dropout(Activation):
}
class DropBlock2d(Operator):
class DropBlock2d(Activation):
def __init__(self, key, dev, **kwargs):
super(DropBlock2d, self).__init__(key, dev, **kwargs)
self.block_size = kwargs.get('block_size', 7)
......@@ -67,10 +67,6 @@ class DropBlock2d(Operator):
},
}
def forward(self, inputs, inplace=False):
outputs = [inputs[0] if inplace else self.alloc()]
return self.dispatch(inputs, outputs)
class DropPath(Activation):
def __init__(self, key, dev, **kwargs):
......@@ -96,9 +92,7 @@ class Elu(Activation):
def attributes(self):
return {
'op_type': 'Elu',
'arguments': {
'alpha': float(self.alpha),
}
'arguments': {'alpha': float(self.alpha)},
}
......@@ -110,9 +104,7 @@ class PRelu(Operator):
def attributes(self):
return {
'op_type': 'PRelu',
'arguments': {
'data_format': self.data_format,
}
'arguments': {'data_format': self.data_format},
}
def forward(self, inputs):
......@@ -127,9 +119,7 @@ class Relu(Activation):
def attributes(self):
return {
'op_type': 'Relu',
'arguments': {
'alpha': float(self.alpha),
}
'arguments': {'alpha': float(self.alpha)},
}
......@@ -140,9 +130,7 @@ class Relu6(Activation):
def attributes(self):
return {
'op_type': 'Relu',
'arguments': {
'max_value': 6.,
}
'arguments': {'max_value': 6.},
}
......@@ -170,7 +158,5 @@ class Softmax(Activation):
def attributes(self):
return {
'op_type': 'Softmax',
'arguments': {
'axis': self.axis,
}
'arguments': {'axis': self.axis},
}
......@@ -14,6 +14,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.framework import device_spec
from dragon.core.framework.ops import Operator
......@@ -41,13 +42,14 @@ class Arange(Operator):
slice_args[i], 'float32')
def forward(self, slice_args, trainable=False):
output = self.dispatch(
out = self.dispatch(
[], [self.alloc()],
callback=lambda ws, handle:
self.feed(ws, handle, slice_args)
self.feed(ws, handle, slice_args),
no_grad=True,
)
output._requires_grad = trainable
return output
out._requires_grad = trainable
return out
class ArgReduce(Operator):
......@@ -85,7 +87,7 @@ class Cast(Operator):
if inputs[0].dtype == self.dtype:
return inputs[0]
if inplace:
return self.dispatch([], inputs, no_grad=True)
return self.dispatch([], [self.alloc(inputs[0])], no_grad=True)
return self.dispatch(inputs, [self.alloc()])
......@@ -122,7 +124,7 @@ class ChannelNormalize(Operator):
return self.dispatch(
inputs, [self.alloc()],
callback=lambda ws, handle:
self.feed(ws, handle, perm)
self.feed(ws, handle, perm),
)
......@@ -225,7 +227,7 @@ class ExpandDims(Operator):
}
def forward(self, inputs, inplace=False):
outputs = [inputs[0] if inplace else self.alloc()]
outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
return self.dispatch(inputs, outputs)
......@@ -247,7 +249,7 @@ class Flatten(Operator):
}
def forward(self, inputs, inplace=False):
outputs = [inputs[0] if inplace else self.alloc()]
outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
return self.dispatch(inputs, outputs)
......@@ -447,11 +449,10 @@ class Reshape(Operator):
e, 'int64')
def forward(self, inputs, shape, inplace=False):
outputs = [inputs[0] if inplace else self.alloc()]
return self.dispatch(
inputs, outputs,
inputs, [self.alloc(inputs[0]) if inplace else self.alloc()],
callback=lambda ws, handle:
self.feed(ws, handle, shape)
self.feed(ws, handle, shape),
)
......@@ -493,12 +494,13 @@ class Slice(Operator):
class Shape(Operator):
def __init__(self, key, dev, **kwargs):
super(Shape, self).__init__(key, dev, **kwargs)
self._device = device_spec.DeviceSpec()
def attributes(self):
return {'op_type': 'Shape', 'arguments': {}}
def forward(self, inputs):
return self.dispatch(inputs, [self.alloc()])
return self.dispatch(inputs, [self.alloc()], no_grad=True)
class Split(Operator):
......@@ -535,7 +537,7 @@ class Squeeze(Operator):
}
def forward(self, inputs, inplace=False):
outputs = [inputs[0] if inplace else self.alloc()]
outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
return self.dispatch(inputs, outputs)
......
......@@ -62,7 +62,7 @@ class Copy(Operator):
def forward(self, inputs, outputs):
outputs = outputs if outputs else [self.alloc()]
return self.dispatch(inputs, outputs)
return self.dispatch(inputs, outputs, no_grad=True)
class MaskedAssign(Operator):
......
......@@ -46,7 +46,7 @@ def all_reduce(inputs, operation='MEAN', group=None, **kwargs):
if group is None:
raise ValueError('<group> is required.')
if operation not in ('MEAN', 'SUM'):
raise ValueError('Unsupported reduce op:', operation)
raise ValueError('Unsupported reduce op: ' + operation)
args.update(group.arguments)
args.pop('group')
op_lib = distributed_ops_lib.Collective
......
......@@ -115,11 +115,8 @@ def eye(n, m=None, k=0, dtype='float32', **kwargs):
if types.is_tensor(m):
m = int(m.get_value())
return op_lib \
.instantiate(
k=k,
ndim=2,
dtype=dtype,
).apply([n, m], trainable=trainable)
.instantiate(k=k, ndim=2, dtype=dtype) \
.apply([n, m], trainable=trainable)
else:
args['n'] = args['m'] = None
if types.is_tensor(n) or types.is_tensor(m):
......@@ -173,14 +170,8 @@ def eye_like(other, k=0, dtype='float32', **kwargs):
op_lib = init_ops_lib.Eye
if context.executing_eagerly():
return op_lib \
.instantiate(
k=k,
dtype=dtype,
).apply(
shape=[],
shape_like=other,
trainable=trainable,
)
.instantiate(k=k, dtype=dtype) \
.apply([], other, trainable=trainable)
else:
args.pop('other')
return op_lib.blend(inputs=[other], **args)
......@@ -366,14 +357,8 @@ def ones_like(other, dtype='float32', **kwargs):
op_lib = init_ops_lib.Fill
if context.executing_eagerly():
return op_lib \
.instantiate(
value=1,
dtype=dtype,
).apply(
shape=[],
shape_like=other,
trainable=trainable,
)
.instantiate(value=1, dtype=dtype) \
.apply([], other, trainable=trainable)
else:
args.pop('other')
return op_lib.blend(inputs=[other], value=1., **args)
......@@ -453,11 +438,7 @@ def random_normal_like(other, mean=0, std=1, dtype='float32', **kwargs):
mean=args['mean'],
std=args['std'],
dtype=dtype,
).apply(
shape=[],
shape_like=other,
trainable=trainable,
)
).apply([], other, trainable=trainable)
else:
args.pop('other')
return op_lib.blend(inputs=[other], **args)
......@@ -535,11 +516,7 @@ def random_uniform_like(other, low=-1, high=1, dtype='float32', **kwargs):
low=args['low'],
high=args['high'],
dtype=dtype,
).apply(
shape=[],
shape_like=other,
trainable=trainable,
)
).apply([], other, trainable=trainable)
else:
args.pop('other')
return op_lib.blend(inputs=[other], **args)
......@@ -641,14 +618,8 @@ def zeros_like(other, dtype='float32', **kwargs):
op_lib = init_ops_lib.Fill
if context.executing_eagerly():
return op_lib \
.instantiate(
value=0,
dtype=dtype,
).apply(
shape=[],
shape_like=other,
trainable=trainable,
)
.instantiate(value=0, dtype=dtype) \
.apply([], other, trainable=trainable)
else:
args.pop('other')
return op_lib.blend(inputs=[other], value=0., **args)
......@@ -14,7 +14,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.framework import ops
from dragon.core.framework.ops import Operator
......@@ -30,25 +29,17 @@ class Initializer(Operator):
ws, '{}/dims[{}]'.format(handle, i),
dim, 'int64')
def forward(
self,
shape,
out=None,
shape_like=None,
trainable=False,
):
inputs = [] if shape_like is None else [shape_like]
outputs = [ops.new_leaf(
shape=shape,
dtype=self.dtype,
device=self.alloc(),
trainable=trainable,
) if out is None else out]
return self.dispatch(
inputs, outputs,
def forward(self, shape, shape_as=None, out=None, trainable=None):
out = self.dispatch(
[] if shape_as is None else [shape_as],
[self.alloc(out)],
callback=lambda ws, handle:
self.feed(ws, handle, shape),
no_grad=True,
)
if trainable is not None:
out._requires_grad = trainable
return out
class Eye(Initializer):
......
......@@ -53,7 +53,8 @@ class Axpby(Operator):
def forward(self, inputs, outputs=None):
if outputs is None:
outputs = [self.alloc() for _ in range(len(inputs))]
outputs = [None] * len(inputs)
outputs = [self.alloc(out) for out in outputs]
return self.dispatch(inputs, outputs, no_grad=True)
......@@ -65,12 +66,8 @@ class BinaryOp(Operator):
def attributes(self):
return {'op_type': self.op_type, 'arguments': {}}
def forward(self, inputs, outputs=None):
if outputs is None:
outputs = [self.alloc()]
else:
outputs[0]._device = self.alloc()
return self.dispatch(inputs, outputs)
def forward(self, inputs, outputs=(None,)):
return self.dispatch(inputs, [self.alloc(outputs[0])])
class Clip(Operator):
......
......@@ -23,7 +23,7 @@ class Metric(Operator):
self.reduction = kwargs.get('reduction', 'MEAN')
def forward(self, inputs):
return self.dispatch(inputs, [self.alloc()])
return self.dispatch(inputs, [self.alloc()], no_grad=True)
class Accuracy(Metric):
......
......@@ -576,7 +576,7 @@ def uniform(self, low=0, high=1):
).apply(shape, out=self)
def _binary_op(a, b, op_type, outputs=None):
def _binary_op(a, b, op_type, outputs=(None,)):
"""Apply the general binary operation."""
return math_ops_lib.BinaryOp \
.instantiate(op_type=op_type) \
......
......@@ -85,13 +85,11 @@ class BiasAdd(Operator):
def attributes(self):
return {
'op_type': 'BiasAdd',
'arguments': {
'data_format': self.data_format,
},
'arguments': {'data_format': self.data_format},
}
def forward(self, inputs, inplace=False):
outputs = [inputs[0] if inplace else self.alloc()]
outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
return self.dispatch(inputs, outputs)
......
......@@ -85,6 +85,16 @@ def getfullargspec(obj):
return _getfullargspec(target)
def isclass(object):
"""Decorator-aware replacement for ``inspect.isclass``."""
return _inspect.isclass(decorator.unwrap(object)[1])
def isfunction(object):
"""Decorator-aware replacement for ``inspect.isfunction``."""
return _inspect.isfunction(decorator.unwrap(object)[1])
def ismethod(object):
"""Decorator-aware replacement for ``inspect.ismethod``."""
return _inspect.ismethod(decorator.unwrap(object)[1])
......@@ -14,12 +14,15 @@ from __future__ import division as _division
from __future__ import print_function as _print_function
# Classes
from dragon.vm.tensorflow.core.keras.regularizers import L1
from dragon.vm.tensorflow.core.keras.regularizers import l1
from dragon.vm.tensorflow.core.keras.regularizers import L1L2
from dragon.vm.tensorflow.core.keras.regularizers import L2
from dragon.vm.tensorflow.core.keras.regularizers import l2
from dragon.vm.tensorflow.core.keras.regularizers import Regularizer
# Functions
from dragon.vm.tensorflow.core.keras.regularizers import l1
from dragon.vm.tensorflow.core.keras.regularizers import get
from dragon.vm.tensorflow.core.keras.regularizers import l1_l2
from dragon.vm.tensorflow.core.keras.regularizers import l2
__all__ = [_s for _s in dir() if not _s.startswith('_')]
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/activations.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
......@@ -18,6 +14,7 @@ from __future__ import division
from __future__ import print_function
from dragon.core.util import six
from dragon.vm.tensorflow.core.keras.utils import generic_utils
from dragon.vm.tensorflow.core.ops import math_ops
from dragon.vm.tensorflow.core.ops import nn
......@@ -272,7 +269,7 @@ def tanh(x, **kwargs):
def get(identifier):
"""Return the activation callable by identifier.
"""Return the activation function by identifier.
Parameters
----------
......@@ -282,7 +279,7 @@ def get(identifier):
Returns
-------
callable
The activation callable.
The activation function.
"""
if identifier is None:
......@@ -290,8 +287,9 @@ def get(identifier):
elif callable(identifier):
return identifier
elif isinstance(identifier, six.string_types):
return globals()[identifier]
return generic_utils.deserialize_keras_object(
identifier, globals(), 'activation')
else:
raise TypeError(
'Could not interpret activation identifier: {}.'
.format(repr(identifier)))
'Could not interpret the activation identifier: {}.'
.format(identifier))
......@@ -75,7 +75,7 @@ def Input(
if shape is not None:
shape = (batch_size,) + tuple(shape)
if kwargs:
raise ValueError('Unrecognized keyword arguments:', kwargs.keys())
raise ValueError('Unrecognized keyword arguments: ' + kwargs.keys())
if dtype is None:
if tensor is not None:
dtype = tensor.dtype
......
......@@ -18,6 +18,7 @@ from __future__ import division
from __future__ import print_function
from dragon.core.util import six
from dragon.vm.tensorflow.core.keras.utils import generic_utils
from dragon.vm.tensorflow.core.ops.init_ops import Constant
from dragon.vm.tensorflow.core.ops.init_ops import GlorotNormal
from dragon.vm.tensorflow.core.ops.init_ops import GlorotUniform
......@@ -60,8 +61,9 @@ def get(identifier):
elif callable(identifier):
return identifier
elif isinstance(identifier, six.string_types):
return globals()[identifier]
return generic_utils.deserialize_keras_object(
identifier, globals(), 'initializer')
else:
raise TypeError(
'Could not interpret initializer identifier: {}.'
.format(repr(identifier)))
'Could not interpret the initializer identifier: {}.'
.format(identifier))
......@@ -96,7 +96,7 @@ class BatchNormalization(Layer):
def build(self, input_shape):
input_shape = tensor_shape.TensorShape(input_shape)
if not input_shape.ndims:
raise ValueError('Input has undefined rank:', input_shape)
raise ValueError('Input has undefined rank: ' + input_shape)
param_shape = [input_shape.dims[self.axis]]
self.input_spec = InputSpec(
# Each layer should adapt to the:
......
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/losses.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
......@@ -20,6 +16,7 @@ from __future__ import print_function
from dragon.core.framework import context
from dragon.core.ops import loss_ops
from dragon.core.util import six
from dragon.vm.tensorflow.core.keras.utils import generic_utils
from dragon.vm.tensorflow.core.keras.utils import losses_utils
......@@ -523,8 +520,9 @@ def get(identifier):
elif callable(identifier):
return identifier
elif isinstance(identifier, six.string_types):
return globals()[identifier]
return generic_utils.deserialize_keras_object(
identifier, globals(), 'loss')
else:
raise TypeError(
'Could not interpret loss identifier: {}.'
.format(repr(identifier)))
'Could not interpret the loss identifier: {}.'
.format(identifier))
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/optimizer.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
......@@ -51,7 +47,7 @@ class Optimizer(optimizer_v1.Optimizer):
allowed_kwargs = {'scale', 'clipnorm', 'lr'}
for k in kwargs:
if k not in allowed_kwargs:
raise TypeError('Unexpected keyword argument:', str(k))
raise TypeError('Unexpected keyword argument: ' + str(k))
if kwargs[k] < 0:
raise ValueError("Expected {} >= 0, received: {}".format(k, kwargs[k]))
......@@ -109,15 +105,17 @@ class Optimizer(optimizer_v1.Optimizer):
for g, v in grads_and_vars:
if g is not None:
decay_mult = 0.
if hasattr(v, '__regularizer__'):
decay_mult = v.__regularizer__.l2 / self.BASE_WEIGHT_DECAY
regularizer = getattr(v, '_regularizer', None)
if regularizer is not None:
decay_mult = regularizer.l2 / self.BASE_WEIGHT_DECAY
self._run_update(v, g, decay_mult=decay_mult)
else:
# Store for the lazy compilation.
for g, v in grads_and_vars:
decay_mult = 0.
if hasattr(v, '__regularizer__'):
decay_mult = v.__regularizer__.l2 / self.BASE_WEIGHT_DECAY
regularizer = getattr(v, '_regularizer', None)
if regularizer is not None:
decay_mult = regularizer.l2 / self.BASE_WEIGHT_DECAY
self._add_update(v, g, decay_mult=decay_mult)
# Increase the iterations.
......
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/regularizers.py>
#
# ------------------------------------------------------------
"""Built-in regularizers."""
......@@ -18,6 +14,9 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.core.util import six
from dragon.vm.tensorflow.core.keras.utils import generic_utils
class Regularizer(object):
"""The base regularizer class."""
......@@ -36,10 +35,33 @@ class Regularizer(object):
The output tensor.
"""
x.__regularizer__ = self
x._regularizer = self
return x
class L1(Regularizer):
r"""The L1 regularizer.
The **L1** regularizer is defined as:
.. math:: loss_{reg} = loss + \alpha|w|
"""
def __init__(self, l1=0.01):
r"""Create a ``L1`` regularizer.
Parameters
----------
l1 : float, optional, default=0.01
The value to :math:`\alpha`.
"""
if l1 <= 0.:
raise ValueError('<l1> should be greater than 0.')
self.l1 = l1
class L1L2(Regularizer):
r"""The L1L2 regularizer.
......@@ -65,39 +87,27 @@ class L1L2(Regularizer):
self.l1, self.l2 = l1, l2
def get(identifier):
"""Return a regularizer from the identifier."""
if identifier is None:
return None
elif callable(identifier):
return identifier
else:
raise ValueError(
'Could not interpret regularizer identifier:',
identifier,
)
class L2(Regularizer):
r"""The L2 regularizer.
# Aliases
def l1(l=0.01):
r"""Create a L1 regularizer.
The **L2** regularizer is defined as:
The **L1** regularizer is defined as:
.. math:: loss_{reg} = loss + \frac{\beta}{2}|w|_{2}
.. math:: loss_{reg} = loss + \alpha|w|
"""
Parameters
----------
l : float, optional, default=0.01
The value to :math:`\alpha`.
def __init__(self, l2=0.01):
r"""Create a ``L2`` regularizer.
Returns
-------
dragon.vm.tensorflow.keras.regularizers.Regularizer
The regularizer.
Parameters
----------
l1 : float, optional, default=0.01
The value to :math:`\alpha`.
"""
return L1L2(l1=l)
"""
if l2 <= 0.:
raise ValueError('<l2> should be greater than 0.')
self.l2 = l2
def l1_l2(l1=0.01, l2=0.01):
......@@ -123,22 +133,35 @@ def l1_l2(l1=0.01, l2=0.01):
return L1L2(l1=l1, l2=l2)
def l2(l=0.01):
r"""Create a L2 regularizer.
# Aliases
l1 = L1
l2 = L2
The **L2** regularizer is defined as:
.. math:: loss_{reg} = loss + \frac{\beta}{2}|w|_{2}
def get(identifier):
"""Return the regularizer callable by identifier.
Parameters
----------
l : float, optional, default=0.01
The value to :math:`\beta`.
identifier : Union[callable, str]
The identifier.
Returns
-------
dragon.vm.tensorflow.keras.regularizers.Regularizer
The regularizer.
callable
The activation callable.
"""
return L1L2(l2=l)
if identifier is None:
return None
elif callable(identifier):
return identifier
elif isinstance(identifier, six.string_types):
if identifier == 'l1_l2':
return L1L2(l1=0.01, l2=0.01)
return generic_utils.deserialize_keras_object(
identifier, globals(), 'regularizer')
else:
raise TypeError(
'Could not interpret the regularizer identifier: {}.'
.format(identifier))
......@@ -30,7 +30,7 @@ def convert_data_format(data_format, ndim):
elif ndim == 5:
return 'NDHWC'
else:
raise ValueError('Input rank not supported:', ndim)
raise ValueError('Input rank not supported: ' + ndim)
elif data_format == 'channels_first':
if ndim == 3:
return 'NCW'
......@@ -39,9 +39,9 @@ def convert_data_format(data_format, ndim):
elif ndim == 5:
return 'NCDHW'
else:
raise ValueError('Input rank not supported:', ndim)
raise ValueError('Input rank not supported: ' + ndim)
else:
raise ValueError('Invalid data_format:', data_format)
raise ValueError('Invalid data_format: ' + data_format)
def deconv_output_length(
......@@ -94,8 +94,7 @@ def normalize_padding(value):
if value not in {'valid', 'same'}:
raise ValueError(
'Excepted <padding> in "valid", "same".\n'
'Received: ' + str(value)
)
'Received: ' + str(value))
return value
......
......@@ -19,6 +19,32 @@ from __future__ import print_function
import re
from dragon.core.util import inspect
from dragon.core.util import six
def deserialize_keras_object(
identifier,
module_objects,
printable_module_name='object',
):
"""Deserialize the keras object."""
if isinstance(identifier, six.string_types):
object_name = identifier
obj = module_objects.get(object_name)
if obj is None:
raise ValueError(
'Unknown ' + printable_module_name + ': ' + object_name)
if inspect.isclass(obj):
return obj()
return obj
elif inspect.isfunction(identifier):
return identifier
else:
raise TypeError(
'Could not interpret the {} identifier: {}.'
.format(printable_module_name, identifier))
def to_snake_case(name):
"""Convert the name from camel-style to snake-style."""
......
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/utils/losses_utils.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
......
......@@ -249,7 +249,7 @@ class VarianceScaling(Initializer):
raise ValueError('<scale> must be positive float.')
mode = mode.lower()
if mode not in {'fan_in', 'fan_out', 'fan_avg'}:
raise ValueError('Invalid <mode> argument:', mode)
raise ValueError('Invalid <mode> argument: ' + mode)
distribution = distribution.lower()
if distribution not in {'normal', 'uniform'}:
raise ValueError("Invalid `distribution` argument:", distribution)
......@@ -434,10 +434,12 @@ def glorot_normal_initializer(dtype='float32'):
# Aliases
zeros_initializer = Zeros
ones_initializer = Ones
constant_initializer = Constant
random_uniform_initializer = RandomUniform
random_normal_initializer = RandomNormal
truncated_normal_initializer = TruncatedNormal
zeros_initializer = zero = zeros = Zeros
ones_initializer = one = ones = Ones
constant_initializer = constant = Constant
random_uniform_initializer = uniform = random_uniform = RandomUniform
random_normal_initializer = normal = random_normal = RandomNormal
truncated_normal_initializer = truncated_normal = TruncatedNormal
variance_scaling_initializer = VarianceScaling
glorot_normal = GlorotNormal
glorot_uniform = GlorotUniform
......@@ -26,7 +26,7 @@ def convert_data_format(data_format):
elif data_format == 'channels_first':
return 'NCHW'
else:
raise ValueError('Invalid data_format:', data_format)
raise ValueError('Invalid data_format: ' + data_format)
def normalize_data_format(value):
......
......@@ -70,15 +70,23 @@ class Function(object):
self._arg_device = self._arg_device.SerializeToString()
self._seed = kwargs.get('seed', config.config().random_seed)
def alloc(self):
"""Return the executing device to create an output tensor.
def alloc(self, out=None):
"""Return or bind the executing device to output tensor.
Parameters
----------
out : dragon.vm.torch.Tensor, optional
The optional output tensor.
Returns
-------
dragon.vm.torch.device
The device spec.
Union[dragon.vm.torch.device, dragon.vm.torch.Tensor]
The executing device or output tensor.
"""
if out is not None:
out._device = self._device.copy()
return out
return self._device.copy()
def apply(self, *args, **kwargs):
......@@ -106,7 +114,7 @@ class Function(object):
"""Dispatch the execution."""
if self._def is None:
self._gen_def()
if check_device:
if len(inputs) > 1 and check_device:
self._check_device(inputs)
return execute.run_operator(
op_def=self._def,
......
......@@ -66,7 +66,7 @@ def calculate_gain(nonlinearity, param=None):
raise ValueError("Negative slope {} is not a valid number".format(param))
return math.sqrt(2.0 / (1 + negative_slope ** 2))
else:
raise ValueError('Unsupported nonlinearity:', nonlinearity)
raise ValueError('Unsupported nonlinearity: ' + nonlinearity)
def constant_(tensor, val):
......
......@@ -340,8 +340,7 @@ class LpNormalize(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out])
return self.dispatch([input], [self.alloc(out)])
class LSTMCell(function.Function):
......@@ -564,11 +563,7 @@ class RNNParamSet(function.Function):
}
def forward(self, param, weights):
return self.dispatch(
[param], [weights],
no_grad=True,
check_device=False,
)
return self.dispatch([param], [weights], no_grad=True)
class SigmoidCrossEntropy(_Loss):
......
......@@ -34,8 +34,7 @@ class ArgReduce(function.Function):
}
def forward(self, input, out=None):
outputs = [out] if out else [self.alloc()]
return self.dispatch([input], outputs, no_grad=True)
return self.dispatch([input], [self.alloc(out)], no_grad=True)
class Assign(function.Function):
......@@ -148,8 +147,7 @@ class ChannelShuffle(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out])
return self.dispatch([input], [self.alloc(out)])
class Concat(function.Function):
......@@ -164,8 +162,7 @@ class Concat(function.Function):
}
def forward(self, seq, out=None):
out = out if out else self.alloc()
return self.dispatch(seq, [out])
return self.dispatch(seq, [self.alloc(out)])
class Cumulative(function.Function):
......@@ -187,8 +184,7 @@ class Cumulative(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out])
return self.dispatch([input], [self.alloc(out)])
class Expand(function.Function):
......@@ -235,9 +231,8 @@ class IndexSelect(function.Function):
},
}
def forward(self, input, indices, out=None):
out = out if out else self.alloc()
return self.dispatch([input, indices], [out])
def forward(self, input, index, out=None):
return self.dispatch([input, index], [self.alloc(out)])
class MaskedAssign(function.Function):
......@@ -248,7 +243,7 @@ class MaskedAssign(function.Function):
return {'op_type': 'MaskedAssign', 'arguments': {}}
def forward(self, out, mask, input):
return self.dispatch([input, mask], [out])
return self.dispatch([input, mask], [self.alloc(out)])
class MaskedSelect(function.Function):
......@@ -259,8 +254,7 @@ class MaskedSelect(function.Function):
return {'op_type': 'MaskedSelect', 'arguments': {}}
def forward(self, input, mask, out=None):
out = out if out else self.alloc()
return self.dispatch([input, mask], [out])
return self.dispatch([input, mask], [self.alloc(out)])
class Multinomial(function.Function):
......@@ -280,8 +274,7 @@ class Multinomial(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out], no_grad=True)
return self.dispatch([input], [self.alloc(out)], no_grad=True)
class NonZero(function.Function):
......@@ -292,8 +285,7 @@ class NonZero(function.Function):
return {'op_type': 'NonZero', 'arguments': {}}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out], no_grad=True)
return self.dispatch([input], [self.alloc(out)], no_grad=True)
class OneHot(function.Function):
......@@ -330,8 +322,7 @@ class Reduce(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out])
return self.dispatch([input], [self.alloc(out)])
class Reshape(function.Function):
......@@ -356,9 +347,8 @@ class Reshape(function.Function):
shape[i], 'int64')
def forward(self, input, shape, out=None):
out = out if out else self.alloc()
return self.dispatch(
[input], [out],
[input], [self.alloc(out)],
callback=lambda ws, handle:
self.feed(ws, handle, shape),
)
......@@ -433,8 +423,7 @@ class Stack(function.Function):
}
def forward(self, seq, out=None):
out = out if out else self.alloc()
return self.dispatch(seq, [out])
return self.dispatch(seq, [self.alloc(out)])
class Squeeze(function.Function):
......@@ -451,8 +440,7 @@ class Squeeze(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out])
return self.dispatch([input], [self.alloc(out)])
class Tile(function.Function):
......@@ -534,8 +522,8 @@ class TopK(function.Function):
}
}
def forward(self, input, outputs=None):
outputs = [self.alloc(), self.alloc()] if outputs is None else outputs
def forward(self, input, outputs=(None, None)):
outputs = [self.alloc(outputs[0]), self.alloc(outputs[1])]
return self.dispatch([input], outputs, no_grad=True)
......@@ -553,8 +541,7 @@ class UnSqueeze(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out])
return self.dispatch([input], [self.alloc(out)])
class Where(function.Function):
......
......@@ -943,7 +943,7 @@ def topk(input, k, dim=None, largest=True, sorted=True, out=None):
axis=dim,
largest=largest,
sorted=sorted,
).apply(input, out)
).apply(input, out if out else (None, None))
def unsqueeze(input, dim, out=None):
......
......@@ -42,7 +42,7 @@ def all_reduce(tensor, op='SUM', group=None):
if group is None:
raise ValueError('<group> is required.')
if op not in ('MEAN', 'SUM'):
raise ValueError('Unsupported reduce op:', op)
raise ValueError('Unsupported reduce op: ' + op)
tensors = nest.flatten(tensor)
return _functions.Collective \
.instantiate(
......
......@@ -34,6 +34,7 @@ class _Initializer(function.Function):
[] if shape_like is None else [shape_like], [out],
callback=lambda ws, handle:
self.feed(ws, handle, shape),
no_grad=True,
)
......@@ -62,9 +63,10 @@ class Arange(function.Function):
def forward(self, slice_args, out=None):
return self.dispatch(
[], [out if out else self.alloc()],
[], [self.alloc(out)],
callback=lambda ws, handle:
self.feed(ws, handle, slice_args)
self.feed(ws, handle, slice_args),
no_grad=True,
)
......
......@@ -121,7 +121,7 @@ def eye(
Returns
-------
dragon.Tensor
dragon.vm.torch.Tensor
The output tensor.
"""
......@@ -143,7 +143,7 @@ def fill(out, shape, value):
def fill_like(out, shape_like, value):
return _functions.Fill \
return _functions.Fill \
.instantiate(out.device, value=float(value), dtype=out.dtype) \
.apply(out, [], shape_like)
......
......@@ -33,8 +33,7 @@ class Axpby(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out], no_grad=True)
return self.dispatch([input], [self.alloc(out)], no_grad=True)
class BinaryFunc(function.Function):
......@@ -46,8 +45,7 @@ class BinaryFunc(function.Function):
return {'op_type': self.op_type, 'arguments': {}}
def forward(self, input, value, out=None):
out = out if out else self.alloc()
return self.dispatch([input, value], [out])
return self.dispatch([input, value], [self.alloc(out)])
class Clip(function.Function):
......@@ -70,8 +68,7 @@ class Clip(function.Function):
}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out])
return self.dispatch([input], [self.alloc(out)])
class UnaryFunc(function.Function):
......@@ -83,8 +80,7 @@ class UnaryFunc(function.Function):
return {'op_type': self.op_type, 'arguments': {}}
def forward(self, input, out=None):
out = out if out else self.alloc()
return self.dispatch([input], [out])
return self.dispatch([input], [self.alloc(out)])
class MatMul(function.Function):
......@@ -103,5 +99,4 @@ class MatMul(function.Function):
}
def forward(self, mat1, mat2, out=None):
out = out if out else self.alloc()
return self.dispatch([mat1, mat2], [out])
return self.dispatch([mat1, mat2], [self.alloc(out)])
......@@ -37,11 +37,7 @@ class ParamUpdate(function.Function):
def forward(self, param, grad):
self._check_device([param, grad])
return self.dispatch(
[grad], [param],
no_grad=True,
check_device=False,
)
return self.dispatch([grad], [param], no_grad=True)
class GradAccumulate(function.Function):
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!