Fix the device inference in eager execution

Summary: This commit computes the correct device for an existing output tensor.

Fix the device inference in eager execution
Summary: This commit computes the correct device for an existing output tensor.
Ting PAN
Commit 8dbb73a7 authored Jul 26, 2020 by Ting PAN
Showing with 291 additions and 290 deletions
docs/api/python/dragon/workspace.rst → docs/api/python/dragon/Workspace.rst
docs/api/python/tensorflow/keras/regularizers.rst
docs/api/python/tensorflow/keras/regularizers/L1.rst
docs/api/python/tensorflow/keras/regularizers/l2.rst → docs/api/python/tensorflow/keras/regularizers/L2.rst
docs/api/python/tensorflow/keras/regularizers/l1.rst → docs/api/python/tensorflow/keras/regularizers/get.rst
dragon/operators/array/initialize_ops.cc
dragon/operators/array/shape_op.cc
dragon/operators/array/shape_op.h
dragon/python/core/autograph/config.py
dragon/python/core/distributed/backend.py
dragon/python/core/framework/context.py
dragon/python/core/framework/ops.py
dragon/python/core/framework/workspace.py
dragon/python/core/ops/activation_ops.py
dragon/python/core/ops/activation_ops_lib.py
dragon/python/core/ops/array_ops_lib.py
dragon/python/core/ops/control_flow_ops_lib.py
dragon/python/core/ops/distributed_ops.py
dragon/python/core/ops/init_ops.py
dragon/python/core/ops/init_ops_lib.py
--- a/docs/api/python/dragon/workspace.rst
+++ b/docs/api/python/dragon/workspace.rst
--- a/docs/api/python/tensorflow/keras/regularizers.rst
+++ b/docs/api/python/tensorflow/keras/regularizers.rst
@@ -6,32 +6,35 @@ regularizers
  Classes
  -------
+  `class L1 <regularizers/L1.html>`_
+  : The L1 regularizer.
  `class L1L2 <regularizers/L1L2.html>`_
  : The L1L2 regularizer.
+  `class L2 <regularizers/L1.html>`_
+  : The L1 regularizer.
  `class Regularizer <regularizers/Regularizer.html>`_
  : The base regularizer class.
  Functions
  ---------
-  `l1(...) <regularizers/l1.html>`_
+  `get(...) <regularizers/get.html>`_
-  : Create a L1 regularizer.
+  : Return the regularizer callable by identifier.
  `l1_l2(...) <regularizers/l1_l2.html>`_
  : Create a L1L2 regularizer.
-  `l2(...) <regularizers/l2.html>`_
-  : Create a L2 regularizer.
 .. toctree::
  :hidden:
-  regularizers/l1
+  regularizers/get
+  regularizers/L1
  regularizers/L1L2
  regularizers/l1_l2
-  regularizers/l2
+  regularizers/L2
  regularizers/Regularizer
 .. raw:: html

--- a/docs/api/python/tensorflow/keras/regularizers/L1.rst
+++ b/docs/api/python/tensorflow/keras/regularizers/L1.rst
+L1
+==
+.. autoclass:: dragon.vm.tensorflow.keras.regularizers.L1
+__init__
+--------
+.. automethod:: dragon.vm.tensorflow.keras.regularizers.L1.__init__
+.. raw:: html
+  <style>
+    h1:before {
+      content: "tf.keras.regularizers.";
+      color: #103d3e;
+    }
+  </style>
--- a/docs/api/python/tensorflow/keras/regularizers/l2.rst
+++ b/docs/api/python/tensorflow/keras/regularizers/l2.rst
-l2
+L2
 ==
-.. autofunction:: dragon.vm.tensorflow.keras.regularizers.l2
+.. autoclass:: dragon.vm.tensorflow.keras.regularizers.L2
+__init__
+--------
+.. automethod:: dragon.vm.tensorflow.keras.regularizers.L2.__init__
 .. raw:: html

--- a/docs/api/python/tensorflow/keras/regularizers/l1.rst
+++ b/docs/api/python/tensorflow/keras/regularizers/l1.rst
-l1
+get
-==
+===
-.. autofunction:: dragon.vm.tensorflow.keras.regularizers.l1
+.. autofunction:: dragon.vm.tensorflow.keras.regularizers.get
 .. raw:: html

--- a/dragon/operators/array/initialize_ops.cc
+++ b/dragon/operators/array/initialize_ops.cc
@@ -35,8 +35,9 @@ void InitializeOp<Context>::RunOnDevice() {
    vec64_t out_shape;
    int ndims;
    dims(0, &ndims);
-    for (int i = 0; i < ndims; i++)
+    for (int i = 0; i < ndims; i++) {
      out_shape.push_back(dims(i));
+    }
    Output(0)->Reshape(out_shape);
  }
 }

--- a/dragon/operators/array/shape_op.cc
+++ b/dragon/operators/array/shape_op.cc
@@ -4,12 +4,7 @@ namespace dragon {
 template <class Context>
 void ShapeOp<Context>::RunOnDevice() {
-  Output(0)->Reshape({Input(0).ndim()});
+  Output(0)->template CopyFrom<int64_t>(Input(0).dims());
-  auto* y = Output(0)->template mutable_data<int64_t, CPUContext>();
-  for (int i = 0; i < Input(0).ndim(); i++)
-    y[i] = Input(0).dim(i);
 }
 DEPLOY_CPU(Shape);

--- a/dragon/operators/array/shape_op.h
+++ b/dragon/operators/array/shape_op.h
@@ -23,6 +23,8 @@ class ShapeOp final : public Operator<Context> {
  SIMPLE_CTOR_DTOR(ShapeOp);
  USE_OPERATOR_FUNCTIONS;
+  void SwitchToDevice() override {}
  void RunOnDevice() override;
 };

--- a/dragon/python/core/autograph/config.py
+++ b/dragon/python/core/autograph/config.py
@@ -39,7 +39,7 @@ def set_execution(execution='GRAPH_MODE'):
    """
    if execution not in ('GRAPH_MODE', 'EAGER_MODE'):
-        raise ValueError('Unsupported execution mode:', execution)
+        raise ValueError('Unsupported execution: ' + execution)
    config.config().graph_execution = execution
@@ -75,7 +75,7 @@ def set_scheduler(scheduler='SIMPLE'):
    """
    if scheduler not in ('SIMPLE', 'FUSION'):
-        raise ValueError('Unsupported scheduler type:', scheduler)
+        raise ValueError('Unsupported scheduler: ' + scheduler)
    if scheduler == 'SIMPLE':
        config.config().graph_type = ''
    elif scheduler == 'FUSION':

--- a/dragon/python/core/distributed/backend.py
+++ b/dragon/python/core/distributed/backend.py
@@ -40,7 +40,7 @@ class Backend(object):
            if not is_nccl_available():
                raise ValueError('NCCL backend is not available.')
        elif value == Backend.UNDEFINED:
-            raise ValueError('Invalid backend:', name)
+            raise ValueError('Invalid backend: ' + name)
        return value

--- a/dragon/python/core/framework/context.py
+++ b/dragon/python/core/framework/context.py
@@ -44,7 +44,7 @@ def device(device_type, device_index=0):
    """
    device_type = device_type.lower()
    if device_type not in mapping.DEVICE_STRING_TO_DEVICE_TYPE:
-        raise ValueError('Unsupported device type:', device_type)
+        raise ValueError('Unsupported device type: ' + device_type)
    return _GLOBAL_DEVICE_STACK.get_controller({
        'device_type': mapping.DEVICE_STRING_TO_DEVICE_TYPE[device_type],
        'device_index': device_index,

--- a/dragon/python/core/framework/ops.py
+++ b/dragon/python/core/framework/ops.py
@@ -37,8 +37,11 @@ class Operator(object):
        self._arg_device = self._arg_device.SerializeToString()
        self._seed = kwargs.get('seed', config.config().random_seed)
-    def alloc(self):
+    def alloc(self, out=None):
-        """Return the executing device to create an output tensor."""
+        """Return or bind the executing device to output tensor."""
+        if out is not None:
+            out._device = self._device.copy()
+            return out
        return self._device.copy()
    def apply(self, *args, **kwargs):
@@ -124,11 +127,6 @@ class Operator(object):
        return self.forward(*args, **kwargs)
-def new_leaf(shape, dtype, device, trainable=False):
-    """Return a leaf resource."""
-    return EagerTensor(**locals())
 def remove_binary_scalar(inputs):
    """Remove the scalar for binary ops."""
    if types.is_tensor(inputs[0]):

--- a/dragon/python/core/framework/workspace.py
+++ b/dragon/python/core/framework/workspace.py
@@ -211,7 +211,7 @@ class Workspace(backend.Workspace):
            dtype = value.dtype if dtype is None else dtype
        if hasattr(tensor, 'dtype') and tensor.dtype is not None:
            if tensor.dtype not in mapping.TENSOR_TYPE_TO_NP_TYPE:
-                raise TypeError('Unsupported data type:', tensor.dtype)
+                raise TypeError('Unsupported data type: ' + tensor.dtype)
            dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor.dtype]
        # Determine the copying device option
        if enforce_cpu is True:

--- a/dragon/python/core/ops/activation_ops.py
+++ b/dragon/python/core/ops/activation_ops.py
@@ -61,10 +61,8 @@ def dropout(inputs, prob=0.5, scale=True, **kwargs):
    op_lib = activation_ops_lib.Dropout
    if context.executing_eagerly():
        return op_lib \
-            .instantiate(
+            .instantiate(prob=args['prob'], scale=scale) \
-                prob=args['prob'],
+            .apply([inputs], inplace=inplace)
-                scale=scale,
-            ).apply([inputs], inplace=inplace)
    else:
        return op_lib.blend(**args)

--- a/dragon/python/core/ops/activation_ops_lib.py
+++ b/dragon/python/core/ops/activation_ops_lib.py
@@ -26,7 +26,7 @@ class Activation(Operator):
        return {'op_type': self.op_type, 'arguments': {}}
    def forward(self, inputs, inplace=False):
-        outputs = [inputs[0] if inplace else self.alloc()]
+        outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
        return self.dispatch(inputs, outputs)
@@ -46,7 +46,7 @@ class Dropout(Activation):
        }
-class DropBlock2d(Operator):
+class DropBlock2d(Activation):
    def __init__(self, key, dev, **kwargs):
        super(DropBlock2d, self).__init__(key, dev, **kwargs)
        self.block_size = kwargs.get('block_size', 7)
@@ -67,10 +67,6 @@ class DropBlock2d(Operator):
            },
        }
-    def forward(self, inputs, inplace=False):
-        outputs = [inputs[0] if inplace else self.alloc()]
-        return self.dispatch(inputs, outputs)
 class DropPath(Activation):
    def __init__(self, key, dev, **kwargs):
@@ -96,9 +92,7 @@ class Elu(Activation):
    def attributes(self):
        return {
            'op_type': 'Elu',
-            'arguments': {
+            'arguments': {'alpha': float(self.alpha)},
-                'alpha': float(self.alpha),
-            }
        }
@@ -110,9 +104,7 @@ class PRelu(Operator):
    def attributes(self):
        return {
            'op_type': 'PRelu',
-            'arguments': {
+            'arguments': {'data_format': self.data_format},
-                'data_format': self.data_format,
-            }
        }
    def forward(self, inputs):
@@ -127,9 +119,7 @@ class Relu(Activation):
    def attributes(self):
        return {
            'op_type': 'Relu',
-            'arguments': {
+            'arguments': {'alpha': float(self.alpha)},
-                'alpha': float(self.alpha),
-            }
        }
@@ -140,9 +130,7 @@ class Relu6(Activation):
    def attributes(self):
        return {
            'op_type': 'Relu',
-            'arguments': {
+            'arguments': {'max_value': 6.},
-                'max_value': 6.,
-            }
        }
@@ -170,7 +158,5 @@ class Softmax(Activation):
    def attributes(self):
        return {
            'op_type': 'Softmax',
-            'arguments': {
+            'arguments': {'axis': self.axis},
-                'axis': self.axis,
-            }
        }
--- a/dragon/python/core/ops/array_ops_lib.py
+++ b/dragon/python/core/ops/array_ops_lib.py
@@ -14,6 +14,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+from dragon.core.framework import device_spec
 from dragon.core.framework.ops import Operator
@@ -41,13 +42,14 @@ class Arange(Operator):
                slice_args[i], 'float32')
    def forward(self, slice_args, trainable=False):
-        output = self.dispatch(
+        out = self.dispatch(
            [], [self.alloc()],
            callback=lambda ws, handle:
-            self.feed(ws, handle, slice_args)
+                self.feed(ws, handle, slice_args),
+            no_grad=True,
        )
-        output._requires_grad = trainable
+        out._requires_grad = trainable
-        return output
+        return out
 class ArgReduce(Operator):
@@ -85,7 +87,7 @@ class Cast(Operator):
        if inputs[0].dtype == self.dtype:
            return inputs[0]
        if inplace:
-            return self.dispatch([], inputs, no_grad=True)
+            return self.dispatch([], [self.alloc(inputs[0])], no_grad=True)
        return self.dispatch(inputs, [self.alloc()])
@@ -122,7 +124,7 @@ class ChannelNormalize(Operator):
        return self.dispatch(
            inputs, [self.alloc()],
            callback=lambda ws, handle:
-            self.feed(ws, handle, perm)
+                self.feed(ws, handle, perm),
        )
@@ -225,7 +227,7 @@ class ExpandDims(Operator):
        }
    def forward(self, inputs, inplace=False):
-        outputs = [inputs[0] if inplace else self.alloc()]
+        outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
        return self.dispatch(inputs, outputs)
@@ -247,7 +249,7 @@ class Flatten(Operator):
        }
    def forward(self, inputs, inplace=False):
-        outputs = [inputs[0] if inplace else self.alloc()]
+        outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
        return self.dispatch(inputs, outputs)
@@ -447,11 +449,10 @@ class Reshape(Operator):
                e, 'int64')
    def forward(self, inputs, shape, inplace=False):
-        outputs = [inputs[0] if inplace else self.alloc()]
        return self.dispatch(
-            inputs, outputs,
+            inputs, [self.alloc(inputs[0]) if inplace else self.alloc()],
            callback=lambda ws, handle:
-            self.feed(ws, handle, shape)
+                self.feed(ws, handle, shape),
        )
@@ -493,12 +494,13 @@ class Slice(Operator):
 class Shape(Operator):
    def __init__(self, key, dev, **kwargs):
        super(Shape, self).__init__(key, dev, **kwargs)
+        self._device = device_spec.DeviceSpec()
    def attributes(self):
        return {'op_type': 'Shape', 'arguments': {}}
    def forward(self, inputs):
-        return self.dispatch(inputs, [self.alloc()])
+        return self.dispatch(inputs, [self.alloc()], no_grad=True)
 class Split(Operator):
@@ -535,7 +537,7 @@ class Squeeze(Operator):
        }
    def forward(self, inputs, inplace=False):
-        outputs = [inputs[0] if inplace else self.alloc()]
+        outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
        return self.dispatch(inputs, outputs)

--- a/dragon/python/core/ops/control_flow_ops_lib.py
+++ b/dragon/python/core/ops/control_flow_ops_lib.py
@@ -62,7 +62,7 @@ class Copy(Operator):
    def forward(self, inputs, outputs):
        outputs = outputs if outputs else [self.alloc()]
-        return self.dispatch(inputs, outputs)
+        return self.dispatch(inputs, outputs, no_grad=True)
 class MaskedAssign(Operator):

--- a/dragon/python/core/ops/distributed_ops.py
+++ b/dragon/python/core/ops/distributed_ops.py
@@ -46,7 +46,7 @@ def all_reduce(inputs, operation='MEAN', group=None, **kwargs):
    if group is None:
        raise ValueError('<group> is required.')
    if operation not in ('MEAN', 'SUM'):
-        raise ValueError('Unsupported reduce op:', operation)
+        raise ValueError('Unsupported reduce op: ' + operation)
    args.update(group.arguments)
    args.pop('group')
    op_lib = distributed_ops_lib.Collective

--- a/dragon/python/core/ops/init_ops.py
+++ b/dragon/python/core/ops/init_ops.py
@@ -115,11 +115,8 @@ def eye(n, m=None, k=0, dtype='float32', **kwargs):
        if types.is_tensor(m):
            m = int(m.get_value())
        return op_lib \
-            .instantiate(
+            .instantiate(k=k, ndim=2, dtype=dtype) \
-                k=k,
+            .apply([n, m], trainable=trainable)
-                ndim=2,
-                dtype=dtype,
-            ).apply([n, m], trainable=trainable)
    else:
        args['n'] = args['m'] = None
        if types.is_tensor(n) or types.is_tensor(m):
@@ -173,14 +170,8 @@ def eye_like(other, k=0, dtype='float32', **kwargs):
    op_lib = init_ops_lib.Eye
    if context.executing_eagerly():
        return op_lib \
-            .instantiate(
+            .instantiate(k=k, dtype=dtype) \
-                k=k,
+            .apply([], other, trainable=trainable)
-                dtype=dtype,
-            ).apply(
-                shape=[],
-                shape_like=other,
-                trainable=trainable,
-            )
    else:
        args.pop('other')
        return op_lib.blend(inputs=[other], **args)
@@ -366,14 +357,8 @@ def ones_like(other, dtype='float32', **kwargs):
    op_lib = init_ops_lib.Fill
    if context.executing_eagerly():
        return op_lib \
-            .instantiate(
+            .instantiate(value=1, dtype=dtype) \
-                value=1,
+            .apply([], other, trainable=trainable)
-                dtype=dtype,
-            ).apply(
-                shape=[],
-                shape_like=other,
-                trainable=trainable,
-            )
    else:
        args.pop('other')
        return op_lib.blend(inputs=[other], value=1., **args)
@@ -453,11 +438,7 @@ def random_normal_like(other, mean=0, std=1, dtype='float32', **kwargs):
                mean=args['mean'],
                std=args['std'],
                dtype=dtype,
-            ).apply(
+            ).apply([], other, trainable=trainable)
-                shape=[],
-                shape_like=other,
-                trainable=trainable,
-            )
    else:
        args.pop('other')
        return op_lib.blend(inputs=[other], **args)
@@ -535,11 +516,7 @@ def random_uniform_like(other, low=-1, high=1, dtype='float32', **kwargs):
                low=args['low'],
                high=args['high'],
                dtype=dtype,
-            ).apply(
+            ).apply([], other, trainable=trainable)
-                shape=[],
-                shape_like=other,
-                trainable=trainable,
-            )
    else:
        args.pop('other')
        return op_lib.blend(inputs=[other], **args)
@@ -641,14 +618,8 @@ def zeros_like(other, dtype='float32', **kwargs):
    op_lib = init_ops_lib.Fill
    if context.executing_eagerly():
        return op_lib \
-            .instantiate(
+            .instantiate(value=0, dtype=dtype) \
-                value=0,
+            .apply([], other, trainable=trainable)
-                dtype=dtype,
-            ).apply(
-                shape=[],
-                shape_like=other,
-                trainable=trainable,
-            )
    else:
        args.pop('other')
        return op_lib.blend(inputs=[other], value=0., **args)
--- a/dragon/python/core/ops/init_ops_lib.py
+++ b/dragon/python/core/ops/init_ops_lib.py
@@ -14,7 +14,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from dragon.core.framework import ops
 from dragon.core.framework.ops import Operator
@@ -30,25 +29,17 @@ class Initializer(Operator):
                ws, '{}/dims[{}]'.format(handle, i),
                dim, 'int64')
-    def forward(
+    def forward(self, shape, shape_as=None, out=None, trainable=None):
-        self,
+        out = self.dispatch(
-        shape,
+            [] if shape_as is None else [shape_as],
-        out=None,
+            [self.alloc(out)],
-        shape_like=None,
-        trainable=False,
-    ):
-        inputs = [] if shape_like is None else [shape_like]
-        outputs = [ops.new_leaf(
-            shape=shape,
-            dtype=self.dtype,
-            device=self.alloc(),
-            trainable=trainable,
-        ) if out is None else out]
-        return self.dispatch(
-            inputs, outputs,
            callback=lambda ws, handle:
                self.feed(ws, handle, shape),
+            no_grad=True,
        )
+        if trainable is not None:
+            out._requires_grad = trainable
+        return out
 class Eye(Initializer):

--- a/dragon/python/core/ops/math_ops_lib.py
+++ b/dragon/python/core/ops/math_ops_lib.py
@@ -53,7 +53,8 @@ class Axpby(Operator):
    def forward(self, inputs, outputs=None):
        if outputs is None:
-            outputs = [self.alloc() for _ in range(len(inputs))]
+            outputs = [None] * len(inputs)
+        outputs = [self.alloc(out) for out in outputs]
        return self.dispatch(inputs, outputs, no_grad=True)
@@ -65,12 +66,8 @@ class BinaryOp(Operator):
    def attributes(self):
        return {'op_type': self.op_type, 'arguments': {}}
-    def forward(self, inputs, outputs=None):
+    def forward(self, inputs, outputs=(None,)):
-        if outputs is None:
+        return self.dispatch(inputs, [self.alloc(outputs[0])])
-            outputs = [self.alloc()]
-        else:
-            outputs[0]._device = self.alloc()
-        return self.dispatch(inputs, outputs)
 class Clip(Operator):

--- a/dragon/python/core/ops/metric_ops_lib.py
+++ b/dragon/python/core/ops/metric_ops_lib.py
@@ -23,7 +23,7 @@ class Metric(Operator):
        self.reduction = kwargs.get('reduction', 'MEAN')
    def forward(self, inputs):
-        return self.dispatch(inputs, [self.alloc()])
+        return self.dispatch(inputs, [self.alloc()], no_grad=True)
 class Accuracy(Metric):

--- a/dragon/python/core/ops/tensorbind_eager.py
+++ b/dragon/python/core/ops/tensorbind_eager.py
@@ -576,7 +576,7 @@ def uniform(self, low=0, high=1):
        ).apply(shape, out=self)
-def _binary_op(a, b, op_type, outputs=None):
+def _binary_op(a, b, op_type, outputs=(None,)):
    """Apply the general binary operation."""
    return math_ops_lib.BinaryOp \
        .instantiate(op_type=op_type) \

--- a/dragon/python/core/ops/vision_ops_lib.py
+++ b/dragon/python/core/ops/vision_ops_lib.py
@@ -85,13 +85,11 @@ class BiasAdd(Operator):
    def attributes(self):
        return {
            'op_type': 'BiasAdd',
-            'arguments': {
+            'arguments': {'data_format': self.data_format},
-                'data_format': self.data_format,
-            },
        }
    def forward(self, inputs, inplace=False):
-        outputs = [inputs[0] if inplace else self.alloc()]
+        outputs = [self.alloc(inputs[0]) if inplace else self.alloc()]
        return self.dispatch(inputs, outputs)

--- a/dragon/python/core/util/inspect.py
+++ b/dragon/python/core/util/inspect.py
@@ -85,6 +85,16 @@ def getfullargspec(obj):
    return _getfullargspec(target)
+def isclass(object):
+    """Decorator-aware replacement for ``inspect.isclass``."""
+    return _inspect.isclass(decorator.unwrap(object)[1])
+def isfunction(object):
+    """Decorator-aware replacement for ``inspect.isfunction``."""
+    return _inspect.isfunction(decorator.unwrap(object)[1])
 def ismethod(object):
    """Decorator-aware replacement for ``inspect.ismethod``."""
    return _inspect.ismethod(decorator.unwrap(object)[1])
--- a/tensorflow/_api/keras/regularizers/__init__.py
+++ b/tensorflow/_api/keras/regularizers/__init__.py
@@ -14,12 +14,15 @@ from __future__ import division as _division
 from __future__ import print_function as _print_function
 # Classes
+from dragon.vm.tensorflow.core.keras.regularizers import L1
+from dragon.vm.tensorflow.core.keras.regularizers import l1
 from dragon.vm.tensorflow.core.keras.regularizers import L1L2
+from dragon.vm.tensorflow.core.keras.regularizers import L2
+from dragon.vm.tensorflow.core.keras.regularizers import l2
 from dragon.vm.tensorflow.core.keras.regularizers import Regularizer
 # Functions
-from dragon.vm.tensorflow.core.keras.regularizers import l1
+from dragon.vm.tensorflow.core.keras.regularizers import get
 from dragon.vm.tensorflow.core.keras.regularizers import l1_l2
-from dragon.vm.tensorflow.core.keras.regularizers import l2
 __all__ = [_s for _s in dir() if not _s.startswith('_')]
--- a/tensorflow/core/keras/activations.py
+++ b/tensorflow/core/keras/activations.py
@@ -7,10 +7,6 @@
 #
 #     <https://opensource.org/licenses/BSD-2-Clause>
 #
-# Codes are based on:
-#
-#     <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/activations.py>
-#
 # ------------------------------------------------------------
 from __future__ import absolute_import
@@ -18,6 +14,7 @@ from __future__ import division
 from __future__ import print_function
 from dragon.core.util import six
+from dragon.vm.tensorflow.core.keras.utils import generic_utils
 from dragon.vm.tensorflow.core.ops import math_ops
 from dragon.vm.tensorflow.core.ops import nn
@@ -272,7 +269,7 @@ def tanh(x, **kwargs):
 def get(identifier):
-    """Return the activation callable by identifier.
+    """Return the activation function by identifier.
    Parameters
    ----------
@@ -282,7 +279,7 @@ def get(identifier):
    Returns
    -------
    callable
-        The activation callable.
+        The activation function.
    """
    if identifier is None:
@@ -290,8 +287,9 @@ def get(identifier):
    elif callable(identifier):
        return identifier
    elif isinstance(identifier, six.string_types):
-        return globals()[identifier]
+        return generic_utils.deserialize_keras_object(
+            identifier, globals(), 'activation')
    else:
        raise TypeError(
-            'Could not interpret activation identifier: {}.'
+            'Could not interpret the activation identifier: {}.'
-            .format(repr(identifier)))
+            .format(identifier))
--- a/tensorflow/core/keras/engine/input_layer.py
+++ b/tensorflow/core/keras/engine/input_layer.py
@@ -75,7 +75,7 @@ def Input(
        if shape is not None:
            shape = (batch_size,) + tuple(shape)
    if kwargs:
-        raise ValueError('Unrecognized keyword arguments:', kwargs.keys())
+        raise ValueError('Unrecognized keyword arguments: ' + kwargs.keys())
    if dtype is None:
        if tensor is not None:
            dtype = tensor.dtype

--- a/tensorflow/core/keras/initializers.py
+++ b/tensorflow/core/keras/initializers.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 from dragon.core.util import six
+from dragon.vm.tensorflow.core.keras.utils import generic_utils
 from dragon.vm.tensorflow.core.ops.init_ops import Constant
 from dragon.vm.tensorflow.core.ops.init_ops import GlorotNormal
 from dragon.vm.tensorflow.core.ops.init_ops import GlorotUniform
@@ -60,8 +61,9 @@ def get(identifier):
    elif callable(identifier):
        return identifier
    elif isinstance(identifier, six.string_types):
-        return globals()[identifier]
+        return generic_utils.deserialize_keras_object(
+            identifier, globals(), 'initializer')
    else:
        raise TypeError(
-            'Could not interpret initializer identifier: {}.'
+            'Could not interpret the initializer identifier: {}.'
-            .format(repr(identifier)))
+            .format(identifier))
--- a/tensorflow/core/keras/layers/normalization.py
+++ b/tensorflow/core/keras/layers/normalization.py
@@ -96,7 +96,7 @@ class BatchNormalization(Layer):
    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if not input_shape.ndims:
-            raise ValueError('Input has undefined rank:', input_shape)
+            raise ValueError('Input has undefined rank: ' + input_shape)
        param_shape = [input_shape.dims[self.axis]]
        self.input_spec = InputSpec(
            # Each layer should adapt to the:

--- a/tensorflow/core/keras/losses.py
+++ b/tensorflow/core/keras/losses.py
@@ -7,10 +7,6 @@
 #
 #     <https://opensource.org/licenses/BSD-2-Clause>
 #
-# Codes are based on:
-#
-#     <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/losses.py>
-#
 # ------------------------------------------------------------
 from __future__ import absolute_import
@@ -20,6 +16,7 @@ from __future__ import print_function
 from dragon.core.framework import context
 from dragon.core.ops import loss_ops
 from dragon.core.util import six
+from dragon.vm.tensorflow.core.keras.utils import generic_utils
 from dragon.vm.tensorflow.core.keras.utils import losses_utils
@@ -523,8 +520,9 @@ def get(identifier):
    elif callable(identifier):
        return identifier
    elif isinstance(identifier, six.string_types):
-        return globals()[identifier]
+        return generic_utils.deserialize_keras_object(
+            identifier, globals(), 'loss')
    else:
        raise TypeError(
-            'Could not interpret loss identifier: {}.'
+            'Could not interpret the loss identifier: {}.'
-            .format(repr(identifier)))
+            .format(identifier))
--- a/tensorflow/core/keras/optimizer/optimizer.py
+++ b/tensorflow/core/keras/optimizer/optimizer.py
@@ -7,10 +7,6 @@
 #
 #     <https://opensource.org/licenses/BSD-2-Clause>
 #
-# Codes are based on:
-#
-#     <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/optimizer.py>
-#
 # ------------------------------------------------------------
 from __future__ import absolute_import
@@ -51,7 +47,7 @@ class Optimizer(optimizer_v1.Optimizer):
        allowed_kwargs = {'scale', 'clipnorm', 'lr'}
        for k in kwargs:
            if k not in allowed_kwargs:
-                raise TypeError('Unexpected keyword argument:', str(k))
+                raise TypeError('Unexpected keyword argument: ' + str(k))
            if kwargs[k] < 0:
                raise ValueError("Expected {} >= 0, received: {}".format(k, kwargs[k]))
@@ -109,15 +105,17 @@ class Optimizer(optimizer_v1.Optimizer):
            for g, v in grads_and_vars:
                if g is not None:
                    decay_mult = 0.
-                    if hasattr(v, '__regularizer__'):
+                    regularizer = getattr(v, '_regularizer', None)
-                        decay_mult = v.__regularizer__.l2 / self.BASE_WEIGHT_DECAY
+                    if regularizer is not None:
+                        decay_mult = regularizer.l2 / self.BASE_WEIGHT_DECAY
                    self._run_update(v, g, decay_mult=decay_mult)
        else:
            # Store for the lazy compilation.
            for g, v in grads_and_vars:
                decay_mult = 0.
-                if hasattr(v, '__regularizer__'):
+                regularizer = getattr(v, '_regularizer', None)
-                    decay_mult = v.__regularizer__.l2 / self.BASE_WEIGHT_DECAY
+                if regularizer is not None:
+                    decay_mult = regularizer.l2 / self.BASE_WEIGHT_DECAY
                self._add_update(v, g, decay_mult=decay_mult)
        # Increase the iterations.

--- a/tensorflow/core/keras/regularizers.py
+++ b/tensorflow/core/keras/regularizers.py
@@ -7,10 +7,6 @@
 #
 #     <https://opensource.org/licenses/BSD-2-Clause>
 #
-# Codes are based on:
-#
-#     <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/regularizers.py>
-#
 # ------------------------------------------------------------
 """Built-in regularizers."""
@@ -18,6 +14,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+from dragon.core.util import six
+from dragon.vm.tensorflow.core.keras.utils import generic_utils
 class Regularizer(object):
    """The base regularizer class."""
@@ -36,10 +35,33 @@ class Regularizer(object):
            The output tensor.
        """
-        x.__regularizer__ = self
+        x._regularizer = self
        return x
+class L1(Regularizer):
+    r"""The L1 regularizer.
+    The **L1** regularizer is defined as:
+    .. math:: loss_{reg} = loss + \alpha|w|
+    """
+    def __init__(self, l1=0.01):
+        r"""Create a ``L1`` regularizer.
+        Parameters
+        ----------
+        l1 : float, optional, default=0.01
+            The value to :math:`\alpha`.
+        """
+        if l1 <= 0.:
+            raise ValueError('<l1> should be greater than 0.')
+        self.l1 = l1
 class L1L2(Regularizer):
    r"""The L1L2 regularizer.
@@ -65,39 +87,27 @@ class L1L2(Regularizer):
        self.l1, self.l2 = l1, l2
-def get(identifier):
+class L2(Regularizer):
-    """Return a regularizer from the identifier."""
+    r"""The L2 regularizer.
-    if identifier is None:
-        return None
-    elif callable(identifier):
-        return identifier
-    else:
-        raise ValueError(
-            'Could not interpret regularizer identifier:',
-            identifier,
-        )
+    The **L2** regularizer is defined as:
-# Aliases
+    .. math:: loss_{reg} = loss + \frac{\beta}{2}|w|_{2}
-def l1(l=0.01):
-    r"""Create a L1 regularizer.
-    The **L1** regularizer is defined as:
+    """
-    .. math:: loss_{reg} = loss + \alpha|w|
+    def __init__(self, l2=0.01):
+        r"""Create a ``L2`` regularizer.
        Parameters
        ----------
-    l : float, optional, default=0.01
+        l1 : float, optional, default=0.01
            The value to :math:`\alpha`.
-    Returns
-    -------
-    dragon.vm.tensorflow.keras.regularizers.Regularizer
-        The regularizer.
        """
-    return L1L2(l1=l)
+        if l2 <= 0.:
+            raise ValueError('<l2> should be greater than 0.')
+        self.l2 = l2
 def l1_l2(l1=0.01, l2=0.01):
@@ -123,22 +133,35 @@ def l1_l2(l1=0.01, l2=0.01):
    return L1L2(l1=l1, l2=l2)
-def l2(l=0.01):
+# Aliases
-    r"""Create a L2 regularizer.
+l1 = L1
+l2 = L2
-    The **L2** regularizer is defined as:
-    .. math:: loss_{reg} = loss + \frac{\beta}{2}|w|_{2}
+def get(identifier):
+    """Return the regularizer callable by identifier.
    Parameters
    ----------
-    l : float, optional, default=0.01
+    identifier : Union[callable, str]
-        The value to :math:`\beta`.
+        The identifier.
    Returns
    -------
-    dragon.vm.tensorflow.keras.regularizers.Regularizer
+    callable
-        The regularizer.
+        The activation callable.
    """
-    return L1L2(l2=l)
+    if identifier is None:
+        return None
+    elif callable(identifier):
+        return identifier
+    elif isinstance(identifier, six.string_types):
+        if identifier == 'l1_l2':
+            return L1L2(l1=0.01, l2=0.01)
+        return generic_utils.deserialize_keras_object(
+            identifier, globals(), 'regularizer')
+    else:
+        raise TypeError(
+            'Could not interpret the regularizer identifier: {}.'
+            .format(identifier))
--- a/tensorflow/core/keras/utils/conv_utils.py
+++ b/tensorflow/core/keras/utils/conv_utils.py
@@ -30,7 +30,7 @@ def convert_data_format(data_format, ndim):
        elif ndim == 5:
            return 'NDHWC'
        else:
-            raise ValueError('Input rank not supported:', ndim)
+            raise ValueError('Input rank not supported: ' + ndim)
    elif data_format == 'channels_first':
        if ndim == 3:
            return 'NCW'
@@ -39,9 +39,9 @@ def convert_data_format(data_format, ndim):
        elif ndim == 5:
            return 'NCDHW'
        else:
-            raise ValueError('Input rank not supported:', ndim)
+            raise ValueError('Input rank not supported: ' + ndim)
    else:
-        raise ValueError('Invalid data_format:', data_format)
+        raise ValueError('Invalid data_format: ' + data_format)
 def deconv_output_length(
@@ -94,8 +94,7 @@ def normalize_padding(value):
        if value not in {'valid', 'same'}:
            raise ValueError(
                'Excepted <padding> in "valid", "same".\n'
-                'Received: ' + str(value)
+                'Received: ' + str(value))
-            )
    return value

--- a/tensorflow/core/keras/utils/generic_utils.py
+++ b/tensorflow/core/keras/utils/generic_utils.py
@@ -19,6 +19,32 @@ from __future__ import print_function
 import re
+from dragon.core.util import inspect
+from dragon.core.util import six
+def deserialize_keras_object(
+    identifier,
+    module_objects,
+    printable_module_name='object',
+):
+    """Deserialize the keras object."""
+    if isinstance(identifier, six.string_types):
+        object_name = identifier
+        obj = module_objects.get(object_name)
+        if obj is None:
+            raise ValueError(
+                'Unknown ' + printable_module_name + ': ' + object_name)
+        if inspect.isclass(obj):
+            return obj()
+        return obj
+    elif inspect.isfunction(identifier):
+        return identifier
+    else:
+        raise TypeError(
+            'Could not interpret the {} identifier: {}.'
+            .format(printable_module_name, identifier))
 def to_snake_case(name):
    """Convert the name from camel-style to snake-style."""

--- a/tensorflow/core/keras/utils/losses_utils.py
+++ b/tensorflow/core/keras/utils/losses_utils.py
@@ -7,10 +7,6 @@
 #
 #     <https://opensource.org/licenses/BSD-2-Clause>
 #
-# Codes are based on:
-#
-#     <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/utils/losses_utils.py>
-#
 # ------------------------------------------------------------
 from __future__ import absolute_import

--- a/tensorflow/core/ops/init_ops.py
+++ b/tensorflow/core/ops/init_ops.py
@@ -249,7 +249,7 @@ class VarianceScaling(Initializer):
            raise ValueError('<scale> must be positive float.')
        mode = mode.lower()
        if mode not in {'fan_in', 'fan_out', 'fan_avg'}:
-            raise ValueError('Invalid <mode> argument:', mode)
+            raise ValueError('Invalid <mode> argument: ' + mode)
        distribution = distribution.lower()
        if distribution not in {'normal', 'uniform'}:
            raise ValueError("Invalid `distribution` argument:", distribution)
@@ -434,10 +434,12 @@ def glorot_normal_initializer(dtype='float32'):
 # Aliases
-zeros_initializer = Zeros
+zeros_initializer = zero = zeros = Zeros
-ones_initializer = Ones
+ones_initializer = one = ones = Ones
-constant_initializer = Constant
+constant_initializer = constant = Constant
-random_uniform_initializer = RandomUniform
+random_uniform_initializer = uniform = random_uniform = RandomUniform
-random_normal_initializer = RandomNormal
+random_normal_initializer = normal = random_normal = RandomNormal
-truncated_normal_initializer = TruncatedNormal
+truncated_normal_initializer = truncated_normal = TruncatedNormal
 variance_scaling_initializer = VarianceScaling
+glorot_normal = GlorotNormal
+glorot_uniform = GlorotUniform
--- a/tensorlayer/core/layers/convolution/conv_utils.py
+++ b/tensorlayer/core/layers/convolution/conv_utils.py
@@ -26,7 +26,7 @@ def convert_data_format(data_format):
    elif data_format == 'channels_first':
        return 'NCHW'
    else:
-        raise ValueError('Invalid data_format:', data_format)
+        raise ValueError('Invalid data_format: ' + data_format)
 def normalize_data_format(value):

--- a/torch/core/autograd/function.py
+++ b/torch/core/autograd/function.py
@@ -70,15 +70,23 @@ class Function(object):
        self._arg_device = self._arg_device.SerializeToString()
        self._seed = kwargs.get('seed', config.config().random_seed)
-    def alloc(self):
+    def alloc(self, out=None):
-        """Return the executing device to create an output tensor.
+        """Return or bind the executing device to output tensor.
+        Parameters
+        ----------
+        out : dragon.vm.torch.Tensor, optional
+            The optional output tensor.
        Returns
        -------
-        dragon.vm.torch.device
+        Union[dragon.vm.torch.device, dragon.vm.torch.Tensor]
-            The device spec.
+            The executing device or output tensor.
        """
+        if out is not None:
+            out._device = self._device.copy()
+            return out
        return self._device.copy()
    def apply(self, *args, **kwargs):
@@ -106,7 +114,7 @@ class Function(object):
        """Dispatch the execution."""
        if self._def is None:
            self._gen_def()
-        if check_device:
+        if len(inputs) > 1 and check_device:
            self._check_device(inputs)
        return execute.run_operator(
            op_def=self._def,

--- a/torch/core/nn/init.py
+++ b/torch/core/nn/init.py
@@ -66,7 +66,7 @@ def calculate_gain(nonlinearity, param=None):
                raise ValueError("Negative slope {} is not a valid number".format(param))
        return math.sqrt(2.0 / (1 + negative_slope ** 2))
    else:
-        raise ValueError('Unsupported nonlinearity:', nonlinearity)
+        raise ValueError('Unsupported nonlinearity: ' + nonlinearity)
 def constant_(tensor, val):

--- a/torch/core/nn/modules/_functions.py
+++ b/torch/core/nn/modules/_functions.py
@@ -340,8 +340,7 @@ class LpNormalize(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)])
-        return self.dispatch([input], [out])
 class LSTMCell(function.Function):
@@ -564,11 +563,7 @@ class RNNParamSet(function.Function):
        }
    def forward(self, param, weights):
-        return self.dispatch(
+        return self.dispatch([param], [weights], no_grad=True)
-            [param], [weights],
-            no_grad=True,
-            check_device=False,
-        )
 class SigmoidCrossEntropy(_Loss):

--- a/torch/core/ops/array/_functions.py
+++ b/torch/core/ops/array/_functions.py
@@ -34,8 +34,7 @@ class ArgReduce(function.Function):
        }
    def forward(self, input, out=None):
-        outputs = [out] if out else [self.alloc()]
+        return self.dispatch([input], [self.alloc(out)], no_grad=True)
-        return self.dispatch([input], outputs, no_grad=True)
 class Assign(function.Function):
@@ -148,8 +147,7 @@ class ChannelShuffle(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)])
-        return self.dispatch([input], [out])
 class Concat(function.Function):
@@ -164,8 +162,7 @@ class Concat(function.Function):
        }
    def forward(self, seq, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch(seq, [self.alloc(out)])
-        return self.dispatch(seq, [out])
 class Cumulative(function.Function):
@@ -187,8 +184,7 @@ class Cumulative(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)])
-        return self.dispatch([input], [out])
 class Expand(function.Function):
@@ -235,9 +231,8 @@ class IndexSelect(function.Function):
            },
        }
-    def forward(self, input, indices, out=None):
+    def forward(self, input, index, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input, index], [self.alloc(out)])
-        return self.dispatch([input, indices], [out])
 class MaskedAssign(function.Function):
@@ -248,7 +243,7 @@ class MaskedAssign(function.Function):
        return {'op_type': 'MaskedAssign', 'arguments': {}}
    def forward(self, out, mask, input):
-        return self.dispatch([input, mask], [out])
+        return self.dispatch([input, mask], [self.alloc(out)])
 class MaskedSelect(function.Function):
@@ -259,8 +254,7 @@ class MaskedSelect(function.Function):
        return {'op_type': 'MaskedSelect', 'arguments': {}}
    def forward(self, input, mask, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input, mask], [self.alloc(out)])
-        return self.dispatch([input, mask], [out])
 class Multinomial(function.Function):
@@ -280,8 +274,7 @@ class Multinomial(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)], no_grad=True)
-        return self.dispatch([input], [out], no_grad=True)
 class NonZero(function.Function):
@@ -292,8 +285,7 @@ class NonZero(function.Function):
        return {'op_type': 'NonZero', 'arguments': {}}
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)], no_grad=True)
-        return self.dispatch([input], [out], no_grad=True)
 class OneHot(function.Function):
@@ -330,8 +322,7 @@ class Reduce(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)])
-        return self.dispatch([input], [out])
 class Reshape(function.Function):
@@ -356,9 +347,8 @@ class Reshape(function.Function):
                shape[i], 'int64')
    def forward(self, input, shape, out=None):
-        out = out if out else self.alloc()
        return self.dispatch(
-            [input], [out],
+            [input], [self.alloc(out)],
            callback=lambda ws, handle:
                self.feed(ws, handle, shape),
        )
@@ -433,8 +423,7 @@ class Stack(function.Function):
        }
    def forward(self, seq, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch(seq, [self.alloc(out)])
-        return self.dispatch(seq, [out])
 class Squeeze(function.Function):
@@ -451,8 +440,7 @@ class Squeeze(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)])
-        return self.dispatch([input], [out])
 class Tile(function.Function):
@@ -534,8 +522,8 @@ class TopK(function.Function):
            }
        }
-    def forward(self, input, outputs=None):
+    def forward(self, input, outputs=(None, None)):
-        outputs = [self.alloc(), self.alloc()] if outputs is None else outputs
+        outputs = [self.alloc(outputs[0]), self.alloc(outputs[1])]
        return self.dispatch([input], outputs, no_grad=True)
@@ -553,8 +541,7 @@ class UnSqueeze(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)])
-        return self.dispatch([input], [out])
 class Where(function.Function):

--- a/torch/core/ops/array/functional.py
+++ b/torch/core/ops/array/functional.py
@@ -943,7 +943,7 @@ def topk(input, k, dim=None, largest=True, sorted=True, out=None):
            axis=dim,
            largest=largest,
            sorted=sorted,
-        ).apply(input, out)
+        ).apply(input, out if out else (None, None))
 def unsqueeze(input, dim, out=None):

--- a/torch/core/ops/distributed/functional.py
+++ b/torch/core/ops/distributed/functional.py
@@ -42,7 +42,7 @@ def all_reduce(tensor, op='SUM', group=None):
    if group is None:
        raise ValueError('<group> is required.')
    if op not in ('MEAN', 'SUM'):
-        raise ValueError('Unsupported reduce op:', op)
+        raise ValueError('Unsupported reduce op: ' + op)
    tensors = nest.flatten(tensor)
    return _functions.Collective \
        .instantiate(

--- a/torch/core/ops/init/_functions.py
+++ b/torch/core/ops/init/_functions.py
@@ -34,6 +34,7 @@ class _Initializer(function.Function):
            [] if shape_like is None else [shape_like], [out],
            callback=lambda ws, handle:
                self.feed(ws, handle, shape),
+            no_grad=True,
        )
@@ -62,9 +63,10 @@ class Arange(function.Function):
    def forward(self, slice_args, out=None):
        return self.dispatch(
-            [], [out if out else self.alloc()],
+            [], [self.alloc(out)],
            callback=lambda ws, handle:
-            self.feed(ws, handle, slice_args)
+                self.feed(ws, handle, slice_args),
+            no_grad=True,
        )

--- a/torch/core/ops/init/functional.py
+++ b/torch/core/ops/init/functional.py
@@ -121,7 +121,7 @@ def eye(
    Returns
    -------
-    dragon.Tensor
+    dragon.vm.torch.Tensor
        The output tensor.
    """

--- a/torch/core/ops/math/_functions.py
+++ b/torch/core/ops/math/_functions.py
@@ -33,8 +33,7 @@ class Axpby(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)], no_grad=True)
-        return self.dispatch([input], [out], no_grad=True)
 class BinaryFunc(function.Function):
@@ -46,8 +45,7 @@ class BinaryFunc(function.Function):
        return {'op_type': self.op_type, 'arguments': {}}
    def forward(self, input, value, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input, value], [self.alloc(out)])
-        return self.dispatch([input, value], [out])
 class Clip(function.Function):
@@ -70,8 +68,7 @@ class Clip(function.Function):
        }
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)])
-        return self.dispatch([input], [out])
 class UnaryFunc(function.Function):
@@ -83,8 +80,7 @@ class UnaryFunc(function.Function):
        return {'op_type': self.op_type, 'arguments': {}}
    def forward(self, input, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([input], [self.alloc(out)])
-        return self.dispatch([input], [out])
 class MatMul(function.Function):
@@ -103,5 +99,4 @@ class MatMul(function.Function):
        }
    def forward(self, mat1, mat2, out=None):
-        out = out if out else self.alloc()
+        return self.dispatch([mat1, mat2], [self.alloc(out)])
-        return self.dispatch([mat1, mat2], [out])
--- a/torch/core/ops/training/_functions.py
+++ b/torch/core/ops/training/_functions.py
@@ -37,11 +37,7 @@ class ParamUpdate(function.Function):
    def forward(self, param, grad):
        self._check_device([param, grad])
-        return self.dispatch(
+        return self.dispatch([grad], [param], no_grad=True)
-            [grad], [param],
-            no_grad=True,
-            check_device=False,
-        )
 class GradAccumulate(function.Function):