Fix the disorder while compiling ops

Ting PAN
Commit 94863c22 authored Feb 10, 2018 by Ting PAN
Showing with 42 additions and 20 deletions
Dragon/include/operators/ndarray/repeat_op.h
Dragon/include/operators/norm/l2_norm_op.h
Dragon/python/dragon/operators/ndarray.py
Dragon/python/dragon/vm/caffe/net.py
Dragon/python/dragon/vm/theano/compile/function.py
Dragon/python/setup.py
Dragon/src/operators/ndarray/repeat_op.cc
Dragon/src/operators/norm/l2_norm_op.cc
--- a/Dragon/include/operators/ndarray/repeat_op.h
+++ b/Dragon/include/operators/ndarray/repeat_op.h
@@ -17,28 +17,30 @@ class RepeatOp : public Operator<Context> {
    RepeatOp(const OperatorDef& op_def, Workspace* ws)
        : Operator<Context>(op_def, ws),
          axis(OperatorBase::GetSingleArg<int>("axis", -1)),
-          repeats(OperatorBase::GetSingleArg<int>("repeats", 1)) {}
+          repeats_desc(OperatorBase::GetSingleArg<string>("repeats", "")) {}
    void RunOnDevice() override;
    template<typename T> void RunWithType();
 protected:
-    TIndex axis, repeats, outer_dim, dim, inner_dim;
+    TIndex axis, outer_dim, dim, inner_dim, reps;
+    string repeats_desc;
 };
 template <class Context>
 class RepeatGradientOp : public Operator<Context> {
-public:
+ public:
    RepeatGradientOp(const OperatorDef& op_def, Workspace* ws)
        : Operator<Context>(op_def, ws),
          axis(OperatorBase::GetSingleArg<int>("axis", -1)),
-          repeats(OperatorBase::GetSingleArg<int>("repeats", 1)) {}
+          repeats_desc(OperatorBase::GetSingleArg<string>("repeats", "")) {}
    void RunOnDevice() override;
    template<typename T> void RunWithType();
 protected:
-    TIndex axis, repeats, outer_dim, dim, inner_dim;
+    TIndex axis, outer_dim, dim, inner_dim, reps;
+    string repeats_desc;
 };
 }    // namespace dragon

--- a/Dragon/include/operators/norm/l2_norm_op.h
+++ b/Dragon/include/operators/norm/l2_norm_op.h
@@ -33,20 +33,21 @@ class L2NormOp final : public Operator<Context> {
    TIndex outer_dim, dim, inner_dim, spatial_dim;
 };
 template <class Context>
 class L2NormGradientOp final : public Operator<Context> {
 public:
    L2NormGradientOp(const OperatorDef& op_def, Workspace* ws)
        : Operator<Context>(op_def, ws),
          axis(OperatorBase::GetSingleArg<int>("axis", 0)),
-          num_axes(OperatorBase::GetSingleArg<int>("num_axes", -1)) {}
+          num_axes(OperatorBase::GetSingleArg<int>("num_axes", -1)),
+          mode(OperatorBase::GetSingleArg<string>("mode", "SUM")) {}
    void RunOnDevice() override;
    template <typename T> void RunWithType();
 protected:
    TIndex axis, num_axes, end_axis;
+    string mode;
    bool across_inner;
    Tensor* norm, *multiplier, *buffer, *buffer_inner;
    TIndex outer_dim, dim, inner_dim;

--- a/Dragon/python/dragon/operators/ndarray.py
+++ b/Dragon/python/dragon/operators/ndarray.py
@@ -279,7 +279,6 @@ def Reduce(inputs, axis=-1, operation='NONE', keep_dims=False, **kwargs):
                    output.shape[i] = 1
            else: output.shape = [1]
        else:
            if keep_dims: output.shape[axis] = 1
            else: del output.shape[axis]
@@ -445,7 +444,7 @@ def Repeat(inputs, axis=-1, repeats=1, **kwargs):
        The input tensor.
    axis : int
        The axis to repeat. Defaults is ``-1`` (Repeat as Scalar).
-    repeats : int
+    repeats : int or Tensor
        The magnitude of repeating.
    Returns
@@ -456,12 +455,17 @@ def Repeat(inputs, axis=-1, repeats=1, **kwargs):
    """
    CheckInputs(inputs, 1)
    arguments = ParseArguments(locals())
+    arguments['extra_inputs'] = [Tensor.Convert(repeats, dtype='int32')]
+    arguments['repeats'] = arguments['extra_inputs'][0].name
    output = Tensor.CreateOperator(nout=1, op_type='Repeat', **arguments)
-    if inputs.shape is not None:
+    if inputs.shape is not None and \
+            not isinstance(repeats, Tensor):
        if axis == -1:
-            total_count = np.prod(inputs.shape)
+            fake_shape = inputs.shape[:]
+            fake_shape = [1 if dim is None else dim for dim in fake_shape]
+            total_count = np.prod(fake_shape)
            output.shape = [total_count * repeats]
        else:
            output.shape = inputs.shape[:]

--- a/Dragon/python/dragon/vm/caffe/net.py
+++ b/Dragon/python/dragon/vm/caffe/net.py
@@ -552,7 +552,6 @@ class Net(object):
        """
        return list(self._net_outputs)
    def replace(self, A, B):
        """Replace the A as B.

--- a/Dragon/python/dragon/vm/theano/compile/function.py
+++ b/Dragon/python/dragon/vm/theano/compile/function.py
@@ -262,7 +262,6 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
            all_exprs = dict(all_exprs, **output.expressions)
        all_extra_targets = all_extra_targets.union(output.extra_targets)
        if len(output.grad_wrts) > 0: existing_grads = True
-    for extra_target in all_extra_targets: meta_graph.target.extend([extra_target])
    # we should sort out the topology of these operators before using
    all_exprs = sorted(all_exprs.items(), key=lambda d: d[0])
@@ -280,9 +279,10 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
                    external_input_exprs = OrderedDict(external_input_exprs, **new_tensor.expressions)
                else:
                    external_input_exprs = dict(external_input_exprs, **new_tensor.expressions)
-                    external_input_exprs = OrderedDict(sorted(external_input_exprs.items(), lambda x, y: cmp(x[1], y[1])))
+                external_input_exprs = OrderedDict(sorted(external_input_exprs.items(), key=lambda A: A[0]))
            elif isinstance(new_tensor, np.ndarray):
                ws.FeedTensor(new_tensor, GetTensorName())
+            all_extra_targets = all_extra_targets.union(new_tensor.extra_targets)
        external_input_ops = [v for k, v in external_input_exprs.items()]
        for op in forward_ops:
            op.input.extend([name_dict[input] if input in name_dict
@@ -298,8 +298,15 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
        forward_ops, grad_ops = GraphGradientMaker.Make(forward_ops, targets)
    else:
        grad_ops = []
+    # Write Ops
    meta_graph.op.extend(forward_ops + grad_ops)
+    # Write Extra Targets
+    for extra_target in all_extra_targets:
+        meta_graph.target.extend([extra_target])
+    # Write Misc
    if len(outputs) > 0:
        GraphDef_Device(meta_graph)
        GraphDef_Opt(meta_graph)

--- a/Dragon/python/setup.py
+++ b/Dragon/python/setup.py
@@ -36,7 +36,7 @@ find_packages('dragon')
 find_modules()
 setup(name = 'dragon',
-      version='0.2.1.6',
+      version='0.2.1.7',
      description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework',
      url='https://github.com/neopenx/Dragon',
      author='Ting Pan',

--- a/Dragon/src/operators/ndarray/repeat_op.cc
+++ b/Dragon/src/operators/ndarray/repeat_op.cc
@@ -12,7 +12,7 @@ void RepeatOp<Context>::RunWithType() {
                            outer_dim,
                                  dim,
                            inner_dim,
-                              repeats,
+                                 reps,
                                Xdata,
                                Ydata,
                              &ctx());
@@ -20,16 +20,20 @@ void RepeatOp<Context>::RunWithType() {
 template <class Context>
 void RepeatOp<Context>::RunOnDevice() {
+    //  parse repeats from desc
+    Tensor* repeats = ws()->GetTensor(repeats_desc);
+    CHECK(repeats->IsType<int>()) << "\nThe type of repeats should be int32.";
+    reps = repeats->template data<int, CPUContext>()[0];
    if (axis == -1) {
        outer_dim = inner_dim = 1;
        dim = input(0).count();
-        output(0)->Reshape(vector<TIndex>(1, dim * repeats));
+        output(0)->Reshape(vector<TIndex>(1, dim * reps));
    } else {
        outer_dim = input(0).count(0, axis);
        dim = input(0).dim(axis);
        inner_dim = input(0).count(axis + 1);
        vector<TIndex> dims = input(0).dims();
-        dims[axis] *= repeats;
+        dims[axis] *= reps;
        output(0)->Reshape(dims);
    }
@@ -51,7 +55,7 @@ void RepeatGradientOp<Context>::RunWithType() {
                                outer_dim,
                                      dim,
                                inner_dim,
-                                  repeats,
+                                     reps,
                                   dYdata,
                                   dXdata,
                                  &ctx());
@@ -59,6 +63,10 @@ void RepeatGradientOp<Context>::RunWithType() {
 template <class Context>
 void RepeatGradientOp<Context>::RunOnDevice() {
+    //  parse repeats from desc
+    Tensor* repeats = ws()->GetTensor(repeats_desc);
+    CHECK(repeats->IsType<int>()) << "\nThe type of repeats should be int32.";
+    reps = repeats->template data<int, CPUContext>()[0];
    if (axis == -1) {
        outer_dim = inner_dim = 1;
        dim = input(0).count();

--- a/Dragon/src/operators/norm/l2_norm_op.cc
+++ b/Dragon/src/operators/norm/l2_norm_op.cc
@@ -116,6 +116,7 @@ void L2NormGradientOp<Context>::RunWithType() {
        if (across_inner) {
            Ndata = norm->template data<T, CPUContext>();
            T sum_of_x_mul_dy = math::Dot<T, Context>(buffer->count(), Xdata, dYdata);
+            if (mode == "MEAN") sum_of_x_mul_dy = sum_of_x_mul_dy / dim;
            math::Scale<T, Context>(buffer->count(), sum_of_x_mul_dy / Ndata[n] / Ndata[n], Xdata, dXdata);
            math::Sub<T, Context>(buffer->count(), dYdata, dXdata, dXdata);
            math::Scal<T, Context>(buffer->count(), T(1.0 / Ndata[n]), dXdata);
@@ -123,7 +124,7 @@ void L2NormGradientOp<Context>::RunWithType() {
            //  compute \sum_{i} x_{i, j}dy_{i, j}
            math::Mul<T, Context>(buffer->count(), Xdata, dYdata, Bdata);
            math::Gemv<T, Context>(CblasTrans, dim, inner_dim,
-                                                          1.0,
+                             mode == "MEAN" ? 1.0 / dim : 1.0,
                                              Bdata, DMuldata,
                                                          0.0,
                                                  BInnerdata);