Re-implement Softmax Focal Loss

Ting PAN
Commit 1d551431 authored Jul 06, 2018 by Ting PAN
Showing with 499 additions and 459 deletions
Dragon/include/core/common.h
Dragon/include/core/operator.h
Dragon/include/operators/loss/sparse_softmax_cross_entropy_op.h
Dragon/include/operators/loss/sparse_softmax_focal_loss_op.h
Dragon/include/utils/op_kernel.h
Dragon/modules/python/dragon.cc
Dragon/modules/python/dragon.h
Dragon/modules/python/py_tensor.h
Dragon/python/dragon/core/workspace.py
Dragon/python/dragon/operators/loss.py
Dragon/python/dragon/version.py
Dragon/python/dragon/vm/caffe/layers/loss.py
Dragon/python/dragon/vm/caffe/proto/caffe.proto
Dragon/python/dragon/vm/caffe/proto/caffe_pb2.py
Dragon/python/setup.py
Dragon/src/core/operator.cc
Dragon/src/operators/loss/sparse_softmax_cross_entropy_op.cc
Dragon/src/operators/loss/sparse_softmax_focal_loss_op.cc
Dragon/src/operators/misc/python_op.cc
Dragon/src/operators/vision/conv_op_base.cc
--- a/Dragon/include/core/common.h
+++ b/Dragon/include/core/common.h
@@ -52,9 +52,9 @@ using Set = std::unordered_set<Value> ;
 /*
 * Define the Kernel version.
 *
- * | Major(2) | Minor(2) | Patch(06) |
+ * | Major(2) | Minor(2) | Patch(07) |
 */
-#define DRAGON_VERSION 2206
+#define DRAGON_VERSION 2207

 /*
 * Define the default random seed.

--- a/Dragon/include/core/operator.h
+++ b/Dragon/include/core/operator.h
@@ -90,8 +90,10 @@ class Operator : public OperatorBase {
 public:
    Operator(const OperatorDef& def, Workspace* ws)
        : OperatorBase(def, ws), ctx_(def.device_option()),
-          recomputing_aware_(OperatorBase::Arg<bool>(
-              "recomputing_aware", false)) {
+          allow_recompute_(OperatorBase::Arg<bool>(
+              "recomputing_aware", false)),
+          do_sync_(OperatorBase::Arg<bool>(
+              "do_sync", true)) {
        allow_run_ = true;
        allow_run_ &= _MPICheck();
        allow_run_ &= (!(OutputSize() == 1 &&
@@ -99,13 +101,13 @@ class Operator : public OperatorBase {
    }

    virtual void Run() final {
-        if (!allow_run_)  return;
-        if (recomputing_aware_) MakeResource();
+        if (!allow_run_) return;
+        if (allow_recompute_) MakeResource();
        ctx().SwitchToDevice();
        MemorySwitch();
        RunOnDevice();
-        ctx().FinishDeviceCompution();
-        if (recomputing_aware_) CleanResource();
+        if (do_sync_) ctx().FinishDeviceCompution();
+        if (allow_recompute_) CleanResource();
    }

    virtual void ElimateCorruption();
@@ -126,7 +128,7 @@ class Operator : public OperatorBase {

 protected:
    Context ctx_;
-    bool allow_run_, recomputing_aware_;
+    bool allow_run_, allow_recompute_, do_sync_;

 private:
    bool _MPICheck() {

--- a/Dragon/include/operators/loss/sparse_softmax_cross_entropy_op.h
+++ b/Dragon/include/operators/loss/sparse_softmax_cross_entropy_op.h
@@ -24,11 +24,11 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> {
          axis(OperatorBase::Arg<int>("axis", 1)),
          normalization(OperatorBase::Arg<string>(
              "normalization", "VALID")) {
-        vector<int> ignores = OperatorBase::Args<int>("ignore_labels");
-        if (ignores.size()) {
-            ignore.Reshape({ (TIndex)ignores.size() });
-            auto* Idata = ignore.mutable_data<int, CPUContext>();
-            for (int i = 0; i < ignores.size(); i++) Idata[i] = ignores[i];
+        auto xs = OperatorBase::Args<int>("ignore_labels");
+        if (xs.size()) {
+            ignores.Reshape({ (TIndex)xs.size() });
+            auto* Idata = ignores.mutable_data<int, CPUContext>();
+            for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
        }
    }
    USE_OPERATOR_FUNCTIONS;
@@ -41,8 +41,7 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> {

 protected:
    TIndex axis, outer_dim, inner_dim;
-    Tensor ignore, valid, losses;
-    Tensor* prob;
+    Tensor* prob, losses, flags, ignores;
    unique_ptr<OperatorBase> softmax_op;
    string normalization;
 };
@@ -55,11 +54,11 @@ class SparseSoftmaxCrossEntropyGradientOp : public Operator<Context> {
          axis(OperatorBase::Arg<int>("axis", 1)),
          normalization(OperatorBase::Arg<string>(
              "normalization", "VALID")) {
-        vector<int> ignores = OperatorBase::Args<int>("ignore_labels");
-        if (ignores.size()) {
-            ignore.Reshape({ (TIndex)ignores.size() });
-            auto* Idata = ignore.mutable_data<int, CPUContext>();
-            for (int i = 0; i < ignores.size(); i++) Idata[i] = ignores[i];
+        auto xs = OperatorBase::Args<int>("ignore_labels");
+        if (xs.size()) {
+            ignores.Reshape({ (TIndex)xs.size() });
+            auto* Idata = ignores.mutable_data<int, CPUContext>();
+            for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
        }
    }
    USE_OPERATOR_FUNCTIONS;
@@ -69,8 +68,7 @@ class SparseSoftmaxCrossEntropyGradientOp : public Operator<Context> {

 protected:
    TIndex axis, outer_dim, inner_dim;
-    Tensor ignore, valid;
-    Tensor* prob;
+    Tensor* prob, ignores, flags;
    string normalization;
 };


--- a/Dragon/include/operators/loss/sparse_softmax_focal_loss_op.h
+++ b/Dragon/include/operators/loss/sparse_softmax_focal_loss_op.h
@@ -17,18 +17,19 @@
 namespace dragon {

 template <class Context>
-class SparseSoftmaxFocalLossOp final : public SparseSoftmaxCrossEntropyOp<Context> {
+class SparseSoftmaxFocalLossOp final
+    : public SparseSoftmaxCrossEntropyOp<Context> {
 public:
    SparseSoftmaxFocalLossOp(const OperatorDef& def, Workspace* ws)
        : SparseSoftmaxCrossEntropyOp<Context>(def, ws),
           axis(OperatorBase::Arg<int>("axis", 1)),
           normalization(OperatorBase::Arg<string>(
               "normalization", "VALID")),
-           alpha(OperatorBase::Arg<float>("alpha", 0.5)),
-           gamma(OperatorBase::Arg<float>("gamma", 0.0)),
-           neg_id(OperatorBase::Arg<int>("neg_id", -1)) {
-        pos_alpha = alpha * 2.0;
-        neg_alpha = (1 - alpha) * 2.0;
+           alpha(OperatorBase::Arg<float>("alpha", 0.25f)),
+           gamma(OperatorBase::Arg<float>("gamma", 2.f)),
+           neg_id(OperatorBase::Arg<int>("neg_id", 0)) {
+        pos_alpha = alpha;
+        neg_alpha = 1.f - alpha;
    }
    USE_OPERATOR_FUNCTIONS;

@@ -36,35 +37,36 @@ class SparseSoftmaxFocalLossOp final : public SparseSoftmaxCrossEntropyOp<Contex
    template <typename T> void RunWithType();

 protected:
-    float alpha, gamma;
-    int neg_id;
-    float pos_alpha, neg_alpha;
-    TIndex axis, outer_dim, inner_dim;
-    Tensor* scale;
+    float alpha, gamma, pos_alpha, neg_alpha;
+    TIndex axis, neg_id, outer_dim, inner_dim;
+    Tensor losses, flags;
    string normalization;
 };

 template <class Context>
-class SparseSoftmaxFocalLossGradientOp final : public SparseSoftmaxCrossEntropyGradientOp<Context> {
+class SparseSoftmaxFocalLossGradientOp final
+    : public SparseSoftmaxCrossEntropyGradientOp<Context> {
 public:
    SparseSoftmaxFocalLossGradientOp(const OperatorDef& def, Workspace* ws)
         : SparseSoftmaxCrossEntropyGradientOp<Context>(def, ws),
           axis(OperatorBase::Arg<int>("axis", 1)),
           normalization(OperatorBase::Arg<string>(
               "normalization", "VALID")),
-           gamma(OperatorBase::Arg<float>("gamma", 0.0)),
-           eps(OperatorBase::Arg<float>("eps", float(1e-10))),
-           neg_id(OperatorBase::Arg<int>("neg_id", -1)) {}
+           alpha(OperatorBase::Arg<float>("alpha", 0.25f)),
+           gamma(OperatorBase::Arg<float>("gamma", 2.f)),
+           neg_id(OperatorBase::Arg<int>("neg_id", 0)) {
+        pos_alpha = alpha;
+        neg_alpha = 1.f - alpha;
+    }
    USE_OPERATOR_FUNCTIONS;

    void RunOnDevice() override;
    template <typename T> void RunWithType();

 protected:
-    float gamma, eps;
-    int neg_id;
-    TIndex axis, outer_dim, inner_dim;
-    Tensor* scale;
+    float alpha, gamma, pos_alpha, neg_alpha;
+    TIndex axis, neg_id, outer_dim, inner_dim;
+    Tensor flags;
    string normalization;
 };


--- a/Dragon/include/utils/op_kernel.h
+++ b/Dragon/include/utils/op_kernel.h
@@ -289,37 +289,36 @@ void SoftmaxCrossEntropy(

 template <typename Tx, typename Ty, class Context>
 void SparseSoftmaxCrossEntropy(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const Tx*               prob,
    const Ty*               labels,
-    Tx*                     loss,
-    Tx*                     valid,
-    Tensor*                 ignore,
+    const int*              ignores,
+    const int               num_ignores,
+    Tx*                     losses,
+    Tx*                     flags,
    Context*                ctx);

 template <typename Tx, typename Ty, class Context>
 void SparseSoftmaxCrossEntropyGrad(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const Tx*               prob,
    const Ty*               labels,
-    Tx*                     valid,
-    Tensor*                 ignore,
+    const int*              ignores,
+    const int               num_ignores,
    Tx*                     dx,
+    Tx*                     flags,
    Context*                ctx);

 /******************** loss.sparse_softmax_focal_loss ********************/

 template <typename T, class Context>
 void SparseSoftmaxFocalLoss(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float             pos_alpha,
    const float             neg_alpha,
@@ -327,26 +326,28 @@ void SparseSoftmaxFocalLoss(
    const int               neg_id,
    const T*                prob,
    const T*                labels,
-    T*                      scale,
-    T*                      loss,
-    T*                      valid,
-    Tensor*                 ignore);
+    const int*              ignores,
+    const int               num_ignores,
+    T*                      losses,
+    T*                      flags ,
+    Context*                ctx);

 template <typename T, class Context>
 void SparseSoftmaxFocalLossGrad(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
+    const float             pos_alpha,
+    const float             neg_alpha,
    const float             gamma,
    const int               neg_id,
-    const float             eps,
-    const T*                scale,
    const T*                prob,
    const T*                labels,
-    T*                      valid,
-    Tensor*                 ignore,
-    T*                      dx);
+    const int*              ignores,
+    const int               num_ignores,
+    T*                      dx,
+    T*                      flags,
+    Context*                ctx);

 /******************** misc.astype ********************/


--- a/Dragon/modules/python/dragon.cc
+++ b/Dragon/modules/python/dragon.cc
@@ -227,6 +227,7 @@ PyMethodDef* GetAllMethods() {
        PYFUNC(HasTensorCC),
        PYFUNC(CreateTensorCC),
        PYFUNC(CreateFillerCC),
+        PYFUNC(GetFillerTypeCC),
        PYFUNC(RenameTensorCC),
        PYFUNC(TensorFromShapeCC),
        PYFUNC(TensorFromPyArrayCC),

--- a/Dragon/modules/python/dragon.h
+++ b/Dragon/modules/python/dragon.h
@@ -56,12 +56,14 @@ class NumpyFetcher : public TensorFetcherBase {
        for (const auto dim : tensor.dims()) npy_dims.push_back(dim);
        int npy_type = TypeMetaToNPY(tensor.meta());
        if (npy_type == -1) {
-            string s = "The data type of Tensor(" + tensor.name() + ") is unknown. Have you solved it ?";
+            string s = "The data type of Tensor(" +
+                tensor.name() + ") is unknown. Have you solved it ?";
            PyErr_SetString(PyExc_RuntimeError, s.c_str());
            return nullptr;
        }
        //  create a empty array with r shape
-        PyObject* array = PyArray_SimpleNew(tensor.ndim(), npy_dims.data(), npy_type);
+        PyObject* array = PyArray_SimpleNew(
+            tensor.ndim(), npy_dims.data(), npy_type);
        //  copy the tensor data to the numpy array
        if (tensor.memory_state() == MixedMemory::STATE_AT_CUDA) {
            CUDAContext::Memcpy<CPUContext, CUDAContext>(tensor.nbytes(),
@@ -86,8 +88,8 @@ class StringFetcher : public TensorFetcherBase {

 class NumpyFeeder : public TensorFeederBase {
 public:
-    PyObject* Feed(const DeviceOption& option, 
-                   PyArrayObject* original_array, 
+    PyObject* Feed(const DeviceOption& option,
+                   PyArrayObject* original_array,
                   Tensor* tensor) override {
        PyArrayObject* array = PyArray_GETCONTIGUOUS(original_array);
        const TypeMeta& meta = TypeNPYToMeta(PyArray_TYPE(array));

--- a/Dragon/modules/python/py_tensor.h
+++ b/Dragon/modules/python/py_tensor.h
@@ -52,6 +52,11 @@ inline PyObject* CreateFillerCC(PyObject* self, PyObject* args) {
    Py_RETURN_TRUE;
 }

+inline PyObject* GetFillerTypeCC(PyObject* self, PyObject* args) {
+    const auto* f = ws()->GetFiller(ParseName(self, args));
+    return String_AsPyUnicode(f->type());
+}
+
 inline PyObject* RenameTensorCC(PyObject* self, PyObject* args) {
    char* ori_name, *tar_name;
    if (!PyArg_ParseTuple(args, "ss", &ori_name, &tar_name)) {

--- a/Dragon/python/dragon/core/workspace.py
+++ b/Dragon/python/dragon/core/workspace.py
@@ -44,6 +44,7 @@ __all__ = [
    'HasTensor',
    'CreateTensor',
    'CreateFiller',
+    'GetFillerType',
    'GetTensorName',
    'RenameTensor',
    'FeedTensor',
@@ -335,7 +336,7 @@ def CreateFiller(filler_def):
    Parameters
    ----------
    filler_def : dragon_pb2.TensorFiller
-        The
+        The filler.

    Returns
    -------
@@ -356,6 +357,31 @@ def CreateFiller(filler_def):
    CreateFillerCC(filler_def)


+def GetFillerType(tensor):
+    """Get the filler type of specific tensor.
+
+    It is useful if you want to tag some tensors,
+
+    e.g. tag with ``numpy``, and get to initialize them lazily.
+
+    Parameters
+    ----------
+    tensor : Tensor or str
+        The tensor to query.
+
+    Returns
+    -------
+    str
+        The filler type.
+
+    References
+    ----------
+    The wrapper of ``GetFillerTypeCC``.
+
+    """
+    return GetFillerTypeCC(_stringify_tensor(tensor))
+
+
 def GetTensorName(tensor):
    """Query the name represented in current workspace.


--- a/Dragon/python/dragon/operators/loss.py
+++ b/Dragon/python/dragon/operators/loss.py
@@ -218,7 +218,7 @@ def L2Loss(inputs, normalization='BATCH_SIZE', **kwargs):


 def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels=(),
-                           alpha=0.5, gamma=0.0, eps=1e-10, neg_id=-1, **kwargs):
+                           alpha=0.25, gamma=2.0, neg_id=0, **kwargs):
    """SoftmaxFocalLoss with sparse labels. `[Lin et.al, 2017] <https://arxiv.org/abs/1708.02002>`_.

    Parameters
@@ -232,13 +232,11 @@ def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels=
    ignore_label : tuple or list
        The label id to ignore. Default is ``empty``.
    alpha : float
-        The scale factor on the rare class. Default is ``0.5``.
+        The scale factor on the rare class. Default is ``0.25``.
    gamma : float
-        The exponential decay factor on the easy examples. Default is ``0.0``.
-    eps : float
-        The eps.
+        The exponential decay factor on the easy examples. Default is ``2.0``.
    neg_id : int
-        The negative id. Default is ``-1`` (Without Class Balance)
+        The negative id. Default is ``0``.

    Returns
    -------

--- a/Dragon/python/dragon/version.py
+++ b/Dragon/python/dragon/version.py
@@ -14,7 +14,7 @@ from __future__ import division
 from __future__ import print_function

 version = '0.2.2'
-full_version = '0.2.2.5'
+full_version = '0.2.2.7'
 release = False

 if not release:

--- a/Dragon/python/dragon/vm/caffe/layers/loss.py
+++ b/Dragon/python/dragon/vm/caffe/layers/loss.py
@@ -149,8 +149,6 @@ class SoftmaxWithFocalLossLayer(Layer):
        The scale on the rare class. Refer `FocalLossParameter.alpha`_.
    gamma : float
        The exponential decay. Refer `FocalLossParameter.gamma`_.
-    eps : float
-        The eps. Refer `FocalLossParameter.eps`_.
    neg_id : int
        The negative id. Refer `FocalLossParameter.neg_id`_.
    normalization : NormalizationMode
@@ -174,7 +172,6 @@ class SoftmaxWithFocalLossLayer(Layer):
                       'ignore_labels': [param.ignore_label] if param.HasField('ignore_label') else [],
                       'alpha': float(focal_loss_param.alpha),
                       'gamma': float(focal_loss_param.gamma),
-                       'eps': float(focal_loss_param.eps),
                       'neg_id': focal_loss_param.neg_id}

    def Setup(self, bottom):

--- a/Dragon/python/dragon/vm/caffe/proto/caffe.proto
+++ b/Dragon/python/dragon/vm/caffe/proto/caffe.proto
@@ -1504,10 +1504,9 @@ message DenseConcatParameter {
 }

 message FocalLossParameter {
-  optional float alpha = 1 [default = 0.5];
-  optional float gamma = 2 [default = 0.0];
-  optional float eps = 3 [default = 1e-10];
-  optional int32 neg_id = 4 [default = -1];
+  optional float alpha = 1 [default = 0.25];
+  optional float gamma = 2 [default = 2.0];
+  optional int32 neg_id = 3 [default = 0];
 }

 message GatherParameter {

--- a/Dragon/python/dragon/vm/caffe/proto/caffe_pb2.py
+++ b/Dragon/python/dragon/vm/caffe/proto/caffe_pb2.py
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
 DESCRIPTOR = _descriptor.FileDescriptor(
  name='caffe.proto',
  package='caffe',
-  serialized_pb=_b('\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc9\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18  \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x10\n\x08stage_lr\x18\x32 \x03(\x02\x12\x12\n\nstage_iter\x18\x33 \x03(\x05\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x15\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x06\x31\x65-008\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"\x85\x01\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\x12\x10\n\x08mpi_rank\x18\x06 \x03(\r\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xcb\x19\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1c\n\x0cmirror_stage\x18\xa2\x01 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x36\n\x11roi_pooling_param\x18\x97\x01 \x01(\x0b\x32\x1a.caffe.ROIPoolingParameter\x12;\n\x14smooth_l1_loss_param\x18\x98\x01 \x01(\x0b\x32\x1c.caffe.SmoothL1LossParameter\x12\'\n\tmpi_param\x18\x99\x01 \x01(\x0b\x32\x13.caffe.MPIParameter\x12/\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x17.caffe.PermuteParameter\x12\x33\n\x0fnormalize_param\x18\x9b\x01 \x01(\x0b\x32\x19.caffe.NormalizeParameter\x12\x31\n\x0eparallel_param\x18\x9d\x01 \x01(\x0b\x32\x18.caffe.ParallelParameter\x12-\n\x0cresize_param\x18\x9e\x01 \x01(\x0b\x32\x16.caffe.ResizeParameter\x12\x36\n\x11\x65xpand_dims_param\x18\x9f\x01 \x01(\x0b\x32\x1a.caffe.ExpandDimsParameter\x12\x31\n\x0eproposal_param\x18\xa0\x01 \x01(\x0b\x32\x18.caffe.ProposalParameter\x12\x38\n\x12\x62\x61tch_renorm_param\x18\xa1\x01 \x01(\x0b\x32\x1b.caffe.BatchRenormParameter\x12\x38\n\x12\x64\x65nse_concat_param\x18\xa3\x01 \x01(\x0b\x32\x1b.caffe.DenseConcatParameter\x12\x34\n\x10\x66ocal_loss_param\x18\xa4\x01 \x01(\x0b\x32\x19.caffe.FocalLossParameter\x12-\n\x0cgather_param\x18\xa5\x01 \x01(\x0b\x32\x16.caffe.GatherParameter\x12\x34\n\x10group_norm_param\x18\xa6\x01 \x01(\x0b\x32\x19.caffe.GroupNormParameter\"\xa7\x02\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x12\n\x07padding\x18\x0b \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\x12!\n\x12\x63olor_augmentation\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x10min_random_scale\x18\t \x01(\x02:\x01\x31\x12\x1b\n\x10max_random_scale\x18\n \x01(\x02:\x01\x31\"\xf5\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x1a\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"L\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\x12\x08\n\x04UNIT\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"h\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x35\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"I\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x19\n\x0bscale_train\x18\x02 \x01(\x08:\x04true\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xbd\x01\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12;\n\x05\x64type\x18\x05 \x01(\x0e\x32#.caffe.MemoryDataParameter.DataType:\x07\x46LOAT32\"$\n\x08\x44\x61taType\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\x0b\n\x07\x46LOAT16\x10\x01\"e\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-009\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"T\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\x12#\n\tmultiples\x18\x03 \x01(\x0b\x32\x10.caffe.BlobShape\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18  \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"H\n\x0cMPIParameter\x12\x0f\n\x04root\x18\x01 \x01(\r:\x01\x30\x12\x12\n\x07\x63omm_id\x18\x02 \x01(\x04:\x01\x30\x12\x13\n\x08group_id\x18\x03 \x01(\x04:\x01\x30\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\x93\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12,\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x13\n\x03\x65ps\x18\x04 \x01(\x02:\x06\x31\x65-010\"d\n\x11ParallelParameter\x12\x1d\n\x0emultiple_nodes\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07shuffle\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x18\n\tpartition\x18\x03 \x01(\x08:\x05\x66\x61lse\"R\n\x0fResizeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0e\n\x02\x66x\x18\x02 \x01(\x02:\x02-1\x12\x0e\n\x02\x66y\x18\x03 \x01(\x02:\x02-1\"\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"\x90\x02\n\x11ProposalParameter\x12\x0e\n\x06stride\x18\x01 \x03(\x05\x12\r\n\x05ratio\x18\x02 \x03(\x02\x12\r\n\x05scale\x18\x03 \x03(\x02\x12\x1b\n\rpre_nms_top_n\x18\x04 \x01(\r:\x04\x36\x30\x30\x30\x12\x1b\n\x0epost_nms_top_n\x18\x05 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x06 \x01(\x02:\x03\x30.7\x12\x14\n\x08min_size\x18\x07 \x01(\r:\x02\x31\x36\x12\x14\n\tmin_level\x18\x08 \x01(\x05:\x01\x32\x12\x14\n\tmax_level\x18\t \x01(\x05:\x01\x35\x12\x1c\n\x0f\x63\x61nonical_scale\x18\n \x01(\x05:\x03\x32\x32\x34\x12\x1a\n\x0f\x63\x61nonical_level\x18\x0b \x01(\x05:\x01\x34\"\xa6\x01\n\x14\x42\x61tchRenormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x10\n\x05r_max\x18\x04 \x01(\x02:\x01\x33\x12\x10\n\x05\x64_max\x18\x05 \x01(\x02:\x01\x35\x12\x16\n\x07t_delta\x18\x06 \x01(\x02:\x05\x30.001\"?\n\x14\x44\x65nseConcatParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x16\n\x0bgrowth_rate\x18\x02 \x01(\x05:\x01\x30\"c\n\x12\x46ocalLossParameter\x12\x12\n\x05\x61lpha\x18\x01 \x01(\x02:\x03\x30.5\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x30\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-010\x12\x12\n\x06neg_id\x18\x04 \x01(\x05:\x02-1\"\"\n\x0fGatherParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\"{\n\x12GroupNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x11\n\x05group\x18\x05 \x01(\r:\x02\x33\x32*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
+  serialized_pb=_b('\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc9\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18  \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x10\n\x08stage_lr\x18\x32 \x03(\x02\x12\x12\n\nstage_iter\x18\x33 \x03(\x05\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x15\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x06\x31\x65-008\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"\x85\x01\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\x12\x10\n\x08mpi_rank\x18\x06 \x03(\r\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xcb\x19\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1c\n\x0cmirror_stage\x18\xa2\x01 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x36\n\x11roi_pooling_param\x18\x97\x01 \x01(\x0b\x32\x1a.caffe.ROIPoolingParameter\x12;\n\x14smooth_l1_loss_param\x18\x98\x01 \x01(\x0b\x32\x1c.caffe.SmoothL1LossParameter\x12\'\n\tmpi_param\x18\x99\x01 \x01(\x0b\x32\x13.caffe.MPIParameter\x12/\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x17.caffe.PermuteParameter\x12\x33\n\x0fnormalize_param\x18\x9b\x01 \x01(\x0b\x32\x19.caffe.NormalizeParameter\x12\x31\n\x0eparallel_param\x18\x9d\x01 \x01(\x0b\x32\x18.caffe.ParallelParameter\x12-\n\x0cresize_param\x18\x9e\x01 \x01(\x0b\x32\x16.caffe.ResizeParameter\x12\x36\n\x11\x65xpand_dims_param\x18\x9f\x01 \x01(\x0b\x32\x1a.caffe.ExpandDimsParameter\x12\x31\n\x0eproposal_param\x18\xa0\x01 \x01(\x0b\x32\x18.caffe.ProposalParameter\x12\x38\n\x12\x62\x61tch_renorm_param\x18\xa1\x01 \x01(\x0b\x32\x1b.caffe.BatchRenormParameter\x12\x38\n\x12\x64\x65nse_concat_param\x18\xa3\x01 \x01(\x0b\x32\x1b.caffe.DenseConcatParameter\x12\x34\n\x10\x66ocal_loss_param\x18\xa4\x01 \x01(\x0b\x32\x19.caffe.FocalLossParameter\x12-\n\x0cgather_param\x18\xa5\x01 \x01(\x0b\x32\x16.caffe.GatherParameter\x12\x34\n\x10group_norm_param\x18\xa6\x01 \x01(\x0b\x32\x19.caffe.GroupNormParameter\"\xa7\x02\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x12\n\x07padding\x18\x0b \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\x12!\n\x12\x63olor_augmentation\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x10min_random_scale\x18\t \x01(\x02:\x01\x31\x12\x1b\n\x10max_random_scale\x18\n \x01(\x02:\x01\x31\"\xf5\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x1a\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"L\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\x12\x08\n\x04UNIT\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"h\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x35\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"I\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x19\n\x0bscale_train\x18\x02 \x01(\x08:\x04true\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xbd\x01\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12;\n\x05\x64type\x18\x05 \x01(\x0e\x32#.caffe.MemoryDataParameter.DataType:\x07\x46LOAT32\"$\n\x08\x44\x61taType\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\x0b\n\x07\x46LOAT16\x10\x01\"e\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-009\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"T\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\x12#\n\tmultiples\x18\x03 \x01(\x0b\x32\x10.caffe.BlobShape\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18  \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"H\n\x0cMPIParameter\x12\x0f\n\x04root\x18\x01 \x01(\r:\x01\x30\x12\x12\n\x07\x63omm_id\x18\x02 \x01(\x04:\x01\x30\x12\x13\n\x08group_id\x18\x03 \x01(\x04:\x01\x30\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\x93\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12,\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x13\n\x03\x65ps\x18\x04 \x01(\x02:\x06\x31\x65-010\"d\n\x11ParallelParameter\x12\x1d\n\x0emultiple_nodes\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07shuffle\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x18\n\tpartition\x18\x03 \x01(\x08:\x05\x66\x61lse\"R\n\x0fResizeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0e\n\x02\x66x\x18\x02 \x01(\x02:\x02-1\x12\x0e\n\x02\x66y\x18\x03 \x01(\x02:\x02-1\"\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"\x90\x02\n\x11ProposalParameter\x12\x0e\n\x06stride\x18\x01 \x03(\x05\x12\r\n\x05ratio\x18\x02 \x03(\x02\x12\r\n\x05scale\x18\x03 \x03(\x02\x12\x1b\n\rpre_nms_top_n\x18\x04 \x01(\r:\x04\x36\x30\x30\x30\x12\x1b\n\x0epost_nms_top_n\x18\x05 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x06 \x01(\x02:\x03\x30.7\x12\x14\n\x08min_size\x18\x07 \x01(\r:\x02\x31\x36\x12\x14\n\tmin_level\x18\x08 \x01(\x05:\x01\x32\x12\x14\n\tmax_level\x18\t \x01(\x05:\x01\x35\x12\x1c\n\x0f\x63\x61nonical_scale\x18\n \x01(\x05:\x03\x32\x32\x34\x12\x1a\n\x0f\x63\x61nonical_level\x18\x0b \x01(\x05:\x01\x34\"\xa6\x01\n\x14\x42\x61tchRenormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x10\n\x05r_max\x18\x04 \x01(\x02:\x01\x33\x12\x10\n\x05\x64_max\x18\x05 \x01(\x02:\x01\x35\x12\x16\n\x07t_delta\x18\x06 \x01(\x02:\x05\x30.001\"?\n\x14\x44\x65nseConcatParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x16\n\x0bgrowth_rate\x18\x02 \x01(\x05:\x01\x30\"N\n\x12\x46ocalLossParameter\x12\x13\n\x05\x61lpha\x18\x01 \x01(\x02:\x04\x30.25\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x32\x12\x11\n\x06neg_id\x18\x03 \x01(\x05:\x01\x30\"\"\n\x0fGatherParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\"{\n\x12GroupNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x11\n\x05group\x18\x05 \x01(\r:\x02\x33\x32*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
 )
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)

@@ -40,8 +40,8 @@ _PHASE = _descriptor.EnumDescriptor(
  ],
  containing_type=None,
  options=None,
-  serialized_start=17663,
-  serialized_end=17691,
+  serialized_start=17642,
+  serialized_end=17670,
 )
 _sym_db.RegisterEnumDescriptor(_PHASE)

@@ -5842,28 +5842,21 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor(
    _descriptor.FieldDescriptor(
      name='alpha', full_name='caffe.FocalLossParameter.alpha', index=0,
      number=1, type=2, cpp_type=6, label=1,
-      has_default_value=True, default_value=0.5,
+      has_default_value=True, default_value=0.25,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    _descriptor.FieldDescriptor(
      name='gamma', full_name='caffe.FocalLossParameter.gamma', index=1,
      number=2, type=2, cpp_type=6, label=1,
-      has_default_value=True, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='eps', full_name='caffe.FocalLossParameter.eps', index=2,
-      number=3, type=2, cpp_type=6, label=1,
-      has_default_value=True, default_value=1e-010,
+      has_default_value=True, default_value=2,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    _descriptor.FieldDescriptor(
-      name='neg_id', full_name='caffe.FocalLossParameter.neg_id', index=3,
-      number=4, type=5, cpp_type=1, label=1,
-      has_default_value=True, default_value=-1,
+      name='neg_id', full_name='caffe.FocalLossParameter.neg_id', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=True, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
@@ -5879,7 +5872,7 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor(
  oneofs=[
  ],
  serialized_start=17401,
-  serialized_end=17500,
+  serialized_end=17479,
 )


@@ -5908,8 +5901,8 @@ _GATHERPARAMETER = _descriptor.Descriptor(
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=17502,
-  serialized_end=17536,
+  serialized_start=17481,
+  serialized_end=17515,
 )


@@ -5959,8 +5952,8 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor(
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=17538,
-  serialized_end=17661,
+  serialized_start=17517,
+  serialized_end=17640,
 )

 _BLOBPROTO.fields_by_name['shape'].message_type = _BLOBSHAPE

--- a/Dragon/python/setup.py
+++ b/Dragon/python/setup.py
@@ -42,7 +42,7 @@ find_modules()


 setup(name = 'dragon',
-      version='0.2.2.6',
+      version='0.2.2.7',
      description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework',
      url='https://github.com/seetaresearch/Dragon',
      author='Ting Pan',

--- a/Dragon/src/core/operator.cc
+++ b/Dragon/src/core/operator.cc
@@ -229,11 +229,35 @@ void Operator<Context>::CleanResource() {
    }
 }

-DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);
-DEFINE_REGISTRY(CUDAOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);
-DEFINE_REGISTRY(CUDNNOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);
-DEFINE_REGISTRY(GradientRegistry, GradientMakerBase, const OperatorDef&, const vector<string>&);
-DEFINE_REGISTRY(NoGradientRegistry, GradientMakerBase, const OperatorDef&, const vector<string>&);
+DEFINE_REGISTRY(
+    CPUOperatorRegistry,
+    OperatorBase,
+    const OperatorDef&,
+    Workspace*);
+
+DEFINE_REGISTRY(
+    CUDAOperatorRegistry,
+    OperatorBase,
+    const OperatorDef&,
+    Workspace*);
+
+DEFINE_REGISTRY(
+    CUDNNOperatorRegistry,
+    OperatorBase,
+    const OperatorDef&,
+    Workspace*);
+
+DEFINE_REGISTRY(
+    GradientRegistry,
+    GradientMakerBase,
+    const OperatorDef&,
+    const vector<string>&);
+
+DEFINE_REGISTRY(
+    NoGradientRegistry,
+    GradientMakerBase,
+    const OperatorDef&,
+    const vector<string>&);

 #define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname) \
 template <> T OperatorBase::Arg( \
@@ -252,7 +276,6 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(string, s)
 INSTANTIATE_GET_SINGLE_ARGUMENT(bool, b);
 INSTANTIATE_GET_SINGLE_ARGUMENT(int64_t, i64);

-
 #define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname) \
 template<> vector<T> OperatorBase::Args<T>(const string& name) { \
    if(args_.count(name) == 0) return vector<T>(); \

--- a/Dragon/src/operators/loss/sparse_softmax_cross_entropy_op.cc
+++ b/Dragon/src/operators/loss/sparse_softmax_cross_entropy_op.cc
@@ -42,16 +42,17 @@ void SparseSoftmaxCrossEntropyOp<Context>::SoftmaxRunFP16() {

 template <class Context> template <typename Tx, typename Ty>
 void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() {
-    auto* prob_data = prob->template data<Tx, Context>();
-    auto* label_data = Input(1).template data<Ty, Context>();
-    auto* loss_data = losses.template mutable_data<Tx, Context>();
-    auto* valid_data = valid.template mutable_data<Tx, Context>();
+    auto* Pdata = prob->template data<Tx, Context>();
+    auto* Tdata = Input(1).template data<Ty, Context>();
+    auto* Idata = !ignores.count() ? nullptr :
+        ignores.template data<int, Context>();
+    auto* Ldata = losses.template mutable_data<Tx, Context>();
+    auto* Fdata = flags.template mutable_data<Tx, Context>();

    kernel::SparseSoftmaxCrossEntropy<Tx, Ty, Context>(
-        Input(0).count(), Input(0).dim(axis),
-            outer_dim, inner_dim,
-                prob_data, label_data, loss_data,
-                    valid_data, &ignore, &ctx());
+        outer_dim, Input(0).dim(axis), inner_dim,
+            Pdata, Tdata, Idata, ignores.count(),
+                Ldata, Fdata, &ctx());

    if (normalization == "UNIT") {
        Output(0)->ReshapeLike(losses);
@@ -61,11 +62,12 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() {

    Tx normalizer;
    if (normalization == "VALID")
-        normalizer = std::max(math::ASum<Tx, Context>(valid.count(), valid_data), (Tx)1.f);
+        normalizer = std::max(
+            math::ASum<Tx, Context>(flags.count(), Fdata), (Tx)1.f);
    else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0);
    else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
    else if (normalization == "NONE") normalizer = 1;
-    Tx loss = math::ASum<Tx, Context>(losses.count(), loss_data);
+    Tx loss = math::ASum<Tx, Context>(losses.count(), Ldata);
    Output(0)->Reshape({ 1 });
    auto* Ydata = Output(0)->template mutable_data<Tx, Context>();
    math::Set<Tx, Context>(1, loss / normalizer, Ydata);
@@ -77,11 +79,12 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunOnDevice() {
    inner_dim = Input(0).count(axis + 1);
    CHECK_EQ(outer_dim * inner_dim, Input(1).count())
        << "\nNumber of predictions must match the number of labels.";
-    valid.Reshape({ outer_dim * inner_dim });
    losses.Reshape({ outer_dim * inner_dim });
+    flags.Reshape({ outer_dim * inner_dim });
    prob = ws()->CreateTensor("/mnt/" + anchor() + "/softmax/prob");

-    if (XIsType(Input(0), float) || XIsType(Input(0), float16)) {
+    if (XIsType(Input(0), float) ||
+            XIsType(Input(0), float16)) {
        if (XIsType(Input(0), float16)) SoftmaxRunFP16();
        else SoftmaxRun();
        if (XIsType(Input(1), float)) RunWithType<float, float>();
@@ -98,33 +101,35 @@ OPERATOR_SCHEMA(SparseSoftmaxCrossEntropy).NumInputs(2).NumOutputs(1);

 template <class Context> template <typename Tx, typename Ty>
 void SparseSoftmaxCrossEntropyGradientOp<Context>::RunWithType() {
-    auto* label_data = Input(1).template data<Ty, Context>();
-    auto* prob_data = prob->template mutable_data<Tx, Context>();
+    auto* Pdata = prob->template mutable_data<Tx, Context>();
+    auto* Tdata = Input(1).template data<Ty, Context>();
+    auto* Idata = !ignores.count() ? nullptr :
+        ignores.template data<int, Context>();
    auto* dXdata = Output(0)->template mutable_data<Tx, Context>();
-    auto* valid_data = valid.template mutable_data<Tx, Context>();
-    ctx().template Copy<Tx, Context, Context>(prob->count(), dXdata, prob_data);
+    auto* Fdata = flags.template mutable_data<Tx, Context>();
+    ctx().template Copy<Tx, Context, Context>(
+        prob->count(), dXdata, Pdata);

    kernel::SparseSoftmaxCrossEntropyGrad<Tx, Ty, Context>(
-        Output(0)->count(), Output(0)->dim(axis),
-            outer_dim, inner_dim,
-                prob_data, label_data, valid_data,
-                    &ignore, dXdata, &ctx());
+        outer_dim, Output(0)->dim(axis), inner_dim,
+            Pdata, Tdata, Idata, ignores.count(),
+                dXdata, Fdata, &ctx());

    if (normalization == "UNIT") {
        auto* dYdata = Input(-1).template data<Tx, Context>();
        kernel::SumGrad<Tx, Context>(
            Input(0).count() / Input(0).dim(axis),
                Input(0).dim(axis), inner_dim,
-                    1.0, dYdata, prob_data);
+                    1.0, dYdata, Pdata);
        math::Mul<Tx, Context>(
-            Output(0)->count(), prob_data, dXdata, dXdata);
+            Output(0)->count(), Pdata, dXdata, dXdata);
        return;
    }

    Tx normalizer;
    if (normalization == "VALID")
        normalizer = std::max(
-            math::ASum<Tx, Context>(valid.count(), valid_data), (Tx)1.f);
+            math::ASum<Tx, Context>(flags.count(), Fdata), (Tx)1.f);
    else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0);
    else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
    else if (normalization == "NONE") normalizer = 1;
@@ -141,7 +146,7 @@ void SparseSoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
    outer_dim = prob->count(0, axis);
    inner_dim = prob->count(axis + 1);
    Output(0)->ReshapeLike(Input(0));
-    valid.Reshape({ outer_dim * inner_dim });
+    flags.Reshape({ outer_dim * inner_dim });

    if (XIsType(Input(0), float) || XIsType(Input(0), float16)) {
        if (XIsType(Input(1), float)) RunWithType<float, float>();

--- a/Dragon/src/operators/loss/sparse_softmax_focal_loss_op.cc
+++ b/Dragon/src/operators/loss/sparse_softmax_focal_loss_op.cc
@@ -9,31 +9,33 @@ namespace dragon {

 template <class Context> template <typename T>
 void SparseSoftmaxFocalLossOp<Context>::RunWithType() {
-    auto* prob_data = this->prob->template data<T, Context>();
-    auto* label_data = Input(1).template data<T, Context>();
-    auto* loss_data = this->losses.template mutable_data<T, Context>();
-    auto* valid_data = this->valid.template mutable_data<T, Context>();
-    auto* scale_data = scale->template mutable_data<T, Context>();
+    auto* Pdata = this->prob->template data<T, Context>();
+    auto* Tdata = Input(1).template data<T, Context>();
+    auto* Idata = !this->ignores.count() ? nullptr :
+        this->ignores.template data<int, Context>();
+    auto* Ldata = losses.template mutable_data<T, Context>();
+    auto* Fdata = flags.template mutable_data<T, Context>();

    kernel::SparseSoftmaxFocalLoss<T, Context>(
-        Input(0).count(), Input(0).dim(axis), outer_dim, inner_dim,
+        outer_dim, Input(0).dim(axis), inner_dim,
            pos_alpha, neg_alpha, gamma, neg_id,
-                prob_data, label_data, scale_data,
-                    loss_data, valid_data, &this->ignore);
+                Pdata, Tdata, Idata, this->ignores.count(),
+                    Ldata, Fdata, &ctx());

    if (normalization == "UNIT") {
-        Output(0)->ReshapeLike(this->losses);
-        Output(0)->template Copy<Context, Context>(this->losses);
+        Output(0)->ReshapeLike(losses);
+        Output(0)->template Copy<Context, Context>(losses);
        return;
    }

    T normalizer;
    if (normalization == "VALID")
-        normalizer = std::max(math::ASum<T, Context>(this->valid.count(), valid_data), 1.f);
+        normalizer = std::max(
+            math::ASum<T, Context>(flags.count(), Fdata), 1.f);
    else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0);
    else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
    else if (normalization == "NONE") normalizer = 1;
-    T loss = math::ASum<T, Context>(this->losses.count(), loss_data);
+    T loss = math::ASum<T, Context>(losses.count(), Ldata);
    Output(0)->Reshape({ 1 });
    auto* Ydata = Output(0)->template mutable_data<T, Context>();
    math::Set<T, Context>(1, loss / normalizer, Ydata);
@@ -45,13 +47,11 @@ void SparseSoftmaxFocalLossOp<Context>::RunOnDevice() {
    inner_dim = Input(0).count(axis + 1);
    CHECK_EQ(outer_dim * inner_dim, Input(1).count())
        << "\nNumber of predictions must match the number of labels.";
-    this->valid.Reshape({ outer_dim * inner_dim });
-    this->losses.Reshape({ outer_dim * inner_dim });
+    flags.Reshape({ outer_dim * inner_dim });
+    losses.Reshape({ outer_dim * inner_dim });
    ws()->CreateTensor("/mnt/" + anchor() + "/softmax/prob");
    this->SoftmaxRun();
    this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax/prob");
-    scale = ws()->CreateTensor("/mnt/" + anchor() + "/focal/scale");
-    scale->ReshapeLike(*this->prob);

    if (XIsType(Input(0), float)) RunWithType<float>();
    else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
@@ -65,31 +65,33 @@ OPERATOR_SCHEMA(SparseSoftmaxFocalLoss).NumInputs(2).NumOutputs(1);

 template <class Context> template <typename T>
 void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() {
-    auto* label_data = Input(1).template data<T, Context>();
-    auto* prob_data = this->prob->template mutable_data<T, Context>();
+    auto* Pdata = this->prob->template mutable_data<T, Context>();
+    auto* Tdata = Input(1).template data<T, Context>();
+    auto* Idata = !this->ignores.count() ? nullptr :
+        this->ignores.template data<int, Context>();
    auto* dXdata = Output(0)->template mutable_data<T, Context>();
-    auto* valid_data = this->valid.template mutable_data<T, Context>();
-    auto* scale_data = scale->template mutable_data<T, Context>();
+    auto* Fdata = flags.template mutable_data<T, Context>();

    kernel::SparseSoftmaxFocalLossGrad<T, Context>(
-        Output(0)->count(), Output(0)->dim(axis), outer_dim, inner_dim,
-            gamma, neg_id, eps, scale_data, prob_data, label_data,
-                valid_data, &this->ignore, dXdata);
+        outer_dim, Output(0)->dim(axis), inner_dim,
+            pos_alpha, neg_alpha, gamma, neg_id,
+                Pdata, Tdata, Idata, this->ignores.count(),
+                    dXdata, Fdata, &ctx());

    if (normalization == "UNIT") {
        auto* dYdata = Input(-1).template data<T, Context>();
        kernel::SumGrad<T, Context>(
            Input(0).count() / Input(0).dim(axis),
                Input(0).dim(axis), inner_dim,
-                    1.0, dYdata, prob_data);
+                    1.0, dYdata, Pdata);
        math::Mul<T, Context>(Output(0)->count(),
-            prob_data, dXdata, dXdata); return;
+            Pdata, dXdata, dXdata); return;
    }

    T normalizer;
    if (normalization == "VALID")
        normalizer = std::max(
-            math::ASum<T, Context>(this->valid.count(), valid_data), 1.f);
+            math::ASum<T, Context>(flags.count(), Fdata), 1.f);
    else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0);
    else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
    else if (normalization == "NONE") normalizer = 1;
@@ -103,11 +105,10 @@ void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() {
 template <class Context>
 void SparseSoftmaxFocalLossGradientOp<Context>::RunOnDevice() {
    this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax/prob");
-    scale = ws()->GetTensor("/mnt/" + anchor() + "/focal/scale");
    outer_dim = this->prob->count(0, axis);
    inner_dim = this->prob->count(axis + 1);
    Output(0)->ReshapeLike(Input(0));
-    this->valid.Reshape({ outer_dim * inner_dim });
+    flags.Reshape({ outer_dim * inner_dim });

    if (XIsType(Input(0), float)) RunWithType<float>();
    else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });

--- a/Dragon/src/operators/misc/python_op.cc
+++ b/Dragon/src/operators/misc/python_op.cc
@@ -3,12 +3,15 @@
 #ifdef WITH_PYTHON

 #ifdef WITH_PYTHON3
-#define PyBytes_FromStringAndSize PyUnicode_FromStringAndSize
+#define PyBytes_FromStringAndSize \
+    PyUnicode_FromStringAndSize
 #endif

-#define String(str) \
+#define Bytes(str) \
    PyBytes_FromStringAndSize(str, string(str).size())

+#define CS2Bytes(cstr) Bytes(cstr.c_str())
+
 namespace dragon {

 template <class Context>
@@ -17,6 +20,9 @@ RunOp<Context>::RunOp(const OperatorDef& def, Workspace* ws)
      module(OperatorBase::Arg<string>("module", "")),
      op(OperatorBase::Arg<string>("op", "")),
      param_str((OperatorBase::Arg<string>("param_str", ""))) {
+    //  optimization for all python ops
+    if (!AllowRun()) return; this->do_sync_ = false;
+
    //  init interpreter & load module
    Py_Initialize();
    PyObject* py_module = PyImport_ImportModule(module.c_str());
@@ -27,37 +33,38 @@ RunOp<Context>::RunOp(const OperatorDef& def, Workspace* ws)
                 << " from module: " << module;
    self = PyObject_CallObject(py_op, NULL);

-    //  pass param string
-    PyObject_SetAttr(self, String("param_str"), String(param_str.c_str()));
-    PyObject_SetAttr(self, String("param_str_"), String(param_str.c_str()));
-
-    //  build inputs and outputs for Python
+    //  wrap inputs and outputs
    inputs = PyList_New(InputSize());
    for (int i = 0; i < InputSize(); i++)
-        PyList_SetItem(inputs, i, String(Input(i).name().c_str()));
+        PyList_SetItem(inputs, i, CS2Bytes(Input(i).name()));
    outputs = PyList_New(OutputSize());
    for (int i = 0; i < OutputSize(); i++)
-        PyList_SetItem(outputs, i, String(Output(i)->name().c_str()));
-    if (!AllowRun()) return;
+        PyList_SetItem(outputs, i, CS2Bytes(Output(i)->name()));
+
+    //  backward compatibility: param_str
+    PyObject_SetAttr(self, Bytes("param_str"), CS2Bytes(param_str));
+    PyObject_SetAttr(self, Bytes("param_str_"), CS2Bytes(param_str));

-    //  setup
-    if (PyObject_HasAttr(self, String("setup")))
+    //  backward compatibility: self.setup(inputs, outputs)
+    if (PyObject_HasAttr(self, Bytes("setup"))) {
        PyObject_CallMethod(self, "setup", "OO", inputs, outputs);
+    }
 }

 template <class Context>
 void RunOp<Context>::RunOnDevice() {
-    //  init phase
-    PyObject_SetAttr(self, String("phase"), String(phase().c_str()));
+    //  reset phase
+    PyObject_SetAttr(self, Bytes("phase"), CS2Bytes(phase()));

-    //  reshape
-    if (PyObject_HasAttr(self, String("reshape")))
+    //  backward compatibility: reshape(inputs, outputs)
+    if (PyObject_HasAttr(self, Bytes("reshape"))) {
        PyObject_CallMethod(self, "reshape", "OO", inputs, outputs);
+    }

-    //  run
-    if (PyObject_HasAttr(self, String("forward"))) {
+    //  overloaded run inferfaces
+    if (PyObject_HasAttr(self, Bytes("forward"))) {
        PyObject_CallMethod(self, "forward", "OO", inputs, outputs);
-    } else if (PyObject_HasAttr(self, String("run"))) {
+    } else if (PyObject_HasAttr(self, Bytes("run"))) {
        PyObject_CallMethod(self, "run", "OO", inputs, outputs);
    }
 }
@@ -72,18 +79,23 @@ NO_GRADIENT(Run);

 template <class Context>
 void TemplateGradientOp<Context>::RunOnDevice() {
-    //  init phase
-    PyObject_SetAttr(this->self, String("phase"), String(phase().c_str()));
-
-    //  reshape
-    if (PyObject_HasAttr(this->self, String("reshape")))
-        PyObject_CallMethod(this->self, "reshape", "OO", this->inputs, this->outputs);
-
-    //  run
-    if (PyObject_HasAttr(this->self, String("backward"))) {
-        PyObject_CallMethod(this->self, "forward", "OO", this->inputs, this->outputs);
-    } else if (PyObject_HasAttr(this->self, String("grad"))) {
-        PyObject_CallMethod(this->self, "grad", "OO", this->inputs, this->outputs);
+    //  reset phase
+    PyObject_SetAttr(this->self,
+        Bytes("phase"), CS2Bytes(phase()));
+
+    //  backward compatibility: reshape(inputs, outputs)
+    if (PyObject_HasAttr(this->self, Bytes("reshape"))) {
+        PyObject_CallMethod(this->self, "reshape",
+            "OO", this->inputs, this->outputs);
+    }
+
+    //  overloaded run inferfaces
+    if (PyObject_HasAttr(this->self, Bytes("backward"))) {
+        PyObject_CallMethod(this->self, "forward",
+            "OO", this->inputs, this->outputs);
+    } else if (PyObject_HasAttr(this->self, Bytes("grad"))) {
+        PyObject_CallMethod(this->self, "grad",
+            "OO", this->inputs, this->outputs);
    }
 }


--- a/Dragon/src/operators/vision/conv_op_base.cc
+++ b/Dragon/src/operators/vision/conv_op_base.cc
@@ -235,7 +235,7 @@ void ConvOpBase<Context>::Reshape() {
        weight_shape.push_back(conv_in_channels / group);
        weight_shape.push_back(conv_out_channels);
    }
-    bias_shape.assign(1, num_output);
+    bias_shape = { num_output };

    //  determine the bottom and top shape
    bottom_shape = Input(0).dims();

--- a/Dragon/src/utils/op_kernel.cc
+++ b/Dragon/src/utils/op_kernel.cc
@@ -604,140 +604,137 @@ template <> void SoftmaxCrossEntropy<float, CPUContext>(

 template <typename Tx, typename Ty>
 void _SparseSoftmaxCrossEntropy(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const Tx*               prob,
    const Ty*               labels,
-    Tx*                     loss,
-    Tx*                     valid,
-    Tensor*                 ignore) {
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data<int, CPUContext>() : nullptr;
-    const int dim = count / outer_dim;
-    for (int i = 0; i < outer_dim; ++i) {
-        for (int j = 0; j < inner_dim; ++j) {
-            const int idx = i * inner_dim + j;
+    const int*              ignores,
+    const int               num_ignores,
+    Tx*                     losses,
+    Tx*                     flags) {
+    for (int oix = 0; oix < outer_dim; ++oix) {
+        for (int iix = 0; iix < inner_dim; ++iix) {
+            const int idx = oix * inner_dim + iix;
            const int label = labels[idx];
            int k;
-            for (k = 0; k < ignore->count(); ++k) {
+            for (k = 0; k < num_ignores; ++k) {
                if (label == ignores[k]) {
-                    loss[idx] = valid[idx] = 0;
+                    losses[idx] = flags[idx] = 0;
                    break;
                }
            }
-            if (k == ignore->count()) {
-                Tx labeled_prob = prob[i * dim + label * inner_dim + j];
-                loss[idx] = -std::log(std::max(labeled_prob, FLT_MIN));
-                valid[idx] = 1;
+            if (k == num_ignores) {
+                const int t = (oix * axis_dim + label) * inner_dim + iix;
+                losses[idx] = -std::log(std::max(prob[t], FLT_MIN));
+                flags[idx] = 1;
            }
        }
    }
 }

 template <> void SparseSoftmaxCrossEntropy<float, float, CPUContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float*            prob,
    const float*            labels,
-    float*                  loss,
-    float*                  valid,
-    Tensor*                 ignore,
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  losses,
+    float*                  flags,
    CPUContext*             ctx) {
    _SparseSoftmaxCrossEntropy<float, float>(
-        count, classes, outer_dim, inner_dim,
-            prob, labels, loss, valid, ignore);
+        outer_dim, axis_dim, inner_dim,
+            prob, labels, ignores, num_ignores,
+                losses, flags);
 }

 template <> void SparseSoftmaxCrossEntropy<float, int64_t, CPUContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float*            prob,
    const int64_t*          labels,
-    float*                  loss,
-    float*                  valid,
-    Tensor*                 ignore,
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  losses,
+    float*                  flags,
    CPUContext*             ctx) {
    _SparseSoftmaxCrossEntropy<float, int64_t>(
-        count, classes, outer_dim, inner_dim,
-            prob, labels, loss, valid, ignore);
+        outer_dim, axis_dim, inner_dim,
+            prob, labels, ignores, num_ignores,
+                losses, flags);
 }

 template <typename Tx, typename Ty>
 void _SparseSoftmaxCrossEntropyGrad(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const Tx*               prob,
    const Ty*               labels,
-    Tx*                     valid,
-    Tensor*                 ignore,
-    Tx*                     dx) {
-    int dim = count / outer_dim;
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data <int, CPUContext>() : nullptr;
-    valid[0] = 0;
-    for (int i = 0; i < outer_dim; ++i) {
-        for (int j = 0; j < inner_dim; ++j) {
-            const int label = labels[i * inner_dim + j];
+    const int*              ignores,
+    const int               num_ignores,
+    Tx*                     dx,
+    Tx*                     flags) {
+    flags[0] = 0;
+    for (int oix = 0; oix < outer_dim; ++oix) {
+        for (int iix = 0; iix < inner_dim; ++iix) {
+            const int label = labels[oix * inner_dim + iix];
            int k;
-            for (k = 0; k < ignore->count(); ++k)
+            for (k = 0; k < num_ignores; ++k)
                if (label == ignores[k]) break;
-            if (k != ignore->count()) {
-                for (int c = 0; c < classes; ++c)
-                    dx[i * dim + c * inner_dim + j] = 0;
+            if (k != num_ignores) {
+                for (int c = 0; c < axis_dim; ++c)
+                    dx[(oix * axis_dim + c) * inner_dim + iix] = 0;
            } else {
-                dx[i * dim + label * inner_dim + j] -= 1;
-                valid[0]++;
+                dx[(oix * axis_dim + label) * inner_dim + iix] -= 1;
+                flags[0]++;
            }
        }
    }
 }

 template<> void SparseSoftmaxCrossEntropyGrad<float, float, CPUContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float*            prob,
    const float*            labels,
-    float*                  valid,
-    Tensor*                 ignore,
+    const int*              ignores,
+    const int               num_ignores,
    float*                  dx,
+    float*                  flags,
    CPUContext*             ctx) {
    _SparseSoftmaxCrossEntropyGrad<float, float>(
-        count, classes, outer_dim, inner_dim,
-            prob, labels, valid, ignore, dx);
+        outer_dim, axis_dim, inner_dim,
+            prob, labels, ignores,
+                num_ignores, dx, flags);
 }

 template<> void SparseSoftmaxCrossEntropyGrad<float, int64_t, CPUContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float*            prob,
    const int64_t*          labels,
-    float*                  valid,
-    Tensor*                 ignore,
+    const int*              ignores,
+    const int               num_ignores,
    float*                  dx,
+    float*                  flags,
    CPUContext*             ctx) {
    _SparseSoftmaxCrossEntropyGrad<float, int64_t>(
-        count, classes, outer_dim, inner_dim,
-            prob, labels, valid, ignore, dx);
+        outer_dim, axis_dim, inner_dim,
+            prob, labels, ignores,
+                num_ignores, dx, flags);
 }

 /******************** loss.sparse_softmax_focal_loss ********************/

 template <> void SparseSoftmaxFocalLoss<float, CPUContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float             pos_alpha,
    const float             neg_alpha,
@@ -745,84 +742,78 @@ template <> void SparseSoftmaxFocalLoss<float, CPUContext>(
    const int               neg_id,
    const float*            prob,
    const float*            labels,
-    float*                  scale,
-    float*                  loss,
-    float*                  valid,
-    Tensor*                 ignore) {
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data<int, CPUContext>() : nullptr;
-    const int dim = count / outer_dim;
-#ifdef WITH_OMP
-    #pragma omp parallel for num_threads(GET_OMP_THREADS(count))
-#endif
-    for (int i = 0; i < count; ++i) 
-        scale[i] = std::pow((1.0f - prob[i]), gamma);
-
-    for (int i = 0; i < outer_dim; ++i) {
-        for (int j = 0; j < inner_dim; ++j) {
-            const int idx = i * inner_dim + j;
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  losses,
+    float*                  flags,
+    CPUContext*             ctx) {
+    for (int oix = 0; oix < outer_dim; ++oix) {
+        for (int iix = 0; iix < inner_dim; ++iix) {
+            const int idx = oix * inner_dim + iix;
            const int label = labels[idx];
            int k;
-            for (k = 0; k < ignore->count(); ++k) {
+            for (k = 0; k < num_ignores; ++k) {
                if (label == ignores[k]) {
-                    loss[idx] = valid[idx] = 0;
+                    losses[idx] = flags[idx] = 0;
                    break;
                }
            }
-            if (k == ignore->count()) {
-                const int t_ = i * dim + label * inner_dim + j;
+            if (k == num_ignores) {
+                const int t = (oix * axis_dim + label) * inner_dim + iix;
                float labeled_prob = std::max(labeled_prob, FLT_MIN);
-                scale[t_] = label > neg_id ?
-                    pos_alpha * scale[t_] :  neg_alpha * scale[t_];
-                loss[idx] = -scale[t_] * std::log(labeled_prob);
-                valid[idx] = label > neg_id ? 1 : 0;
+                float scale = std::pow((1.f - prob[t]), gamma);
+                scale = label > neg_id ?
+                    pos_alpha * scale :  neg_alpha * scale;
+                losses[idx] = -scale * std::log(labeled_prob);
+                flags[idx] = label > neg_id ? 1 : 0;
            }
        }
    }
 }

 template<> void SparseSoftmaxFocalLossGrad<float, CPUContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
+    const float             pos_alpha,
+    const float             neg_alpha,
    const float             gamma,
    const int               neg_id,
-    const float             eps,
-    const float*            scale,
    const float*            prob,
    const float*            labels,
-    float*                  valid,
-    Tensor*                 ignore,
-    float*                  dx) {
-    int dim = count / outer_dim;
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data <int, CPUContext>() : nullptr;
-    valid[0] = 0;
-    for (int i = 0; i < outer_dim; ++i) {
-        for (int j = 0; j < inner_dim; ++j) {
-            const int label = labels[i * inner_dim + j];
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  dx,
+    float*                  flags,
+    CPUContext*             ctx) {
+    flags[0] = 0;
+    for (int oix = 0; oix < outer_dim; ++oix) {
+        for (int iix = 0; iix < inner_dim; ++iix) {
+            const int label = labels[oix * inner_dim + iix];
            int k;
-            for (k = 0; k < ignore->count(); ++k)
+            for (k = 0; k < num_ignores; ++k)
                if (label == ignores[k]) break;
-            if (k != ignore->count()) {
-                for (int c = 0; c < classes; ++c)
-                    dx[i * dim + c * inner_dim + j] = 0;
+            if (k != num_ignores) {
+                for (int c = 0; c < axis_dim; ++c)
+                    dx[(oix * axis_dim + c) * inner_dim + iix] = 0;
            } else {
-                const int t_ = i * dim + label * inner_dim + j;
-                float grad = -gamma
-                    * (scale[t_] / std::max((1.0f - prob[t_]), eps))
-                    * std::log(std::max(prob[t_], FLT_MIN))
-                    * prob[t_] + scale[t_];
-                for (int c = 0; c < classes; ++c) {
-                    const int i_ = i * dim + c * inner_dim + j;
+                const int t = (oix * axis_dim + label) * inner_dim + iix;
+                float onemp = 1. - prob[t];
+                //  unstable if gamma is 0
+                float grad = -gamma * pow(onemp, gamma - 1)
+                                    * log(std::max(prob[t], FLT_MIN))
+                                    * prob[t] + pow(onemp, gamma);
+                grad = label > neg_id ?
+                    pos_alpha * grad : neg_alpha * grad;
+                for (int c = 0; c < axis_dim; ++c) {
+                    const int i_ = (oix * axis_dim + c) * inner_dim + iix;
                    if (c == label) {
-                        dx[i_] = grad * (prob[t_] - 1);
+                        dx[i_] = grad * (prob[t] - 1);
                    } else {
                        dx[i_] = grad * prob[i_];
                    }
                }
-                if (label > neg_id) valid[0]++;
+                if (label > neg_id) flags[0]++;
            }
        }
    }

--- a/Dragon/src/utils/op_kernel.cu
+++ b/Dragon/src/utils/op_kernel.cu
@@ -938,205 +938,186 @@ template <> void SoftmaxCrossEntropy<float, CUDAContext>(
 template <typename Tx, typename Ty>
 __global__ void _SparseSoftmaxCrossEntropy(
    const int               count,
+    const int               axis_dim,
+    const int               inner_dim,
    const Tx*               prob,
    const Ty*               labels,
-    Tx*                     loss,
-    const int               classes,
-    const int               inner_dim,
    const int*              ignores,
-    const int               ignore_num,
-    Tx*                     valid) {
+    const int               num_ignores,
+    Tx*                     losses,
+    Tx*                     flags) {
    CUDA_KERNEL_LOOP(idx, count) {
-        const int o_idx = idx / inner_dim;
-        const int i_idx = idx % inner_dim;
-        const int label = labels[o_idx * inner_dim + i_idx];
+        const int oix = idx / inner_dim;
+        const int iix = idx % inner_dim;
+        const int label = labels[oix * inner_dim + iix];
        int k;
-        for (k = 0; k < ignore_num; k++) {
+        for (k = 0; k < num_ignores; k++) {
            if (label == ignores[k]) {
-                loss[idx] = valid[idx] = 0;
+                losses[idx] = flags[idx] = 0;
                break;
            }
        }
-        if (k == ignore_num) {
-            loss[idx] = -log(
-                max(prob[(o_idx * classes + label)
-                            * inner_dim + i_idx], FLT_MIN)
+        if (k == num_ignores) {
+            losses[idx] = -log(
+                max(prob[(oix * axis_dim + label)
+                    * inner_dim + iix], FLT_MIN)
            );
-            valid[idx] = 1;
+            flags[idx] = 1;
        }
    }
 }

 template <> void SparseSoftmaxCrossEntropy<float, float, CUDAContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float*            prob,
    const float*            labels,
-    float*                  loss,
-    float*                  valid,
-    Tensor*                 ignore,
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  losses,
+    float*                  flags,
    CUDAContext*            ctx) {
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data<int, CUDAContext>() : nullptr;
    const int num_preds = outer_dim * inner_dim;
    _SparseSoftmaxCrossEntropy<float, float>
        << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
            0, ctx->cuda_stream() >> >(
-                num_preds, prob, labels, loss,
-                    classes, inner_dim,
-                        ignores, ignore->count(), valid);
+                num_preds, axis_dim, inner_dim,
+                    prob, labels, ignores, num_ignores,
+                        losses, flags);
 }

 template <> void SparseSoftmaxCrossEntropy<float, int64_t, CUDAContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float*            prob,
    const int64_t*          labels,
-    float*                  loss,
-    float*                  valid,
-    Tensor*                 ignore,
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  losses,
+    float*                  flags,
    CUDAContext*            ctx) {
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data<int, CUDAContext>() : nullptr;
    const int num_preds = outer_dim * inner_dim;
    _SparseSoftmaxCrossEntropy<float, int64_t>
        << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
            0, ctx->cuda_stream() >> >(
-                num_preds, prob, labels, loss,
-                    classes, inner_dim,
-                        ignores, ignore->count(), valid);
+                num_preds, axis_dim, inner_dim,
+                    prob, labels, ignores, num_ignores,
+                        losses, flags);
 }

 template <typename Tx, typename Ty>
 __global__ void _SparseSoftmaxCrossEntropyGrad(
    const int               count,
+    const int               axis_dim,
+    const int               inner_dim,
    const Tx*               prob,
    const Ty*               labels,
-    Tx*                     dx,
-    const int               classes,
-    const int               inner_dim,
    const int*              ignores,
-    const int               ignore_num,
-    Tx*                     valid) {
+    const int               num_ignores,
+    Tx*                     dx,
+    Tx*                     flags) {
    CUDA_KERNEL_LOOP(idx, count) {
-        const int o_idx = idx / inner_dim;
-        const int i_idx = idx % inner_dim;
-        const int label = labels[o_idx * inner_dim + i_idx];
+        const int oix = idx / inner_dim;
+        const int iix = idx % inner_dim;
+        const int label = labels[oix * inner_dim + iix];
        int k;
-        for (k = 0; k < ignore_num; k++) 
+        for (k = 0; k < num_ignores; k++)
                if (label == ignores[k]) break;
-        if (k != ignore_num) {
-                for (int c = 0; c < classes; c++)
-                    dx[(o_idx * classes + c) * inner_dim + i_idx] = 0;
-                valid[idx] = 0;
+        if (k != num_ignores) {
+            for (int c = 0; c < axis_dim; c++)
+                dx[(oix * axis_dim + c) * inner_dim + iix] = 0;
+            flags[idx] = 0;
        } else {
-                dx[(o_idx * classes + label) * inner_dim + i_idx] -= 1;
-                valid[idx] = 1;
+            dx[(oix * axis_dim + label) * inner_dim + iix] -= 1;
+            flags[idx] = 1;
        }
    }
 }

 template<> void SparseSoftmaxCrossEntropyGrad<float, float, CUDAContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float*            prob,
    const float*            labels,
-    float*                  valid,
-    Tensor*                 ignore,
-    float*                  dXdata,
-    CUDAContext*             ctx) {
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data <int, CUDAContext >() : nullptr;
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  dx,
+    float*                  flags,
+    CUDAContext*            ctx) {
    const int num_preds = outer_dim * inner_dim;
    _SparseSoftmaxCrossEntropyGrad<float, float>
        << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
            0, ctx->cuda_stream() >> >(
-                num_preds, prob, labels, dXdata,
-                    classes, inner_dim,
-                        ignores, ignore->count(), valid);
+                num_preds, axis_dim, inner_dim,
+                    prob, labels, ignores, num_ignores, 
+                        dx, flags);
 }

 template<> void SparseSoftmaxCrossEntropyGrad<float, int64_t, CUDAContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float*            prob,
    const int64_t*          labels,
-    float*                  valid,
-    Tensor*                 ignore,
-    float*                  dXdata,
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  dx,
+    float*                  flags,
    CUDAContext*            ctx) {
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data <int, CUDAContext >() : nullptr;
    const int num_preds = outer_dim * inner_dim;
    _SparseSoftmaxCrossEntropyGrad<float, int64_t>
        << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
            0, ctx->cuda_stream() >> >(
-                num_preds, prob, labels, dXdata,
-                    classes, inner_dim,
-                        ignores, ignore->count(), valid);
+                num_preds, axis_dim, inner_dim,
+                    prob, labels, ignores, num_ignores,
+                        dx, flags);
 }

 /******************** loss.sparse_softmax_focal_loss ********************/

 template <typename T>
-__global__ void _SparseSoftmaxFocalScale(
-    const int               count,
-    const float             gamma,
-    const T*                prob,
-    T*                      scale) {
-    CUDA_KERNEL_LOOP(idx, count) {
-        scale[idx] = std::pow((1.0f - prob[idx]), gamma);
-    }
-}
-
-template <typename T>
 __global__ void _SparseSoftmaxFocalLoss(
    const int               count,
+    const int               axis_dim,
+    const int               inner_dim,
    const float             pos_alpha,
    const float             neg_alpha,
+    const float             gamma,
    const int               neg_id,
-    T*                      scale,
    const T*                prob,
    const T*                labels,
-    T*                      loss,
-    const int               classes,
-    const int               inner_dim,
    const int*              ignores,
-    const int               ignore_num,
-    T*                      valid) {
+    const int               num_ignores,
+    T*                      losses,
+    T*                      flags) {
    CUDA_KERNEL_LOOP(idx, count) {
-        const int o_idx = idx / inner_dim;
-        const int i_idx = idx % inner_dim;
-        const int label = labels[o_idx * inner_dim + i_idx];
+        const int oix = idx / inner_dim;
+        const int iix = idx % inner_dim;
+        const int label = labels[oix * inner_dim + iix];
        int k;
-        for (k = 0; k < ignore_num; k++) {
+        for (k = 0; k < num_ignores; k++) {
            if (label == ignores[k]) {
-                loss[idx] = valid[idx] = 0;
+                losses[idx] = flags[idx] = 0;
                break;
            }
        }
-        if (k == ignore_num) {
-            const int t_ = (o_idx * classes + label) * inner_dim + i_idx;
-            scale[t_] = label > neg_id ? pos_alpha * scale[t_] :
-                                         neg_alpha * scale[t_];
-            loss[idx] = -scale[t_] * std::log(max(prob[t_], FLT_MIN));
-            valid[idx] = label > neg_id ? 1 : 0;
+        if (k == num_ignores) {
+            const int t = (oix * axis_dim + label) * inner_dim + iix;
+            T scale = pow(1.f - prob[t], gamma);
+            scale = label > neg_id ?
+                pos_alpha * scale : neg_alpha * scale;
+            losses[idx] = -scale * std::log(max(prob[t], FLT_MIN));
+            flags[idx] = label > neg_id ? 1 : 0;
        }
    }
 }

 template <> void SparseSoftmaxFocalLoss<float, CUDAContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
    const float             pos_alpha,
    const float             neg_alpha,
@@ -1144,89 +1125,92 @@ template <> void SparseSoftmaxFocalLoss<float, CUDAContext>(
    const int               neg_id,
    const float*            prob,
    const float*            labels,
-    float*                  scale,
-    float*                  loss,
-    float*                  valid,
-    Tensor*                 ignore) {
-    const int* ignores = ignore->count() > 0 ? 
-        ignore->data<int, CUDAContext>() : nullptr;
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  losses,
+    float*                  flags,
+    CUDAContext*            ctx) {
    const int num_preds = outer_dim * inner_dim;
-    _SparseSoftmaxFocalScale<float>
-        << <CUDA_BLOCKS(count), CUDA_THREADS >> >(
-            count, gamma, prob, scale);
    _SparseSoftmaxFocalLoss<float>
-        << <CUDA_BLOCKS(num_preds), CUDA_THREADS >> >(
-            num_preds, pos_alpha, neg_alpha, neg_id, scale,
-                prob, labels, loss, classes, inner_dim,
-                    ignores, ignore->count(), valid);
+        << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
+            0, ctx->cuda_stream() >> >(
+                num_preds, axis_dim, inner_dim,
+                    pos_alpha, neg_alpha, gamma, neg_id,
+                        prob, labels, ignores, num_ignores,
+                            losses, flags);
 }

 template <typename T>
 __global__ void _SparseSoftmaxFocalLossGrad(
    const int               count,
+    const int               axis_dim,
+    const int               inner_dim,
+    const float             pos_alpha,
+    const float             neg_alpha,
    const float             gamma,
    const int               neg_id,
-    const float             eps,
-    const T*                scale,
    const T*                prob,
    const T*                labels,
-    T*                      dx,
-    const int               classes,
-    const int               inner_dim,
    const int*              ignores,
-    const int               ignore_num,
-    T*                      valid) {
+    const int               num_ignores,
+    T*                      dx,
+    T*                      flags) {
    CUDA_KERNEL_LOOP(idx, count) {
-        const int o_idx = idx / inner_dim;
-        const int i_idx = idx % inner_dim;
-        const int label = labels[o_idx * inner_dim + i_idx];
+        const int oix = idx / inner_dim;
+        const int iix = idx % inner_dim;
+        const int label = labels[oix * inner_dim + iix];
        int k;
-        for (k = 0; k < ignore_num; k++) 
+        for (k = 0; k < num_ignores; k++)
            if (label == ignores[k]) break;
-        if (k != ignore_num) {
-            for (int c = 0; c < classes; c++)
-                dx[(o_idx * classes + c) * inner_dim + i_idx] = 0;
-            valid[idx] = 0;
+        if (k != num_ignores) {
+            for (int c = 0; c < axis_dim; c++)
+                dx[(oix * axis_dim + c) * inner_dim + iix] = 0;
+            flags[idx] = 0;
        } else {
-            const int t_ = (o_idx * classes + label) * inner_dim + i_idx;
-            T grad = -gamma * (scale[t_] / max((1.0f - prob[t_]), eps))
-                            * std::log(max(prob[t_], FLT_MIN))
-                            * prob[t_] + scale[t_];
-            for (int c = 0; c < classes; c++) {
-                const int i_ = (o_idx * classes + c) * inner_dim + i_idx;
+            const int t = (oix * axis_dim + label) * inner_dim + iix;
+            T onemp = 1. - prob[t];
+            //  unstable if gamma is 0
+            T grad = -gamma * pow(onemp, gamma - 1)
+                            * log(max(prob[t], FLT_MIN))
+                            * prob[t] + pow(onemp, gamma);
+            grad = label > neg_id ?
+                pos_alpha * grad : neg_alpha * grad;
+            for (int c = 0; c < axis_dim; c++) {
+                const int i = (oix * axis_dim + c) * inner_dim + iix;
                if (c == label) {
-                    dx[i_] = grad * (prob[t_] - 1);
+                    dx[i] = grad * (prob[t] - 1);
                } else {
-                    dx[i_] = grad * prob[i_];
+                    dx[i] = grad * prob[i];
                }
            }
-            valid[idx] = label > neg_id ? 1 : 0;
+            flags[idx] = label > neg_id ? 1 : 0;
        }
    }
 }

 template<> void SparseSoftmaxFocalLossGrad<float, CUDAContext>(
-    const int               count,
-    const int               classes,
    const int               outer_dim,
+    const int               axis_dim,
    const int               inner_dim,
+    const float             pos_alpha,
+    const float             neg_alpha,
    const float             gamma,
    const int               neg_id,
-    const float             eps,
-    const float*            scale,
    const float*            prob,
    const float*            labels,
-    float*                  valid,
-    Tensor*                 ignore,
-    float*                  dXdata) {
-    const int* ignores = ignore->count() > 0 ?
-        ignore->data <int, CUDAContext >() : nullptr;
+    const int*              ignores,
+    const int               num_ignores,
+    float*                  dx,
+    float*                  flags,
+    CUDAContext*            ctx) {
    const int num_preds = outer_dim * inner_dim;
    _SparseSoftmaxFocalLossGrad<float>
-        << <CUDA_BLOCKS(num_preds), CUDA_THREADS >> >(
-            num_preds, gamma, neg_id, eps, scale,
-                prob, labels, dXdata, classes, inner_dim,
-                    ignores, ignore->count(), valid);
+        << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
+            0, ctx->cuda_stream() >> >(
+                num_preds, axis_dim, inner_dim,
+                    pos_alpha, neg_alpha, gamma, neg_id,
+                        prob, labels, ignores, num_ignores,
+                            dx, flags);
 }

 /******************** misc.astype ********************/