Commit 1d551431 by Ting PAN

Re-implement Softmax Focal Loss

1 parent 5dea1524
...@@ -52,9 +52,9 @@ using Set = std::unordered_set<Value> ; ...@@ -52,9 +52,9 @@ using Set = std::unordered_set<Value> ;
/* /*
* Define the Kernel version. * Define the Kernel version.
* *
* | Major(2) | Minor(2) | Patch(06) | * | Major(2) | Minor(2) | Patch(07) |
*/ */
#define DRAGON_VERSION 2206 #define DRAGON_VERSION 2207
/* /*
* Define the default random seed. * Define the default random seed.
......
...@@ -90,8 +90,10 @@ class Operator : public OperatorBase { ...@@ -90,8 +90,10 @@ class Operator : public OperatorBase {
public: public:
Operator(const OperatorDef& def, Workspace* ws) Operator(const OperatorDef& def, Workspace* ws)
: OperatorBase(def, ws), ctx_(def.device_option()), : OperatorBase(def, ws), ctx_(def.device_option()),
recomputing_aware_(OperatorBase::Arg<bool>( allow_recompute_(OperatorBase::Arg<bool>(
"recomputing_aware", false)) { "recomputing_aware", false)),
do_sync_(OperatorBase::Arg<bool>(
"do_sync", true)) {
allow_run_ = true; allow_run_ = true;
allow_run_ &= _MPICheck(); allow_run_ &= _MPICheck();
allow_run_ &= (!(OutputSize() == 1 && allow_run_ &= (!(OutputSize() == 1 &&
...@@ -100,12 +102,12 @@ class Operator : public OperatorBase { ...@@ -100,12 +102,12 @@ class Operator : public OperatorBase {
virtual void Run() final { virtual void Run() final {
if (!allow_run_) return; if (!allow_run_) return;
if (recomputing_aware_) MakeResource(); if (allow_recompute_) MakeResource();
ctx().SwitchToDevice(); ctx().SwitchToDevice();
MemorySwitch(); MemorySwitch();
RunOnDevice(); RunOnDevice();
ctx().FinishDeviceCompution(); if (do_sync_) ctx().FinishDeviceCompution();
if (recomputing_aware_) CleanResource(); if (allow_recompute_) CleanResource();
} }
virtual void ElimateCorruption(); virtual void ElimateCorruption();
...@@ -126,7 +128,7 @@ class Operator : public OperatorBase { ...@@ -126,7 +128,7 @@ class Operator : public OperatorBase {
protected: protected:
Context ctx_; Context ctx_;
bool allow_run_, recomputing_aware_; bool allow_run_, allow_recompute_, do_sync_;
private: private:
bool _MPICheck() { bool _MPICheck() {
......
...@@ -24,11 +24,11 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> { ...@@ -24,11 +24,11 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> {
axis(OperatorBase::Arg<int>("axis", 1)), axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>( normalization(OperatorBase::Arg<string>(
"normalization", "VALID")) { "normalization", "VALID")) {
vector<int> ignores = OperatorBase::Args<int>("ignore_labels"); auto xs = OperatorBase::Args<int>("ignore_labels");
if (ignores.size()) { if (xs.size()) {
ignore.Reshape({ (TIndex)ignores.size() }); ignores.Reshape({ (TIndex)xs.size() });
auto* Idata = ignore.mutable_data<int, CPUContext>(); auto* Idata = ignores.mutable_data<int, CPUContext>();
for (int i = 0; i < ignores.size(); i++) Idata[i] = ignores[i]; for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
} }
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -41,8 +41,7 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> { ...@@ -41,8 +41,7 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> {
protected: protected:
TIndex axis, outer_dim, inner_dim; TIndex axis, outer_dim, inner_dim;
Tensor ignore, valid, losses; Tensor* prob, losses, flags, ignores;
Tensor* prob;
unique_ptr<OperatorBase> softmax_op; unique_ptr<OperatorBase> softmax_op;
string normalization; string normalization;
}; };
...@@ -55,11 +54,11 @@ class SparseSoftmaxCrossEntropyGradientOp : public Operator<Context> { ...@@ -55,11 +54,11 @@ class SparseSoftmaxCrossEntropyGradientOp : public Operator<Context> {
axis(OperatorBase::Arg<int>("axis", 1)), axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>( normalization(OperatorBase::Arg<string>(
"normalization", "VALID")) { "normalization", "VALID")) {
vector<int> ignores = OperatorBase::Args<int>("ignore_labels"); auto xs = OperatorBase::Args<int>("ignore_labels");
if (ignores.size()) { if (xs.size()) {
ignore.Reshape({ (TIndex)ignores.size() }); ignores.Reshape({ (TIndex)xs.size() });
auto* Idata = ignore.mutable_data<int, CPUContext>(); auto* Idata = ignores.mutable_data<int, CPUContext>();
for (int i = 0; i < ignores.size(); i++) Idata[i] = ignores[i]; for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
} }
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -69,8 +68,7 @@ class SparseSoftmaxCrossEntropyGradientOp : public Operator<Context> { ...@@ -69,8 +68,7 @@ class SparseSoftmaxCrossEntropyGradientOp : public Operator<Context> {
protected: protected:
TIndex axis, outer_dim, inner_dim; TIndex axis, outer_dim, inner_dim;
Tensor ignore, valid; Tensor* prob, ignores, flags;
Tensor* prob;
string normalization; string normalization;
}; };
......
...@@ -17,18 +17,19 @@ ...@@ -17,18 +17,19 @@
namespace dragon { namespace dragon {
template <class Context> template <class Context>
class SparseSoftmaxFocalLossOp final : public SparseSoftmaxCrossEntropyOp<Context> { class SparseSoftmaxFocalLossOp final
: public SparseSoftmaxCrossEntropyOp<Context> {
public: public:
SparseSoftmaxFocalLossOp(const OperatorDef& def, Workspace* ws) SparseSoftmaxFocalLossOp(const OperatorDef& def, Workspace* ws)
: SparseSoftmaxCrossEntropyOp<Context>(def, ws), : SparseSoftmaxCrossEntropyOp<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 1)), axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>( normalization(OperatorBase::Arg<string>(
"normalization", "VALID")), "normalization", "VALID")),
alpha(OperatorBase::Arg<float>("alpha", 0.5)), alpha(OperatorBase::Arg<float>("alpha", 0.25f)),
gamma(OperatorBase::Arg<float>("gamma", 0.0)), gamma(OperatorBase::Arg<float>("gamma", 2.f)),
neg_id(OperatorBase::Arg<int>("neg_id", -1)) { neg_id(OperatorBase::Arg<int>("neg_id", 0)) {
pos_alpha = alpha * 2.0; pos_alpha = alpha;
neg_alpha = (1 - alpha) * 2.0; neg_alpha = 1.f - alpha;
} }
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
...@@ -36,35 +37,36 @@ class SparseSoftmaxFocalLossOp final : public SparseSoftmaxCrossEntropyOp<Contex ...@@ -36,35 +37,36 @@ class SparseSoftmaxFocalLossOp final : public SparseSoftmaxCrossEntropyOp<Contex
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
float alpha, gamma; float alpha, gamma, pos_alpha, neg_alpha;
int neg_id; TIndex axis, neg_id, outer_dim, inner_dim;
float pos_alpha, neg_alpha; Tensor losses, flags;
TIndex axis, outer_dim, inner_dim;
Tensor* scale;
string normalization; string normalization;
}; };
template <class Context> template <class Context>
class SparseSoftmaxFocalLossGradientOp final : public SparseSoftmaxCrossEntropyGradientOp<Context> { class SparseSoftmaxFocalLossGradientOp final
: public SparseSoftmaxCrossEntropyGradientOp<Context> {
public: public:
SparseSoftmaxFocalLossGradientOp(const OperatorDef& def, Workspace* ws) SparseSoftmaxFocalLossGradientOp(const OperatorDef& def, Workspace* ws)
: SparseSoftmaxCrossEntropyGradientOp<Context>(def, ws), : SparseSoftmaxCrossEntropyGradientOp<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 1)), axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>( normalization(OperatorBase::Arg<string>(
"normalization", "VALID")), "normalization", "VALID")),
gamma(OperatorBase::Arg<float>("gamma", 0.0)), alpha(OperatorBase::Arg<float>("alpha", 0.25f)),
eps(OperatorBase::Arg<float>("eps", float(1e-10))), gamma(OperatorBase::Arg<float>("gamma", 2.f)),
neg_id(OperatorBase::Arg<int>("neg_id", -1)) {} neg_id(OperatorBase::Arg<int>("neg_id", 0)) {
pos_alpha = alpha;
neg_alpha = 1.f - alpha;
}
USE_OPERATOR_FUNCTIONS; USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void RunWithType(); template <typename T> void RunWithType();
protected: protected:
float gamma, eps; float alpha, gamma, pos_alpha, neg_alpha;
int neg_id; TIndex axis, neg_id, outer_dim, inner_dim;
TIndex axis, outer_dim, inner_dim; Tensor flags;
Tensor* scale;
string normalization; string normalization;
}; };
......
...@@ -289,37 +289,36 @@ void SoftmaxCrossEntropy( ...@@ -289,37 +289,36 @@ void SoftmaxCrossEntropy(
template <typename Tx, typename Ty, class Context> template <typename Tx, typename Ty, class Context>
void SparseSoftmaxCrossEntropy( void SparseSoftmaxCrossEntropy(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const Tx* prob, const Tx* prob,
const Ty* labels, const Ty* labels,
Tx* loss, const int* ignores,
Tx* valid, const int num_ignores,
Tensor* ignore, Tx* losses,
Tx* flags,
Context* ctx); Context* ctx);
template <typename Tx, typename Ty, class Context> template <typename Tx, typename Ty, class Context>
void SparseSoftmaxCrossEntropyGrad( void SparseSoftmaxCrossEntropyGrad(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const Tx* prob, const Tx* prob,
const Ty* labels, const Ty* labels,
Tx* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
Tx* dx, Tx* dx,
Tx* flags,
Context* ctx); Context* ctx);
/******************** loss.sparse_softmax_focal_loss ********************/ /******************** loss.sparse_softmax_focal_loss ********************/
template <typename T, class Context> template <typename T, class Context>
void SparseSoftmaxFocalLoss( void SparseSoftmaxFocalLoss(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float pos_alpha, const float pos_alpha,
const float neg_alpha, const float neg_alpha,
...@@ -327,26 +326,28 @@ void SparseSoftmaxFocalLoss( ...@@ -327,26 +326,28 @@ void SparseSoftmaxFocalLoss(
const int neg_id, const int neg_id,
const T* prob, const T* prob,
const T* labels, const T* labels,
T* scale, const int* ignores,
T* loss, const int num_ignores,
T* valid, T* losses,
Tensor* ignore); T* flags ,
Context* ctx);
template <typename T, class Context> template <typename T, class Context>
void SparseSoftmaxFocalLossGrad( void SparseSoftmaxFocalLossGrad(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float pos_alpha,
const float neg_alpha,
const float gamma, const float gamma,
const int neg_id, const int neg_id,
const float eps,
const T* scale,
const T* prob, const T* prob,
const T* labels, const T* labels,
T* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
T* dx); T* dx,
T* flags,
Context* ctx);
/******************** misc.astype ********************/ /******************** misc.astype ********************/
......
...@@ -227,6 +227,7 @@ PyMethodDef* GetAllMethods() { ...@@ -227,6 +227,7 @@ PyMethodDef* GetAllMethods() {
PYFUNC(HasTensorCC), PYFUNC(HasTensorCC),
PYFUNC(CreateTensorCC), PYFUNC(CreateTensorCC),
PYFUNC(CreateFillerCC), PYFUNC(CreateFillerCC),
PYFUNC(GetFillerTypeCC),
PYFUNC(RenameTensorCC), PYFUNC(RenameTensorCC),
PYFUNC(TensorFromShapeCC), PYFUNC(TensorFromShapeCC),
PYFUNC(TensorFromPyArrayCC), PYFUNC(TensorFromPyArrayCC),
......
...@@ -56,12 +56,14 @@ class NumpyFetcher : public TensorFetcherBase { ...@@ -56,12 +56,14 @@ class NumpyFetcher : public TensorFetcherBase {
for (const auto dim : tensor.dims()) npy_dims.push_back(dim); for (const auto dim : tensor.dims()) npy_dims.push_back(dim);
int npy_type = TypeMetaToNPY(tensor.meta()); int npy_type = TypeMetaToNPY(tensor.meta());
if (npy_type == -1) { if (npy_type == -1) {
string s = "The data type of Tensor(" + tensor.name() + ") is unknown. Have you solved it ?"; string s = "The data type of Tensor(" +
tensor.name() + ") is unknown. Have you solved it ?";
PyErr_SetString(PyExc_RuntimeError, s.c_str()); PyErr_SetString(PyExc_RuntimeError, s.c_str());
return nullptr; return nullptr;
} }
// create a empty array with r shape // create a empty array with r shape
PyObject* array = PyArray_SimpleNew(tensor.ndim(), npy_dims.data(), npy_type); PyObject* array = PyArray_SimpleNew(
tensor.ndim(), npy_dims.data(), npy_type);
// copy the tensor data to the numpy array // copy the tensor data to the numpy array
if (tensor.memory_state() == MixedMemory::STATE_AT_CUDA) { if (tensor.memory_state() == MixedMemory::STATE_AT_CUDA) {
CUDAContext::Memcpy<CPUContext, CUDAContext>(tensor.nbytes(), CUDAContext::Memcpy<CPUContext, CUDAContext>(tensor.nbytes(),
......
...@@ -52,6 +52,11 @@ inline PyObject* CreateFillerCC(PyObject* self, PyObject* args) { ...@@ -52,6 +52,11 @@ inline PyObject* CreateFillerCC(PyObject* self, PyObject* args) {
Py_RETURN_TRUE; Py_RETURN_TRUE;
} }
inline PyObject* GetFillerTypeCC(PyObject* self, PyObject* args) {
const auto* f = ws()->GetFiller(ParseName(self, args));
return String_AsPyUnicode(f->type());
}
inline PyObject* RenameTensorCC(PyObject* self, PyObject* args) { inline PyObject* RenameTensorCC(PyObject* self, PyObject* args) {
char* ori_name, *tar_name; char* ori_name, *tar_name;
if (!PyArg_ParseTuple(args, "ss", &ori_name, &tar_name)) { if (!PyArg_ParseTuple(args, "ss", &ori_name, &tar_name)) {
......
...@@ -44,6 +44,7 @@ __all__ = [ ...@@ -44,6 +44,7 @@ __all__ = [
'HasTensor', 'HasTensor',
'CreateTensor', 'CreateTensor',
'CreateFiller', 'CreateFiller',
'GetFillerType',
'GetTensorName', 'GetTensorName',
'RenameTensor', 'RenameTensor',
'FeedTensor', 'FeedTensor',
...@@ -335,7 +336,7 @@ def CreateFiller(filler_def): ...@@ -335,7 +336,7 @@ def CreateFiller(filler_def):
Parameters Parameters
---------- ----------
filler_def : dragon_pb2.TensorFiller filler_def : dragon_pb2.TensorFiller
The The filler.
Returns Returns
------- -------
...@@ -356,6 +357,31 @@ def CreateFiller(filler_def): ...@@ -356,6 +357,31 @@ def CreateFiller(filler_def):
CreateFillerCC(filler_def) CreateFillerCC(filler_def)
def GetFillerType(tensor):
"""Get the filler type of specific tensor.
It is useful if you want to tag some tensors,
e.g. tag with ``numpy``, and get to initialize them lazily.
Parameters
----------
tensor : Tensor or str
The tensor to query.
Returns
-------
str
The filler type.
References
----------
The wrapper of ``GetFillerTypeCC``.
"""
return GetFillerTypeCC(_stringify_tensor(tensor))
def GetTensorName(tensor): def GetTensorName(tensor):
"""Query the name represented in current workspace. """Query the name represented in current workspace.
......
...@@ -218,7 +218,7 @@ def L2Loss(inputs, normalization='BATCH_SIZE', **kwargs): ...@@ -218,7 +218,7 @@ def L2Loss(inputs, normalization='BATCH_SIZE', **kwargs):
def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels=(),
alpha=0.5, gamma=0.0, eps=1e-10, neg_id=-1, **kwargs): alpha=0.25, gamma=2.0, neg_id=0, **kwargs):
"""SoftmaxFocalLoss with sparse labels. `[Lin et.al, 2017] <https://arxiv.org/abs/1708.02002>`_. """SoftmaxFocalLoss with sparse labels. `[Lin et.al, 2017] <https://arxiv.org/abs/1708.02002>`_.
Parameters Parameters
...@@ -232,13 +232,11 @@ def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels= ...@@ -232,13 +232,11 @@ def SparseSoftmaxFocalLoss(inputs, axis=1, normalization='VALID', ignore_labels=
ignore_label : tuple or list ignore_label : tuple or list
The label id to ignore. Default is ``empty``. The label id to ignore. Default is ``empty``.
alpha : float alpha : float
The scale factor on the rare class. Default is ``0.5``. The scale factor on the rare class. Default is ``0.25``.
gamma : float gamma : float
The exponential decay factor on the easy examples. Default is ``0.0``. The exponential decay factor on the easy examples. Default is ``2.0``.
eps : float
The eps.
neg_id : int neg_id : int
The negative id. Default is ``-1`` (Without Class Balance) The negative id. Default is ``0``.
Returns Returns
------- -------
......
...@@ -14,7 +14,7 @@ from __future__ import division ...@@ -14,7 +14,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
version = '0.2.2' version = '0.2.2'
full_version = '0.2.2.5' full_version = '0.2.2.7'
release = False release = False
if not release: if not release:
......
...@@ -149,8 +149,6 @@ class SoftmaxWithFocalLossLayer(Layer): ...@@ -149,8 +149,6 @@ class SoftmaxWithFocalLossLayer(Layer):
The scale on the rare class. Refer `FocalLossParameter.alpha`_. The scale on the rare class. Refer `FocalLossParameter.alpha`_.
gamma : float gamma : float
The exponential decay. Refer `FocalLossParameter.gamma`_. The exponential decay. Refer `FocalLossParameter.gamma`_.
eps : float
The eps. Refer `FocalLossParameter.eps`_.
neg_id : int neg_id : int
The negative id. Refer `FocalLossParameter.neg_id`_. The negative id. Refer `FocalLossParameter.neg_id`_.
normalization : NormalizationMode normalization : NormalizationMode
...@@ -174,7 +172,6 @@ class SoftmaxWithFocalLossLayer(Layer): ...@@ -174,7 +172,6 @@ class SoftmaxWithFocalLossLayer(Layer):
'ignore_labels': [param.ignore_label] if param.HasField('ignore_label') else [], 'ignore_labels': [param.ignore_label] if param.HasField('ignore_label') else [],
'alpha': float(focal_loss_param.alpha), 'alpha': float(focal_loss_param.alpha),
'gamma': float(focal_loss_param.gamma), 'gamma': float(focal_loss_param.gamma),
'eps': float(focal_loss_param.eps),
'neg_id': focal_loss_param.neg_id} 'neg_id': focal_loss_param.neg_id}
def Setup(self, bottom): def Setup(self, bottom):
......
...@@ -1504,10 +1504,9 @@ message DenseConcatParameter { ...@@ -1504,10 +1504,9 @@ message DenseConcatParameter {
} }
message FocalLossParameter { message FocalLossParameter {
optional float alpha = 1 [default = 0.5]; optional float alpha = 1 [default = 0.25];
optional float gamma = 2 [default = 0.0]; optional float gamma = 2 [default = 2.0];
optional float eps = 3 [default = 1e-10]; optional int32 neg_id = 3 [default = 0];
optional int32 neg_id = 4 [default = -1];
} }
message GatherParameter { message GatherParameter {
......
...@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default() ...@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor( DESCRIPTOR = _descriptor.FileDescriptor(
name='caffe.proto', name='caffe.proto',
package='caffe', package='caffe',
serialized_pb=_b('\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc9\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18 \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x10\n\x08stage_lr\x18\x32 \x03(\x02\x12\x12\n\nstage_iter\x18\x33 \x03(\x05\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x15\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x06\x31\x65-008\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"\x85\x01\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\x12\x10\n\x08mpi_rank\x18\x06 \x03(\r\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xcb\x19\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1c\n\x0cmirror_stage\x18\xa2\x01 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x36\n\x11roi_pooling_param\x18\x97\x01 \x01(\x0b\x32\x1a.caffe.ROIPoolingParameter\x12;\n\x14smooth_l1_loss_param\x18\x98\x01 \x01(\x0b\x32\x1c.caffe.SmoothL1LossParameter\x12\'\n\tmpi_param\x18\x99\x01 \x01(\x0b\x32\x13.caffe.MPIParameter\x12/\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x17.caffe.PermuteParameter\x12\x33\n\x0fnormalize_param\x18\x9b\x01 \x01(\x0b\x32\x19.caffe.NormalizeParameter\x12\x31\n\x0eparallel_param\x18\x9d\x01 \x01(\x0b\x32\x18.caffe.ParallelParameter\x12-\n\x0cresize_param\x18\x9e\x01 \x01(\x0b\x32\x16.caffe.ResizeParameter\x12\x36\n\x11\x65xpand_dims_param\x18\x9f\x01 \x01(\x0b\x32\x1a.caffe.ExpandDimsParameter\x12\x31\n\x0eproposal_param\x18\xa0\x01 \x01(\x0b\x32\x18.caffe.ProposalParameter\x12\x38\n\x12\x62\x61tch_renorm_param\x18\xa1\x01 \x01(\x0b\x32\x1b.caffe.BatchRenormParameter\x12\x38\n\x12\x64\x65nse_concat_param\x18\xa3\x01 \x01(\x0b\x32\x1b.caffe.DenseConcatParameter\x12\x34\n\x10\x66ocal_loss_param\x18\xa4\x01 \x01(\x0b\x32\x19.caffe.FocalLossParameter\x12-\n\x0cgather_param\x18\xa5\x01 \x01(\x0b\x32\x16.caffe.GatherParameter\x12\x34\n\x10group_norm_param\x18\xa6\x01 \x01(\x0b\x32\x19.caffe.GroupNormParameter\"\xa7\x02\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x12\n\x07padding\x18\x0b \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\x12!\n\x12\x63olor_augmentation\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x10min_random_scale\x18\t \x01(\x02:\x01\x31\x12\x1b\n\x10max_random_scale\x18\n \x01(\x02:\x01\x31\"\xf5\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x1a\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"L\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\x12\x08\n\x04UNIT\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"h\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x35\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"I\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x19\n\x0bscale_train\x18\x02 \x01(\x08:\x04true\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xbd\x01\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12;\n\x05\x64type\x18\x05 \x01(\x0e\x32#.caffe.MemoryDataParameter.DataType:\x07\x46LOAT32\"$\n\x08\x44\x61taType\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\x0b\n\x07\x46LOAT16\x10\x01\"e\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-009\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"T\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\x12#\n\tmultiples\x18\x03 \x01(\x0b\x32\x10.caffe.BlobShape\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"H\n\x0cMPIParameter\x12\x0f\n\x04root\x18\x01 \x01(\r:\x01\x30\x12\x12\n\x07\x63omm_id\x18\x02 \x01(\x04:\x01\x30\x12\x13\n\x08group_id\x18\x03 \x01(\x04:\x01\x30\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\x93\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12,\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x13\n\x03\x65ps\x18\x04 \x01(\x02:\x06\x31\x65-010\"d\n\x11ParallelParameter\x12\x1d\n\x0emultiple_nodes\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07shuffle\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x18\n\tpartition\x18\x03 \x01(\x08:\x05\x66\x61lse\"R\n\x0fResizeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0e\n\x02\x66x\x18\x02 \x01(\x02:\x02-1\x12\x0e\n\x02\x66y\x18\x03 \x01(\x02:\x02-1\"\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"\x90\x02\n\x11ProposalParameter\x12\x0e\n\x06stride\x18\x01 \x03(\x05\x12\r\n\x05ratio\x18\x02 \x03(\x02\x12\r\n\x05scale\x18\x03 \x03(\x02\x12\x1b\n\rpre_nms_top_n\x18\x04 \x01(\r:\x04\x36\x30\x30\x30\x12\x1b\n\x0epost_nms_top_n\x18\x05 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x06 \x01(\x02:\x03\x30.7\x12\x14\n\x08min_size\x18\x07 \x01(\r:\x02\x31\x36\x12\x14\n\tmin_level\x18\x08 \x01(\x05:\x01\x32\x12\x14\n\tmax_level\x18\t \x01(\x05:\x01\x35\x12\x1c\n\x0f\x63\x61nonical_scale\x18\n \x01(\x05:\x03\x32\x32\x34\x12\x1a\n\x0f\x63\x61nonical_level\x18\x0b \x01(\x05:\x01\x34\"\xa6\x01\n\x14\x42\x61tchRenormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x10\n\x05r_max\x18\x04 \x01(\x02:\x01\x33\x12\x10\n\x05\x64_max\x18\x05 \x01(\x02:\x01\x35\x12\x16\n\x07t_delta\x18\x06 \x01(\x02:\x05\x30.001\"?\n\x14\x44\x65nseConcatParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x16\n\x0bgrowth_rate\x18\x02 \x01(\x05:\x01\x30\"c\n\x12\x46ocalLossParameter\x12\x12\n\x05\x61lpha\x18\x01 \x01(\x02:\x03\x30.5\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x30\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-010\x12\x12\n\x06neg_id\x18\x04 \x01(\x05:\x02-1\"\"\n\x0fGatherParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\"{\n\x12GroupNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x11\n\x05group\x18\x05 \x01(\r:\x02\x33\x32*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01') serialized_pb=_b('\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x91\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x0e\n\x06labels\x18\x08 \x03(\x05\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc9\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18 \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x10\n\x08stage_lr\x18\x32 \x03(\x02\x12\x12\n\nstage_iter\x18\x33 \x03(\x05\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x15\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x06\x31\x65-008\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"\x85\x01\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\x12\x10\n\x08mpi_rank\x18\x06 \x03(\r\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xcb\x19\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1c\n\x0cmirror_stage\x18\xa2\x01 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x36\n\x11roi_pooling_param\x18\x97\x01 \x01(\x0b\x32\x1a.caffe.ROIPoolingParameter\x12;\n\x14smooth_l1_loss_param\x18\x98\x01 \x01(\x0b\x32\x1c.caffe.SmoothL1LossParameter\x12\'\n\tmpi_param\x18\x99\x01 \x01(\x0b\x32\x13.caffe.MPIParameter\x12/\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x17.caffe.PermuteParameter\x12\x33\n\x0fnormalize_param\x18\x9b\x01 \x01(\x0b\x32\x19.caffe.NormalizeParameter\x12\x31\n\x0eparallel_param\x18\x9d\x01 \x01(\x0b\x32\x18.caffe.ParallelParameter\x12-\n\x0cresize_param\x18\x9e\x01 \x01(\x0b\x32\x16.caffe.ResizeParameter\x12\x36\n\x11\x65xpand_dims_param\x18\x9f\x01 \x01(\x0b\x32\x1a.caffe.ExpandDimsParameter\x12\x31\n\x0eproposal_param\x18\xa0\x01 \x01(\x0b\x32\x18.caffe.ProposalParameter\x12\x38\n\x12\x62\x61tch_renorm_param\x18\xa1\x01 \x01(\x0b\x32\x1b.caffe.BatchRenormParameter\x12\x38\n\x12\x64\x65nse_concat_param\x18\xa3\x01 \x01(\x0b\x32\x1b.caffe.DenseConcatParameter\x12\x34\n\x10\x66ocal_loss_param\x18\xa4\x01 \x01(\x0b\x32\x19.caffe.FocalLossParameter\x12-\n\x0cgather_param\x18\xa5\x01 \x01(\x0b\x32\x16.caffe.GatherParameter\x12\x34\n\x10group_norm_param\x18\xa6\x01 \x01(\x0b\x32\x19.caffe.GroupNormParameter\"\xa7\x02\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x12\n\x07padding\x18\x0b \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\x12!\n\x12\x63olor_augmentation\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x10min_random_scale\x18\t \x01(\x02:\x01\x31\x12\x1b\n\x10max_random_scale\x18\n \x01(\x02:\x01\x31\"\xf5\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x1a\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"L\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\x12\x08\n\x04UNIT\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"h\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x35\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"I\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\x12\x19\n\x0bscale_train\x18\x02 \x01(\x08:\x04true\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xbd\x01\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\x12;\n\x05\x64type\x18\x05 \x01(\x0e\x32#.caffe.MemoryDataParameter.DataType:\x07\x46LOAT32\"$\n\x08\x44\x61taType\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\x0b\n\x07\x46LOAT16\x10\x01\"e\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x13\n\x03\x65ps\x18\x03 \x01(\x02:\x06\x31\x65-009\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"T\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\x12#\n\tmultiples\x18\x03 \x01(\x0b\x32\x10.caffe.BlobShape\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"H\n\x0cMPIParameter\x12\x0f\n\x04root\x18\x01 \x01(\r:\x01\x30\x12\x12\n\x07\x63omm_id\x18\x02 \x01(\x04:\x01\x30\x12\x13\n\x08group_id\x18\x03 \x01(\x04:\x01\x30\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\x93\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12,\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x13\n\x03\x65ps\x18\x04 \x01(\x02:\x06\x31\x65-010\"d\n\x11ParallelParameter\x12\x1d\n\x0emultiple_nodes\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x16\n\x07shuffle\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x18\n\tpartition\x18\x03 \x01(\x08:\x05\x66\x61lse\"R\n\x0fResizeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0e\n\x02\x66x\x18\x02 \x01(\x02:\x02-1\x12\x0e\n\x02\x66y\x18\x03 \x01(\x02:\x02-1\"\'\n\x13\x45xpandDimsParameter\x12\x10\n\x04\x61xis\x18\x01 \x01(\x05:\x02-1\"\x90\x02\n\x11ProposalParameter\x12\x0e\n\x06stride\x18\x01 \x03(\x05\x12\r\n\x05ratio\x18\x02 \x03(\x02\x12\r\n\x05scale\x18\x03 \x03(\x02\x12\x1b\n\rpre_nms_top_n\x18\x04 \x01(\r:\x04\x36\x30\x30\x30\x12\x1b\n\x0epost_nms_top_n\x18\x05 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x06 \x01(\x02:\x03\x30.7\x12\x14\n\x08min_size\x18\x07 \x01(\r:\x02\x31\x36\x12\x14\n\tmin_level\x18\x08 \x01(\x05:\x01\x32\x12\x14\n\tmax_level\x18\t \x01(\x05:\x01\x35\x12\x1c\n\x0f\x63\x61nonical_scale\x18\n \x01(\x05:\x03\x32\x32\x34\x12\x1a\n\x0f\x63\x61nonical_level\x18\x0b \x01(\x05:\x01\x34\"\xa6\x01\n\x14\x42\x61tchRenormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x10\n\x05r_max\x18\x04 \x01(\x02:\x01\x33\x12\x10\n\x05\x64_max\x18\x05 \x01(\x02:\x01\x35\x12\x16\n\x07t_delta\x18\x06 \x01(\x02:\x05\x30.001\"?\n\x14\x44\x65nseConcatParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x16\n\x0bgrowth_rate\x18\x02 \x01(\x05:\x01\x30\"N\n\x12\x46ocalLossParameter\x12\x13\n\x05\x61lpha\x18\x01 \x01(\x02:\x04\x30.25\x12\x10\n\x05gamma\x18\x02 \x01(\x02:\x01\x32\x12\x11\n\x06neg_id\x18\x03 \x01(\x05:\x01\x30\"\"\n\x0fGatherParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\"{\n\x12GroupNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12$\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x03\x30.9\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x30.001\x12\x11\n\x05group\x18\x05 \x01(\r:\x02\x33\x32*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
) )
_sym_db.RegisterFileDescriptor(DESCRIPTOR) _sym_db.RegisterFileDescriptor(DESCRIPTOR)
...@@ -40,8 +40,8 @@ _PHASE = _descriptor.EnumDescriptor( ...@@ -40,8 +40,8 @@ _PHASE = _descriptor.EnumDescriptor(
], ],
containing_type=None, containing_type=None,
options=None, options=None,
serialized_start=17663, serialized_start=17642,
serialized_end=17691, serialized_end=17670,
) )
_sym_db.RegisterEnumDescriptor(_PHASE) _sym_db.RegisterEnumDescriptor(_PHASE)
...@@ -5842,28 +5842,21 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor( ...@@ -5842,28 +5842,21 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor(
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='alpha', full_name='caffe.FocalLossParameter.alpha', index=0, name='alpha', full_name='caffe.FocalLossParameter.alpha', index=0,
number=1, type=2, cpp_type=6, label=1, number=1, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0.5, has_default_value=True, default_value=0.25,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='gamma', full_name='caffe.FocalLossParameter.gamma', index=1, name='gamma', full_name='caffe.FocalLossParameter.gamma', index=1,
number=2, type=2, cpp_type=6, label=1, number=2, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=0, has_default_value=True, default_value=2,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None),
_descriptor.FieldDescriptor(
name='eps', full_name='caffe.FocalLossParameter.eps', index=2,
number=3, type=2, cpp_type=6, label=1,
has_default_value=True, default_value=1e-010,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor( _descriptor.FieldDescriptor(
name='neg_id', full_name='caffe.FocalLossParameter.neg_id', index=3, name='neg_id', full_name='caffe.FocalLossParameter.neg_id', index=2,
number=4, type=5, cpp_type=1, label=1, number=3, type=5, cpp_type=1, label=1,
has_default_value=True, default_value=-1, has_default_value=True, default_value=0,
message_type=None, enum_type=None, containing_type=None, message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None, is_extension=False, extension_scope=None,
options=None), options=None),
...@@ -5879,7 +5872,7 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor( ...@@ -5879,7 +5872,7 @@ _FOCALLOSSPARAMETER = _descriptor.Descriptor(
oneofs=[ oneofs=[
], ],
serialized_start=17401, serialized_start=17401,
serialized_end=17500, serialized_end=17479,
) )
...@@ -5908,8 +5901,8 @@ _GATHERPARAMETER = _descriptor.Descriptor( ...@@ -5908,8 +5901,8 @@ _GATHERPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=17502, serialized_start=17481,
serialized_end=17536, serialized_end=17515,
) )
...@@ -5959,8 +5952,8 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor( ...@@ -5959,8 +5952,8 @@ _GROUPNORMPARAMETER = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[ oneofs=[
], ],
serialized_start=17538, serialized_start=17517,
serialized_end=17661, serialized_end=17640,
) )
_BLOBPROTO.fields_by_name['shape'].message_type = _BLOBSHAPE _BLOBPROTO.fields_by_name['shape'].message_type = _BLOBSHAPE
......
...@@ -42,7 +42,7 @@ find_modules() ...@@ -42,7 +42,7 @@ find_modules()
setup(name = 'dragon', setup(name = 'dragon',
version='0.2.2.6', version='0.2.2.7',
description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework', description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework',
url='https://github.com/seetaresearch/Dragon', url='https://github.com/seetaresearch/Dragon',
author='Ting Pan', author='Ting Pan',
......
...@@ -229,11 +229,35 @@ void Operator<Context>::CleanResource() { ...@@ -229,11 +229,35 @@ void Operator<Context>::CleanResource() {
} }
} }
DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*); DEFINE_REGISTRY(
DEFINE_REGISTRY(CUDAOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*); CPUOperatorRegistry,
DEFINE_REGISTRY(CUDNNOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*); OperatorBase,
DEFINE_REGISTRY(GradientRegistry, GradientMakerBase, const OperatorDef&, const vector<string>&); const OperatorDef&,
DEFINE_REGISTRY(NoGradientRegistry, GradientMakerBase, const OperatorDef&, const vector<string>&); Workspace*);
DEFINE_REGISTRY(
CUDAOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
DEFINE_REGISTRY(
CUDNNOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
DEFINE_REGISTRY(
GradientRegistry,
GradientMakerBase,
const OperatorDef&,
const vector<string>&);
DEFINE_REGISTRY(
NoGradientRegistry,
GradientMakerBase,
const OperatorDef&,
const vector<string>&);
#define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname) \ #define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname) \
template <> T OperatorBase::Arg( \ template <> T OperatorBase::Arg( \
...@@ -252,7 +276,6 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(string, s) ...@@ -252,7 +276,6 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(string, s)
INSTANTIATE_GET_SINGLE_ARGUMENT(bool, b); INSTANTIATE_GET_SINGLE_ARGUMENT(bool, b);
INSTANTIATE_GET_SINGLE_ARGUMENT(int64_t, i64); INSTANTIATE_GET_SINGLE_ARGUMENT(int64_t, i64);
#define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname) \ #define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname) \
template<> vector<T> OperatorBase::Args<T>(const string& name) { \ template<> vector<T> OperatorBase::Args<T>(const string& name) { \
if(args_.count(name) == 0) return vector<T>(); \ if(args_.count(name) == 0) return vector<T>(); \
......
...@@ -42,16 +42,17 @@ void SparseSoftmaxCrossEntropyOp<Context>::SoftmaxRunFP16() { ...@@ -42,16 +42,17 @@ void SparseSoftmaxCrossEntropyOp<Context>::SoftmaxRunFP16() {
template <class Context> template <typename Tx, typename Ty> template <class Context> template <typename Tx, typename Ty>
void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() { void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() {
auto* prob_data = prob->template data<Tx, Context>(); auto* Pdata = prob->template data<Tx, Context>();
auto* label_data = Input(1).template data<Ty, Context>(); auto* Tdata = Input(1).template data<Ty, Context>();
auto* loss_data = losses.template mutable_data<Tx, Context>(); auto* Idata = !ignores.count() ? nullptr :
auto* valid_data = valid.template mutable_data<Tx, Context>(); ignores.template data<int, Context>();
auto* Ldata = losses.template mutable_data<Tx, Context>();
auto* Fdata = flags.template mutable_data<Tx, Context>();
kernel::SparseSoftmaxCrossEntropy<Tx, Ty, Context>( kernel::SparseSoftmaxCrossEntropy<Tx, Ty, Context>(
Input(0).count(), Input(0).dim(axis), outer_dim, Input(0).dim(axis), inner_dim,
outer_dim, inner_dim, Pdata, Tdata, Idata, ignores.count(),
prob_data, label_data, loss_data, Ldata, Fdata, &ctx());
valid_data, &ignore, &ctx());
if (normalization == "UNIT") { if (normalization == "UNIT") {
Output(0)->ReshapeLike(losses); Output(0)->ReshapeLike(losses);
...@@ -61,11 +62,12 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() { ...@@ -61,11 +62,12 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunWithType() {
Tx normalizer; Tx normalizer;
if (normalization == "VALID") if (normalization == "VALID")
normalizer = std::max(math::ASum<Tx, Context>(valid.count(), valid_data), (Tx)1.f); normalizer = std::max(
math::ASum<Tx, Context>(flags.count(), Fdata), (Tx)1.f);
else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0); else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0);
else if (normalization == "FULL") normalizer = outer_dim * inner_dim; else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
else if (normalization == "NONE") normalizer = 1; else if (normalization == "NONE") normalizer = 1;
Tx loss = math::ASum<Tx, Context>(losses.count(), loss_data); Tx loss = math::ASum<Tx, Context>(losses.count(), Ldata);
Output(0)->Reshape({ 1 }); Output(0)->Reshape({ 1 });
auto* Ydata = Output(0)->template mutable_data<Tx, Context>(); auto* Ydata = Output(0)->template mutable_data<Tx, Context>();
math::Set<Tx, Context>(1, loss / normalizer, Ydata); math::Set<Tx, Context>(1, loss / normalizer, Ydata);
...@@ -77,11 +79,12 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunOnDevice() { ...@@ -77,11 +79,12 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunOnDevice() {
inner_dim = Input(0).count(axis + 1); inner_dim = Input(0).count(axis + 1);
CHECK_EQ(outer_dim * inner_dim, Input(1).count()) CHECK_EQ(outer_dim * inner_dim, Input(1).count())
<< "\nNumber of predictions must match the number of labels."; << "\nNumber of predictions must match the number of labels.";
valid.Reshape({ outer_dim * inner_dim });
losses.Reshape({ outer_dim * inner_dim }); losses.Reshape({ outer_dim * inner_dim });
flags.Reshape({ outer_dim * inner_dim });
prob = ws()->CreateTensor("/mnt/" + anchor() + "/softmax/prob"); prob = ws()->CreateTensor("/mnt/" + anchor() + "/softmax/prob");
if (XIsType(Input(0), float) || XIsType(Input(0), float16)) { if (XIsType(Input(0), float) ||
XIsType(Input(0), float16)) {
if (XIsType(Input(0), float16)) SoftmaxRunFP16(); if (XIsType(Input(0), float16)) SoftmaxRunFP16();
else SoftmaxRun(); else SoftmaxRun();
if (XIsType(Input(1), float)) RunWithType<float, float>(); if (XIsType(Input(1), float)) RunWithType<float, float>();
...@@ -98,33 +101,35 @@ OPERATOR_SCHEMA(SparseSoftmaxCrossEntropy).NumInputs(2).NumOutputs(1); ...@@ -98,33 +101,35 @@ OPERATOR_SCHEMA(SparseSoftmaxCrossEntropy).NumInputs(2).NumOutputs(1);
template <class Context> template <typename Tx, typename Ty> template <class Context> template <typename Tx, typename Ty>
void SparseSoftmaxCrossEntropyGradientOp<Context>::RunWithType() { void SparseSoftmaxCrossEntropyGradientOp<Context>::RunWithType() {
auto* label_data = Input(1).template data<Ty, Context>(); auto* Pdata = prob->template mutable_data<Tx, Context>();
auto* prob_data = prob->template mutable_data<Tx, Context>(); auto* Tdata = Input(1).template data<Ty, Context>();
auto* Idata = !ignores.count() ? nullptr :
ignores.template data<int, Context>();
auto* dXdata = Output(0)->template mutable_data<Tx, Context>(); auto* dXdata = Output(0)->template mutable_data<Tx, Context>();
auto* valid_data = valid.template mutable_data<Tx, Context>(); auto* Fdata = flags.template mutable_data<Tx, Context>();
ctx().template Copy<Tx, Context, Context>(prob->count(), dXdata, prob_data); ctx().template Copy<Tx, Context, Context>(
prob->count(), dXdata, Pdata);
kernel::SparseSoftmaxCrossEntropyGrad<Tx, Ty, Context>( kernel::SparseSoftmaxCrossEntropyGrad<Tx, Ty, Context>(
Output(0)->count(), Output(0)->dim(axis), outer_dim, Output(0)->dim(axis), inner_dim,
outer_dim, inner_dim, Pdata, Tdata, Idata, ignores.count(),
prob_data, label_data, valid_data, dXdata, Fdata, &ctx());
&ignore, dXdata, &ctx());
if (normalization == "UNIT") { if (normalization == "UNIT") {
auto* dYdata = Input(-1).template data<Tx, Context>(); auto* dYdata = Input(-1).template data<Tx, Context>();
kernel::SumGrad<Tx, Context>( kernel::SumGrad<Tx, Context>(
Input(0).count() / Input(0).dim(axis), Input(0).count() / Input(0).dim(axis),
Input(0).dim(axis), inner_dim, Input(0).dim(axis), inner_dim,
1.0, dYdata, prob_data); 1.0, dYdata, Pdata);
math::Mul<Tx, Context>( math::Mul<Tx, Context>(
Output(0)->count(), prob_data, dXdata, dXdata); Output(0)->count(), Pdata, dXdata, dXdata);
return; return;
} }
Tx normalizer; Tx normalizer;
if (normalization == "VALID") if (normalization == "VALID")
normalizer = std::max( normalizer = std::max(
math::ASum<Tx, Context>(valid.count(), valid_data), (Tx)1.f); math::ASum<Tx, Context>(flags.count(), Fdata), (Tx)1.f);
else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0); else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0);
else if (normalization == "FULL") normalizer = outer_dim * inner_dim; else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
else if (normalization == "NONE") normalizer = 1; else if (normalization == "NONE") normalizer = 1;
...@@ -141,7 +146,7 @@ void SparseSoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() { ...@@ -141,7 +146,7 @@ void SparseSoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
outer_dim = prob->count(0, axis); outer_dim = prob->count(0, axis);
inner_dim = prob->count(axis + 1); inner_dim = prob->count(axis + 1);
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
valid.Reshape({ outer_dim * inner_dim }); flags.Reshape({ outer_dim * inner_dim });
if (XIsType(Input(0), float) || XIsType(Input(0), float16)) { if (XIsType(Input(0), float) || XIsType(Input(0), float16)) {
if (XIsType(Input(1), float)) RunWithType<float, float>(); if (XIsType(Input(1), float)) RunWithType<float, float>();
......
...@@ -9,31 +9,33 @@ namespace dragon { ...@@ -9,31 +9,33 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void SparseSoftmaxFocalLossOp<Context>::RunWithType() { void SparseSoftmaxFocalLossOp<Context>::RunWithType() {
auto* prob_data = this->prob->template data<T, Context>(); auto* Pdata = this->prob->template data<T, Context>();
auto* label_data = Input(1).template data<T, Context>(); auto* Tdata = Input(1).template data<T, Context>();
auto* loss_data = this->losses.template mutable_data<T, Context>(); auto* Idata = !this->ignores.count() ? nullptr :
auto* valid_data = this->valid.template mutable_data<T, Context>(); this->ignores.template data<int, Context>();
auto* scale_data = scale->template mutable_data<T, Context>(); auto* Ldata = losses.template mutable_data<T, Context>();
auto* Fdata = flags.template mutable_data<T, Context>();
kernel::SparseSoftmaxFocalLoss<T, Context>( kernel::SparseSoftmaxFocalLoss<T, Context>(
Input(0).count(), Input(0).dim(axis), outer_dim, inner_dim, outer_dim, Input(0).dim(axis), inner_dim,
pos_alpha, neg_alpha, gamma, neg_id, pos_alpha, neg_alpha, gamma, neg_id,
prob_data, label_data, scale_data, Pdata, Tdata, Idata, this->ignores.count(),
loss_data, valid_data, &this->ignore); Ldata, Fdata, &ctx());
if (normalization == "UNIT") { if (normalization == "UNIT") {
Output(0)->ReshapeLike(this->losses); Output(0)->ReshapeLike(losses);
Output(0)->template Copy<Context, Context>(this->losses); Output(0)->template Copy<Context, Context>(losses);
return; return;
} }
T normalizer; T normalizer;
if (normalization == "VALID") if (normalization == "VALID")
normalizer = std::max(math::ASum<T, Context>(this->valid.count(), valid_data), 1.f); normalizer = std::max(
math::ASum<T, Context>(flags.count(), Fdata), 1.f);
else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0); else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0);
else if (normalization == "FULL") normalizer = outer_dim * inner_dim; else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
else if (normalization == "NONE") normalizer = 1; else if (normalization == "NONE") normalizer = 1;
T loss = math::ASum<T, Context>(this->losses.count(), loss_data); T loss = math::ASum<T, Context>(losses.count(), Ldata);
Output(0)->Reshape({ 1 }); Output(0)->Reshape({ 1 });
auto* Ydata = Output(0)->template mutable_data<T, Context>(); auto* Ydata = Output(0)->template mutable_data<T, Context>();
math::Set<T, Context>(1, loss / normalizer, Ydata); math::Set<T, Context>(1, loss / normalizer, Ydata);
...@@ -45,13 +47,11 @@ void SparseSoftmaxFocalLossOp<Context>::RunOnDevice() { ...@@ -45,13 +47,11 @@ void SparseSoftmaxFocalLossOp<Context>::RunOnDevice() {
inner_dim = Input(0).count(axis + 1); inner_dim = Input(0).count(axis + 1);
CHECK_EQ(outer_dim * inner_dim, Input(1).count()) CHECK_EQ(outer_dim * inner_dim, Input(1).count())
<< "\nNumber of predictions must match the number of labels."; << "\nNumber of predictions must match the number of labels.";
this->valid.Reshape({ outer_dim * inner_dim }); flags.Reshape({ outer_dim * inner_dim });
this->losses.Reshape({ outer_dim * inner_dim }); losses.Reshape({ outer_dim * inner_dim });
ws()->CreateTensor("/mnt/" + anchor() + "/softmax/prob"); ws()->CreateTensor("/mnt/" + anchor() + "/softmax/prob");
this->SoftmaxRun(); this->SoftmaxRun();
this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax/prob"); this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax/prob");
scale = ws()->CreateTensor("/mnt/" + anchor() + "/focal/scale");
scale->ReshapeLike(*this->prob);
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
...@@ -65,31 +65,33 @@ OPERATOR_SCHEMA(SparseSoftmaxFocalLoss).NumInputs(2).NumOutputs(1); ...@@ -65,31 +65,33 @@ OPERATOR_SCHEMA(SparseSoftmaxFocalLoss).NumInputs(2).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() { void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() {
auto* label_data = Input(1).template data<T, Context>(); auto* Pdata = this->prob->template mutable_data<T, Context>();
auto* prob_data = this->prob->template mutable_data<T, Context>(); auto* Tdata = Input(1).template data<T, Context>();
auto* Idata = !this->ignores.count() ? nullptr :
this->ignores.template data<int, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* valid_data = this->valid.template mutable_data<T, Context>(); auto* Fdata = flags.template mutable_data<T, Context>();
auto* scale_data = scale->template mutable_data<T, Context>();
kernel::SparseSoftmaxFocalLossGrad<T, Context>( kernel::SparseSoftmaxFocalLossGrad<T, Context>(
Output(0)->count(), Output(0)->dim(axis), outer_dim, inner_dim, outer_dim, Output(0)->dim(axis), inner_dim,
gamma, neg_id, eps, scale_data, prob_data, label_data, pos_alpha, neg_alpha, gamma, neg_id,
valid_data, &this->ignore, dXdata); Pdata, Tdata, Idata, this->ignores.count(),
dXdata, Fdata, &ctx());
if (normalization == "UNIT") { if (normalization == "UNIT") {
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
kernel::SumGrad<T, Context>( kernel::SumGrad<T, Context>(
Input(0).count() / Input(0).dim(axis), Input(0).count() / Input(0).dim(axis),
Input(0).dim(axis), inner_dim, Input(0).dim(axis), inner_dim,
1.0, dYdata, prob_data); 1.0, dYdata, Pdata);
math::Mul<T, Context>(Output(0)->count(), math::Mul<T, Context>(Output(0)->count(),
prob_data, dXdata, dXdata); return; Pdata, dXdata, dXdata); return;
} }
T normalizer; T normalizer;
if (normalization == "VALID") if (normalization == "VALID")
normalizer = std::max( normalizer = std::max(
math::ASum<T, Context>(this->valid.count(), valid_data), 1.f); math::ASum<T, Context>(flags.count(), Fdata), 1.f);
else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0); else if (normalization == "BATCH_SIZE") normalizer = Input(0).dim(0);
else if (normalization == "FULL") normalizer = outer_dim * inner_dim; else if (normalization == "FULL") normalizer = outer_dim * inner_dim;
else if (normalization == "NONE") normalizer = 1; else if (normalization == "NONE") normalizer = 1;
...@@ -103,11 +105,10 @@ void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() { ...@@ -103,11 +105,10 @@ void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void SparseSoftmaxFocalLossGradientOp<Context>::RunOnDevice() { void SparseSoftmaxFocalLossGradientOp<Context>::RunOnDevice() {
this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax/prob"); this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax/prob");
scale = ws()->GetTensor("/mnt/" + anchor() + "/focal/scale");
outer_dim = this->prob->count(0, axis); outer_dim = this->prob->count(0, axis);
inner_dim = this->prob->count(axis + 1); inner_dim = this->prob->count(axis + 1);
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
this->valid.Reshape({ outer_dim * inner_dim }); flags.Reshape({ outer_dim * inner_dim });
if (XIsType(Input(0), float)) RunWithType<float>(); if (XIsType(Input(0), float)) RunWithType<float>();
else LOG(FATAL) << DTypeHelper(Input(0), { "float32" }); else LOG(FATAL) << DTypeHelper(Input(0), { "float32" });
......
...@@ -3,12 +3,15 @@ ...@@ -3,12 +3,15 @@
#ifdef WITH_PYTHON #ifdef WITH_PYTHON
#ifdef WITH_PYTHON3 #ifdef WITH_PYTHON3
#define PyBytes_FromStringAndSize PyUnicode_FromStringAndSize #define PyBytes_FromStringAndSize \
PyUnicode_FromStringAndSize
#endif #endif
#define String(str) \ #define Bytes(str) \
PyBytes_FromStringAndSize(str, string(str).size()) PyBytes_FromStringAndSize(str, string(str).size())
#define CS2Bytes(cstr) Bytes(cstr.c_str())
namespace dragon { namespace dragon {
template <class Context> template <class Context>
...@@ -17,6 +20,9 @@ RunOp<Context>::RunOp(const OperatorDef& def, Workspace* ws) ...@@ -17,6 +20,9 @@ RunOp<Context>::RunOp(const OperatorDef& def, Workspace* ws)
module(OperatorBase::Arg<string>("module", "")), module(OperatorBase::Arg<string>("module", "")),
op(OperatorBase::Arg<string>("op", "")), op(OperatorBase::Arg<string>("op", "")),
param_str((OperatorBase::Arg<string>("param_str", ""))) { param_str((OperatorBase::Arg<string>("param_str", ""))) {
// optimization for all python ops
if (!AllowRun()) return; this->do_sync_ = false;
// init interpreter & load module // init interpreter & load module
Py_Initialize(); Py_Initialize();
PyObject* py_module = PyImport_ImportModule(module.c_str()); PyObject* py_module = PyImport_ImportModule(module.c_str());
...@@ -27,37 +33,38 @@ RunOp<Context>::RunOp(const OperatorDef& def, Workspace* ws) ...@@ -27,37 +33,38 @@ RunOp<Context>::RunOp(const OperatorDef& def, Workspace* ws)
<< " from module: " << module; << " from module: " << module;
self = PyObject_CallObject(py_op, NULL); self = PyObject_CallObject(py_op, NULL);
// pass param string // wrap inputs and outputs
PyObject_SetAttr(self, String("param_str"), String(param_str.c_str()));
PyObject_SetAttr(self, String("param_str_"), String(param_str.c_str()));
// build inputs and outputs for Python
inputs = PyList_New(InputSize()); inputs = PyList_New(InputSize());
for (int i = 0; i < InputSize(); i++) for (int i = 0; i < InputSize(); i++)
PyList_SetItem(inputs, i, String(Input(i).name().c_str())); PyList_SetItem(inputs, i, CS2Bytes(Input(i).name()));
outputs = PyList_New(OutputSize()); outputs = PyList_New(OutputSize());
for (int i = 0; i < OutputSize(); i++) for (int i = 0; i < OutputSize(); i++)
PyList_SetItem(outputs, i, String(Output(i)->name().c_str())); PyList_SetItem(outputs, i, CS2Bytes(Output(i)->name()));
if (!AllowRun()) return;
// backward compatibility: param_str
PyObject_SetAttr(self, Bytes("param_str"), CS2Bytes(param_str));
PyObject_SetAttr(self, Bytes("param_str_"), CS2Bytes(param_str));
// setup // backward compatibility: self.setup(inputs, outputs)
if (PyObject_HasAttr(self, String("setup"))) if (PyObject_HasAttr(self, Bytes("setup"))) {
PyObject_CallMethod(self, "setup", "OO", inputs, outputs); PyObject_CallMethod(self, "setup", "OO", inputs, outputs);
}
} }
template <class Context> template <class Context>
void RunOp<Context>::RunOnDevice() { void RunOp<Context>::RunOnDevice() {
// init phase // reset phase
PyObject_SetAttr(self, String("phase"), String(phase().c_str())); PyObject_SetAttr(self, Bytes("phase"), CS2Bytes(phase()));
// reshape // backward compatibility: reshape(inputs, outputs)
if (PyObject_HasAttr(self, String("reshape"))) if (PyObject_HasAttr(self, Bytes("reshape"))) {
PyObject_CallMethod(self, "reshape", "OO", inputs, outputs); PyObject_CallMethod(self, "reshape", "OO", inputs, outputs);
}
// run // overloaded run inferfaces
if (PyObject_HasAttr(self, String("forward"))) { if (PyObject_HasAttr(self, Bytes("forward"))) {
PyObject_CallMethod(self, "forward", "OO", inputs, outputs); PyObject_CallMethod(self, "forward", "OO", inputs, outputs);
} else if (PyObject_HasAttr(self, String("run"))) { } else if (PyObject_HasAttr(self, Bytes("run"))) {
PyObject_CallMethod(self, "run", "OO", inputs, outputs); PyObject_CallMethod(self, "run", "OO", inputs, outputs);
} }
} }
...@@ -72,18 +79,23 @@ NO_GRADIENT(Run); ...@@ -72,18 +79,23 @@ NO_GRADIENT(Run);
template <class Context> template <class Context>
void TemplateGradientOp<Context>::RunOnDevice() { void TemplateGradientOp<Context>::RunOnDevice() {
// init phase // reset phase
PyObject_SetAttr(this->self, String("phase"), String(phase().c_str())); PyObject_SetAttr(this->self,
Bytes("phase"), CS2Bytes(phase()));
// reshape
if (PyObject_HasAttr(this->self, String("reshape"))) // backward compatibility: reshape(inputs, outputs)
PyObject_CallMethod(this->self, "reshape", "OO", this->inputs, this->outputs); if (PyObject_HasAttr(this->self, Bytes("reshape"))) {
PyObject_CallMethod(this->self, "reshape",
// run "OO", this->inputs, this->outputs);
if (PyObject_HasAttr(this->self, String("backward"))) { }
PyObject_CallMethod(this->self, "forward", "OO", this->inputs, this->outputs);
} else if (PyObject_HasAttr(this->self, String("grad"))) { // overloaded run inferfaces
PyObject_CallMethod(this->self, "grad", "OO", this->inputs, this->outputs); if (PyObject_HasAttr(this->self, Bytes("backward"))) {
PyObject_CallMethod(this->self, "forward",
"OO", this->inputs, this->outputs);
} else if (PyObject_HasAttr(this->self, Bytes("grad"))) {
PyObject_CallMethod(this->self, "grad",
"OO", this->inputs, this->outputs);
} }
} }
......
...@@ -235,7 +235,7 @@ void ConvOpBase<Context>::Reshape() { ...@@ -235,7 +235,7 @@ void ConvOpBase<Context>::Reshape() {
weight_shape.push_back(conv_in_channels / group); weight_shape.push_back(conv_in_channels / group);
weight_shape.push_back(conv_out_channels); weight_shape.push_back(conv_out_channels);
} }
bias_shape.assign(1, num_output); bias_shape = { num_output };
// determine the bottom and top shape // determine the bottom and top shape
bottom_shape = Input(0).dims(); bottom_shape = Input(0).dims();
......
...@@ -604,140 +604,137 @@ template <> void SoftmaxCrossEntropy<float, CPUContext>( ...@@ -604,140 +604,137 @@ template <> void SoftmaxCrossEntropy<float, CPUContext>(
template <typename Tx, typename Ty> template <typename Tx, typename Ty>
void _SparseSoftmaxCrossEntropy( void _SparseSoftmaxCrossEntropy(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const Tx* prob, const Tx* prob,
const Ty* labels, const Ty* labels,
Tx* loss, const int* ignores,
Tx* valid, const int num_ignores,
Tensor* ignore) { Tx* losses,
const int* ignores = ignore->count() > 0 ? Tx* flags) {
ignore->data<int, CPUContext>() : nullptr; for (int oix = 0; oix < outer_dim; ++oix) {
const int dim = count / outer_dim; for (int iix = 0; iix < inner_dim; ++iix) {
for (int i = 0; i < outer_dim; ++i) { const int idx = oix * inner_dim + iix;
for (int j = 0; j < inner_dim; ++j) {
const int idx = i * inner_dim + j;
const int label = labels[idx]; const int label = labels[idx];
int k; int k;
for (k = 0; k < ignore->count(); ++k) { for (k = 0; k < num_ignores; ++k) {
if (label == ignores[k]) { if (label == ignores[k]) {
loss[idx] = valid[idx] = 0; losses[idx] = flags[idx] = 0;
break; break;
} }
} }
if (k == ignore->count()) { if (k == num_ignores) {
Tx labeled_prob = prob[i * dim + label * inner_dim + j]; const int t = (oix * axis_dim + label) * inner_dim + iix;
loss[idx] = -std::log(std::max(labeled_prob, FLT_MIN)); losses[idx] = -std::log(std::max(prob[t], FLT_MIN));
valid[idx] = 1; flags[idx] = 1;
} }
} }
} }
} }
template <> void SparseSoftmaxCrossEntropy<float, float, CPUContext>( template <> void SparseSoftmaxCrossEntropy<float, float, CPUContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float* prob, const float* prob,
const float* labels, const float* labels,
float* loss, const int* ignores,
float* valid, const int num_ignores,
Tensor* ignore, float* losses,
float* flags,
CPUContext* ctx) { CPUContext* ctx) {
_SparseSoftmaxCrossEntropy<float, float>( _SparseSoftmaxCrossEntropy<float, float>(
count, classes, outer_dim, inner_dim, outer_dim, axis_dim, inner_dim,
prob, labels, loss, valid, ignore); prob, labels, ignores, num_ignores,
losses, flags);
} }
template <> void SparseSoftmaxCrossEntropy<float, int64_t, CPUContext>( template <> void SparseSoftmaxCrossEntropy<float, int64_t, CPUContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float* prob, const float* prob,
const int64_t* labels, const int64_t* labels,
float* loss, const int* ignores,
float* valid, const int num_ignores,
Tensor* ignore, float* losses,
float* flags,
CPUContext* ctx) { CPUContext* ctx) {
_SparseSoftmaxCrossEntropy<float, int64_t>( _SparseSoftmaxCrossEntropy<float, int64_t>(
count, classes, outer_dim, inner_dim, outer_dim, axis_dim, inner_dim,
prob, labels, loss, valid, ignore); prob, labels, ignores, num_ignores,
losses, flags);
} }
template <typename Tx, typename Ty> template <typename Tx, typename Ty>
void _SparseSoftmaxCrossEntropyGrad( void _SparseSoftmaxCrossEntropyGrad(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const Tx* prob, const Tx* prob,
const Ty* labels, const Ty* labels,
Tx* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
Tx* dx) { Tx* dx,
int dim = count / outer_dim; Tx* flags) {
const int* ignores = ignore->count() > 0 ? flags[0] = 0;
ignore->data <int, CPUContext>() : nullptr; for (int oix = 0; oix < outer_dim; ++oix) {
valid[0] = 0; for (int iix = 0; iix < inner_dim; ++iix) {
for (int i = 0; i < outer_dim; ++i) { const int label = labels[oix * inner_dim + iix];
for (int j = 0; j < inner_dim; ++j) {
const int label = labels[i * inner_dim + j];
int k; int k;
for (k = 0; k < ignore->count(); ++k) for (k = 0; k < num_ignores; ++k)
if (label == ignores[k]) break; if (label == ignores[k]) break;
if (k != ignore->count()) { if (k != num_ignores) {
for (int c = 0; c < classes; ++c) for (int c = 0; c < axis_dim; ++c)
dx[i * dim + c * inner_dim + j] = 0; dx[(oix * axis_dim + c) * inner_dim + iix] = 0;
} else { } else {
dx[i * dim + label * inner_dim + j] -= 1; dx[(oix * axis_dim + label) * inner_dim + iix] -= 1;
valid[0]++; flags[0]++;
} }
} }
} }
} }
template<> void SparseSoftmaxCrossEntropyGrad<float, float, CPUContext>( template<> void SparseSoftmaxCrossEntropyGrad<float, float, CPUContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float* prob, const float* prob,
const float* labels, const float* labels,
float* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
float* dx, float* dx,
float* flags,
CPUContext* ctx) { CPUContext* ctx) {
_SparseSoftmaxCrossEntropyGrad<float, float>( _SparseSoftmaxCrossEntropyGrad<float, float>(
count, classes, outer_dim, inner_dim, outer_dim, axis_dim, inner_dim,
prob, labels, valid, ignore, dx); prob, labels, ignores,
num_ignores, dx, flags);
} }
template<> void SparseSoftmaxCrossEntropyGrad<float, int64_t, CPUContext>( template<> void SparseSoftmaxCrossEntropyGrad<float, int64_t, CPUContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float* prob, const float* prob,
const int64_t* labels, const int64_t* labels,
float* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
float* dx, float* dx,
float* flags,
CPUContext* ctx) { CPUContext* ctx) {
_SparseSoftmaxCrossEntropyGrad<float, int64_t>( _SparseSoftmaxCrossEntropyGrad<float, int64_t>(
count, classes, outer_dim, inner_dim, outer_dim, axis_dim, inner_dim,
prob, labels, valid, ignore, dx); prob, labels, ignores,
num_ignores, dx, flags);
} }
/******************** loss.sparse_softmax_focal_loss ********************/ /******************** loss.sparse_softmax_focal_loss ********************/
template <> void SparseSoftmaxFocalLoss<float, CPUContext>( template <> void SparseSoftmaxFocalLoss<float, CPUContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float pos_alpha, const float pos_alpha,
const float neg_alpha, const float neg_alpha,
...@@ -745,84 +742,78 @@ template <> void SparseSoftmaxFocalLoss<float, CPUContext>( ...@@ -745,84 +742,78 @@ template <> void SparseSoftmaxFocalLoss<float, CPUContext>(
const int neg_id, const int neg_id,
const float* prob, const float* prob,
const float* labels, const float* labels,
float* scale, const int* ignores,
float* loss, const int num_ignores,
float* valid, float* losses,
Tensor* ignore) { float* flags,
const int* ignores = ignore->count() > 0 ? CPUContext* ctx) {
ignore->data<int, CPUContext>() : nullptr; for (int oix = 0; oix < outer_dim; ++oix) {
const int dim = count / outer_dim; for (int iix = 0; iix < inner_dim; ++iix) {
#ifdef WITH_OMP const int idx = oix * inner_dim + iix;
#pragma omp parallel for num_threads(GET_OMP_THREADS(count))
#endif
for (int i = 0; i < count; ++i)
scale[i] = std::pow((1.0f - prob[i]), gamma);
for (int i = 0; i < outer_dim; ++i) {
for (int j = 0; j < inner_dim; ++j) {
const int idx = i * inner_dim + j;
const int label = labels[idx]; const int label = labels[idx];
int k; int k;
for (k = 0; k < ignore->count(); ++k) { for (k = 0; k < num_ignores; ++k) {
if (label == ignores[k]) { if (label == ignores[k]) {
loss[idx] = valid[idx] = 0; losses[idx] = flags[idx] = 0;
break; break;
} }
} }
if (k == ignore->count()) { if (k == num_ignores) {
const int t_ = i * dim + label * inner_dim + j; const int t = (oix * axis_dim + label) * inner_dim + iix;
float labeled_prob = std::max(labeled_prob, FLT_MIN); float labeled_prob = std::max(labeled_prob, FLT_MIN);
scale[t_] = label > neg_id ? float scale = std::pow((1.f - prob[t]), gamma);
pos_alpha * scale[t_] : neg_alpha * scale[t_]; scale = label > neg_id ?
loss[idx] = -scale[t_] * std::log(labeled_prob); pos_alpha * scale : neg_alpha * scale;
valid[idx] = label > neg_id ? 1 : 0; losses[idx] = -scale * std::log(labeled_prob);
flags[idx] = label > neg_id ? 1 : 0;
} }
} }
} }
} }
template<> void SparseSoftmaxFocalLossGrad<float, CPUContext>( template<> void SparseSoftmaxFocalLossGrad<float, CPUContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float pos_alpha,
const float neg_alpha,
const float gamma, const float gamma,
const int neg_id, const int neg_id,
const float eps,
const float* scale,
const float* prob, const float* prob,
const float* labels, const float* labels,
float* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
float* dx) { float* dx,
int dim = count / outer_dim; float* flags,
const int* ignores = ignore->count() > 0 ? CPUContext* ctx) {
ignore->data <int, CPUContext>() : nullptr; flags[0] = 0;
valid[0] = 0; for (int oix = 0; oix < outer_dim; ++oix) {
for (int i = 0; i < outer_dim; ++i) { for (int iix = 0; iix < inner_dim; ++iix) {
for (int j = 0; j < inner_dim; ++j) { const int label = labels[oix * inner_dim + iix];
const int label = labels[i * inner_dim + j];
int k; int k;
for (k = 0; k < ignore->count(); ++k) for (k = 0; k < num_ignores; ++k)
if (label == ignores[k]) break; if (label == ignores[k]) break;
if (k != ignore->count()) { if (k != num_ignores) {
for (int c = 0; c < classes; ++c) for (int c = 0; c < axis_dim; ++c)
dx[i * dim + c * inner_dim + j] = 0; dx[(oix * axis_dim + c) * inner_dim + iix] = 0;
} else { } else {
const int t_ = i * dim + label * inner_dim + j; const int t = (oix * axis_dim + label) * inner_dim + iix;
float grad = -gamma float onemp = 1. - prob[t];
* (scale[t_] / std::max((1.0f - prob[t_]), eps)) // unstable if gamma is 0
* std::log(std::max(prob[t_], FLT_MIN)) float grad = -gamma * pow(onemp, gamma - 1)
* prob[t_] + scale[t_]; * log(std::max(prob[t], FLT_MIN))
for (int c = 0; c < classes; ++c) { * prob[t] + pow(onemp, gamma);
const int i_ = i * dim + c * inner_dim + j; grad = label > neg_id ?
pos_alpha * grad : neg_alpha * grad;
for (int c = 0; c < axis_dim; ++c) {
const int i_ = (oix * axis_dim + c) * inner_dim + iix;
if (c == label) { if (c == label) {
dx[i_] = grad * (prob[t_] - 1); dx[i_] = grad * (prob[t] - 1);
} else { } else {
dx[i_] = grad * prob[i_]; dx[i_] = grad * prob[i_];
} }
} }
if (label > neg_id) valid[0]++; if (label > neg_id) flags[0]++;
} }
} }
} }
......
...@@ -938,205 +938,186 @@ template <> void SoftmaxCrossEntropy<float, CUDAContext>( ...@@ -938,205 +938,186 @@ template <> void SoftmaxCrossEntropy<float, CUDAContext>(
template <typename Tx, typename Ty> template <typename Tx, typename Ty>
__global__ void _SparseSoftmaxCrossEntropy( __global__ void _SparseSoftmaxCrossEntropy(
const int count, const int count,
const int axis_dim,
const int inner_dim,
const Tx* prob, const Tx* prob,
const Ty* labels, const Ty* labels,
Tx* loss,
const int classes,
const int inner_dim,
const int* ignores, const int* ignores,
const int ignore_num, const int num_ignores,
Tx* valid) { Tx* losses,
Tx* flags) {
CUDA_KERNEL_LOOP(idx, count) { CUDA_KERNEL_LOOP(idx, count) {
const int o_idx = idx / inner_dim; const int oix = idx / inner_dim;
const int i_idx = idx % inner_dim; const int iix = idx % inner_dim;
const int label = labels[o_idx * inner_dim + i_idx]; const int label = labels[oix * inner_dim + iix];
int k; int k;
for (k = 0; k < ignore_num; k++) { for (k = 0; k < num_ignores; k++) {
if (label == ignores[k]) { if (label == ignores[k]) {
loss[idx] = valid[idx] = 0; losses[idx] = flags[idx] = 0;
break; break;
} }
} }
if (k == ignore_num) { if (k == num_ignores) {
loss[idx] = -log( losses[idx] = -log(
max(prob[(o_idx * classes + label) max(prob[(oix * axis_dim + label)
* inner_dim + i_idx], FLT_MIN) * inner_dim + iix], FLT_MIN)
); );
valid[idx] = 1; flags[idx] = 1;
} }
} }
} }
template <> void SparseSoftmaxCrossEntropy<float, float, CUDAContext>( template <> void SparseSoftmaxCrossEntropy<float, float, CUDAContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float* prob, const float* prob,
const float* labels, const float* labels,
float* loss, const int* ignores,
float* valid, const int num_ignores,
Tensor* ignore, float* losses,
float* flags,
CUDAContext* ctx) { CUDAContext* ctx) {
const int* ignores = ignore->count() > 0 ?
ignore->data<int, CUDAContext>() : nullptr;
const int num_preds = outer_dim * inner_dim; const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxCrossEntropy<float, float> _SparseSoftmaxCrossEntropy<float, float>
<< <CUDA_BLOCKS(num_preds), CUDA_THREADS, << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >( 0, ctx->cuda_stream() >> >(
num_preds, prob, labels, loss, num_preds, axis_dim, inner_dim,
classes, inner_dim, prob, labels, ignores, num_ignores,
ignores, ignore->count(), valid); losses, flags);
} }
template <> void SparseSoftmaxCrossEntropy<float, int64_t, CUDAContext>( template <> void SparseSoftmaxCrossEntropy<float, int64_t, CUDAContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float* prob, const float* prob,
const int64_t* labels, const int64_t* labels,
float* loss, const int* ignores,
float* valid, const int num_ignores,
Tensor* ignore, float* losses,
float* flags,
CUDAContext* ctx) { CUDAContext* ctx) {
const int* ignores = ignore->count() > 0 ?
ignore->data<int, CUDAContext>() : nullptr;
const int num_preds = outer_dim * inner_dim; const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxCrossEntropy<float, int64_t> _SparseSoftmaxCrossEntropy<float, int64_t>
<< <CUDA_BLOCKS(num_preds), CUDA_THREADS, << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >( 0, ctx->cuda_stream() >> >(
num_preds, prob, labels, loss, num_preds, axis_dim, inner_dim,
classes, inner_dim, prob, labels, ignores, num_ignores,
ignores, ignore->count(), valid); losses, flags);
} }
template <typename Tx, typename Ty> template <typename Tx, typename Ty>
__global__ void _SparseSoftmaxCrossEntropyGrad( __global__ void _SparseSoftmaxCrossEntropyGrad(
const int count, const int count,
const int axis_dim,
const int inner_dim,
const Tx* prob, const Tx* prob,
const Ty* labels, const Ty* labels,
Tx* dx,
const int classes,
const int inner_dim,
const int* ignores, const int* ignores,
const int ignore_num, const int num_ignores,
Tx* valid) { Tx* dx,
Tx* flags) {
CUDA_KERNEL_LOOP(idx, count) { CUDA_KERNEL_LOOP(idx, count) {
const int o_idx = idx / inner_dim; const int oix = idx / inner_dim;
const int i_idx = idx % inner_dim; const int iix = idx % inner_dim;
const int label = labels[o_idx * inner_dim + i_idx]; const int label = labels[oix * inner_dim + iix];
int k; int k;
for (k = 0; k < ignore_num; k++) for (k = 0; k < num_ignores; k++)
if (label == ignores[k]) break; if (label == ignores[k]) break;
if (k != ignore_num) { if (k != num_ignores) {
for (int c = 0; c < classes; c++) for (int c = 0; c < axis_dim; c++)
dx[(o_idx * classes + c) * inner_dim + i_idx] = 0; dx[(oix * axis_dim + c) * inner_dim + iix] = 0;
valid[idx] = 0; flags[idx] = 0;
} else { } else {
dx[(o_idx * classes + label) * inner_dim + i_idx] -= 1; dx[(oix * axis_dim + label) * inner_dim + iix] -= 1;
valid[idx] = 1; flags[idx] = 1;
} }
} }
} }
template<> void SparseSoftmaxCrossEntropyGrad<float, float, CUDAContext>( template<> void SparseSoftmaxCrossEntropyGrad<float, float, CUDAContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float* prob, const float* prob,
const float* labels, const float* labels,
float* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
float* dXdata, float* dx,
float* flags,
CUDAContext* ctx) { CUDAContext* ctx) {
const int* ignores = ignore->count() > 0 ?
ignore->data <int, CUDAContext >() : nullptr;
const int num_preds = outer_dim * inner_dim; const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxCrossEntropyGrad<float, float> _SparseSoftmaxCrossEntropyGrad<float, float>
<< <CUDA_BLOCKS(num_preds), CUDA_THREADS, << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >( 0, ctx->cuda_stream() >> >(
num_preds, prob, labels, dXdata, num_preds, axis_dim, inner_dim,
classes, inner_dim, prob, labels, ignores, num_ignores,
ignores, ignore->count(), valid); dx, flags);
} }
template<> void SparseSoftmaxCrossEntropyGrad<float, int64_t, CUDAContext>( template<> void SparseSoftmaxCrossEntropyGrad<float, int64_t, CUDAContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float* prob, const float* prob,
const int64_t* labels, const int64_t* labels,
float* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
float* dXdata, float* dx,
float* flags,
CUDAContext* ctx) { CUDAContext* ctx) {
const int* ignores = ignore->count() > 0 ?
ignore->data <int, CUDAContext >() : nullptr;
const int num_preds = outer_dim * inner_dim; const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxCrossEntropyGrad<float, int64_t> _SparseSoftmaxCrossEntropyGrad<float, int64_t>
<< <CUDA_BLOCKS(num_preds), CUDA_THREADS, << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
0, ctx->cuda_stream() >> >( 0, ctx->cuda_stream() >> >(
num_preds, prob, labels, dXdata, num_preds, axis_dim, inner_dim,
classes, inner_dim, prob, labels, ignores, num_ignores,
ignores, ignore->count(), valid); dx, flags);
} }
/******************** loss.sparse_softmax_focal_loss ********************/ /******************** loss.sparse_softmax_focal_loss ********************/
template <typename T> template <typename T>
__global__ void _SparseSoftmaxFocalScale(
const int count,
const float gamma,
const T* prob,
T* scale) {
CUDA_KERNEL_LOOP(idx, count) {
scale[idx] = std::pow((1.0f - prob[idx]), gamma);
}
}
template <typename T>
__global__ void _SparseSoftmaxFocalLoss( __global__ void _SparseSoftmaxFocalLoss(
const int count, const int count,
const int axis_dim,
const int inner_dim,
const float pos_alpha, const float pos_alpha,
const float neg_alpha, const float neg_alpha,
const float gamma,
const int neg_id, const int neg_id,
T* scale,
const T* prob, const T* prob,
const T* labels, const T* labels,
T* loss,
const int classes,
const int inner_dim,
const int* ignores, const int* ignores,
const int ignore_num, const int num_ignores,
T* valid) { T* losses,
T* flags) {
CUDA_KERNEL_LOOP(idx, count) { CUDA_KERNEL_LOOP(idx, count) {
const int o_idx = idx / inner_dim; const int oix = idx / inner_dim;
const int i_idx = idx % inner_dim; const int iix = idx % inner_dim;
const int label = labels[o_idx * inner_dim + i_idx]; const int label = labels[oix * inner_dim + iix];
int k; int k;
for (k = 0; k < ignore_num; k++) { for (k = 0; k < num_ignores; k++) {
if (label == ignores[k]) { if (label == ignores[k]) {
loss[idx] = valid[idx] = 0; losses[idx] = flags[idx] = 0;
break; break;
} }
} }
if (k == ignore_num) { if (k == num_ignores) {
const int t_ = (o_idx * classes + label) * inner_dim + i_idx; const int t = (oix * axis_dim + label) * inner_dim + iix;
scale[t_] = label > neg_id ? pos_alpha * scale[t_] : T scale = pow(1.f - prob[t], gamma);
neg_alpha * scale[t_]; scale = label > neg_id ?
loss[idx] = -scale[t_] * std::log(max(prob[t_], FLT_MIN)); pos_alpha * scale : neg_alpha * scale;
valid[idx] = label > neg_id ? 1 : 0; losses[idx] = -scale * std::log(max(prob[t], FLT_MIN));
flags[idx] = label > neg_id ? 1 : 0;
} }
} }
} }
template <> void SparseSoftmaxFocalLoss<float, CUDAContext>( template <> void SparseSoftmaxFocalLoss<float, CUDAContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float pos_alpha, const float pos_alpha,
const float neg_alpha, const float neg_alpha,
...@@ -1144,89 +1125,92 @@ template <> void SparseSoftmaxFocalLoss<float, CUDAContext>( ...@@ -1144,89 +1125,92 @@ template <> void SparseSoftmaxFocalLoss<float, CUDAContext>(
const int neg_id, const int neg_id,
const float* prob, const float* prob,
const float* labels, const float* labels,
float* scale, const int* ignores,
float* loss, const int num_ignores,
float* valid, float* losses,
Tensor* ignore) { float* flags,
const int* ignores = ignore->count() > 0 ? CUDAContext* ctx) {
ignore->data<int, CUDAContext>() : nullptr;
const int num_preds = outer_dim * inner_dim; const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxFocalScale<float>
<< <CUDA_BLOCKS(count), CUDA_THREADS >> >(
count, gamma, prob, scale);
_SparseSoftmaxFocalLoss<float> _SparseSoftmaxFocalLoss<float>
<< <CUDA_BLOCKS(num_preds), CUDA_THREADS >> >( << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
num_preds, pos_alpha, neg_alpha, neg_id, scale, 0, ctx->cuda_stream() >> >(
prob, labels, loss, classes, inner_dim, num_preds, axis_dim, inner_dim,
ignores, ignore->count(), valid); pos_alpha, neg_alpha, gamma, neg_id,
prob, labels, ignores, num_ignores,
losses, flags);
} }
template <typename T> template <typename T>
__global__ void _SparseSoftmaxFocalLossGrad( __global__ void _SparseSoftmaxFocalLossGrad(
const int count, const int count,
const int axis_dim,
const int inner_dim,
const float pos_alpha,
const float neg_alpha,
const float gamma, const float gamma,
const int neg_id, const int neg_id,
const float eps,
const T* scale,
const T* prob, const T* prob,
const T* labels, const T* labels,
T* dx,
const int classes,
const int inner_dim,
const int* ignores, const int* ignores,
const int ignore_num, const int num_ignores,
T* valid) { T* dx,
T* flags) {
CUDA_KERNEL_LOOP(idx, count) { CUDA_KERNEL_LOOP(idx, count) {
const int o_idx = idx / inner_dim; const int oix = idx / inner_dim;
const int i_idx = idx % inner_dim; const int iix = idx % inner_dim;
const int label = labels[o_idx * inner_dim + i_idx]; const int label = labels[oix * inner_dim + iix];
int k; int k;
for (k = 0; k < ignore_num; k++) for (k = 0; k < num_ignores; k++)
if (label == ignores[k]) break; if (label == ignores[k]) break;
if (k != ignore_num) { if (k != num_ignores) {
for (int c = 0; c < classes; c++) for (int c = 0; c < axis_dim; c++)
dx[(o_idx * classes + c) * inner_dim + i_idx] = 0; dx[(oix * axis_dim + c) * inner_dim + iix] = 0;
valid[idx] = 0; flags[idx] = 0;
} else { } else {
const int t_ = (o_idx * classes + label) * inner_dim + i_idx; const int t = (oix * axis_dim + label) * inner_dim + iix;
T grad = -gamma * (scale[t_] / max((1.0f - prob[t_]), eps)) T onemp = 1. - prob[t];
* std::log(max(prob[t_], FLT_MIN)) // unstable if gamma is 0
* prob[t_] + scale[t_]; T grad = -gamma * pow(onemp, gamma - 1)
for (int c = 0; c < classes; c++) { * log(max(prob[t], FLT_MIN))
const int i_ = (o_idx * classes + c) * inner_dim + i_idx; * prob[t] + pow(onemp, gamma);
grad = label > neg_id ?
pos_alpha * grad : neg_alpha * grad;
for (int c = 0; c < axis_dim; c++) {
const int i = (oix * axis_dim + c) * inner_dim + iix;
if (c == label) { if (c == label) {
dx[i_] = grad * (prob[t_] - 1); dx[i] = grad * (prob[t] - 1);
} else { } else {
dx[i_] = grad * prob[i_]; dx[i] = grad * prob[i];
} }
} }
valid[idx] = label > neg_id ? 1 : 0; flags[idx] = label > neg_id ? 1 : 0;
} }
} }
} }
template<> void SparseSoftmaxFocalLossGrad<float, CUDAContext>( template<> void SparseSoftmaxFocalLossGrad<float, CUDAContext>(
const int count,
const int classes,
const int outer_dim, const int outer_dim,
const int axis_dim,
const int inner_dim, const int inner_dim,
const float pos_alpha,
const float neg_alpha,
const float gamma, const float gamma,
const int neg_id, const int neg_id,
const float eps,
const float* scale,
const float* prob, const float* prob,
const float* labels, const float* labels,
float* valid, const int* ignores,
Tensor* ignore, const int num_ignores,
float* dXdata) { float* dx,
const int* ignores = ignore->count() > 0 ? float* flags,
ignore->data <int, CUDAContext >() : nullptr; CUDAContext* ctx) {
const int num_preds = outer_dim * inner_dim; const int num_preds = outer_dim * inner_dim;
_SparseSoftmaxFocalLossGrad<float> _SparseSoftmaxFocalLossGrad<float>
<< <CUDA_BLOCKS(num_preds), CUDA_THREADS >> >( << <CUDA_BLOCKS(num_preds), CUDA_THREADS,
num_preds, gamma, neg_id, eps, scale, 0, ctx->cuda_stream() >> >(
prob, labels, dXdata, classes, inner_dim, num_preds, axis_dim, inner_dim,
ignores, ignore->count(), valid); pos_alpha, neg_alpha, gamma, neg_id,
prob, labels, ignores, num_ignores,
dx, flags);
} }
/******************** misc.astype ********************/ /******************** misc.astype ********************/
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!