Commit 0ab14f30 by Ting PAN

Normalize the math notations in docstring

Summary:
This commit normalizes the inconsistent math notations in docstring.
1 parent 2598f4dc
Showing with 489 additions and 579 deletions
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The base layer class."""
from __future__ import absolute_import
......
......@@ -8,8 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The implementation of the common layers."""
"""The common layers."""
from __future__ import absolute_import
from __future__ import division
......@@ -628,6 +627,10 @@ class Slice(Layer):
class Softmax(Layer):
r"""Apply the softmax function.
The **Softmax** function is defined as:
.. math:: \text{Softmax}(x_{i}) = \frac{\exp(x_{i})}{\sum_{j} \exp(x_{j})}
Examples:
```python
......
......@@ -8,8 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The implementation of the data layers."""
"""The data layers."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,8 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The implementation of loss layers."""
"""The loss layers."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,8 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The implementation of the neuron layers."""
"""The neuron layers."""
from __future__ import absolute_import
from __future__ import division
......@@ -66,7 +65,7 @@ class ELU(Layer):
\text{ELU}(x) =
\begin{cases}
x, & \text{ if } x \geq 0 \\
\alpha * (e^{x} - 1), & \text{ otherwise }
\alpha * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -220,7 +219,7 @@ class Sigmoid(Layer):
The **Sigmoid** function is defined as:
.. math:: \text{Sigmoid}(x) = \frac{1}{1 + e^{-x}}
.. math:: \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}
Examples:
......@@ -246,7 +245,7 @@ class TanH(Layer):
The **Tanh** function is defined as:
.. math:: \text{Tanh}(x) = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}
.. math:: \text{Tanh}(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}
Examples:
......
......@@ -8,8 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The implementation of the vision layers."""
"""The vision layers."""
from __future__ import absolute_import
from __future__ import division
......@@ -184,8 +183,9 @@ class Pooling(Layer):
super(Pooling, self).__init__(layer_param)
param = layer_param.pooling_param
self.arguments = {
'data_format': 'NCHW',
'ceil_mode': True,
'mode': {0: 'MAX', 1: 'AVG'}[param.pool],
'data_format': 'NCHW',
'global_pooling': param.global_pooling,
}
if not param.HasField('kernel_h'):
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The base net class."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The solver to update parameters."""
from __future__ import absolute_import
......
......@@ -3,6 +3,12 @@ dragon.cuda
.. only:: html
Classes
-------
`class Stream <cuda/Stream.html>`_
: The CUDA stream wrapper.
Functions
---------
......@@ -10,10 +16,7 @@ dragon.cuda
: Return the index of current selected device.
`enable_cudnn(...) <cuda/enable_cudnn.html>`_
: Activate the CuDNN engine.
`enable_cudnn_benchmark(...) <cuda/enable_cudnn_benckmark.html>`_
: Activate the CuDNN benchmark.
: Enable the CuDNN library.
`get_device_capability(...) <cuda/get_device_capability.html>`_
: Return the capability of specified device.
......@@ -28,18 +31,18 @@ dragon.cuda
: Set the current device.
`synchronize(...) <cuda/synchronize.html>`_
: Synchronize the specified cuda stream.
: Synchronize a specified CUDA stream.
.. toctree::
:hidden:
cuda/current_device
cuda/enable_cudnn
cuda/enable_cudnn_benchmark
cuda/get_device_capability
cuda/is_available
cuda/set_default_device
cuda/set_device
cuda/Stream
cuda/synchronize
.. raw:: html
......
Stream
======
.. autoclass:: dragon.cuda.Stream
__init__
--------
.. automethod:: dragon.cuda.Stream.__init__
Properties
----------
ptr
###
.. autoattribute:: dragon.cuda.Stream.ptr
Methods
-------
synchronize
###########
.. automethod:: dragon.cuda.Stream.synchronize
.. raw:: html
<style>
h1:before {
content: "dragon.cuda.";
color: #103d3e;
}
</style>
enable_cudnn_benchmark
======================
.. autofunction:: dragon.cuda.enable_cudnn_benchmark
.. raw:: html
<style>
h1:before {
content: "dragon.cuda.";
color: #103d3e;
}
</style>
......@@ -139,7 +139,7 @@ dragon.math
: Compute the sum value of elements along the given axis.
`tanh(...) <math/tanh.html>`_
: Compute the tanh result of input.
: Compute the tanh of input.
.. toctree::
:hidden:
......
......@@ -100,7 +100,7 @@ vm.tensorflow.math
: Compute the reciprocal square root of input.
`sigmoid(...) <math/sigmoid.html>`_
: Apply the sigmoid function.
: Compute the sigmoid function.
`sign(...) <math/sign.html>`_
: Compute the sign indication of input.
......@@ -118,7 +118,7 @@ vm.tensorflow.math
: Compute the element-wise subtraction.
`tanh(...) <math/tanh.html>`_
: Apply the tanh function.
: Compute the tanh of input.
.. toctree::
:hidden:
......
......@@ -8,36 +8,29 @@ bool GraphGradientMaker::CheckGrad(
const Set<string>& targets,
vector<pair<string, int>>& gen_grads) {
if (NoGradientRegistry()->Has(op_def.type())) {
for (auto& input : op_def.input()) {
blacklist_set_.insert(input);
}
return true;
}
bool maybe_skip = false;
for (int i = 0; i < op_def.output_size(); ++i) {
const auto& output = op_def.output(i);
if (!inputs_to_grads_.count(output)) {
if (blacklist_set_.count(output)) return true;
maybe_skip = true;
if (targets.count(output)) {
// Consider to generate virtual gradient for targets
gen_grads.push_back({output, i});
inputs_to_grads_[output] = output + "_grad";
} else if (op_def.output_size() == 1) {
return true; // We can skip this op, obviously
}
}
}
// Pass check, even if missing some grads
return false;
return maybe_skip && gen_grads.empty();
}
void GraphGradientMaker::Make(
const vector<OperatorDef*>& op_defs,
const vector<string>& targets,
const vector<string>& input_grads,
GraphDef& backward_def) {
GraphDef& graph_def) {
Set<string> split_grads, targets_v2;
Map<string, int> inputs_count, grads_count;
Set<string> all_split_grads, targets_set;
Map<string, string> targets_to_grads;
// PLAY for the forward
for (auto* op_def : op_defs) {
......@@ -49,126 +42,118 @@ void GraphGradientMaker::Make(
input_in_outputs = true;
break;
}
// Avoid to count the duplicate input(i.e. the in-place output)
// Avoid to count the duplicate input (i.e. the in-place output)
if (!input_in_outputs) inputs_count[input]++;
}
}
// PLAY for the backward
for (int i = 0; i < targets.size(); ++i) {
// Set the gradient of targets
for (int i = 0; i < targets.size(); ++i) {
if (i < input_grads.size()) {
inputs_to_grads_[targets[i]] = input_grads[i];
}
targets_set.insert(targets[i]);
targets_v2.insert(targets[i]);
}
// PLAY for the backward
for (int op_idx = (int)op_defs.size() - 1; op_idx >= 0; --op_idx) {
// Collect inputs and outputs, generate raw gradient ops
const OperatorDef& op = *op_defs[op_idx];
const OperatorDef& op_def = *op_defs[op_idx];
// Generate def by registered gradient maker
vector<pair<string, int>> gen_grads;
bool is_skip = CheckGrad(op, targets_set, gen_grads);
vector<string> g_outputs;
for (const auto& output : op.output()) {
string g_output = "";
if (inputs_to_grads_.count(output) > 0) {
g_output = inputs_to_grads_[output];
}
g_outputs.push_back(g_output);
}
auto grad = MakeGradientForOp(op, g_outputs);
// Process the raw gradient ops
vector<OperatorDef> gather_ops;
for (auto& grad_op : grad.ops) {
// Set op name
if (!grad_op.has_name()) grad_op.set_name(GetOperatorName());
// Split and gather gradients for multi-used input
for (int i = 0; i < grad_op.output_size(); ++i) {
auto* output = grad_op.mutable_output(i);
int original_idx = -1;
for (int j = 0; j < grad.g_inputs.size(); ++j) {
if (grad_op.output(i) == grad.g_inputs[j]) {
original_idx = j;
}
}
// Ignore unused && in-placee GI
if (original_idx == -1) continue;
vector<string> grad_outputs;
bool is_skip = CheckGrad(op_def, targets_v2, gen_grads);
for (const auto& output : op_def.output()) {
string grad_output = "";
const auto& it = inputs_to_grads_.find(output);
if (it != inputs_to_grads_.end()) grad_output = it->second;
grad_outputs.push_back(grad_output);
}
auto pack = MakeGradientForOp(op_def, grad_outputs);
// Split and gather gradient for multi-used inputs
vector<OperatorDef> gather_defs;
for (auto& grad_def : pack.grad_defs) {
if (!grad_def.has_name()) {
grad_def.set_name(GetOperatorName());
}
for (int i = 0; i < grad_def.output_size(); ++i) {
const auto& grad_name = grad_def.output(i);
int original_index = -1;
for (int j = 0; j < pack.grad_inputs.size(); ++j) {
if (grad_name == pack.grad_inputs[j]) {
original_index = j;
}
}
if (original_index == -1) continue;
bool output_in_inputs = false;
for (const auto& input : grad_op.input()) {
if (grad_op.output(i) == input) {
for (const auto& name : grad_def.input()) {
if (grad_name == name) {
output_in_inputs = true;
break;
}
}
if (output_in_inputs) continue;
// Find a split branch
const auto& original_name = op.input(original_idx);
// Detect a split branch
const auto& original_name = op_def.input(original_index);
if (inputs_count[original_name] > 1) {
// Split
auto split_name =
*output + "_autosplit_" + str::to(grads_count[*output]++);
if (!is_skip) all_split_grads.insert(split_name);
// Gather
if (grads_count[*output] == inputs_count[original_name]) {
OperatorDef gather_op;
gather_op.set_name(GetOperatorName());
gather_op.set_type("GradientGather");
gather_op.add_output(*output);
if (grad_op.has_device_option()) {
gather_op.mutable_device_option()->CopyFrom(
grad_op.device_option());
auto grad_name_v2 =
grad_name + "_autosplit_" + str::to(grads_count[grad_name]++);
if (!is_skip) split_grads.insert(grad_name_v2);
if (grads_count[grad_name] == inputs_count[original_name]) {
auto gather_def = MakeOperatorDef(
"GradientGather",
GetOperatorName(),
vector<string>({}),
vector<string>({grad_name}));
if (grad_def.has_device_option()) {
gather_def.mutable_device_option()->CopyFrom(
grad_def.device_option());
}
for (int j = 0; j < grads_count[*output]; j++) {
auto key = *output + "_autosplit_" + str::to(j);
if (all_split_grads.count(key)) gather_op.add_input(key);
for (int j = 0; j < grads_count[grad_name]; j++) {
auto name = grad_name + "_autosplit_" + str::to(j);
if (split_grads.count(name)) gather_def.add_input(name);
}
gather_ops.push_back(gather_op);
gather_defs.push_back(gather_def);
}
*output = split_name;
*grad_def.mutable_output(i) = grad_name_v2;
}
}
}
// Now, append the required ops
// Add defs
if (!is_skip) {
// GradientGenerateOp
for (int i = 0; i < op_def.input_size(); ++i) {
inputs_to_grads_[op_def.input(i)] = pack.grad_inputs[i];
}
// Add def for ``GradientGenerateOp``
if (gen_grads.size() > 0) {
vector<string> op_inputs, op_outputs;
vector<string> inputs, outputs;
Argument arg_defaults;
arg_defaults.set_name("defaults");
for (auto& gen_grad : gen_grads) {
op_inputs.push_back(gen_grad.first);
op_outputs.emplace_back(gen_grad.first + "_grad");
arg_defaults.add_floats(grad.defaults[gen_grad.second]);
inputs.push_back(gen_grad.first);
outputs.emplace_back(gen_grad.first + "_grad");
arg_defaults.add_floats(pack.defaults[gen_grad.second]);
}
auto generate_op = MakeOperatorDef(
auto generate_def = MakeOperatorDef(
"GradientGenerate",
GetOperatorName(),
op_inputs,
op_outputs,
inputs,
outputs,
vector<Argument>({arg_defaults}));
if (op.has_device_option()) {
generate_op.mutable_device_option()->CopyFrom(op.device_option());
}
backward_def.add_op()->CopyFrom(generate_op);
}
// GradientOp
for (const auto& grad_op : grad.ops) {
backward_def.add_op()->CopyFrom(grad_op);
if (op_def.has_device_option()) {
generate_def.mutable_device_option()->CopyFrom(
op_def.device_option());
}
graph_def.add_op()->CopyFrom(generate_def);
}
// GradientGatherOp
for (const auto& gather_op : gather_ops) {
backward_def.add_op()->CopyFrom(gather_op);
// Add def for ``GenerateOp``
for (const auto& grad_def : pack.grad_defs) {
graph_def.add_op()->CopyFrom(grad_def);
}
// Done
if (!is_skip) {
for (int i = 0; i < op.input_size(); ++i) {
if (!grad.g_inputs[i].empty())
inputs_to_grads_[op.input(i)] = grad.g_inputs[i];
}
// Add def for ``GradientGatherOp``
for (const auto& gather_def : gather_defs) {
graph_def.add_op()->CopyFrom(gather_def);
}
}
}
......@@ -261,7 +246,6 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) {
auto* op = output_def.mutable_op(op_idx);
// Ignore the non-gradient ops
if (!str::find(op->type(), "Gradient")) continue;
// Check if output is an alias of input
vec32_t inplace_flags;
for (int i = 0; i < op->output_size(); ++i) {
......@@ -273,11 +257,9 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) {
}
inplace_flags.emplace_back(flag);
}
// Besides, we need to collect the dead buffers
// Reuse them when current operator is done
vector<string> dead_buffers;
// Rewrite input gradients
for (int i = 0; i < op->input_size(); ++i) {
const string& input = op->input(i);
......@@ -291,7 +273,6 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) {
*op->mutable_input(i) = new_input;
}
}
// Rewrite output gradients
for (int i = 0; i < op->output_size(); ++i) {
if (str::startswith(op->type(), "Python")) continue;
......@@ -313,7 +294,6 @@ GraphDef GraphGradientMaker::Share(const GraphDef& input_def) {
}
*op->mutable_output(i) = new_output;
}
// Update the pool
for (auto& buffer : dead_buffers) {
pool.emplace_back(buffer);
......
......@@ -19,14 +19,14 @@ namespace dragon {
class DRAGON_API GraphGradientMaker {
public:
/*! \brief Generate a backward graph from the forward ops */
/*! \brief Generate graph def from the op defs */
void Make(
const vector<OperatorDef*>& op_defs,
const vector<string>& targets,
const vector<string>& input_grads,
GraphDef& graph_def);
/*! \brief Rewrite a graph to share the intermediate grads */
/*! \brief Rewrite graph def to share the intermediate grads */
GraphDef Share(const GraphDef& input_def);
/*! \brief Add an empty gradient */
......@@ -45,7 +45,7 @@ class DRAGON_API GraphGradientMaker {
}
private:
/*! \brief Check the missing grads of backward procedure */
/*! \brief Check the missing grads */
bool CheckGrad(
const OperatorDef& op_def,
const Set<string>& targets,
......@@ -60,9 +60,6 @@ class DRAGON_API GraphGradientMaker {
/*! \brief The mapping from input to grad */
Map<string, string> inputs_to_grads_;
/*! \brief The non-gradient outputs */
Set<string> blacklist_set_;
/*! \brief The gradients should be retained */
Set<string> retained_grads_;
......
......@@ -202,33 +202,36 @@ OperatorBase* NewOperator(const OperatorDef& def, Workspace* ws) {
return TryCreateOperator(def.type(), mutable_def, ws);
}
Gradient MakeGradientForOp(
GradientPack MakeGradientForOp(
const OperatorDef& def,
const vector<string>& g_outputs) {
unique_ptr<GradientMakerBase> maker(
GradientRegistry()->Create(def.type(), def, g_outputs));
if (maker.get() == nullptr)
LOG(FATAL) << "Gradient maker for operator " << def.type()
<< "not implemented.";
Gradient grad = maker->Make();
const vector<string>& grad_outputs) {
CHECK(GradientRegistry()->Has(def.type()))
<< "\nNo GradientMaker registered for " << def.type() << "Op.";
OperatorDef reference_def(def);
// Set the cache key
unique_ptr<GradientMakerBase> maker(
GradientRegistry()->Create(def.type(), def, grad_outputs));
GradientPack pack = maker->Make();
// Copy cache key
if (reference_def.has_cache_key()) {
for (int i = 0; i < grad.ops.size(); ++i) {
grad.ops[i].set_cache_key(
for (int i = 0; i < pack.grad_defs.size(); ++i) {
pack.grad_defs[i].set_cache_key(
reference_def.cache_key() + "/grad" +
(i > 0 ? (":" + str::to(i)) : ""));
}
}
// Copy device option and arguments
if (maker->CopyDeviceOption() && def.has_device_option())
for (auto& grad_def : grad.ops)
if (maker->CopyDeviceOption() && def.has_device_option()) {
for (auto& grad_def : pack.grad_defs) {
grad_def.mutable_device_option()->CopyFrom(def.device_option());
}
}
// Copy arguments
if (maker->CopyArguments() && def.arg_size())
for (auto& grad_def : grad.ops)
if (maker->CopyArguments() && def.arg_size()) {
for (auto& grad_def : pack.grad_defs) {
grad_def.mutable_arg()->MergeFrom(reference_def.arg());
return grad;
}
}
return pack;
}
/* Operator Registry */
......
......@@ -20,22 +20,22 @@
namespace dragon {
struct Gradient {
Gradient(
const vector<OperatorDef>& ops,
const vector<string>& g_inputs,
struct GradientPack {
GradientPack(
const vector<OperatorDef>& grad_defs,
const vector<string>& grad_inputs,
const vector<float>& defaults)
: ops(ops), g_inputs(g_inputs), defaults(defaults) {}
: grad_defs(grad_defs), grad_inputs(grad_inputs), defaults(defaults) {}
vector<OperatorDef> ops;
vector<string> g_inputs;
vector<OperatorDef> grad_defs;
vector<string> grad_inputs;
vector<float> defaults;
};
class GradientMakerBase {
public:
GradientMakerBase(const OperatorDef& def, const vector<string>& g_outputs)
: def(def), g_inputs_(def.input_size()), g_outputs_(g_outputs) {}
GradientMakerBase(const OperatorDef& def, const vector<string>& grad_outputs)
: def(def), grad_inputs_(def.input_size()), grad_outputs_(grad_outputs) {}
virtual ~GradientMakerBase() {}
......@@ -49,21 +49,23 @@ class GradientMakerBase {
return true;
}
virtual Gradient Make() {
virtual GradientPack Make() {
auto new_defs = MakeDef();
if (def.has_cache_key()) {
// Attach the handle to name if having cache key
for (size_t i = 0; i < new_defs.size(); i++)
for (size_t i = 0; i < new_defs.size(); i++) {
new_defs[i].set_name(def.name());
}
} else {
// Otherwise, just put it into the arguments
Argument arg;
arg.set_name("handle");
arg.set_s(def.name());
for (size_t i = 0; i < new_defs.size(); i++)
for (size_t i = 0; i < new_defs.size(); i++) {
new_defs[i].add_arg()->CopyFrom(arg);
}
return Gradient(new_defs, g_inputs_, defaults());
}
return GradientPack(new_defs, grad_inputs_, defaults());
};
virtual vector<OperatorDef> MakeDef() {
......@@ -84,26 +86,26 @@ class GradientMakerBase {
}
string GI(const int i) {
if (i >= int(g_inputs_.size())) return "";
g_inputs_[i] = def.input(i) + "_grad";
return g_inputs_[i];
if (i >= int(grad_inputs_.size())) return "";
grad_inputs_[i] = def.input(i) + "_grad";
return grad_inputs_[i];
}
const string GO(const int i) const {
return i < int(g_outputs_.size()) ? g_outputs_[i] : "";
return i < int(grad_outputs_.size()) ? grad_outputs_[i] : "";
}
virtual vector<float> defaults() {
return vector<float>(g_outputs_.size(), 1.f);
return vector<float>(grad_outputs_.size(), 1.f);
}
protected:
const OperatorDef& def;
vector<string> g_inputs_;
const vector<string>& g_outputs_;
vector<string> grad_inputs_;
const vector<string>& grad_outputs_;
};
DRAGON_API Gradient
DRAGON_API GradientPack
MakeGradientForOp(const OperatorDef& op_def, const vector<string>& g_outputs);
#define GRADIENT_MAKER_CTOR(name) \
......
......@@ -24,7 +24,7 @@ class Registry {
ObjType* Create(const SrcType& key, Args... args) {
CHECK(registry_.count(key))
<< "\nKey(" << key << ") has not registered yet.";
<< "\nKey(" << key << ") has not registered.";
return registry_[key](args...);
}
......
......@@ -23,19 +23,18 @@ namespace autograd {
void RegisterModule(py::module& m) {
m.def(
"CreateGradientDefs",
[](const string& forward_def, const vector<string>& g_outputs) {
"CreateGradientDef",
[](const string& def_str, const vector<string>& grad_outputs) {
OperatorDef def;
if (!def.ParseFromString(forward_def))
LOG(FATAL) << "Failed to parse the OperatorDef.";
if (!GradientRegistry()->Has(def.type()))
LOG(FATAL) << def.type() << "Op has no gradients.";
Gradient grad = MakeGradientForOp(def, g_outputs);
vector<py::bytes> grad_ops;
for (const auto& e : grad.ops)
grad_ops.push_back(e.SerializeAsString());
CHECK(def.ParseFromString(def_str))
<< "\nFailed to parse the OperatorDef.";
GradientPack pack = MakeGradientForOp(def, grad_outputs);
vector<py::bytes> grad_defs;
for (const auto& op_def : pack.grad_defs) {
grad_defs.push_back(op_def.SerializeAsString());
}
return std::tuple<vector<py::bytes>, vector<string>, vector<float>>(
grad_ops, grad.g_inputs, grad.defaults);
grad_defs, pack.grad_inputs, pack.defaults);
});
}
......
......@@ -96,16 +96,11 @@ void RegisterModule(py::module& m) {
});
/*! \brief Activate the CuDNN engine */
m.def("cudaEnableDNN", [](bool enabled) {
m.def("cudaEnableDNN", [](bool enabled, bool benchmark) {
#ifdef USE_CUDA
CUDAContext::object()->cudnn_enabled_ = enabled;
#endif
});
/*! \brief Activate the CuDNN benchmark */
m.def("cudaEnableDNNBenchmark", [](bool enabled) {
#ifdef USE_CUDA
CUDAContext::object()->cudnn_benchmark_ = enabled;
auto* cuda_object = CUDAContext::object();
cuda_object->cudnn_enabled_ = enabled;
cuda_object->cudnn_benchmark_ = benchmark;
#endif
});
......
......@@ -15,11 +15,11 @@ from __future__ import print_function as _print_function
from dragon.core.device.cuda import current_device
from dragon.core.device.cuda import enable_cudnn
from dragon.core.device.cuda import enable_cudnn_benchmark
from dragon.core.device.cuda import get_device_capability
from dragon.core.device.cuda import is_available
from dragon.core.device.cuda import set_default_device
from dragon.core.device.cuda import set_device
from dragon.core.device.cuda import Stream
from dragon.core.device.cuda import synchronize
__all__ = [_s for _s in dir() if not _s.startswith('_')]
......@@ -8,8 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""List the exported CXX API."""
"""List the exported C++ API."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the options for autograph utilities."""
from __future__ import absolute_import
......
......@@ -12,7 +12,6 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/def_function.py>
#
# ------------------------------------------------------------
"""Utilities to define a graph function with decorator."""
from __future__ import absolute_import
......@@ -267,7 +266,7 @@ class FunctionGuard(object):
executables = [function_lib.create_function(inputs, outputs)]
for obj in dummies:
if isinstance(obj, optimizer.Optimizer):
executables.append(function_lib.create_function(updater=obj))
executables.append(function_lib.create_function(optimizer=obj))
self.inputs = inputs
self.outputs = returns
self.executables = executables
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Translate the graph abstraction to a python function."""
from __future__ import absolute_import
......@@ -288,7 +287,7 @@ class Function(object):
if len(kwargs) > 0 else self.callback(*args)
def create_function(inputs=None, outputs=None, givens=None, updater=None):
def create_function(inputs=None, outputs=None, givens=None, optimizer=None):
"""Create a callable graph from specified outputs.
Tensors that catch any operators can be used to create a graph:
......@@ -325,37 +324,36 @@ def create_function(inputs=None, outputs=None, givens=None, updater=None):
bar = dragon.create_function(outputs=y, givens={x: x2})
```
Specify ``updater`` to make a graph applying SGD updates:
Specify ``optimizer`` to make a graph applying parameter updates:
```python
x = dragon.Tensor('x', dtype='float32').set_value(1)
x_grad = dragon.Tensor('x_grad', dtype='float32').set_value(1)
# Define a updater to catch the operators
updater = dragon.updaters.SGD(base_lr=0.01)
updater.apply_gradients(values_and_grads=[(x, x_grad)])
optimizer = dragon.optimizers.SGD(base_lr=0.01)
optimizer.apply_gradients(values_and_grads=[(x, x_grad)])
# Compute x -= 0.01 * x_grad
train_step = dragon.create_function(updater=updater)
train_step = dragon.create_function(optimizer=optimizer)
train_step()
print(x.get_value())
print(x.get_value()) # 0.99
```
Parameters
----------
inputs : Sequence[dragon.Tensor], optional
The inputs to feed.
The input tensors.
outputs : Sequence[dragon.Tensor], optional
The outputs to fetch.
The output tensors.
givens : Dict[dragon.Tensor, dragon.Tensor], optional
The substitutions to apply.
updater : Updater, optional
The optional updater.
The optional substitutions.
optimizer : dragon.optimizers.Optimizer, optional
The optional optimizer.
Returns
-------
Function
callable
The callable function.
"""
return Function().create(inputs, outputs, givens, updater)
return Function().create(inputs, outputs, givens, optimizer)
......@@ -12,8 +12,7 @@
# <https://github.com/caffe2/caffe2/blob/master/caffe2/python/core.py>
#
# ------------------------------------------------------------
"""Python-implemented gradient maker."""
"""Simple gradient maker implementation."""
from __future__ import absolute_import
from __future__ import division
......@@ -28,47 +27,38 @@ from dragon.core.proto import dragon_pb2
class GradientMaker(object):
"""Make def for the gradient based on rules."""
"""The maker to generate grad defs to run backward."""
@classmethod
def gen_def(cls, op_def, g_outputs):
"""Generate the OperatorDef from forward op."""
grad_defs, g_inputs, defaults = backend.CreateGradientDefs(
op_def.SerializeToString(), g_outputs)
def gen_def(cls, op_def, grad_outputs):
"""Generate the grad def."""
grad_defs, grad_inputs, defaults = backend.CreateGradientDef(
op_def.SerializeToString(), grad_outputs)
for i, grad_def in enumerate(grad_defs):
new_def = dragon_pb2.OperatorDef()
new_def.ParseFromString(grad_def)
grad_defs[i] = new_def
return grad_defs, g_inputs, defaults
return grad_defs, grad_inputs, defaults
@classmethod
def check(cls, op_def, inputs_to_grads, blacklist, targets):
def check(cls, op_def, inputs_to_grads, targets):
"""Check if missing gradients. If missing, skip."""
if op_def.type in backend.NO_GRADIENT_OPERATORS:
for input in op_def.input:
blacklist.add(input)
return True, None
gen_grads = []
for idx, output in enumerate(op_def.output):
return True, []
gen_grads, maybe_skip = [], False
for i, output in enumerate(op_def.output):
if output not in inputs_to_grads:
if output in blacklist:
return True, gen_grads
maybe_skip = True
if output in targets:
# Consider to generate virtual gradient for targets.
gen_grads.append((output, idx))
gen_grads.append((output, i))
inputs_to_grads[output] = output + '_grad'
elif len(op_def.output) == 1:
# We can skip this op, obviously.
return True, gen_grads
# Pass, even if missing some grads.
return False, gen_grads
return maybe_skip and len(gen_grads) == 0, gen_grads
@classmethod
def make(cls, op_defs, targets, input_grads=None):
"""Make the backward op defs."""
"""Make the grad defs."""
inputs_to_grads = {} if input_grads is None else input_grads
inputs_count, grads_count = defaultdict(int), defaultdict(int)
all_split_grads, blacklist = set(), set()
# PLAY for the forward.
for op_def in op_defs:
......@@ -77,89 +67,71 @@ class GradientMaker(object):
outputs = [output for output in op_def.output]
for input in op_def.input:
if input not in outputs:
# Avoid to count the duplicate input,
# (i.e. the in-place output).
# Avoid to count the duplicate input (i.e. the in-place output).
inputs_count[input] += 1
# PLAY for the backward.
backward_defs = []
backward_defs, split_grads = [], set()
for op_def in op_defs[::-1]:
# Collect inputs and outputs.
is_skip, gen_grads = cls.check(
op_def=op_def,
inputs_to_grads=inputs_to_grads,
blacklist=blacklist,
targets=targets,
)
# Missing grads are represented as ``None``.
g_outputs = [inputs_to_grads.get(name, '') for name in op_def.output]
grad_defs, g_inputs, defaults = cls.gen_def(op_def, g_outputs)
# Generate def by registered gradient maker.
is_skip, gen_grads = cls.check(op_def, inputs_to_grads, targets)
grad_outputs = [inputs_to_grads.get(name, '') for name in op_def.output]
grad_defs, grad_inputs, defaults = cls.gen_def(op_def, grad_outputs)
# Append operators.
# Add defs.
if not is_skip:
# GradientGenerateOp
for input, grad_input in zip(op_def.input, grad_inputs):
inputs_to_grads[input] = grad_input
# Add def for ``GradientGenerateOp``
if len(gen_grads) > 0:
op_inputs, op_outputs, values = [], [], []
for item in gen_grads:
op_inputs.append(item[0])
op_outputs.append(item[0] + '_grad')
values.append(defaults[item[1]])
inputs, outputs, values = [], [], []
for name, i in gen_grads:
inputs.append(name)
outputs.append(name + '_grad')
values.append(defaults[i])
gen_op = proto_util.make_operator_def(
name=OpDef.get_name(),
op_type='GradientGenerate',
inputs=op_inputs,
outputs=op_outputs,
inputs=inputs,
outputs=outputs,
defaults=values,
)
if op_def.HasField('device_option'):
gen_op.device_option.CopyFrom(op_def.device_option)
device_option=op_def.device_option
if op_def.HasField('device_option') else None)
backward_defs.append(gen_op)
# GradientOp
# Add def for ``GradientOp``
for grad_def in grad_defs:
grad_def.name = OpDef.get_name()
backward_defs.append(grad_def)
# Split and gather grads for multi-used input.
# Split and gather gradient for multi-used inputs.
for grad_def in grad_defs:
for g_output_idx, g_output in enumerate(grad_def.output):
original_idx = -1
for g_input_idx, g_input in enumerate(g_inputs):
if g_output == g_input:
original_idx = g_input_idx
# Ignore un-used && in-placed GI(?).
if original_idx == -1:
for i, grad_name in enumerate(grad_def.output):
original_index = -1
for j, name in enumerate(grad_inputs):
if grad_name == name:
original_index = j
if original_index == -1 or grad_name in grad_def.input:
continue
if g_output in grad_def.input:
original_name = op_def.input[original_index]
if inputs_count[original_name] <= 1:
continue
# Found a split branch.
original_name = op_def.input[original_idx]
if inputs_count[original_name] > 1:
# Split.
split_name = g_output + '_autosplit_%d' % grads_count[g_output]
# Detect a split branch.
grad_name_v2 = grad_name + '_autosplit_%d' % grads_count[grad_name]
if not is_skip:
all_split_grads.add(split_name)
grads_count[g_output] += 1
# Gather.
if grads_count[g_output] == inputs_count[original_name]:
split_inputs = []
for idx in range(grads_count[g_output]):
if '%s_autosplit_%d' % (g_output, idx) in all_split_grads:
split_inputs.append('%s_autosplit_%d' % (g_output, idx))
gather_def = proto_util.make_operator_def(
split_grads.add(grad_name_v2)
grads_count[grad_name] += 1
if grads_count[grad_name] == inputs_count[original_name]:
gather_inputs = []
for j in range(grads_count[grad_name]):
if '%s_autosplit_%d' % (grad_name, j) in split_grads:
gather_inputs.append('%s_autosplit_%d' % (grad_name, j))
backward_defs.append(proto_util.make_operator_def(
name=OpDef.get_name(),
op_type='GradientGather',
inputs=split_inputs,
outputs=[g_output],
)
if grad_def.HasField('device_option'):
gather_def.device_option.CopyFrom(grad_def.device_option)
backward_defs.append(gather_def)
grad_def.output[g_output_idx] = split_name
# Done.
if not is_skip:
for name, grad in zip(op_def.input, g_inputs):
if grad != '':
inputs_to_grads[name] = grad
inputs=gather_inputs,
outputs=[grad_name],
device_option=grad_def.device_option
if grad_def.HasField('device_option') else None))
grad_def.output[i] = grad_name_v2
return backward_defs
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the helper for creating symbolic operators."""
from __future__ import absolute_import
......@@ -87,14 +86,16 @@ class OpDef(object):
num_outputs = len(outputs)
# Construct Def.
op_idx, op_name = OpDef.get_index_and_name()
op_info._defs[op_idx] = proto_util.make_operator_def(
op_index, op_name = OpDef.get_index_and_name()
op_info.add_def(
op_index, proto_util.make_operator_def(
name=op_name,
op_type=op_type,
inputs=[input.id for input in inputs],
outputs=[output.id for output in outputs],
device_option=proto_util.get_default_device_option(),
**kwargs)
**kwargs
))
# Blend the op for outputs.
for output in outputs:
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the describing spec for symbolic operators."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The graph executing tensor."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""CUDA utilities."""
from __future__ import absolute_import
......@@ -20,7 +19,17 @@ from dragon.core.framework import config
class Stream(backend.CudaStream):
"""The CUDA stream wrapper."""
def __init__(self, device_index):
"""Create a ``Stream``.
Parameters
----------
device_index : int, required
The device index of stream.
"""
super(Stream, self).__init__(device_index)
@property
......@@ -36,7 +45,7 @@ class Stream(backend.CudaStream):
return super(Stream, self).ptr
def synchronize(self):
"""Synchronize the stream."""
"""Wait for the dispatched kernels to complete."""
self.Synchronize()
......@@ -52,38 +61,28 @@ def current_device():
return backend.cudaGetDevice()
def enable_cudnn(enabled=True):
"""Activate the CuDNN engine.
Parameters
----------
enabled : bool, optional, default=True
**True** to activate CuDNN.
"""
return backend.cudaEnableDNN(enabled)
def enable_cudnn_benchmark(enabled=True):
"""Activate the CuDNN benchmark.
def enable_cudnn(enabled=True, benchmark=False):
"""Enable the CuDNN library.
Parameters
----------
enabled : bool, optional, default=True
**True** to activate CuDNN benchmark.
**True** to enable the CuDNN.
benchmark : bool, optional, default=False
**True** to select algorithms according to benchmark.
"""
return backend.cudaEnableDNNBenchmark(enabled)
return backend.cudaEnableDNN(enabled, benchmark)
def get_device_capability(device_id=None):
def get_device_capability(device_index=None):
"""Return the capability of specified device.
If ``device_id`` is **None**, the current device will be selected.
If ``device_index`` is **None**, the current device will be selected.
Parameters
----------
device_id : int, optional
device_index : int, optional
The device index.
Returns
......@@ -92,8 +91,8 @@ def get_device_capability(device_id=None):
The major and minor number.
"""
device_id = device_id if device_id else -1
return backend.cudaGetDeviceCapability(device_id)
device_index = device_index if device_index else -1
return backend.cudaGetDeviceCapability(device_index)
def is_available():
......@@ -144,18 +143,18 @@ def set_device(device_index=0):
return backend.cudaSetDevice(device_index)
def synchronize(device_id=None, stream_id=0):
"""Synchronize the specified stream.
def synchronize(device_index=None, stream_index=0):
"""Synchronize a specified CUDA stream.
If ``device_id`` is **None**, the current device will be selected.
If ``device_index`` is **None**, the current device will be selected.
Parameters
----------
device_id : int, optional
device_index : int, optional
The device index.
stream_id : int, optional, default=0
stream_index : int, optional, default=0
The stream index.
"""
device_id = device_id if device_id else -1
return backend.cudaStreamSynchronize(device_id, stream_id)
device_index = device_index if device_index else -1
return backend.cudaStreamSynchronize(device_index, stream_index)
......@@ -12,7 +12,6 @@
# <https://github.com/pytorch/pytorch/blob/master/torch/distributed/distributed_c10d.py>
#
# ------------------------------------------------------------
"""Distributed utilities equipped with Python."""
from __future__ import absolute_import
......
......@@ -12,7 +12,6 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/backprop.py>
#
# ------------------------------------------------------------
"""Do back-propagation from the executed operations."""
from __future__ import absolute_import
......@@ -115,9 +114,8 @@ class GradientTape(object):
# Check the pushed tape.
if self._tape is None:
raise RuntimeError(
'GradientTape.gradient can only be called '
'once on non-persistent tapes.'
)
'GradientTape.gradient(...) can only be called '
'once on non-persistent tapes.')
if self._recording:
if not self._persistent:
self._pop_tape()
......
......@@ -12,7 +12,6 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/context.py>
#
# ------------------------------------------------------------
"""State management for eager execution."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Execute tensor operations. """
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The eager executing tensor."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the global configurations."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Structure to represent a device."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Constant mappings."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Utilities to fly an operator."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define some helpful protocol buffer makers here."""
from __future__ import absolute_import
......@@ -101,10 +100,9 @@ def make_operator_def(
arg=None,
**kwargs
):
op_def = dragon_pb2.OperatorDef()
op_def.type, op_def.name = op_type, name
op_def.input.extend([str(tensor) for tensor in inputs])
op_def.output.extend([str(tensor) for tensor in outputs])
op_def = dragon_pb2.OperatorDef(type=op_type, name=name)
op_def.input.extend(inputs)
op_def.output.extend(outputs)
if device_option is not None:
op_def.device_option.CopyFrom(device_option)
if 'random_seed' in kwargs:
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Structure to represent a tensor."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the basic prototypes."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Generic interfaces of current default workspace."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Utilities for KPLRecord."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Utilities for TFRecord."""
from __future__ import absolute_import
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The activation ops."""
from __future__ import absolute_import
from __future__ import division
......@@ -30,7 +31,7 @@ def dropout(inputs, prob=0.5, scale=True, **kwargs):
The **Dropout** function is defined as:
.. math:: \text{Dropout}(x) = x * \text{Bernoulli}(p=1 - prob)
.. math:: \text{Dropout}(x) = x * (r \sim \mathcal{B}(1, 1 - \text{prob}))
Examples:
......@@ -85,10 +86,11 @@ def drop_block2d(
The **DropBlock** function is defined as:
.. math::
\text{DropBlock}(x) = x \cdot \text{Bernoulli}(\alpha\cdot\gamma) \\
\quad \\ \text{where}\quad \gamma =
\frac{keep\_prob}{block\_size^{n}}
\frac{feat\_size^{n}}{(feat\_size - block\_size + 1)^n}
\text{DropBlock}(x_{ijk} =
x_{ijk} * (r_{ik} \sim \mathcal{B}(1, \alpha\gamma)) \\ \quad \\
\text{where}\quad \gamma =
\frac{\text{keep\_prob}}{\text{block\_size}^{n}}
\frac{\text{feat\_size}^{n}}{(\text{feat\_size} - \text{block\_size} + 1)^n}
Set the ``decrement`` to schedule ``keep_prob`` from **1.0**.
......@@ -103,7 +105,7 @@ def drop_block2d(
keep_prob : Union[float, dragon.Tensor], optional, default=0.9
The keeping prob.
alpha : float, optional, default=1.
The scale factor to :math:`\gamma`.
The value to :math:`\gamma`.
decrement : float, optional, default=0.
The decrement value to ``keep_prob``.
data_format : {'NCHW', 'NHWC'}, optional
......@@ -141,7 +143,7 @@ def drop_path(inputs, prob=0.2, increment=0., **kwargs):
The **DropPath** function is defined as:
.. math:: \text{DropPath}(x) = x * \text{Bernoulli}(p=1 - prob)
.. math:: \text{DropPath}(x_{ij}) = x_{ij} * (r_{i} \sim \mathcal{B}(1, 1 - \text{prob}))
Set the ``increment`` to schedule ``prob`` from **0.0** after each run.
......@@ -187,7 +189,7 @@ def elu(inputs, alpha=1., **kwargs):
\text{ELU}(x) =
\begin{cases}
x, & \text{ if } x \geq 0 \\
\alpha * (e^{x} - 1), & \text{ otherwise }
\alpha * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -273,7 +275,7 @@ def log_softmax(inputs, axis=-1, **kwargs):
The **LogSoftmax** function is defined as:
.. math:: \text{LogSoftmax}(x) = \log(\frac{e^{x_{i}}}{\sum e^{x_{j}}})
.. math:: \text{LogSoftmax}(x) = \log(\frac{\exp(x_{i})}{\sum \exp(x_{j})})
The argument ``axis`` could be negative:
......@@ -451,7 +453,7 @@ def selu(inputs, alpha=1.67326, gamma=1.0507, **kwargs):
\text{SELU}(x) = \gamma *
\begin{cases}
x, & \text{ if } x \geq 0 \\
\alpha * (e^{x} - 1), & \text{ otherwise }
\alpha * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -496,7 +498,7 @@ def sigmoid(inputs, **kwargs):
The **Sigmoid** function is defined as:
.. math:: \text{Sigmoid}(x) = \frac{1}{1 + e^{-x}}
.. math:: \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}
Examples:
......@@ -533,7 +535,7 @@ def softmax(inputs, axis=-1, **kwargs):
The **Softmax** function is defined as:
.. math:: \text{Softmax}(x) = \frac{e^{x_{i}}}{\sum e^{x_{j}}}
.. math:: \text{Softmax}(x_{i}) = \frac{\exp(x_{i})}{\sum_{j} \exp(x_{j})}
The argument ``axis`` could be negative:
......@@ -569,11 +571,11 @@ def softmax(inputs, axis=-1, **kwargs):
@OpSchema.num_inputs(1)
def tanh(inputs, **kwargs):
r"""Apply the tanh function.
r"""Compute the tanh of input.
The **Tanh** function is defined as:
.. math:: \text{Tanh}(x) = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}
.. math:: \text{Tanh}(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}
Examples:
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The activation ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The array ops."""
from __future__ import absolute_import
from __future__ import division
......@@ -814,8 +815,8 @@ def moments(inputs, axis=None, keep_dims=False, **kwargs):
.. math::
\begin{cases}
\text{Mean}(x) = \frac{1}{n}\sum(x) \\
\text{Variance}(x) = \frac{1}{n}\sum(x - \text{Mean}(x))^{2}
\text{mean} = \frac{1}{n}\sum(\text{input}) \\
\text{variance} = \frac{1}{n}\sum(x - \text{mean}(\text{input}))^{2}
\end{cases}
The argument ``axis`` could be negative or **None**:
......@@ -910,7 +911,7 @@ def multinomial(inputs, num_samples=1, eps=0., normalize=False, **kwargs):
def nonzero(inputs, **kwargs):
r"""Return the index of non-zero elements.
.. math:: \text{out} = \{i, \text{ if } \text{input}[i] \neq 0
.. math:: \text{out} = \{i\}, \text{ if } \text{input}[i] \neq 0
Parameters
----------
......@@ -958,7 +959,7 @@ def one_hot(inputs, depth, on_value=1, off_value=0, **kwargs):
Parameters
----------
inputs : dragon.Tensor
The tensor :math:`x`.
The input tensor.
depth : int
The depth of representation.
on_value : int, optional, default=1
......@@ -1497,18 +1498,18 @@ def where(inputs, **kwargs):
r"""Select the elements from two branches under the condition.
.. math::
y[i] =
\text{out}[i] =
\begin{cases}
a[i] & \text{ if } \text{cond}[i] \text{ is True } \\
b[i], & \text{ otherwise }
\text{input1}[i] & \text{ if } \text{condition}[i] \text{ is True } \\
\text{input2}[i], & \text{ otherwise }
\end{cases}
Return the indices of **True** elements, if only the ``cond`` is given.
Return the index of **True** elements, if only the ``condition`` is given.
Parameters
----------
inputs : Sequence[dragon.Tensor]
The tensor :math:`a`, :math:`b`, and :math:`\text{cond}`.
The input1, input2 and condition tensor.
Returns
-------
......@@ -1517,7 +1518,7 @@ def where(inputs, **kwargs):
See Also
--------
`dragon.nonzero(...)`_ : Return the indices of non-zero elements.
`dragon.nonzero(...)`_ : Return the index of non-zero elements.
"""
if types.is_tensor(inputs) or len(inputs) == 1:
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The array ops library."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The control flow ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The control flow ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The distributed ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The distributed ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The framework ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The init ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The init ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The loss ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The loss ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The math ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The metric ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The metric ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The normalization ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The normalization ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The rnn ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The rnn ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Bind tensor methods executed eagerly."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Bind tensor methods executed symbolically."""
from __future__ import absolute_import
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The training ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Utilities to simplify the exporting of operators."""
from __future__ import absolute_import
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The vision ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The vision ops library."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The Adam optimizers."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The optimizer to update parameters."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The RMSprop optimizers."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The SGD optimizers."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the common used math functions."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Registry utilities."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the common thread local structures."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Toolkit for manipulating the onnx api."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Utilities to a too simple ONNX exporting or importing."""
from __future__ import absolute_import
......
......@@ -8,7 +8,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Import the Keras API for TensorFlow."""
from __future__ import absolute_import
......
......@@ -32,7 +32,7 @@ def elu(x, alpha=1., **kwargs):
\text{ELU}(x) =
\begin{cases}
x, & \text{ if } x \geq 0 \\
\alpha * (e^{x} - 1), & \text{ otherwise }
\alpha * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -45,7 +45,7 @@ def elu(x, alpha=1., **kwargs):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input tensor.
alpha : float, optional, default=1.
The value to :math:`\alpha`.
......@@ -63,7 +63,7 @@ def exponential(x):
The **Exponential** function is defined as:
.. math:: \text{out} = e^{x}
.. math:: \text{Exp}(x) = \exp(x)
Examples:
......@@ -75,7 +75,7 @@ def exponential(x):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input tensor.
Returns
-------
......@@ -139,7 +139,7 @@ def relu(x, alpha=0, max_value=None, **kwargs):
x : dragon.Tensor
The input tensor.
alpha : number, optional, default=0
The valve of :math:`\alpha`.
The value to :math:`\alpha`.
max_value : number, optional
The value to :math:`v_{max}`.
......@@ -161,7 +161,7 @@ def selu(x, **kwargs):
\text{SELU}(x) = 1.0507 *
\begin{cases}
x, & \text{ if } x \geq 0 \\
1.67326 * (e^{x} - 1), & \text{ otherwise }
1.67326 * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -174,7 +174,7 @@ def selu(x, **kwargs):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input tensor.
Returns
-------
......@@ -190,7 +190,7 @@ def sigmoid(x, **kwargs):
The **Sigmoid** function is defined as:
.. math:: \text{Sigmoid}(x) = \frac{1}{1 + e^{-x}}
.. math:: \text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}
Examples:
......@@ -218,7 +218,7 @@ def softmax(x, axis=-1, **kwargs):
The **Softmax** function is defined as:
.. math:: \text{Softmax}(x) = \frac{e^{x_{i}}}{\sum e^{x_{j}}}
.. math:: \text{Softmax}(x_{i}) = \frac{\exp(x_{i})}{\sum_{j} \exp(x_{j})}
Examples:
......@@ -230,7 +230,7 @@ def softmax(x, axis=-1, **kwargs):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input tensor.
axis : int, optional, default=-1
The axis to reduce.
......@@ -248,7 +248,7 @@ def tanh(x, **kwargs):
The **Tanh** function is defined as:
.. math:: \text{Tanh}(x) = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}
.. math:: \text{Tanh}(x) = \frac{\exp{x} - \exp(-x)}{\exp(x) + \exp(-x)}
Examples:
......@@ -281,5 +281,4 @@ def get(identifier):
else:
raise TypeError(
'Could not interpret activation function identifier: {}.'
.format(repr(identifier))
)
.format(repr(identifier)))
......@@ -31,7 +31,7 @@ class ELU(Layer):
\text{ELU}(x) =
\begin{cases}
x, & \text{ if } x \geq 0 \\
\alpha * (e^{x} - 1), & \text{ otherwise }
\alpha * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -162,7 +162,7 @@ class SELU(Layer):
\text{SELU}(x) = 1.0507 *
\begin{cases}
x, & \text{ if } x \geq 0 \\
1.67326 * (e^{x} - 1), & \text{ otherwise }
1.67326 * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -188,7 +188,7 @@ class Softmax(Layer):
The **Softmax** function is defined as:
.. math:: \text{Softmax}(x) = \frac{e^{x_{i}}}{\sum e^{x_{j}}}
.. math:: \text{Softmax}(x_{i}) = \frac{\exp(x_{i})}{\sum_{j} \exp(x_{j})}
Examples:
......
......@@ -47,9 +47,7 @@ class _Merge(Layer):
class Add(_Merge):
r"""The layer to add a sequence of inputs.
.. math:: \text{out} = \sum(x)
"""The layer to add a sequence of inputs.
Examples:
......@@ -75,7 +73,7 @@ class Add(_Merge):
class Concatenate(_Merge):
r"""The layer to concatenate a sequence of inputs.
"""The layer to concatenate a sequence of inputs.
Examples:
......@@ -103,7 +101,7 @@ class Concatenate(_Merge):
class Maximum(_Merge):
r"""The layer to compute the minimum of a sequence of inputs.
"""The layer to compute the minimum of a sequence of inputs.
Examples:
......@@ -126,7 +124,7 @@ class Maximum(_Merge):
class Minimum(_Merge):
r"""The layer to compute the minimum of a sequence of inputs.
"""The layer to compute the minimum of a sequence of inputs.
Examples:
......@@ -149,9 +147,7 @@ class Minimum(_Merge):
class Multiply(_Merge):
r"""The layer to multiply a sequence of inputs.
.. math:: \text{out} = \prod(x)
"""The layer to multiply a sequence of inputs.
Examples:
......@@ -174,9 +170,7 @@ class Multiply(_Merge):
class Subtract(_Merge):
r"""The layer to subtract two inputs.
.. math:: \text{out} = x - y
"""The layer to subtract two inputs.
Examples:
......
......@@ -12,7 +12,6 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/regularizers.py>
#
# ------------------------------------------------------------
"""Built-in regularizers."""
from __future__ import absolute_import
......
......@@ -12,6 +12,7 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/array_ops.py>
#
# ------------------------------------------------------------
"""The array ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -12,6 +12,7 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/bitwise_ops.py>
#
# ------------------------------------------------------------
"""The bitwise ops."""
from __future__ import absolute_import
from __future__ import division
......@@ -23,7 +24,7 @@ from dragon.core.ops import math_ops
def bitwise_and(x, y, name=None):
r"""Compute the element-wise AND bitwise operation.
.. math:: \text{out} = x \mathbin{\&} y
.. math:: \text{out} = \text{input1} \mathbin{\&} \text{input2}
Examples:
......@@ -37,9 +38,9 @@ def bitwise_and(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -55,7 +56,7 @@ def bitwise_and(x, y, name=None):
def bitwise_or(x, y, name=None):
r"""Compute the element-wise OR bitwise operation.
.. math:: \text{out} = x \mathbin{|} y
.. math:: \text{out} = \text{input1} \mathbin{|} \text{input2}
Examples:
......@@ -69,9 +70,9 @@ def bitwise_or(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -87,7 +88,7 @@ def bitwise_or(x, y, name=None):
def bitwise_xor(x, y, name=None):
r"""Compute the element-wise XOR bitwise operation.
.. math:: \text{out} = x \oplus y
.. math:: \text{out} = \text{input1} \oplus \text{input2}
Examples:
......@@ -101,9 +102,9 @@ def bitwise_xor(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -119,7 +120,7 @@ def bitwise_xor(x, y, name=None):
def invert(x, name=None):
r"""Invert each bit of input.
.. math:: \text{out} = \,\,\sim x
.. math:: \text{out} = \,\,\sim \text{input}
Examples:
......@@ -136,7 +137,7 @@ def invert(x, name=None):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input tensor.
name : str, optional
A optional name for the operation.
......
......@@ -12,6 +12,7 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/clip_ops.py>
#
# ------------------------------------------------------------
"""The clip ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Grad implementation."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -12,6 +12,7 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/init_ops.py>
#
# ------------------------------------------------------------
"""The init ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -12,6 +12,7 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/linalg_ops.py>
#
# ------------------------------------------------------------
"""The linalg ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -12,6 +12,7 @@
# <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/math_ops.py>
#
# ------------------------------------------------------------
"""The math ops."""
from __future__ import absolute_import
from __future__ import division
......@@ -25,7 +26,7 @@ from dragon.core.ops import math_ops
def abs(x, name=None):
r"""Compute the absolute value of input.
.. math:: \text{out} = \left| x \right|
.. math:: \text{out} = \left| \text{input} \right|
Examples:
......@@ -52,7 +53,7 @@ def abs(x, name=None):
def add(x, y, name=None):
r"""Compute the element-wise addition.
.. math:: \text{out} = x + y
.. math:: \text{out} = \text{input1} + \text{input2}
```python
x = tf.constant(1)
......@@ -64,9 +65,9 @@ def add(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -82,7 +83,7 @@ def add(x, y, name=None):
def add_n(inputs, name=None):
r"""Compute the element-wise sum on a sequence of inputs.
.. math:: \text{out} = \sum(x)
.. math:: \text{out} = \sum(\text{input}_{i})
Examples:
......@@ -214,7 +215,7 @@ def cast(x, dtype, name=None):
def ceil(x, name=None):
r"""Compute the smallest integer not less than input.
.. math:: \text{out} = \lceil x \rceil
.. math:: \text{out} = \lceil \text{input} \rceil
Examples:
......@@ -242,7 +243,7 @@ def ceil(x, name=None):
def cos(x, name=None):
r"""Compute the cos of input.
.. math:: \text{out} = \cos(x)
.. math:: \text{out} = \cos(\text{input})
Examples:
......@@ -327,7 +328,7 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
def divide(x, y, name=None):
r"""Compute the element-wise division.
.. math:: \text{out} = x \div y
.. math:: \text{out} = \text{input1} \div \text{input2}
Examples:
......@@ -341,9 +342,9 @@ def divide(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -359,7 +360,7 @@ def divide(x, y, name=None):
def equal(x, y, name=None):
r"""Compute the element-wise equal comparison.
.. math:: \text{out} = (x == y)
.. math:: \text{out} = (\text{input1} == \text{input2})
Examples:
......@@ -374,9 +375,9 @@ def equal(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -392,7 +393,7 @@ def equal(x, y, name=None):
def exp(x, name=None):
r"""Compute the exponential of input.
.. math:: \text{out} = e^{x}
.. math:: \text{out} = \exp(\text{input})
Examples:
......@@ -420,7 +421,7 @@ def exp(x, name=None):
def floor(x, name=None):
r"""Compute the largest integer not greater than input.
.. math:: \text{out} = \lfloor x \rfloor
.. math:: \text{out} = \lfloor \text{input} \rfloor
Examples:
......@@ -448,7 +449,7 @@ def floor(x, name=None):
def greater(x, y, name=None):
r"""Compute the element-wise greater comparison.
.. math:: \text{out} = (x > y)
.. math:: \text{out} = (\text{input1} > \text{input2})
Examples:
......@@ -463,9 +464,9 @@ def greater(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -481,7 +482,7 @@ def greater(x, y, name=None):
def greater_equal(x, y, name=None):
r"""Compute the element-wise greater-equal comparison.
.. math:: \text{out} = (x >= y)
.. math:: \text{out} = (\text{input1} >= \text{input2})
Examples:
......@@ -496,9 +497,9 @@ def greater_equal(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -514,7 +515,7 @@ def greater_equal(x, y, name=None):
def is_inf(x, name=None):
r"""Check if the elements of input are infinite.
.. math:: \text{out} = \text{isinf}(x)
.. math:: \text{out} = \text{isinf}(\text{input})
Examples:
......@@ -542,7 +543,7 @@ def is_inf(x, name=None):
def is_nan(x, name=None):
r"""Check if the elements of input are NaN.
.. math:: \text{out} = \text{isnan}(x)
.. math:: \text{out} = \text{isnan}(\text{input})
Examples:
......@@ -570,7 +571,7 @@ def is_nan(x, name=None):
def less(x, y, name=None):
r"""Compute the element-wise less comparison.
.. math:: \text{out} = (x < y)
.. math:: \text{out} = (\text{input1} < \text{input2})
Examples:
......@@ -585,9 +586,9 @@ def less(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -603,7 +604,7 @@ def less(x, y, name=None):
def less_equal(x, y, name=None):
r"""Compute the element-wise less-equal comparison.
.. math:: \text{out} = (x <= y)
.. math:: \text{out} = (\text{input1} <= \text{input2})
Examples:
......@@ -618,9 +619,9 @@ def less_equal(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -636,7 +637,7 @@ def less_equal(x, y, name=None):
def log(x, name=None):
r"""Compute the logarithm of input.
.. math:: \text{out} = \log(x)
.. math:: \text{out} = \log(\text{input})
Examples:
......@@ -670,7 +671,7 @@ def matmul(
):
r"""Compute the matrix multiplication.
.. math:: \text{out} = a \times b
.. math:: y = a \times b
The rank of ``a`` and ``b`` should be equal and >= 2:
......@@ -725,7 +726,7 @@ def matmul(
def multiply(x, y, name=None):
r"""Compute the element-wise multiplication.
.. math:: \text{out} = x \times y
.. math:: \text{out} = \text{input1} \times \text{input2}
Examples:
......@@ -739,9 +740,9 @@ def multiply(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -757,7 +758,7 @@ def multiply(x, y, name=None):
def negative(x, name=None):
r"""Compute the element-wise negative.
.. math:: \text{out} = -x
.. math:: \text{out} = -\text{input}
```python
x = tf.constant([-1, 0, 1])
......@@ -783,7 +784,7 @@ def negative(x, name=None):
def not_equal(x, y, name=None):
r"""Compute the element-wise not-equal comparison.
.. math:: \text{out} = (x != y)
.. math:: \text{out} = (\text{input1} != \text{input2})
Examples:
......@@ -798,9 +799,9 @@ def not_equal(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -816,7 +817,7 @@ def not_equal(x, y, name=None):
def pow(x, y, name=None):
r"""Compute the power of input.
.. math:: \text{out} = x^{y}
.. math:: \text{out} = \text{input}^{\text{exponent}}
The two inputs should be broadcast to each other:
......@@ -830,9 +831,9 @@ def pow(x, y, name=None):
Parameters
----------
x : Union[dragon.Tensor, number]
The input tensor :math:`x`.
The input tensor.
y : Union[dragon.Tensor, number]
The input tensor :math:`y`.
The exponent tensor.
name : str, optional
A optional name for the operation.
......@@ -897,7 +898,7 @@ def range(start, limit=None, delta=1, dtype='int64', name=None):
def reciprocal(x, name=None):
r"""Compute the reciprocal of input.
.. math:: \text{out} = \frac{1}{x}
.. math:: \text{out} = \frac{1}{\text{input}}
Examples:
......@@ -1099,7 +1100,7 @@ def reduce_sum(input_tensor, axis=None, keepdims=False, name=None):
def round(x, name=None):
r"""Compute the nearest integer of input.
.. math:: \text{out} = \lfloor x \rceil
.. math:: \text{out} = \lfloor \text{input} \rceil
Examples:
......@@ -1127,7 +1128,7 @@ def round(x, name=None):
def rsqrt(x, name=None):
r"""Compute the reciprocal square root of input.
.. math:: \text{out} = \frac{1}{\sqrt{x}}
.. math:: \text{out} = \frac{1}{\sqrt{\text{input}}}
Examples:
......@@ -1153,11 +1154,9 @@ def rsqrt(x, name=None):
def sigmoid(x, name=None, **kwargs):
r"""Apply the sigmoid function.
r"""Compute the sigmoid function.
The **Sigmoid** function is defined as:
.. math:: \text{Sigmoid}(x) = \frac{1}{1 + e^{-x}}
.. math:: \text{out} = \frac{1}{1 + \exp(-\text{input})}
Examples:
......@@ -1186,11 +1185,11 @@ def sign(x, name=None):
r"""Compute the sign indication of input.
.. math::
\text{out}_{i} =
\text{out}[i] =
\begin{cases}
-1, & \text{ if } x_{i} < 0 \\
0, & \text{ if } x_{i} = 0 \\
1, & \text{ if } x_{i} > 0
-1, & \text{ if } \text{input}[i] < 0 \\
0, & \text{ if } \text{input}[i] = 0 \\
1, & \text{ if } \text{input}[i] > 0
\end{cases}
Examples:
......@@ -1219,7 +1218,7 @@ def sign(x, name=None):
def sin(x, name=None):
r"""Compute the sin of input.
.. math:: \text{out} = \sin(x)
.. math:: \text{out} = \sin(\text{input})
Examples:
......@@ -1247,7 +1246,7 @@ def sin(x, name=None):
def sqrt(x, name=None):
r"""Compute the square root of input.
.. math:: \text{out} = \sqrt{x}
.. math:: \text{out} = \sqrt{\text{input}}
Examples:
......@@ -1275,7 +1274,7 @@ def sqrt(x, name=None):
def square(x, name=None):
r"""Compute the square of input.
.. math:: \text{out} = x^{2}
.. math:: \text{out} = \text{input}^{2}
Examples:
......@@ -1303,7 +1302,7 @@ def square(x, name=None):
def subtract(x, y, name=None):
r"""Compute the element-wise subtraction.
.. math:: \text{out} = x - y
.. math:: \text{out} = \text{input1} - \text{input2}
Examples:
......@@ -1317,9 +1316,9 @@ def subtract(x, y, name=None):
Parameters
----------
x : dragon.Tensor
The input tensor.
The input1 tensor.
y : dragon.Tensor
The tensor :math:`y`.
The input2 tensor.
name : str, optional
A optional name for the operation.
......@@ -1333,11 +1332,10 @@ def subtract(x, y, name=None):
def tanh(x, name=None, **kwargs):
r"""Apply the tanh function.
The **Tanh** function is defined as:
r"""Compute the tanh of input.
.. math:: \text{Tanh}(x) = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}
.. math:: \text{out} = \frac{\exp(\text{input}) - \exp(-\text{input})}
{\exp(\text{input}) + \exp(-\text{input})}
Examples:
......
......@@ -9,6 +9,8 @@
#
# ------------------------------------------------------------
"""The nn components."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The nn ops implementation."""
from __future__ import absolute_import
from __future__ import division
......@@ -140,8 +141,8 @@ def moments(x, axes=None, keepdims=False, name=None):
.. math::
\begin{cases}
\text{Mean}(x) = \frac{1}{n}\sum(x) \\
\text{Variance}(x) = \frac{1}{n}\sum(x - \text{Mean}(x))^{2}
\text{mean} = \frac{1}{n}\sum(\text{input}) \\
\text{variance} = \frac{1}{n}\sum(\text{input} - \text{mean})^{2}
\end{cases}
The argument ``axis`` could be negative or **None**:
......@@ -164,7 +165,7 @@ def moments(x, axes=None, keepdims=False, name=None):
Parameters
----------
x : dragon.Tensor
The tensor :math:`x`.
The input tensor.
axes : Union[int, Sequence[int]], optional
The axis to reduce.
keepdims : bool, optional, default=False
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The nn ops."""
from __future__ import absolute_import
from __future__ import division
......@@ -147,17 +148,7 @@ def convolution(
name=None,
**kwargs
):
r"""Apply the n-dimension convolution.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{DK}_{size}) / stride + 1
\end{cases}
"""Apply the n-dimension convolution.
Parameters
----------
......@@ -223,17 +214,7 @@ def conv_transpose(
dilations=None,
name=None,
):
r"""Apply the n-dimension deconvolution.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} - 1) *
stride + \text{DK}_{size} - 2 * pad
\end{cases}
"""Apply the n-dimension deconvolution.
Parameters
----------
......@@ -306,17 +287,7 @@ def conv2d(
dilations=None,
name=None,
):
r"""Apply the 2d convolution.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{DK}_{size}) / stride + 1
\end{cases}
"""Apply the 2d convolution.
Parameters
----------
......@@ -354,17 +325,7 @@ def conv2d_transpose(
dilations=None,
name=None,
):
r"""Apply the 2d deconvolution.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} - 1) *
stride + \text{DK}_{size} - 2 * pad
\end{cases}
"""Apply the 2d deconvolution.
Parameters
----------
......@@ -403,19 +364,9 @@ def depthwise_conv2d(
dilations=None,
name=None,
):
r"""Apply the 2d depthwise convolution.
"""Apply the 2d depthwise convolution.
`[Chollet, 2016] <https://arxiv.org/abs/1610.02357>`_.
The spatial output dimension is computed as:
.. math::
\begin{cases}
\text{DK}_{size} = dilation *
(\text{K}_{size} - 1) + 1 \\
\text{Dim}_{out} = (\text{Dim}_{in} +
2 * pad - \text{DK}_{size}) / stride + 1
\end{cases}
Parameters
----------
input : dragon.Tensor
......@@ -485,7 +436,7 @@ def elu(features, alpha=1., name=None, **kwargs):
\text{ELU}(x) =
\begin{cases}
x, & \text{ if } x \geq 0 \\
\alpha * (e^{x} - 1), & \text{ otherwise }
\alpha * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Parameters
......@@ -598,7 +549,7 @@ def log_softmax(logits, axis=-1, name=None):
The **LogSoftmax** function is defined as:
.. math:: \text{LogSoftmax}(x) = \log(\frac{e^{x_{i}}}{\sum e^{x_{j}}})
.. math:: \text{LogSoftmax}(x) = \log(\frac{\exp(x_{i})}{\sum \exp(x_{j})})
The argument ``axis`` could be negative:
......@@ -797,7 +748,7 @@ def selu(features, name=None, **kwargs):
\text{SELU}(x) = 1.0507 *
\begin{cases}
x, & \text{ if } x \geq 0 \\
1.67326 * (e^{x} - 1), & \text{ otherwise }
1.67326 * (\exp(x) - 1), & \text{ otherwise }
\end{cases}
Examples:
......@@ -836,7 +787,7 @@ def softmax(logits, axis=-1, name=None, **kwargs):
The **Softmax** function is defined as:
.. math:: \text{Softmax}(x) = \frac{e^{x_{i}}}{\sum e^{x_{j}}}
.. math:: \text{Softmax}(x_{i}) = \frac{\exp(x_{i})}{\sum_{j} \exp(x_{j})}
The argument ``axis`` could be negative:
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The random ops."""
from __future__ import absolute_import
from __future__ import division
......@@ -26,9 +27,7 @@ def random_normal(
):
r"""Return a tensor initialized from normal distribution.
The **Normal** distribution is defined as:
.. math:: X \sim N(\mu, \sigma)
.. math:: \text{out} \sim \mathcal{N}(\mu, \sigma)
Parameters
----------
......@@ -65,9 +64,7 @@ def random_uniform(
):
r"""Return a tensor initialized from the uniform distribution.
The **Uniform** distribution is defined as:
.. math:: X \sim U(\alpha, \beta)
.. math:: \text{out} \sim \mathcal{U}(\alpha, \beta)
Parameters
----------
......@@ -104,10 +101,8 @@ def truncated_normal(
):
r"""Return a tensor initialized from the truncated normal distribution.
The **TruncatedNormal** distribution is defined as:
.. math::
X \sim TN(\mu, \sigma, \mu - 2\sigma, \mu + 2\sigma)
\text{out} \sim \mathcal{TN}(\mu, \sigma, \mu - 2\sigma, \mu + 2\sigma)
Parameters
----------
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The standard ops."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -8,6 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""The Variable class."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -9,7 +9,6 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Deep learning and Reinforcement learning library for Researchers and Engineers"""
from __future__ import absolute_import as _absolute_import
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!