Commit 2c90589f by Ting PAN

Refactor vision operators

Summary:
This commit adds 1D and 3D support for vision operators
via a generalized ND implementation.
1 parent 60e5d25a
Showing with 1442 additions and 480 deletions
...@@ -84,7 +84,7 @@ class ArgMax(Layer): ...@@ -84,7 +84,7 @@ class ArgMax(Layer):
param = layer_param.argmax_param param = layer_param.argmax_param
if param.top_k != 1: if param.top_k != 1:
raise ValueError('Top-k argmax is not supported.') raise ValueError('Top-k argmax is not supported.')
self.arguments = {'axis': param.axis, 'keep_dims': True} self.arguments = {'axis': param.axis, 'keepdims': True}
def __call__(self, bottom): def __call__(self, bottom):
return array_ops.argmax(bottom, **self.arguments) return array_ops.argmax(bottom, **self.arguments)
......
...@@ -118,5 +118,8 @@ class Data(Layer): ...@@ -118,5 +118,8 @@ class Data(Layer):
'num_outputs': 2, 'num_outputs': 2,
} }
data, label = framework_ops.python_plugin([], **args) data, label = framework_ops.python_plugin([], **args)
data.shape = (self.data_args['batch_size'],
None, None, len(self.norm_args['mean']))
label.shape = (self.data_args['batch_size'], None)
data = array_ops.channel_normalize(data, **self.norm_args) data = array_ops.channel_normalize(data, **self.norm_args)
return data, label return data, label
...@@ -78,11 +78,12 @@ class Convolution(Layer): ...@@ -78,11 +78,12 @@ class Convolution(Layer):
def __call__(self, bottom): def __call__(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return vision_ops.conv2d(inputs, **self.arguments) conv_op = 'conv{}d'.format(len(bottom.shape) - 2)
return getattr(vision_ops, conv_op)(inputs, **self.arguments)
class Deconvolution(Convolution): class Deconvolution(Convolution):
r"""Apply the 2d deconvolution. r"""Apply the n-dimension deconvolution.
Examples: Examples:
...@@ -117,7 +118,8 @@ class Deconvolution(Convolution): ...@@ -117,7 +118,8 @@ class Deconvolution(Convolution):
def __call__(self, bottom): def __call__(self, bottom):
inputs = [bottom] + [blob['data'] for blob in self._blobs] inputs = [bottom] + [blob['data'] for blob in self._blobs]
return vision_ops.conv2d_transpose(inputs, **self.arguments) conv_op = 'conv{}d_transpose'.format(len(bottom.shape) - 2)
return getattr(vision_ops, conv_op)(inputs, **self.arguments)
class LRN(Layer): class LRN(Layer):
...@@ -186,7 +188,7 @@ class Pooling(Layer): ...@@ -186,7 +188,7 @@ class Pooling(Layer):
'ceil_mode': True, 'ceil_mode': True,
'mode': {0: 'MAX', 1: 'AVG'}[param.pool], 'mode': {0: 'MAX', 1: 'AVG'}[param.pool],
'data_format': 'NCHW', 'data_format': 'NCHW',
'global_pooling': param.global_pooling, 'global_pool': param.global_pooling,
} }
if not param.HasField('kernel_h'): if not param.HasField('kernel_h'):
self.arguments['kernel_shape'] = [param.kernel_size] self.arguments['kernel_shape'] = [param.kernel_size]
...@@ -202,7 +204,8 @@ class Pooling(Layer): ...@@ -202,7 +204,8 @@ class Pooling(Layer):
self.arguments['strides'] = [param.stride_h, param.stride_w] self.arguments['strides'] = [param.stride_h, param.stride_w]
def __call__(self, bottom): def __call__(self, bottom):
return vision_ops.pool2d(bottom, **self.arguments) pool_op = 'pool{}d'.format(len(bottom.shape) - 2)
return getattr(vision_ops, pool_op)(bottom, **self.arguments)
class ROIAlign(Layer): class ROIAlign(Layer):
......
...@@ -28,12 +28,30 @@ dragon.nn ...@@ -28,12 +28,30 @@ dragon.nn
`bias_add(...) <nn/bias_add.html>`_ `bias_add(...) <nn/bias_add.html>`_
: Add the bias across channels to input. : Add the bias across channels to input.
`conv(...) <nn/conv.html>`_
: Apply the n-dimension convolution.
`conv_transpose(...) <nn/conv_transpose.html>`_
: Apply the n-dimension deconvolution.
`conv1d(...) <nn/conv1d.html>`_
: Apply the 1d convolution.
`conv1d_transpose(...) <nn/conv1d_transpose.html>`_
: Apply the 1d deconvolution.
`conv2d(...) <nn/conv2d.html>`_ `conv2d(...) <nn/conv2d.html>`_
: Apply the 2d convolution. : Apply the 2d convolution.
`conv2d_transpose(...) <nn/conv2d_transpose.html>`_ `conv2d_transpose(...) <nn/conv2d_transpose.html>`_
: Apply the 2d deconvolution. : Apply the 2d deconvolution.
`conv3d(...) <nn/conv3d.html>`_
: Apply the 3d convolution.
`conv3d_transpose(...) <nn/conv3d_transpose.html>`_
: Apply the 3d deconvolution.
`depthwise_conv2d(...) <nn/depthwise_conv2d.html>`_ `depthwise_conv2d(...) <nn/depthwise_conv2d.html>`_
: Apply the 2d depthwise convolution. : Apply the 2d depthwise convolution.
...@@ -92,9 +110,15 @@ dragon.nn ...@@ -92,9 +110,15 @@ dragon.nn
: Apply the parametric rectified linear unit. : Apply the parametric rectified linear unit.
`[He et.al, 2015] <https://arxiv.org/abs/1502.01852>`_. `[He et.al, 2015] <https://arxiv.org/abs/1502.01852>`_.
`pool1d(...) <nn/pool1d.html>`_
: Apply the 1d pooling.
`pool2d(...) <nn/pool2d.html>`_ `pool2d(...) <nn/pool2d.html>`_
: Apply the 2d pooling. : Apply the 2d pooling.
`pool3d(...) <nn/pool3d.html>`_
: Apply the 3d pooling.
`relu(...) <nn/relu.html>`_ `relu(...) <nn/relu.html>`_
: Apply the rectified linear unit. : Apply the rectified linear unit.
`[Nair & Hinton, 2010] <http://www.csri.utoronto.ca/~hinton/absps/reluICML.pdf>`_. `[Nair & Hinton, 2010] <http://www.csri.utoronto.ca/~hinton/absps/reluICML.pdf>`_.
...@@ -129,8 +153,14 @@ dragon.nn ...@@ -129,8 +153,14 @@ dragon.nn
nn/RNN nn/RNN
nn/batch_norm nn/batch_norm
nn/bias_add nn/bias_add
nn/conv
nn/conv_transpose
nn/conv1d
nn/conv1d_transpose
nn/conv2d nn/conv2d
nn/conv2d_transpose nn/conv2d_transpose
nn/conv3d
nn/conv3d_transpose
nn/depthwise_conv2d nn/depthwise_conv2d
nn/depth_to_space nn/depth_to_space
nn/dropout nn/dropout
...@@ -146,7 +176,10 @@ dragon.nn ...@@ -146,7 +176,10 @@ dragon.nn
nn/leaky_relu nn/leaky_relu
nn/local_response_norm nn/local_response_norm
nn/log_softmax nn/log_softmax
nn/pool
nn/pool1d
nn/pool2d nn/pool2d
nn/pool3d
nn/prelu nn/prelu
nn/relu nn/relu
nn/relu6 nn/relu6
......
conv
====
.. autofunction:: dragon.nn.conv
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
conv1d
======
.. autofunction:: dragon.nn.conv1d
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
conv1d_transpose
================
.. autofunction:: dragon.nn.conv1d_transpose
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
conv3d
======
.. autofunction:: dragon.nn.conv3d
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
conv3d_transpose
================
.. autofunction:: dragon.nn.conv3d_transpose
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
conv_transpose
==============
.. autofunction:: dragon.nn.conv_transpose
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
pool
====
.. autofunction:: dragon.nn.pool
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
pool1d
======
.. autofunction:: dragon.nn.pool1d
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
pool3d
======
.. autofunction:: dragon.nn.pool3d
.. raw:: html
<style>
h1:before {
content: "dragon.nn.";
color: #103d3e;
}
</style>
...@@ -91,7 +91,7 @@ Name Supported Reference ...@@ -91,7 +91,7 @@ Name Supported Reference
`InstanceNormalization`_ |v| :func:`dragon.nn.instance_norm` `InstanceNormalization`_ |v| :func:`dragon.nn.instance_norm`
`IsInf`_ |v| :func:`dragon.math.is_inf` `IsInf`_ |v| :func:`dragon.math.is_inf`
`IsNaN`_ |v| :func:`dragon.math.is_nan` `IsNaN`_ |v| :func:`dragon.math.is_nan`
`LRN`_ |v| :func:`dragon.nn.lrn` `LRN`_ |v| :func:`dragon.nn.local_response_norm`
`LSTM`_ |v| :func:`dragon.nn.LSTM` `LSTM`_ |v| :func:`dragon.nn.LSTM`
`LeakyRelu`_ |v| :func:`dragon.nn.leaky_relu` `LeakyRelu`_ |v| :func:`dragon.nn.leaky_relu`
`Less`_ |v| :func:`dragon.math.less` `Less`_ |v| :func:`dragon.math.less`
...@@ -183,7 +183,7 @@ Name Supported Reference ...@@ -183,7 +183,7 @@ Name Supported Reference
`TopK`_ |v| :func:`dragon.math.top_k` `TopK`_ |v| :func:`dragon.math.top_k`
`Transpose`_ |v| :func:`dragon.transpose` `Transpose`_ |v| :func:`dragon.transpose`
`Unique`_ |v| :func:`dragon.unique` `Unique`_ |v| :func:`dragon.unique`
`Unsqueeze`_ |v| :func:`dragon.unsqueeze` `Unsqueeze`_ |v| :func:`dragon.expand_dims`
`Upsample`_ |v| :func:`dragon.vision.resize` `Upsample`_ |v| :func:`dragon.vision.resize`
`Where`_ |v| :func:`dragon.where` `Where`_ |v| :func:`dragon.where`
`Xor`_ |v| :func:`dragon.bitwise.bitwise_xor` `Xor`_ |v| :func:`dragon.bitwise.bitwise_xor`
......
...@@ -7,106 +7,172 @@ layers ...@@ -7,106 +7,172 @@ layers
------- -------
`class Add <layers/Add.html>`_ `class Add <layers/Add.html>`_
: The layer to add a sequence of inputs. : Layer to add a sequence of inputs.
`class AveragePooling1D <layers/AveragePooling1D.html>`_
: 1D average pooling layer.
`class AveragePooling2D <layers/AveragePooling2D.html>`_ `class AveragePooling2D <layers/AveragePooling2D.html>`_
: The average 2d pooling layer. : 2D average pooling layer.
`class AveragePooling3D <layers/AveragePooling3D.html>`_
: 3D average pooling layer.
`class BatchNormalization <layers/BatchNormalization.html>`_ `class BatchNormalization <layers/BatchNormalization.html>`_
: The batch normalization layer. : Batch normalization layer.
`[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
`class Concatenate <layers/Concatenate.html>`_ `class Concatenate <layers/Concatenate.html>`_
: The layer to concatenate a sequence of inputs. : Layer to concatenate a sequence of inputs.
`class Conv1D <layers/Conv1D.html>`_
: 1D convolution layer.
`class Conv1DTranspose <layers/Conv1DTranspose.html>`_
: 1D deconvolution layer.
`class Conv2D <layers/Conv2D.html>`_ `class Conv2D <layers/Conv2D.html>`_
: The 2d convolution layer. : 2D convolution layer.
`class Conv2DTranspose <layers/Conv2DTranspose.html>`_ `class Conv2DTranspose <layers/Conv2DTranspose.html>`_
: The 2d deconvolution layer. : 2D deconvolution layer.
`class Conv3D <layers/Conv3D.html>`_
: 3D convolution layer.
`class Conv3DTranspose <layers/Conv3DTranspose.html>`_
: 3D deconvolution layer.
`class Dense <layers/Dense.html>`_ `class Dense <layers/Dense.html>`_
: The fully-connected layer. : Fully-connected layer.
`class DepthwiseConv2D <layers/DepthwiseConv2D.html>`_ `class DepthwiseConv2D <layers/DepthwiseConv2D.html>`_
: The 2d depthwise convolution layer. : 2D depthwise convolution layer.
`[Chollet, 2016] <https://arxiv.org/abs/1610.02357>`_. `[Chollet, 2016] <https://arxiv.org/abs/1610.02357>`_.
`class Dropout <layers/Dropout.html>`_ `class Dropout <layers/Dropout.html>`_
: The dropout layer. : Layer to apply the dropout function.
`[Srivastava et.al, 2014] <http://jmlr.org/papers/v15/srivastava14a.html>`_. `[Srivastava et.al, 2014] <http://jmlr.org/papers/v15/srivastava14a.html>`_.
`class ELU <layers/ELU.html>`_ `class ELU <layers/ELU.html>`_
: The layer to apply the exponential linear unit. : Layer to apply the exponential linear unit.
`[Clevert et.al, 2015] <https://arxiv.org/abs/1511.07289>`_. `[Clevert et.al, 2015] <https://arxiv.org/abs/1511.07289>`_.
`class Flatten <layers/Flatten.html>`_ `class Flatten <layers/Flatten.html>`_
: The layer to reshape input into a matrix. : Layer to reshape input into a matrix.
`class GlobalAveragePooling1D <layers/GlobalAveragePooling1D.html>`_
: 1D global average pooling layer.
`class GlobalAveragePooling2D <layers/GlobalAveragePooling2D.html>`_ `class GlobalAveragePooling2D <layers/GlobalAveragePooling2D.html>`_
: The global average 2d pooling layer. : 2D global average pooling layer.
`class GlobalAveragePooling3D <layers/GlobalAveragePooling3D.html>`_
: 3D global average pooling layer.
`class GlobalMaxPool1D <layers/GlobalMaxPool1D.html>`_
: 1D global max pooling layer.
`class GlobalMaxPool2D <layers/GlobalMaxPool2D.html>`_ `class GlobalMaxPool2D <layers/GlobalMaxPool2D.html>`_
: The global max 2d pooling layer. : 2D global max pooling layer.
`class GlobalMaxPool3D <layers/GlobalMaxPool3D.html>`_
: 3D global max pooling layer.
`class Layer <layers/Layer.html>`_ `class Layer <layers/Layer.html>`_
: The base class of layers. : The base class of layers.
`class LeakyReLU <layers/LeakyReLU.html>`_ `class LeakyReLU <layers/LeakyReLU.html>`_
: The layer to apply the leaky rectified linear unit. : Layer to apply the leaky rectified linear unit.
`class Maximum <layers/Maximum.html>`_ `class Maximum <layers/Maximum.html>`_
: The layer to compute the maximum of a sequence of inputs. : Layer to compute the maximum of a sequence of inputs.
`class MaxPool1D <layers/MaxPool1D.html>`_
: 1D max pooling layer.
`class MaxPool2D <layers/MaxPool2D.html>`_ `class MaxPool2D <layers/MaxPool2D.html>`_
: The max 2d pooling layer. : 2D max pooling layer.
`class MaxPool3D <layers/MaxPool3D.html>`_
: 3D max pooling layer.
`class Minimum <layers/Minimum.html>`_ `class Minimum <layers/Minimum.html>`_
: The layer to compute the minimum of a sequence of inputs. : Layer to compute the minimum of a sequence of inputs.
`class Multiply <layers/Multiply.html>`_ `class Multiply <layers/Multiply.html>`_
: The layer to multiply a sequence of inputs. : Layer to multiply a sequence of inputs.
`class Permute <layers/Permute.html>`_ `class Permute <layers/Permute.html>`_
: The layer to permute the dimensions of input. : Layer to permute the dimensions of input.
`class Reshape <layers/Reshape.html>`_ `class Reshape <layers/Reshape.html>`_
: The layer to change the dimensions of input. : Layer to change the dimensions of input.
`class ReLU <layers/ReLU.html>`_ `class ReLU <layers/ReLU.html>`_
: The layer to apply the rectified linear unit. : Layer to apply the rectified linear unit.
`[Nair & Hinton, 2010] <http://www.csri.utoronto.ca/~hinton/absps/reluICML.pdf>`_. `[Nair & Hinton, 2010] <http://www.csri.utoronto.ca/~hinton/absps/reluICML.pdf>`_.
`class SELU <layers/SELU.html>`_ `class SELU <layers/SELU.html>`_
: Apply the scaled exponential linear unit. : Layer to apply the scaled exponential linear unit.
`[Klambauer et.al, 2017] <https://arxiv.org/abs/1706.02515>`_. `[Klambauer et.al, 2017] <https://arxiv.org/abs/1706.02515>`_.
`class Softmax <layers/Softmax.html>`_ `class Softmax <layers/Softmax.html>`_
: The layer to apply the softmax function. : Layer to apply the softmax function.
`class Subtract <layers/Subtract.html>`_ `class Subtract <layers/Subtract.html>`_
: The layer to subtract two inputs. : Layer to subtract two inputs.
`class UpSampling1D <layers/UpSampling1D.html>`_
: 1D upsampling layer.
`class UpSampling2D <layers/UpSampling2D.html>`_
: 2D upsampling layer.
`class UpSampling3D <layers/UpSampling3D.html>`_
: 3D upsampling layer.
`class ZeroPadding1D <layers/ZeroPadding1D.html>`_
: 1D zero padding layer.
`class ZeroPadding2D <layers/ZeroPadding2D.html>`_
: 2D zero padding layer.
`class ZeroPadding3D <layers/ZeroPadding3D.html>`_
: 3D zero padding layer.
.. toctree:: .. toctree::
:hidden: :hidden:
layers/Add layers/Add
layers/AveragePooling1D
layers/AveragePooling2D layers/AveragePooling2D
layers/AveragePooling3D
layers/BatchNormalization layers/BatchNormalization
layers/Concatenate layers/Concatenate
layers/Conv1D
layers/Conv1DTranspose
layers/Conv2D layers/Conv2D
layers/Conv2DTranspose layers/Conv2DTranspose
layers/Conv3D
layers/Conv3DTranspose
layers/Dense layers/Dense
layers/DepthwiseConv2D layers/DepthwiseConv2D
layers/Dropout layers/Dropout
layers/ELU layers/ELU
layers/Flatten layers/Flatten
layers/GlobalAveragePooling1D
layers/GlobalAveragePooling2D layers/GlobalAveragePooling2D
layers/GlobalAveragePooling3D
layers/GlobalMaxPool1D
layers/GlobalMaxPool2D layers/GlobalMaxPool2D
layers/GlobalMaxPool3D
layers/Layer layers/Layer
layers/LeakyReLU layers/LeakyReLU
layers/Maximum layers/Maximum
layers/MaxPool1D
layers/MaxPool2D layers/MaxPool2D
layers/MaxPool3D
layers/Minimum layers/Minimum
layers/Multiply layers/Multiply
layers/Permute layers/Permute
...@@ -115,6 +181,12 @@ layers ...@@ -115,6 +181,12 @@ layers
layers/SELU layers/SELU
layers/Softmax layers/Softmax
layers/Subtract layers/Subtract
layers/UpSampling1D
layers/UpSampling2D
layers/UpSampling3D
layers/ZeroPadding1D
layers/ZeroPadding2D
layers/ZeroPadding3D
.. raw:: html .. raw:: html
......
AveragePooling1D
================
.. autoclass:: dragon.vm.tensorflow.keras.layers.AveragePooling1D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.AveragePooling1D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
AveragePooling3D
================
.. autoclass:: dragon.vm.tensorflow.keras.layers.AveragePooling3D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.AveragePooling3D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
Conv1D
======
.. autoclass:: dragon.vm.tensorflow.keras.layers.Conv1D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.Conv1D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
Conv1DTranspose
===============
.. autoclass:: dragon.vm.tensorflow.keras.layers.Conv1DTranspose
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.Conv1DTranspose.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
Conv3D
======
.. autoclass:: dragon.vm.tensorflow.keras.layers.Conv3D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.Conv3D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
Conv3DTranspose
===============
.. autoclass:: dragon.vm.tensorflow.keras.layers.Conv3DTranspose
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.Conv3DTranspose.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
GlobalAveragePooling1D
======================
.. autoclass:: dragon.vm.tensorflow.keras.layers.GlobalAveragePooling1D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.GlobalAveragePooling1D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
GlobalAveragePooling3D
======================
.. autoclass:: dragon.vm.tensorflow.keras.layers.GlobalAveragePooling3D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.GlobalAveragePooling3D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
GlobalMaxPool1D
===============
.. autoclass:: dragon.vm.tensorflow.keras.layers.GlobalMaxPooling1D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.GlobalMaxPooling1D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
GlobalMaxPool3D
===============
.. autoclass:: dragon.vm.tensorflow.keras.layers.GlobalMaxPooling3D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.GlobalMaxPooling3D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
MaxPool1D
=========
.. autoclass:: dragon.vm.tensorflow.keras.layers.MaxPooling1D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.MaxPooling1D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
MaxPool3D
=========
.. autoclass:: dragon.vm.tensorflow.keras.layers.MaxPooling3D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.MaxPooling3D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
UpSampling1D
============
.. autoclass:: dragon.vm.tensorflow.keras.layers.UpSampling1D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.UpSampling1D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
UpSampling2D
============
.. autoclass:: dragon.vm.tensorflow.keras.layers.UpSampling2D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.UpSampling2D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
UpSampling3D
============
.. autoclass:: dragon.vm.tensorflow.keras.layers.UpSampling3D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.UpSampling3D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
ZeroPadding1D
=============
.. autoclass:: dragon.vm.tensorflow.keras.layers.ZeroPadding1D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.ZeroPadding1D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
ZeroPadding2D
=============
.. autoclass:: dragon.vm.tensorflow.keras.layers.ZeroPadding2D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.ZeroPadding2D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
ZeroPadding3D
=============
.. autoclass:: dragon.vm.tensorflow.keras.layers.ZeroPadding3D
__init__
--------
.. automethod:: dragon.vm.tensorflow.keras.layers.ZeroPadding3D.__init__
.. raw:: html
<style>
h1:before {
content: "tf.keras.layers.";
color: #103d3e;
}
</style>
...@@ -9,19 +9,40 @@ vm.tensorflow.nn ...@@ -9,19 +9,40 @@ vm.tensorflow.nn
`avg_pool(...) <nn/avg_pool.html>`_ `avg_pool(...) <nn/avg_pool.html>`_
: Apply the n-dimension average pooling. : Apply the n-dimension average pooling.
`avg_pool1d(...) <nn/avg_pool1d.html>`_
: Apply the 1d average pooling.
`avg_pool2d(...) <nn/avg_pool2d.html>`_ `avg_pool2d(...) <nn/avg_pool2d.html>`_
: Apply the 2d average pooling. : Apply the 2d average pooling.
`avg_pool3d(...) <nn/avg_pool3d.html>`_
: Apply the 3d average pooling.
`batch_normalization(...) <nn/batch_normalization.html>`_ `batch_normalization(...) <nn/batch_normalization.html>`_
: Apply the batch normalization. : Apply the batch normalization.
`[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
`bias_add(...) <nn/bias_add.html>`_
: Add the bias across channels to input.
`conv1d(...) <nn/conv1d.html>`_
: Apply the 1d convolution.
`conv1d_transpose(...) <nn/conv1d_transpose.html>`_
: Apply the 1d deconvolution.
`conv2d(...) <nn/conv2d.html>`_ `conv2d(...) <nn/conv2d.html>`_
: Apply the 2d convolution. : Apply the 2d convolution.
`conv2d_transpose(...) <nn/conv2d_transpose.html>`_ `conv2d_transpose(...) <nn/conv2d_transpose.html>`_
: Apply the 2d deconvolution. : Apply the 2d deconvolution.
`conv3d(...) <nn/conv3d.html>`_
: Apply the 3d convolution.
`conv3d_transpose(...) <nn/conv3d_transpose.html>`_
: Apply the 3d deconvolution.
`convolution(...) <nn/convolution.html>`_ `convolution(...) <nn/convolution.html>`_
: Apply the n-dimension convolution. : Apply the n-dimension convolution.
...@@ -56,9 +77,15 @@ vm.tensorflow.nn ...@@ -56,9 +77,15 @@ vm.tensorflow.nn
`max_pool(...) <nn/max_pool.html>`_ `max_pool(...) <nn/max_pool.html>`_
: Apply the n-dimension max pooling. : Apply the n-dimension max pooling.
`max_pool(...) <nn/max_pool.html>`_ `max_pool1d(...) <nn/max_pool1d.html>`_
: Apply the 1d max pooling.
`max_pool2d(...) <nn/max_pool2d.html>`_
: Apply the 2d max pooling. : Apply the 2d max pooling.
`max_pool3d(...) <nn/max_pool3d.html>`_
: Apply the 3d max pooling.
`moments(...) <nn/moments.html>`_ `moments(...) <nn/moments.html>`_
: Compute the mean and variance of input along the given axes. : Compute the mean and variance of input along the given axes.
...@@ -94,10 +121,17 @@ vm.tensorflow.nn ...@@ -94,10 +121,17 @@ vm.tensorflow.nn
:hidden: :hidden:
nn/avg_pool nn/avg_pool
nn/avg_pool1d
nn/avg_pool2d nn/avg_pool2d
nn/avg_pool3d
nn/batch_normalization nn/batch_normalization
nn/bias_add
nn/conv1d
nn/conv1d_transpose
nn/conv2d nn/conv2d
nn/conv2d_transpose nn/conv2d_transpose
nn/conv3d
nn/conv3d_transpose
nn/convolution nn/convolution
nn/conv_transpose nn/conv_transpose
nn/depthwise_conv2d nn/depthwise_conv2d
...@@ -108,7 +142,9 @@ vm.tensorflow.nn ...@@ -108,7 +142,9 @@ vm.tensorflow.nn
nn/local_response_normalization nn/local_response_normalization
nn/log_softmax nn/log_softmax
nn/max_pool nn/max_pool
nn/max_pool1d
nn/max_pool2d nn/max_pool2d
nn/max_pool3d
nn/moments nn/moments
nn/relu nn/relu
nn/relu6 nn/relu6
......
avg_pool1d
==========
.. autofunction:: dragon.vm.tensorflow.nn.avg_pool1d
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
avg_pool3d
==========
.. autofunction:: dragon.vm.tensorflow.nn.avg_pool3d
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
bias_add
========
.. autofunction:: dragon.vm.tensorflow.nn.bias_add
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
conv1d
======
.. autofunction:: dragon.vm.tensorflow.nn.conv1d
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
conv1d_transpose
================
.. autofunction:: dragon.vm.tensorflow.nn.conv1d_transpose
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
conv3d
======
.. autofunction:: dragon.vm.tensorflow.nn.conv3d
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
conv3d_transpose
================
.. autofunction:: dragon.vm.tensorflow.nn.conv3d_transpose
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
max_pool1d
==========
.. autofunction:: dragon.vm.tensorflow.nn.max_pool1d
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
max_pool3d
==========
.. autofunction:: dragon.vm.tensorflow.nn.max_pool3d
.. raw:: html
<style>
h1:before {
content: "tf.nn.";
color: #103d3e;
}
</style>
...@@ -7,51 +7,51 @@ vm.tensorlayer.layers ...@@ -7,51 +7,51 @@ vm.tensorlayer.layers
------- -------
`class BatchNorm <layers/BatchNorm.html>`_ `class BatchNorm <layers/BatchNorm.html>`_
: The layer to apply the batch normalization. : Batch normalization layer.
`[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
`class Concat <layers/Concat.html>`_ `class Concat <layers/Concat.html>`_
: The layer to concat tensors according to the given axis. : Layer to concat tensors according to the given axis.
`class Conv2d <layers/Conv2d.html>`_ `class Conv2d <layers/Conv2d.html>`_
: The 2d convolution layer. : 2d convolution layer.
`class Dense <layers/Dense.html>`_ `class Dense <layers/Dense.html>`_
: The fully connected layer. : Fully connected layer.
`class Elementwise <layers/Elementwise.html>`_ `class Elementwise <layers/Elementwise.html>`_
: The layer to combine inputs by applying element-wise operation. : Layer to combine inputs by applying element-wise operation.
`class Flatten <layers/Flatten.html>`_ `class Flatten <layers/Flatten.html>`_
: The layer to reshape input into a matrix. : Layer to reshape input into a matrix.
`class GlobalMaxPool2d <layers/GlobalMaxPool2d.html>`_ `class GlobalMaxPool2d <layers/GlobalMaxPool2d.html>`_
: The global max 2d pooling layer. : 2d global max pooling layer.
`class GlobalMeanPool2d <layers/GlobalMeanPool2d.html>`_ `class GlobalMeanPool2d <layers/GlobalMeanPool2d.html>`_
: The global mean 2d pooling layer. : 2d global mean pooling layer.
`class MaxPool2d <layers/MaxPool2d.html>`_ `class MaxPool2d <layers/MaxPool2d.html>`_
: The max 2d pooling layer. : 2d max pooling layer.
`class MeanPool2d <layers/MeanPool2d.html>`_ `class MeanPool2d <layers/MeanPool2d.html>`_
: The mean 2d pooling layer. : 2d mean pooling layer.
`class Layer <layers/Layer.html>`_ `class Layer <layers/Layer.html>`_
: The base layer class. : The base layer class.
`class LayerList <layers/LayerList.html>`_ `class LayerList <layers/LayerList.html>`_
: The sequential layer to stack a group of layers. : Layer to stack a group of layers.
`class Relu <layers/Relu.html>`_ `class Relu <layers/Relu.html>`_
: The layer to apply the rectified linear unit. : Layer to apply the rectified linear unit.
`[Nair & Hinton, 2010] <http://www.csri.utoronto.ca/~hinton/absps/reluICML.pdf>`_. `[Nair & Hinton, 2010] <http://www.csri.utoronto.ca/~hinton/absps/reluICML.pdf>`_.
`class Reshape <layers/Reshape.html>`_ `class Reshape <layers/Reshape.html>`_
: The layer to change the dimensions of input. : Layer to change the dimensions of input.
`class Transpose <layers/Transpose.html>`_ `class Transpose <layers/Transpose.html>`_
: The layer to permute the dimensions of input. : Layer to permute the dimensions of input.
Functions Functions
--------- ---------
......
...@@ -164,7 +164,7 @@ vm.torch ...@@ -164,7 +164,7 @@ vm.torch
: Compute the minimum value of inputs. : Compute the minimum value of inputs.
`mm(...) <torch/mm.html>`_ `mm(...) <torch/mm.html>`_
: Perform a matrix multiplication. : Compute the matrix-matrix multiplication.
`mul(...) <torch/mul.html>`_ `mul(...) <torch/mul.html>`_
: Compute the element-wise multiplication. : Compute the element-wise multiplication.
......
...@@ -245,6 +245,14 @@ int\_ ...@@ -245,6 +245,14 @@ int\_
###### ######
.. automethod:: dragon.vm.torch.Tensor.int_ .. automethod:: dragon.vm.torch.Tensor.int_
isinf
#####
.. automethod:: dragon.vm.torch.Tensor.isinf
isnan
#####
.. automethod:: dragon.vm.torch.Tensor.isnan
is_floating_point is_floating_point
################# #################
.. automethod:: dragon.vm.torch.Tensor.is_floating_point .. automethod:: dragon.vm.torch.Tensor.is_floating_point
...@@ -281,6 +289,10 @@ max ...@@ -281,6 +289,10 @@ max
### ###
.. automethod:: dragon.vm.torch.Tensor.max .. automethod:: dragon.vm.torch.Tensor.max
maximum
#######
.. automethod:: dragon.vm.torch.Tensor.maximum
masked_select masked_select
############# #############
.. automethod:: dragon.vm.torch.Tensor.masked_select .. automethod:: dragon.vm.torch.Tensor.masked_select
...@@ -293,6 +305,14 @@ min ...@@ -293,6 +305,14 @@ min
### ###
.. automethod:: dragon.vm.torch.Tensor.min .. automethod:: dragon.vm.torch.Tensor.min
minimum
#######
.. automethod:: dragon.vm.torch.Tensor.minimum
mm
###
.. automethod:: dragon.vm.torch.Tensor.mm
mul mul
### ###
.. automethod:: dragon.vm.torch.Tensor.mul .. automethod:: dragon.vm.torch.Tensor.mul
...@@ -429,6 +449,10 @@ sort ...@@ -429,6 +449,10 @@ sort
#### ####
.. automethod:: dragon.vm.torch.Tensor.sort .. automethod:: dragon.vm.torch.Tensor.sort
split
#####
.. automethod:: dragon.vm.torch.Tensor.split
sqrt sqrt
#### ####
.. automethod:: dragon.vm.torch.Tensor.sqrt .. automethod:: dragon.vm.torch.Tensor.sqrt
...@@ -529,8 +553,15 @@ zero\_ ...@@ -529,8 +553,15 @@ zero\_
.. _torch.full(...): full.html .. _torch.full(...): full.html
.. _torch.ge(...): ge.html .. _torch.ge(...): ge.html
.. _torch.gt(...): gt.html .. _torch.gt(...): gt.html
.. _torch.isinf(...): isinf.html
.. _torch.isnan(...): isnan.html
.. _torch.le(...): le.html .. _torch.le(...): le.html
.. _torch.lt(...): lt.html .. _torch.lt(...): lt.html
.. _torch.max(...): max.html
.. _torch.maximum(...): maximum.html
.. _torch.min(...): min.html
.. _torch.minimum(...): minimum.html
.. _torch.mm(...): mm.html
.. _torch.mul(...): mul.html .. _torch.mul(...): mul.html
.. _torch.ne(...): ne.html .. _torch.ne(...): ne.html
.. _torch.neg(...): neg.html .. _torch.neg(...): neg.html
...@@ -544,6 +575,7 @@ zero\_ ...@@ -544,6 +575,7 @@ zero\_
.. _torch.sign(...): sign.html .. _torch.sign(...): sign.html
.. _torch.sin(...): sin.html .. _torch.sin(...): sin.html
.. _torch.sort(...): sort.html .. _torch.sort(...): sort.html
.. _torch.split(...): split.html
.. _torch.sqrt(...): sqrt.html .. _torch.sqrt(...): sqrt.html
.. _torch.squeeze(...): squeeze.html .. _torch.squeeze(...): squeeze.html
.. _torch.sub(...): sub.html .. _torch.sub(...): sub.html
......
...@@ -9,9 +9,15 @@ vm.torch.nn ...@@ -9,9 +9,15 @@ vm.torch.nn
`class AffineChannel <nn/AffineChannel.html>`_ `class AffineChannel <nn/AffineChannel.html>`_
: Apply affine transformation along the channels. : Apply affine transformation along the channels.
`class AvgPool1d <nn/AvgPool1d.html>`_
: Apply the 1d average pooling.
`class AvgPool2d <nn/AvgPool2d.html>`_ `class AvgPool2d <nn/AvgPool2d.html>`_
: Apply the 2d average pooling. : Apply the 2d average pooling.
`class AvgPool3d <nn/AvgPool3d.html>`_
: Apply the 3d average pooling.
`class BatchNorm1d <nn/BatchNorm1d.html>`_ `class BatchNorm1d <nn/BatchNorm1d.html>`_
: Apply the batch normalization over 2d input. : Apply the batch normalization over 2d input.
`[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
...@@ -36,12 +42,24 @@ vm.torch.nn ...@@ -36,12 +42,24 @@ vm.torch.nn
`class ConstantPad2d <nn/ConstantPad2d.html>`_ `class ConstantPad2d <nn/ConstantPad2d.html>`_
: Pad input according to the last 3-dimensions with a constant. : Pad input according to the last 3-dimensions with a constant.
`class Conv1d <nn/Conv1d.html>`_
: Apply the 1d convolution.
`class Conv2d <nn/Conv2d.html>`_ `class Conv2d <nn/Conv2d.html>`_
: Apply the 2d convolution. : Apply the 2d convolution.
`class Conv3d <nn/Conv3d.html>`_
: Apply the 3d convolution.
`class ConvTranspose1d <nn/ConvTranspose1d.html>`_
: Apply the 1d deconvolution.
`class ConvTranspose2d <nn/ConvTranspose2d.html>`_ `class ConvTranspose2d <nn/ConvTranspose2d.html>`_
: Apply the 2d deconvolution. : Apply the 2d deconvolution.
`class ConvTranspose3d <nn/ConvTranspose3d.html>`_
: Apply the 3d deconvolution.
`class CrossEntropyLoss <nn/CrossEntropyLoss.html>`_ `class CrossEntropyLoss <nn/CrossEntropyLoss.html>`_
: Compute the softmax cross entropy with sparse labels. : Compute the softmax cross entropy with sparse labels.
...@@ -91,6 +109,9 @@ vm.torch.nn ...@@ -91,6 +109,9 @@ vm.torch.nn
: Apply the hard swish function. : Apply the hard swish function.
`[Howard et.al, 2019] <https://arxiv.org/abs/1905.02244>`_. `[Howard et.al, 2019] <https://arxiv.org/abs/1905.02244>`_.
`class Identity <nn/Identity.html>`_
: Apply the identity transformation.
`class KLDivLoss <nn/KLDivLoss.html>`_ `class KLDivLoss <nn/KLDivLoss.html>`_
: Compute the Kullback-Leibler divergence. : Compute the Kullback-Leibler divergence.
...@@ -118,8 +139,14 @@ vm.torch.nn ...@@ -118,8 +139,14 @@ vm.torch.nn
: Apply a long short-term memory (LSTM) cell. : Apply a long short-term memory (LSTM) cell.
`[Hochreiter & Schmidhuber, 1997] <https://doi.org/10.1162>`_. `[Hochreiter & Schmidhuber, 1997] <https://doi.org/10.1162>`_.
`class MaxPool1d <nn/MaxPool1d.html>`_
: Apply the 1d max pooling.
`class MaxPool2d <nn/MaxPool2d.html>`_ `class MaxPool2d <nn/MaxPool2d.html>`_
: Apply the 2d MaxPool2d pooling. : Apply the 2d max pooling.
`class MaxPool3d <nn/MaxPool3d.html>`_
: Apply the 3d max pooling.
`class Module <nn/Module.html>`_ `class Module <nn/Module.html>`_
: The base class of modules. : The base class of modules.
...@@ -212,7 +239,9 @@ vm.torch.nn ...@@ -212,7 +239,9 @@ vm.torch.nn
:hidden: :hidden:
nn/AffineChannel nn/AffineChannel
nn/AvgPool1d
nn/AvgPool2d nn/AvgPool2d
nn/AvgPool3d
nn/BatchNorm1d nn/BatchNorm1d
nn/BatchNorm2d nn/BatchNorm2d
nn/BatchNorm3d nn/BatchNorm3d
...@@ -220,8 +249,12 @@ vm.torch.nn ...@@ -220,8 +249,12 @@ vm.torch.nn
nn/ConstantPad1d nn/ConstantPad1d
nn/ConstantPad2d nn/ConstantPad2d
nn/ConstantPad3d nn/ConstantPad3d
nn/Conv1d
nn/Conv2d nn/Conv2d
nn/Conv3d
nn/ConvTranspose1d
nn/ConvTranspose2d nn/ConvTranspose2d
nn/ConvTranspose3d
nn/CrossEntropyLoss nn/CrossEntropyLoss
nn/CTCLoss nn/CTCLoss
nn/DepthwiseConv2d nn/DepthwiseConv2d
...@@ -235,6 +268,7 @@ vm.torch.nn ...@@ -235,6 +268,7 @@ vm.torch.nn
nn/GumbelSoftmax nn/GumbelSoftmax
nn/Hardsigmoid nn/Hardsigmoid
nn/Hardswish nn/Hardswish
nn/Identity
nn/KLDivLoss nn/KLDivLoss
nn/L1Loss nn/L1Loss
nn/LeakyReLU nn/LeakyReLU
...@@ -243,7 +277,9 @@ vm.torch.nn ...@@ -243,7 +277,9 @@ vm.torch.nn
nn/LogSoftmax nn/LogSoftmax
nn/LSTM nn/LSTM
nn/LSTMCell nn/LSTMCell
nn/MaxPool1d
nn/MaxPool2d nn/MaxPool2d
nn/MaxPool3d
nn/Module nn/Module
nn/MSELoss nn/MSELoss
nn/NLLLoss nn/NLLLoss
......
AvgPool1d
=========
.. autoclass:: dragon.vm.torch.nn.AvgPool1d
__init__
--------
.. automethod:: dragon.vm.torch.nn.AvgPool1d.__init__
.. _torch.nn.functional.avg_pool1d(...): functional/avg_pool1d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
AvgPool3d
=========
.. autoclass:: dragon.vm.torch.nn.AvgPool3d
__init__
--------
.. automethod:: dragon.vm.torch.nn.AvgPool3d.__init__
.. _torch.nn.functional.avg_pool3d(...): functional/avg_pool3d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
Conv1d
======
.. autoclass:: dragon.vm.torch.nn.Conv1d
__init__
--------
.. automethod:: dragon.vm.torch.nn.Conv1d.__init__
.. _torch.nn.functional.conv1d(...): functional/conv1d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
Conv3d
======
.. autoclass:: dragon.vm.torch.nn.Conv3d
__init__
--------
.. automethod:: dragon.vm.torch.nn.Conv3d.__init__
.. _torch.nn.functional.conv3d(...): functional/conv3d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
ConvTranspose1d
===============
.. autoclass:: dragon.vm.torch.nn.ConvTranspose1d
__init__
--------
.. automethod:: dragon.vm.torch.nn.ConvTranspose1d.__init__
.. _torch.nn.functional.conv_transpose1d(...): functional/conv_transpose1d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
ConvTranspose3d
===============
.. autoclass:: dragon.vm.torch.nn.ConvTranspose3d
__init__
--------
.. automethod:: dragon.vm.torch.nn.ConvTranspose3d.__init__
.. _torch.nn.functional.conv_transpose3d(...): functional/conv_transpose3d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
Identity
========
.. autoclass:: dragon.vm.torch.nn.Identity
__init__
--------
.. automethod:: dragon.vm.torch.nn.Identity.__init__
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
MaxPool1d
=========
.. autoclass:: dragon.vm.torch.nn.MaxPool1d
__init__
--------
.. automethod:: dragon.vm.torch.nn.MaxPool1d.__init__
.. _torch.nn.functional.max_pool1d(...): functional/max_pool1d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
MaxPool3d
=========
.. autoclass:: dragon.vm.torch.nn.MaxPool3d
__init__
--------
.. automethod:: dragon.vm.torch.nn.MaxPool3d.__init__
.. _torch.nn.functional.max_pool3d(...): functional/max_pool3d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.";
color: #103d3e;
}
</style>
...@@ -7,7 +7,7 @@ __init__ ...@@ -7,7 +7,7 @@ __init__
-------- --------
.. automethod:: dragon.vm.torch.nn.SyncBatchNorm.__init__ .. automethod:: dragon.vm.torch.nn.SyncBatchNorm.__init__
.. _torch.nn.functional.batch_norm(...): functional/batch_norm.html .. _torch.nn.functional.sync_batch_norm(...): functional/sync_batch_norm.html
.. raw:: html .. raw:: html
......
...@@ -6,9 +6,15 @@ vm.torch.nn.functional ...@@ -6,9 +6,15 @@ vm.torch.nn.functional
Functions Functions
--------- ---------
`avg_pool1d(...) <functional/avg_pool1d.html>`_
: Apply the 1d average pooling to input.
`avg_pool2d(...) <functional/avg_pool2d.html>`_ `avg_pool2d(...) <functional/avg_pool2d.html>`_
: Apply the 2d average pooling to input. : Apply the 2d average pooling to input.
`avg_pool3d(...) <functional/avg_pool3d.html>`_
: Apply the 3d average pooling to input.
`batch_norm(...) <functional/batch_norm.html>`_ `batch_norm(...) <functional/batch_norm.html>`_
: Apply the batch normalization to input. : Apply the batch normalization to input.
`[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
...@@ -16,12 +22,24 @@ vm.torch.nn.functional ...@@ -16,12 +22,24 @@ vm.torch.nn.functional
`binary_cross_entropy_with_logits(...) <functional/binary_cross_entropy_with_logits.html>`_ `binary_cross_entropy_with_logits(...) <functional/binary_cross_entropy_with_logits.html>`_
: Compute the sigmoid cross entropy with contiguous target. : Compute the sigmoid cross entropy with contiguous target.
`conv1d(...) <functional/conv1d.html>`_
: Apply 1d convolution to the input.
`conv2d(...) <functional/conv2d.html>`_ `conv2d(...) <functional/conv2d.html>`_
: Apply 2d convolution to the input. : Apply 2d convolution to the input.
`conv3d(...) <functional/conv3d.html>`_
: Apply 3d convolution to the input.
`conv_transpose1d(...) <functional/conv_transpose1d.html>`_
: Apply 1d deconvolution to the input.
`conv_transpose2d(...) <functional/conv_transpose2d.html>`_ `conv_transpose2d(...) <functional/conv_transpose2d.html>`_
: Apply 2d deconvolution to the input. : Apply 2d deconvolution to the input.
`conv_transpose3d(...) <functional/conv_transpose3d.html>`_
: Apply 3d deconvolution to the input.
`cross_entropy(...) <functional/cross_entropy.html>`_ `cross_entropy(...) <functional/cross_entropy.html>`_
: Compute the softmax cross entropy with sparse labels. : Compute the softmax cross entropy with sparse labels.
...@@ -82,9 +100,15 @@ vm.torch.nn.functional ...@@ -82,9 +100,15 @@ vm.torch.nn.functional
`interpolate(...) <functional/interpolate.html>`_ `interpolate(...) <functional/interpolate.html>`_
: Resize input via interpolating neighborhoods. : Resize input via interpolating neighborhoods.
`max_pool1d(...) <functional/max_pool1d.html>`_
: Apply the 1d max pooling to input.
`max_pool2d(...) <functional/max_pool2d.html>`_ `max_pool2d(...) <functional/max_pool2d.html>`_
: Apply the 2d max pooling to input. : Apply the 2d max pooling to input.
`max_pool3d(...) <functional/max_pool3d.html>`_
: Apply the 3d max pooling to input.
`mse_loss(...) <functional/mse_loss.html>`_ `mse_loss(...) <functional/mse_loss.html>`_
: Compute the element-wise squared error. : Compute the element-wise squared error.
...@@ -143,11 +167,17 @@ vm.torch.nn.functional ...@@ -143,11 +167,17 @@ vm.torch.nn.functional
.. toctree:: .. toctree::
:hidden: :hidden:
functional/avg_pool1d
functional/avg_pool2d functional/avg_pool2d
functional/avg_pool3d
functional/batch_norm functional/batch_norm
functional/binary_cross_entropy_with_logits functional/binary_cross_entropy_with_logits
functional/conv1d
functional/conv2d functional/conv2d
functional/conv3d
functional/conv_transpose1d
functional/conv_transpose2d functional/conv_transpose2d
functional/conv_transpose3d
functional/cross_entropy functional/cross_entropy
functional/ctc_loss functional/ctc_loss
functional/depthwise_conv2d functional/depthwise_conv2d
...@@ -165,7 +195,9 @@ vm.torch.nn.functional ...@@ -165,7 +195,9 @@ vm.torch.nn.functional
functional/local_response_norm functional/local_response_norm
functional/log_softmax functional/log_softmax
functional/interpolate functional/interpolate
functional/max_pool1d
functional/max_pool2d functional/max_pool2d
functional/max_pool3d
functional/mse_loss functional/mse_loss
functional/nll_loss functional/nll_loss
functional/normalize functional/normalize
......
avg_pool1d
==========
.. autofunction:: dragon.vm.torch.nn.functional.avg_pool1d
.. _torch.nn.AvgPool1d(...): ../AvgPool1d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.functional.";
color: #103d3e;
}
</style>
avg_pool3d
==========
.. autofunction:: dragon.vm.torch.nn.functional.avg_pool3d
.. _torch.nn.AvgPool3d(...): ../AvgPool3d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.functional.";
color: #103d3e;
}
</style>
conv1d
======
.. autofunction:: dragon.vm.torch.nn.functional.conv1d
.. _torch.nn.Conv1d(...): ../Conv1d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.functional.";
color: #103d3e;
}
</style>
conv3d
======
.. autofunction:: dragon.vm.torch.nn.functional.conv3d
.. _torch.nn.Conv3d(...): ../Conv3d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.functional.";
color: #103d3e;
}
</style>
conv_transpose1d
================
.. autofunction:: dragon.vm.torch.nn.functional.conv_transpose1d
.. _torch.nn.ConvTranspose1d(...): ../ConvTranspose1d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.functional.";
color: #103d3e;
}
</style>
conv_transpose3d
================
.. autofunction:: dragon.vm.torch.nn.functional.conv_transpose3d
.. _torch.nn.ConvTranspose3d(...): ../ConvTranspose3d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.functional.";
color: #103d3e;
}
</style>
max_pool1d
==========
.. autofunction:: dragon.vm.torch.nn.functional.max_pool1d
.. _torch.nn.MaxPool1d(...): ../MaxPool1d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.functional.";
color: #103d3e;
}
</style>
max_pool3d
==========
.. autofunction:: dragon.vm.torch.nn.functional.max_pool3d
.. _torch.nn.MaxPool3d(...): ../MaxPool3d.html
.. raw:: html
<style>
h1:before {
content: "torch.nn.functional.";
color: #103d3e;
}
</style>
...@@ -19,18 +19,13 @@ ...@@ -19,18 +19,13 @@
namespace dragon { namespace dragon {
typedef enum {
NCHW = 0,
NHWC = 1,
} StorageOrder;
/*! /*!
* \brief Memory to manage both the host and device data. * \brief Memory to manage both the host and device data.
*/ */
class DRAGON_API UnifiedMemory { class DRAGON_API UnifiedMemory {
public: public:
/*! /*!
* \brief The device-aware state for data mutation. * \brief The device-aware state for data consistency.
*/ */
enum State { enum State {
/*! \brief Initial state */ /*! \brief Initial state */
...@@ -60,10 +55,10 @@ class DRAGON_API UnifiedMemory { ...@@ -60,10 +55,10 @@ class DRAGON_API UnifiedMemory {
/*! \brief Switch to the given cuda device */ /*! \brief Switch to the given cuda device */
void SwitchToCUDADevice(int device); void SwitchToCUDADevice(int device);
/*! \brief Involve the state to CPUContext */ /*! \brief Involve the state to cpu */
void ToCPU(size_t size = 0); void ToCPU(size_t size = 0);
/*! \brief Involve the state to CUDAContext */ /*! \brief Involve the state to cuda */
void ToCUDA(size_t size = 0); void ToCUDA(size_t size = 0);
/*! \brief Return the memory state */ /*! \brief Return the memory state */
...@@ -86,7 +81,7 @@ class DRAGON_API UnifiedMemory { ...@@ -86,7 +81,7 @@ class DRAGON_API UnifiedMemory {
return size_t(0); return size_t(0);
} }
/*! \brief Return the number of memory chunks */ /*! \brief Return the number of batch chunks */
size_t num_chunks() const { size_t num_chunks() const {
return num_chunks_; return num_chunks_;
} }
...@@ -181,10 +176,10 @@ class DRAGON_API UnifiedMemory { ...@@ -181,10 +176,10 @@ class DRAGON_API UnifiedMemory {
/*! \brief The cnml data pointer */ /*! \brief The cnml data pointer */
void* cnml_ptr_ = nullptr; void* cnml_ptr_ = nullptr;
/*! \brief The binding cpu tensor for cnml */ /*! \brief The binding cnml cpu tensor */
cnmlCpuTensor_t cnml_cpu_tensor_ = nullptr; cnmlCpuTensor_t cnml_cpu_tensor_ = nullptr;
/*! \brief The binding mlu tensor for cnml */ /*! \brief The binding cnml mlu tensor */
cnmlTensor_t cnml_mlu_tensor_ = nullptr; cnmlTensor_t cnml_mlu_tensor_ = nullptr;
DISABLE_COPY_AND_ASSIGN(UnifiedMemory); DISABLE_COPY_AND_ASSIGN(UnifiedMemory);
......
...@@ -43,6 +43,12 @@ typedef struct { ...@@ -43,6 +43,12 @@ typedef struct {
#endif #endif
/*! \brief Order in which the values are laid out in memory */
typedef enum {
NCHW = 0,
NHWC = 1,
} StorageOrder;
/*! \brief Array that packs a fixed number of elements */ /*! \brief Array that packs a fixed number of elements */
template <typename T, int N> template <typename T, int N>
struct SimpleArray { struct SimpleArray {
......
# ---[ General sources # ---[ General sources
add_subdirectory(activation) add_subdirectory(activation)
add_subdirectory(array) add_subdirectory(array)
add_subdirectory(control_flow)
add_subdirectory(math) add_subdirectory(math)
add_subdirectory(normalization) add_subdirectory(normalization)
add_subdirectory(recurrent) add_subdirectory(recurrent)
......
...@@ -13,14 +13,14 @@ namespace { ...@@ -13,14 +13,14 @@ namespace {
template <typename T, int D> template <typename T, int D>
__global__ void _Transpose( __global__ void _Transpose(
const int nthreads, const int nthreads,
const int ndims, const int num_dims,
const SimpleArray<int, D> x_strides, const SimpleArray<int, D> x_strides,
const SimpleArray<int, D> y_dims, const SimpleArray<int, D> y_dims,
const T* x, const T* x,
T* y) { T* y) {
CUDA_1D_KERNEL_LOOP(yi, nthreads) { CUDA_1D_KERNEL_LOOP(yi, nthreads) {
int xi = 0, tmp = yi; int xi = 0, tmp = yi;
for (int d = ndims - 1; d >= 0; --d) { for (int d = num_dims - 1; d >= 0; --d) {
int r; int r;
FIXED_DIVISOR_DIV_MOD(y_dims.data[d], tmp, &tmp, &r); FIXED_DIVISOR_DIV_MOD(y_dims.data[d], tmp, &tmp, &r);
xi += r * x_strides.data[d]; xi += r * x_strides.data[d];
...@@ -32,14 +32,14 @@ __global__ void _Transpose( ...@@ -32,14 +32,14 @@ __global__ void _Transpose(
template <typename T, int D> template <typename T, int D>
__global__ void _TransposeGrad( __global__ void _TransposeGrad(
const int nthreads, const int nthreads,
const int ndims, const int num_dims,
const SimpleArray<int, D> x_strides, const SimpleArray<int, D> x_strides,
const SimpleArray<int, D> y_dims, const SimpleArray<int, D> y_dims,
const T* dy, const T* dy,
T* dx) { T* dx) {
CUDA_1D_KERNEL_LOOP(yi, nthreads) { CUDA_1D_KERNEL_LOOP(yi, nthreads) {
int xi = 0, tmp = yi; int xi = 0, tmp = yi;
for (int d = ndims - 1; d >= 0; --d) { for (int d = num_dims - 1; d >= 0; --d) {
int r; int r;
FIXED_DIVISOR_DIV_MOD(y_dims.data[d], tmp, &tmp, &r); FIXED_DIVISOR_DIV_MOD(y_dims.data[d], tmp, &tmp, &r);
xi += r * x_strides.data[d]; xi += r * x_strides.data[d];
......
# ---[ General sources
file(GLOB SOURCES *.cc)
set(MODULE_SOURCES ${MODULE_SOURCES} ${SOURCES})
# ---[ CUDA sources
if (USE_CUDA)
file(GLOB CUDA_SOURCES *.cu)
set(KERNEL_CUDA_SOURCES ${KERNEL_CUDA_SOURCES} ${CUDA_SOURCES})
endif()
# ---[ Submit to the parent scope
set(MODULE_SOURCES ${MODULE_SOURCES} PARENT_SCOPE)
set(KERNEL_CUDA_SOURCES ${KERNEL_CUDA_SOURCES} PARENT_SCOPE)
...@@ -23,7 +23,7 @@ _CosGrad<half>(const int nthreads, const half* dy, const half* x, half* dx) { ...@@ -23,7 +23,7 @@ _CosGrad<half>(const int nthreads, const half* dy, const half* x, half* dx) {
CUDA_1D_KERNEL_LOOP(i, nthreads) { CUDA_1D_KERNEL_LOOP(i, nthreads) {
dx[i] = __float2half(-__half2float(dy[i]) * sin(__half2float(x[i]))); dx[i] = __float2half(-__half2float(dy[i]) * sin(__half2float(x[i])));
} }
} } // CosGrad
template <> template <>
__global__ void _CosGrad<half2>( __global__ void _CosGrad<half2>(
...@@ -36,7 +36,7 @@ __global__ void _CosGrad<half2>( ...@@ -36,7 +36,7 @@ __global__ void _CosGrad<half2>(
const float2 grad = __half22float2(dy[i]); const float2 grad = __half22float2(dy[i]);
dx[i] = __floats2half2_rn(-grad.x * sin(val.x), -grad.y * sin(val.y)); dx[i] = __floats2half2_rn(-grad.x * sin(val.x), -grad.y * sin(val.y));
} }
} } // CosGrad
template <typename T> template <typename T>
__global__ void _SinGrad(const int nthreads, const T* dy, const T* x, T* dx) { __global__ void _SinGrad(const int nthreads, const T* dy, const T* x, T* dx) {
...@@ -51,7 +51,7 @@ _SinGrad<half>(const int nthreads, const half* dy, const half* x, half* dx) { ...@@ -51,7 +51,7 @@ _SinGrad<half>(const int nthreads, const half* dy, const half* x, half* dx) {
CUDA_1D_KERNEL_LOOP(i, nthreads) { CUDA_1D_KERNEL_LOOP(i, nthreads) {
dx[i] = __float2half(__half2float(dy[i]) * cos(__half2float(x[i]))); dx[i] = __float2half(__half2float(dy[i]) * cos(__half2float(x[i])));
} }
} } // SinGrad
template <> template <>
__global__ void _SinGrad<half2>( __global__ void _SinGrad<half2>(
...@@ -64,7 +64,7 @@ __global__ void _SinGrad<half2>( ...@@ -64,7 +64,7 @@ __global__ void _SinGrad<half2>(
const float2 grad = __half22float2(dy[i]); const float2 grad = __half22float2(dy[i]);
dx[i] = __floats2half2_rn(grad.x * cos(val.x), grad.y * cos(val.y)); dx[i] = __floats2half2_rn(grad.x * cos(val.x), grad.y * cos(val.y));
} }
} } // SinGrad
template <typename T> template <typename T>
__global__ void __global__ void
...@@ -84,7 +84,7 @@ __global__ void _ReciprocalGrad<half>( ...@@ -84,7 +84,7 @@ __global__ void _ReciprocalGrad<half>(
dx[i] = __float2half( dx[i] = __float2half(
-__half2float(dy[i]) * math::utils::Square(__half2float(y[i]))); -__half2float(dy[i]) * math::utils::Square(__half2float(y[i])));
} }
} } // ReciprocalGrad
template <> template <>
__global__ void _ReciprocalGrad<half2>( __global__ void _ReciprocalGrad<half2>(
...@@ -98,7 +98,7 @@ __global__ void _ReciprocalGrad<half2>( ...@@ -98,7 +98,7 @@ __global__ void _ReciprocalGrad<half2>(
dx[i] = dx[i] =
__floats2half2_rn(-grad.x * (val.x * val.x), -grad.y * (val.y * val.y)); __floats2half2_rn(-grad.x * (val.x * val.x), -grad.y * (val.y * val.y));
} }
} } // ReciprocalGrad
template <typename T> template <typename T>
__global__ void _RsqrtGrad(const int nthreads, const T* dy, const T* y, T* dx) { __global__ void _RsqrtGrad(const int nthreads, const T* dy, const T* y, T* dx) {
...@@ -114,7 +114,7 @@ _RsqrtGrad<half>(const int nthreads, const half* dy, const half* y, half* dx) { ...@@ -114,7 +114,7 @@ _RsqrtGrad<half>(const int nthreads, const half* dy, const half* y, half* dx) {
dx[i] = __float2half( dx[i] = __float2half(
-0.5f * __half2float(dy[i]) * math::utils::Cube(__half2float(y[i]))); -0.5f * __half2float(dy[i]) * math::utils::Cube(__half2float(y[i])));
} }
} } // ReciprocalGrad
template <> template <>
__global__ void _RsqrtGrad<half2>( __global__ void _RsqrtGrad<half2>(
...@@ -129,7 +129,7 @@ __global__ void _RsqrtGrad<half2>( ...@@ -129,7 +129,7 @@ __global__ void _RsqrtGrad<half2>(
-0.5f * grad.x * (val.x * val.x * val.x), -0.5f * grad.x * (val.x * val.x * val.x),
-0.5f * grad.y * (val.y * val.y * val.y)); -0.5f * grad.y * (val.y * val.y * val.y));
} }
} } // ReciprocalGrad
} // namespace } // namespace
......
...@@ -176,35 +176,35 @@ void _Moments( ...@@ -176,35 +176,35 @@ void _Moments(
/* ------------------- Launcher Separator ------------------- */ /* ------------------- Launcher Separator ------------------- */
#define DEFINE_KERNEL_LAUNCHER(T, ScalarT, AccT) \ #define DEFINE_KERNEL_LAUNCHER(T, AccT) \
template <> \ template <> \
void Moments<T, AccT, CUDAContext>( \ void Moments<T, AccT, CUDAContext>( \
const int num_dims, \ const int num_dims, \
const int* dims, \ const int* dims, \
const int num_axes, \ const int num_axes, \
const int* axes, \ const int* axes, \
const T* x, \ const T* x, \
AccT* mean, \ AccT* mean, \
AccT* var, \ AccT* var, \
CUDAContext* ctx) { \ CUDAContext* ctx) { \
_Moments( \ _Moments( \
num_dims, \ num_dims, \
dims, \ dims, \
num_axes, \ num_axes, \
axes, \ axes, \
reinterpret_cast<const ScalarT*>(x), \ reinterpret_cast<const math::ScalarType<T>::type*>(x), \
mean, \ mean, \
var, \ var, \
ctx); \ ctx); \
} }
DEFINE_KERNEL_LAUNCHER(int8_t, int8_t, float); DEFINE_KERNEL_LAUNCHER(int8_t, float);
DEFINE_KERNEL_LAUNCHER(uint8_t, uint8_t, float); DEFINE_KERNEL_LAUNCHER(uint8_t, float);
DEFINE_KERNEL_LAUNCHER(int, int, float); DEFINE_KERNEL_LAUNCHER(int, float);
DEFINE_KERNEL_LAUNCHER(int64_t, int64_t, double); DEFINE_KERNEL_LAUNCHER(int64_t, double);
DEFINE_KERNEL_LAUNCHER(float16, half, float); DEFINE_KERNEL_LAUNCHER(float16, float);
DEFINE_KERNEL_LAUNCHER(float, float, float); DEFINE_KERNEL_LAUNCHER(float, float);
DEFINE_KERNEL_LAUNCHER(double, double, double); DEFINE_KERNEL_LAUNCHER(double, double);
#undef DEFINE_KERNEL_LAUNCHER #undef DEFINE_KERNEL_LAUNCHER
} // namespace kernel } // namespace kernel
......
#include "dragon/core/memory.h"
#include "dragon/utils/device/common_eigen.h" #include "dragon/utils/device/common_eigen.h"
#include "dragon/utils/math_functions.h" #include "dragon/utils/math_functions.h"
#include "dragon/utils/op_kernels.h" #include "dragon/utils/op_kernels.h"
......
#include "dragon/core/memory.h"
#include "dragon/utils/device/common_eigen.h" #include "dragon/utils/device/common_eigen.h"
#include "dragon/utils/math_functions.h" #include "dragon/utils/math_functions.h"
#include "dragon/utils/op_kernels.h" #include "dragon/utils/op_kernels.h"
......
...@@ -147,7 +147,7 @@ __global__ void _L2NormalizeGrad( ...@@ -147,7 +147,7 @@ __global__ void _L2NormalizeGrad(
/* ------------------- Launcher Separator ------------------- */ /* ------------------- Launcher Separator ------------------- */
#define DEFINE_KERNEL_LAUNCHER(name, T, ScalarT, AccT) \ #define DEFINE_KERNEL_LAUNCHER(name, T, AccT) \
template <> \ template <> \
void name<T, CUDAContext>( \ void name<T, CUDAContext>( \
const int outer_dim, \ const int outer_dim, \
...@@ -159,18 +159,18 @@ __global__ void _L2NormalizeGrad( ...@@ -159,18 +159,18 @@ __global__ void _L2NormalizeGrad(
T* y, \ T* y, \
CUDAContext* ctx) { \ CUDAContext* ctx) { \
const auto nblocks = outer_dim * inner_dim; \ const auto nblocks = outer_dim * inner_dim; \
_##name<ScalarT, AccT> \ _##name<math::ScalarType<T>::type, AccT> \
<<<CUDA_2D_BLOCKS(nblocks), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \ <<<CUDA_2D_BLOCKS(nblocks), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
nblocks, \ nblocks, \
inner_dim, \ inner_dim, \
reduce_dim, \ reduce_dim, \
AccT(normalizer), \ AccT(normalizer), \
AccT(epsilon), \ AccT(epsilon), \
reinterpret_cast<const ScalarT*>(x), \ reinterpret_cast<const math::ScalarType<T>::type*>(x), \
reinterpret_cast<ScalarT*>(y)); \ reinterpret_cast<math::ScalarType<T>::type*>(y)); \
} }
#define DEFINE_GRAD_KERNEL_LAUNCHER(name, T, ScalarT, AccT) \ #define DEFINE_GRAD_KERNEL_LAUNCHER(name, T, AccT) \
template <> \ template <> \
void name<T, CUDAContext>( \ void name<T, CUDAContext>( \
const int outer_dim, \ const int outer_dim, \
...@@ -183,30 +183,30 @@ __global__ void _L2NormalizeGrad( ...@@ -183,30 +183,30 @@ __global__ void _L2NormalizeGrad(
T* dx, \ T* dx, \
CUDAContext* ctx) { \ CUDAContext* ctx) { \
const auto nblocks = outer_dim * inner_dim; \ const auto nblocks = outer_dim * inner_dim; \
_##name<ScalarT, AccT> \ _##name<math::ScalarType<T>::type, AccT> \
<<<CUDA_2D_BLOCKS(nblocks), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \ <<<CUDA_2D_BLOCKS(nblocks), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
nblocks, \ nblocks, \
inner_dim, \ inner_dim, \
reduce_dim, \ reduce_dim, \
AccT(normalizer), \ AccT(normalizer), \
AccT(epsilon), \ AccT(epsilon), \
reinterpret_cast<const ScalarT*>(dy), \ reinterpret_cast<const math::ScalarType<T>::type*>(dy), \
reinterpret_cast<const ScalarT*>(x), \ reinterpret_cast<const math::ScalarType<T>::type*>(x), \
reinterpret_cast<ScalarT*>(dx)); \ reinterpret_cast<math::ScalarType<T>::type*>(dx)); \
} }
DEFINE_KERNEL_LAUNCHER(L1Normalize, float16, half, float); DEFINE_KERNEL_LAUNCHER(L1Normalize, float16, float);
DEFINE_KERNEL_LAUNCHER(L1Normalize, float, float, float); DEFINE_KERNEL_LAUNCHER(L1Normalize, float, float);
DEFINE_KERNEL_LAUNCHER(L1Normalize, double, double, double); DEFINE_KERNEL_LAUNCHER(L1Normalize, double, double);
DEFINE_KERNEL_LAUNCHER(L2Normalize, float16, half, float); DEFINE_KERNEL_LAUNCHER(L2Normalize, float16, float);
DEFINE_KERNEL_LAUNCHER(L2Normalize, float, float, float); DEFINE_KERNEL_LAUNCHER(L2Normalize, float, float);
DEFINE_KERNEL_LAUNCHER(L2Normalize, double, double, double); DEFINE_KERNEL_LAUNCHER(L2Normalize, double, double);
DEFINE_GRAD_KERNEL_LAUNCHER(L1NormalizeGrad, float16, half, float); DEFINE_GRAD_KERNEL_LAUNCHER(L1NormalizeGrad, float16, float);
DEFINE_GRAD_KERNEL_LAUNCHER(L1NormalizeGrad, float, float, float); DEFINE_GRAD_KERNEL_LAUNCHER(L1NormalizeGrad, float, float);
DEFINE_GRAD_KERNEL_LAUNCHER(L1NormalizeGrad, double, double, double); DEFINE_GRAD_KERNEL_LAUNCHER(L1NormalizeGrad, double, double);
DEFINE_GRAD_KERNEL_LAUNCHER(L2NormalizeGrad, float16, half, float); DEFINE_GRAD_KERNEL_LAUNCHER(L2NormalizeGrad, float16, float);
DEFINE_GRAD_KERNEL_LAUNCHER(L2NormalizeGrad, float, float, float); DEFINE_GRAD_KERNEL_LAUNCHER(L2NormalizeGrad, float, float);
DEFINE_GRAD_KERNEL_LAUNCHER(L2NormalizeGrad, double, double, double); DEFINE_GRAD_KERNEL_LAUNCHER(L2NormalizeGrad, double, double);
#undef DEFINE_KERNEL_LAUNCHER #undef DEFINE_KERNEL_LAUNCHER
#undef DEFINE_GRAD_KERNEL_LAUNCHER #undef DEFINE_GRAD_KERNEL_LAUNCHER
......
...@@ -10,15 +10,18 @@ namespace kernel { ...@@ -10,15 +10,18 @@ namespace kernel {
namespace { namespace {
#if __CUDA_ARCH__ >= 350
#define LDG(x, i) __ldg(x + i)
#else
#define LDG(x, i) x[i]
#endif
template <typename T> template <typename T>
__global__ void __global__ void
_BiasAdd(const int nthreads, const int axis_dim, const T* x, const T* b, T* y) { _BiasAdd(const int nthreads, const int axis_dim, const T* x, const T* b, T* y) {
auto Plus = math::PlusFunctor<T>();
CUDA_1D_KERNEL_LOOP(i, nthreads) { CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 350 y[i] = Plus(x[i], LDG(b, i % axis_dim));
y[i] = math::PlusFunctor<T>()(x[i], __ldg(b + i % axis_dim));
#else
y[i] = math::PlusFunctor<T>()(x[i], b[i % axis_dim]);
#endif
} }
} }
...@@ -30,12 +33,9 @@ __global__ void _BiasAdd( ...@@ -30,12 +33,9 @@ __global__ void _BiasAdd(
const T* x, const T* x,
const T* b, const T* b,
T* y) { T* y) {
auto Plus = math::PlusFunctor<T>();
CUDA_1D_KERNEL_LOOP(i, nthreads) { CUDA_1D_KERNEL_LOOP(i, nthreads) {
#if __CUDA_ARCH__ >= 350 y[i] = Plus(x[i], LDG(b, (i / inner_dim) % axis_dim));
y[i] = math::PlusFunctor<T>()(x[i], __ldg(b + (i / inner_dim) % axis_dim));
#else
y[i] = math::PlusFunctor<T>()(x[i], b[(i / inner_dim) % axis_dim]);
#endif
} }
} }
...@@ -43,64 +43,48 @@ __global__ void _BiasAdd( ...@@ -43,64 +43,48 @@ __global__ void _BiasAdd(
/* ------------------- Launcher Separator ------------------- */ /* ------------------- Launcher Separator ------------------- */
template <> #define DEFINE_KERNEL_LAUNCHER(T) \
void BiasAdd<float16, CUDAContext>( template <> \
const int outer_dim, void BiasAdd<T, CUDAContext>( \
const int inner_dim, const int outer_dim, \
const int axis_dim, const int inner_dim, \
const float16* x, const int axis_dim, \
const float16* b, const T* x, \
float16* y, const T* b, \
CUDAContext* ctx) { T* y, \
const auto nthreads = outer_dim * axis_dim * inner_dim; CUDAContext* ctx) { \
if (inner_dim == 1) { const auto nthreads = outer_dim * axis_dim * inner_dim; \
_BiasAdd<<<CUDA_BLOCKS(nthreads), CUDA_THREADS, 0, ctx->cuda_stream()>>>( if (inner_dim == 1) { \
nthreads, _BiasAdd<<< \
axis_dim, CUDA_BLOCKS(nthreads), \
reinterpret_cast<const half*>(x), CUDA_THREADS, \
reinterpret_cast<const half*>(b), 0, \
reinterpret_cast<half*>(y)); ctx->cuda_stream()>>>( \
} else { nthreads, \
_BiasAdd<<<CUDA_BLOCKS(nthreads), CUDA_THREADS, 0, ctx->cuda_stream()>>>( axis_dim, \
nthreads, reinterpret_cast<const math::ScalarType<T>::type*>(x), \
inner_dim, reinterpret_cast<const math::ScalarType<T>::type*>(b), \
axis_dim, reinterpret_cast<math::ScalarType<T>::type*>(y)); \
reinterpret_cast<const half*>(x), } else { \
reinterpret_cast<const half*>(b), _BiasAdd<<< \
reinterpret_cast<half*>(y)); CUDA_BLOCKS(nthreads), \
} CUDA_THREADS, \
} 0, \
ctx->cuda_stream()>>>( \
#define DEFINE_KERNEL_LAUNCHER(T) \ nthreads, \
template <> \ inner_dim, \
void BiasAdd<T, CUDAContext>( \ axis_dim, \
const int outer_dim, \ reinterpret_cast<const math::ScalarType<T>::type*>(x), \
const int inner_dim, \ reinterpret_cast<const math::ScalarType<T>::type*>(b), \
const int axis_dim, \ reinterpret_cast<math::ScalarType<T>::type*>(y)); \
const T* x, \ } \
const T* b, \
T* y, \
CUDAContext* ctx) { \
const auto nthreads = outer_dim * axis_dim * inner_dim; \
if (inner_dim == 1) { \
_BiasAdd<<< \
CUDA_BLOCKS(nthreads), \
CUDA_THREADS, \
0, \
ctx->cuda_stream()>>>(nthreads, axis_dim, x, b, y); \
} else { \
_BiasAdd<<< \
CUDA_BLOCKS(nthreads), \
CUDA_THREADS, \
0, \
ctx->cuda_stream()>>>(nthreads, inner_dim, axis_dim, x, b, y); \
} \
} }
DEFINE_KERNEL_LAUNCHER(int8_t); DEFINE_KERNEL_LAUNCHER(int8_t);
DEFINE_KERNEL_LAUNCHER(uint8_t); DEFINE_KERNEL_LAUNCHER(uint8_t);
DEFINE_KERNEL_LAUNCHER(int); DEFINE_KERNEL_LAUNCHER(int);
DEFINE_KERNEL_LAUNCHER(int64_t); DEFINE_KERNEL_LAUNCHER(int64_t);
DEFINE_KERNEL_LAUNCHER(float16);
DEFINE_KERNEL_LAUNCHER(float); DEFINE_KERNEL_LAUNCHER(float);
DEFINE_KERNEL_LAUNCHER(double); DEFINE_KERNEL_LAUNCHER(double);
#undef DEFINE_KERNEL_LAUNCHER #undef DEFINE_KERNEL_LAUNCHER
......
#include "dragon/utils/math_functions.h"
#include "dragon/utils/op_kernels.h" #include "dragon/utils/op_kernels.h"
namespace dragon { namespace dragon {
...@@ -23,40 +24,36 @@ void _DepthwiseConv2dNCHW( ...@@ -23,40 +24,36 @@ void _DepthwiseConv2dNCHW(
const int dilation_h, const int dilation_h,
const int dilation_w, const int dilation_w,
const T* x, const T* x,
const T* w, const T* filter,
T* y) { T* y) {
T sum_val;
int ih, iw, xi, wi;
int yc_offset, xc_start, yc_start;
int ih_start, yh_start, iw_start;
for (int n = 0; n < N; ++n) { for (int n = 0; n < N; ++n) {
for (int c = 0; c < C; ++c) { for (int c = 0; c < C; ++c) {
yc_offset = n * C + c; const int base_offset = n * C + c;
xc_start = yc_offset * H * W; const int x_offset = base_offset * H * W;
yc_start = yc_offset * out_h; const int y_offset = base_offset * out_h * out_w;
for (int oh = 0; oh < out_h; ++oh) { for (int h_out = 0; h_out < out_h; ++h_out) {
ih_start = oh * stride_h - pad_h; const int hstart = h_out * stride_h - pad_h;
yh_start = (yc_start + oh) * out_w; for (int w_out = 0; w_out < out_w; ++w_out) {
for (int ow = 0; ow < out_w; ++ow) { T val = T(0);
sum_val = T(0); int fi = c * kernel_h * kernel_w;
wi = c * kernel_h * kernel_w; const int wstart = w_out * stride_w - pad_w;
iw_start = ow * stride_w - pad_w; for (int h_k = 0; h_k < kernel_h; ++h_k) {
for (int kh = 0; kh < kernel_h; ++kh) { for (int w_k = 0; w_k < kernel_w; ++w_k) {
for (int kw = 0; kw < kernel_w; ++kw) { const int h = hstart + h_k * dilation_h;
ih = ih_start + kh * dilation_h; const int w = wstart + w_k * dilation_w;
iw = iw_start + kw * dilation_w; if (math::utils::IsAGeZeroAndALtB(h, H) &&
if (ih >= 0 && ih < H && iw >= 0 && iw < W) { math::utils::IsAGeZeroAndALtB(w, W)) {
xi = xc_start + ih * W + iw; const int xi = x_offset + h * W + w;
sum_val += x[xi] * w[wi]; val += x[xi] * filter[fi];
} }
++wi; ++fi;
} // End kw } // End w_k
} // End kh } // End h_k
y[yh_start + ow] = sum_val; y[y_offset + h_out * out_w + w_out] = val;
} // End ow } // End w_out
} // End oh } // End h_out
} } // End c
} // End c && n } // End n
} }
template <typename T> template <typename T>
...@@ -76,40 +73,34 @@ void _DepthwiseConv2dNHWC( ...@@ -76,40 +73,34 @@ void _DepthwiseConv2dNHWC(
const int dilation_h, const int dilation_h,
const int dilation_w, const int dilation_w,
const T* x, const T* x,
const T* w, const T* filter,
T* y) { T* y) {
T sum_val;
int ih, iw, xi, wi;
int xn_start, yn_start;
int ih_start, yh_start;
int iw_start, yw_start;
for (int n = 0; n < N; ++n) { for (int n = 0; n < N; ++n) {
xn_start = n * H; const int x_offset = n * H * W * C;
yn_start = n * out_h; const int y_offset = n * out_h * out_w * C;
for (int oh = 0; oh < out_h; ++oh) { for (int h_out = 0; h_out < out_h; ++h_out) {
ih_start = oh * stride_h - pad_h; const int hstart = h_out * stride_h - pad_h;
yh_start = (yn_start + oh) * out_w; for (int w_out = 0; w_out < out_w; ++w_out) {
for (int ow = 0; ow < out_w; ++ow) { const int wstart = w_out * stride_w - pad_w;
iw_start = ow * stride_w - pad_w;
yw_start = (yh_start + ow) * C;
for (int c = 0; c < C; ++c) { for (int c = 0; c < C; ++c) {
sum_val = T(0); T val = T(0);
wi = c * kernel_h * kernel_w; int fi = c * kernel_h * kernel_w;
for (int kh = 0; kh < kernel_h; ++kh) { for (int h_k = 0; h_k < kernel_h; ++h_k) {
for (int kw = 0; kw < kernel_w; ++kw) { for (int w_k = 0; w_k < kernel_w; ++w_k) {
ih = ih_start + kh * dilation_h; const int h = hstart + h_k * dilation_h;
iw = iw_start + kw * dilation_w; const int w = wstart + w_k * dilation_w;
if (ih >= 0 && ih < H && iw >= 0 && iw < W) { if (math::utils::IsAGeZeroAndALtB(h, H) &&
xi = ((xn_start + ih) * W + iw) * C + c; math::utils::IsAGeZeroAndALtB(w, W)) {
sum_val += x[xi] * w[wi]; const int xi = x_offset + (h * W + w) * C + c;
val += x[xi] * filter[fi];
} }
++wi; ++fi;
} // End kw } // End w_k
} // End kh } // End h_k
y[yw_start + c] = sum_val; y[y_offset + ((h_out * out_w) + w_out) * C + c] = val;
} // End c } // End c
} // End ow } // End w_out
} // End oh } // End h_out
} // End n } // End n
} }
...@@ -144,7 +135,7 @@ void DepthwiseConv2d<float16, CPUContext>( ...@@ -144,7 +135,7 @@ void DepthwiseConv2d<float16, CPUContext>(
const int dilation_w, const int dilation_w,
const string& data_format, const string& data_format,
const float16* x, const float16* x,
const float16* w, const float16* filter,
float16* y, float16* y,
CPUContext* ctx) { CPUContext* ctx) {
CPU_FP16_NOT_SUPPORTED; CPU_FP16_NOT_SUPPORTED;
...@@ -168,7 +159,7 @@ void DepthwiseConv2d<float, CPUContext>( ...@@ -168,7 +159,7 @@ void DepthwiseConv2d<float, CPUContext>(
const int dilation_w, const int dilation_w,
const string& data_format, const string& data_format,
const float* x, const float* x,
const float* w, const float* filter,
float* y, float* y,
CPUContext* ctx) { CPUContext* ctx) {
DISPATCH_DATA_KERNEL( DISPATCH_DATA_KERNEL(
...@@ -188,7 +179,7 @@ void DepthwiseConv2d<float, CPUContext>( ...@@ -188,7 +179,7 @@ void DepthwiseConv2d<float, CPUContext>(
dilation_h, dilation_h,
dilation_w, dilation_w,
x, x,
w, filter,
y); y);
} }
...@@ -211,7 +202,7 @@ void DepthwiseConv2d<float, CPUContext>( ...@@ -211,7 +202,7 @@ void DepthwiseConv2d<float, CPUContext>(
const int dilation_w, \ const int dilation_w, \
const string& data_format, \ const string& data_format, \
const T* dy, \ const T* dy, \
const T* w, \ const T* filter, \
T* dx, \ T* dx, \
CPUContext* ctx) { \ CPUContext* ctx) { \
NOT_IMPLEMENTED; \ NOT_IMPLEMENTED; \
...@@ -235,7 +226,7 @@ void DepthwiseConv2d<float, CPUContext>( ...@@ -235,7 +226,7 @@ void DepthwiseConv2d<float, CPUContext>(
const string& data_format, \ const string& data_format, \
const T* dy, \ const T* dy, \
const T* x, \ const T* x, \
T* dw, \ T* dfilter, \
CPUContext* ctx) { \ CPUContext* ctx) { \
NOT_IMPLEMENTED; \ NOT_IMPLEMENTED; \
} }
......
...@@ -71,13 +71,13 @@ void _RoiAlign( ...@@ -71,13 +71,13 @@ void _RoiAlign(
continue; continue;
} }
const float roi_start_w = roi[1] * spatial_scale; const float roi_wstart = roi[1] * spatial_scale;
const float roi_start_h = roi[2] * spatial_scale; const float roi_hstart = roi[2] * spatial_scale;
const float roi_end_w = roi[3] * spatial_scale; const float roi_wend = roi[3] * spatial_scale;
const float roi_end_h = roi[4] * spatial_scale; const float roi_hend = roi[4] * spatial_scale;
const float roi_w = std::max(roi_end_w - roi_start_w, 1.f); const float roi_w = std::max(roi_wend - roi_wstart, 1.f);
const float roi_h = std::max(roi_end_h - roi_start_h, 1.f); const float roi_h = std::max(roi_hend - roi_hstart, 1.f);
const float bin_h = roi_h / (float)out_h; const float bin_h = roi_h / (float)out_h;
const float bin_w = roi_w / (float)out_w; const float bin_w = roi_w / (float)out_w;
...@@ -94,10 +94,10 @@ void _RoiAlign( ...@@ -94,10 +94,10 @@ void _RoiAlign(
for (int c = 0; c < C; ++c) { for (int c = 0; c < C; ++c) {
yi = 0; yi = 0;
for (int oh = 0; oh < out_h; ++oh) { for (int h_out = 0; h_out < out_h; ++h_out) {
hstart = roi_start_h + oh * bin_h; hstart = roi_hstart + h_out * bin_h;
for (int ow = 0; ow < out_w; ++ow) { for (int w_out = 0; w_out < out_w; ++w_out) {
wstart = roi_start_w + ow * bin_w; wstart = roi_wstart + w_out * bin_w;
val = T(0); val = T(0);
for (int i = 0; i < grid_h; ++i) { for (int i = 0; i < grid_h; ++i) {
h = hstart + (i + .5f) * bin_h / (float)grid_h; h = hstart + (i + .5f) * bin_h / (float)grid_h;
...@@ -108,7 +108,7 @@ void _RoiAlign( ...@@ -108,7 +108,7 @@ void _RoiAlign(
} // End i } // End i
offset_y[yi++] = val / num_grids; offset_y[yi++] = val / num_grids;
} }
} // End oh && ow } // End h_out && w_out
offset_x += x_inner_dim; offset_x += x_inner_dim;
offset_y += y_inner_dim; offset_y += y_inner_dim;
} // End c } // End c
......
#ifdef USE_CUDA #ifdef USE_CUDA
#include "dragon/core/context_cuda.h" #include "dragon/core/context_cuda.h"
#include "dragon/utils/conversions.h" #include "dragon/utils/math_functions.h"
#include "dragon/utils/op_kernels.h" #include "dragon/utils/op_kernels.h"
namespace dragon { namespace dragon {
...@@ -54,6 +54,7 @@ _RoiAlignIntp(const int H, const int W, float h, float w, const T* x) { ...@@ -54,6 +54,7 @@ _RoiAlignIntp(const int H, const int W, float h, float w, const T* x) {
return t + (b - t) * v; return t + (b - t) * v;
} }
template <typename T>
__device__ void _RoiAlignIntpParam( __device__ void _RoiAlignIntpParam(
const int H, const int H,
const int W, const int W,
...@@ -63,8 +64,8 @@ __device__ void _RoiAlignIntpParam( ...@@ -63,8 +64,8 @@ __device__ void _RoiAlignIntpParam(
int& bi, int& bi,
int& li, int& li,
int& ri, int& ri,
float& v, T& v,
float& u) { T& u) {
if (h < -1.f || h > H || w < -1.f || w > W) { if (h < -1.f || h > H || w < -1.f || w > W) {
li = ri = ti = bi = -1; li = ri = ti = bi = -1;
return; return;
...@@ -108,8 +109,8 @@ __global__ void _RoiAlign( ...@@ -108,8 +109,8 @@ __global__ void _RoiAlign(
const float* rois, const float* rois,
T* y) { T* y) {
CUDA_1D_KERNEL_LOOP(yi, nthreads) { CUDA_1D_KERNEL_LOOP(yi, nthreads) {
const int ow = yi % out_w; const int w_out = yi % out_w;
const int oh = (yi / out_w) % out_h; const int h_out = (yi / out_w) % out_h;
const int c = (yi / out_w / out_h) % C; const int c = (yi / out_w / out_h) % C;
const int n = yi / out_w / out_h / C; const int n = yi / out_w / out_h / C;
...@@ -121,26 +122,25 @@ __global__ void _RoiAlign( ...@@ -121,26 +122,25 @@ __global__ void _RoiAlign(
continue; continue;
} }
const float roi_start_w = roi[1] * spatial_scale; const float roi_wstart = roi[1] * spatial_scale;
const float roi_start_h = roi[2] * spatial_scale; const float roi_hstart = roi[2] * spatial_scale;
const float roi_end_w = roi[3] * spatial_scale; const float roi_wend = roi[3] * spatial_scale;
const float roi_end_h = roi[4] * spatial_scale; const float roi_hend = roi[4] * spatial_scale;
const float roi_w = max(roi_end_w - roi_start_w, 1.f); const float roi_w = max(roi_wend - roi_wstart, 1.f);
const float roi_h = max(roi_end_h - roi_start_h, 1.f); const float roi_h = max(roi_hend - roi_hstart, 1.f);
const float bin_h = roi_h / (float)out_h; const float bin_h = roi_h / (float)out_h;
const float bin_w = roi_w / (float)out_w; const float bin_w = roi_w / (float)out_w;
const float hstart = roi_hstart + h_out * bin_h;
const float wstart = roi_wstart + w_out * bin_w;
const int grid_h = const int grid_h =
sampling_ratio > 0 ? sampling_ratio : ceil(roi_h / out_h); sampling_ratio > 0 ? sampling_ratio : ceil(roi_h / out_h);
const int grid_w = const int grid_w =
sampling_ratio > 0 ? sampling_ratio : ceil(roi_w / out_w); sampling_ratio > 0 ? sampling_ratio : ceil(roi_w / out_w);
const float hstart = roi_start_h + oh * bin_h;
const float wstart = roi_start_w + ow * bin_w;
const T* offset_x = x + (batch_ind * C + c) * H * W; const T* offset_x = x + (batch_ind * C + c) * H * W;
AccT val = AccT(0); AccT val = AccT(0);
for (int i = 0; i < grid_h; i++) { for (int i = 0; i < grid_h; i++) {
const float h = hstart + (i + .5f) * bin_h / grid_h; const float h = hstart + (i + .5f) * bin_h / grid_h;
...@@ -149,7 +149,6 @@ __global__ void _RoiAlign( ...@@ -149,7 +149,6 @@ __global__ void _RoiAlign(
val += _RoiAlignIntp(H, W, h, w, offset_x); val += _RoiAlignIntp(H, W, h, w, offset_x);
} }
} }
y[yi] = convert::To<T>(val / AccT(grid_h * grid_w)); y[yi] = convert::To<T>(val / AccT(grid_h * grid_w));
} }
} }
...@@ -168,8 +167,8 @@ __global__ void _RoiAlignGrad( ...@@ -168,8 +167,8 @@ __global__ void _RoiAlignGrad(
const float* rois, const float* rois,
AccT* dx) { AccT* dx) {
CUDA_1D_KERNEL_LOOP(yi, nthreads) { CUDA_1D_KERNEL_LOOP(yi, nthreads) {
const int ow = yi % out_w; const int w_out = yi % out_w;
const int oh = (yi / out_w) % out_h; const int h_out = (yi / out_w) % out_h;
const int c = (yi / out_w / out_h) % C; const int c = (yi / out_w / out_h) % C;
const int n = yi / out_w / out_h / C; const int n = yi / out_w / out_h / C;
...@@ -178,24 +177,23 @@ __global__ void _RoiAlignGrad( ...@@ -178,24 +177,23 @@ __global__ void _RoiAlignGrad(
if (batch_ind < 0) continue; if (batch_ind < 0) continue;
const float roi_start_w = roi[1] * spatial_scale; const float roi_wstart = roi[1] * spatial_scale;
const float roi_start_h = roi[2] * spatial_scale; const float roi_hstart = roi[2] * spatial_scale;
const float roi_end_w = roi[3] * spatial_scale; const float roi_wend = roi[3] * spatial_scale;
const float roi_end_h = roi[4] * spatial_scale; const float roi_hend = roi[4] * spatial_scale;
const float roi_w = max(roi_end_w - roi_start_w, 1.f); const float roi_w = max(roi_wend - roi_wstart, 1.f);
const float roi_h = max(roi_end_h - roi_start_h, 1.f); const float roi_h = max(roi_hend - roi_hstart, 1.f);
const float bin_h = roi_h / (float)out_h; const float bin_h = roi_h / (float)out_h;
const float bin_w = roi_w / (float)out_w; const float bin_w = roi_w / (float)out_w;
const float hstart = roi_hstart + h_out * bin_h;
const float wstart = roi_wstart + w_out * bin_w;
const int grid_h = const int grid_h =
sampling_ratio > 0 ? sampling_ratio : ceil(roi_h / out_h); sampling_ratio > 0 ? sampling_ratio : ceil(roi_h / out_h);
const int grid_w = const int grid_w =
sampling_ratio > 0 ? sampling_ratio : ceil(roi_w / out_w); sampling_ratio > 0 ? sampling_ratio : ceil(roi_w / out_w);
const float hstart = roi_start_h + oh * bin_h;
const float wstart = roi_start_w + ow * bin_w;
const float dyi = convert::To<float>(dy[yi]) / float(grid_h * grid_w); const float dyi = convert::To<float>(dy[yi]) / float(grid_h * grid_w);
float* offset_dx = dx + (batch_ind * C + c) * H * W; float* offset_dx = dx + (batch_ind * C + c) * H * W;
...@@ -209,10 +207,10 @@ __global__ void _RoiAlignGrad( ...@@ -209,10 +207,10 @@ __global__ void _RoiAlignGrad(
if (li >= 0 && ri >= 0 && ti >= 0 && bi >= 0) { if (li >= 0 && ri >= 0 && ti >= 0 && bi >= 0) {
const float db = dyi * v; const float db = dyi * v;
const float dt = dyi * (1.f - v); const float dt = dyi * (1.f - v);
atomicAdd(offset_dx + ti * W + li, (1.f - u) * dt); math::utils::AtomicAdd(offset_dx + ti * W + li, (1.f - u) * dt);
atomicAdd(offset_dx + ti * W + ri, u * dt); math::utils::AtomicAdd(offset_dx + ti * W + ri, u * dt);
atomicAdd(offset_dx + bi * W + li, (1.f - u) * db); math::utils::AtomicAdd(offset_dx + bi * W + li, (1.f - u) * db);
atomicAdd(offset_dx + bi * W + ri, u * db); math::utils::AtomicAdd(offset_dx + bi * W + ri, u * db);
} }
} // End i } // End i
} // End j } // End j
...@@ -225,9 +223,9 @@ __global__ void _RoiAlignGrad( ...@@ -225,9 +223,9 @@ __global__ void _RoiAlignGrad(
/* ------------------- Launcher Separator ------------------- */ /* ------------------- Launcher Separator ------------------- */
#define DEFINE_KERNEL_LAUNCHER(T, ScalarT) \ #define DEFINE_KERNEL_LAUNCHER(name, InputT, OutputT) \
template <> \ template <> \
void RoiAlign<T, CUDAContext>( \ void name<InputT, CUDAContext>( \
const int C, \ const int C, \
const int H, \ const int H, \
const int W, \ const int W, \
...@@ -236,12 +234,12 @@ __global__ void _RoiAlignGrad( ...@@ -236,12 +234,12 @@ __global__ void _RoiAlignGrad(
const int num_rois, \ const int num_rois, \
const float spatial_scale, \ const float spatial_scale, \
const int sampling_ratio, \ const int sampling_ratio, \
const T* x, \ const InputT* x, \
const float* rois, \ const float* rois, \
T* y, \ OutputT* y, \
CUDAContext* ctx) { \ CUDAContext* ctx) { \
auto nthreads = num_rois * C * out_h * out_w; \ auto nthreads = num_rois * C * out_h * out_w; \
_RoiAlign<ScalarT, float> \ _##name<math::ScalarType<InputT>::type, float> \
<<<CUDA_BLOCKS(nthreads), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \ <<<CUDA_BLOCKS(nthreads), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
nthreads, \ nthreads, \
C, \ C, \
...@@ -251,53 +249,18 @@ __global__ void _RoiAlignGrad( ...@@ -251,53 +249,18 @@ __global__ void _RoiAlignGrad(
out_w, \ out_w, \
spatial_scale, \ spatial_scale, \
sampling_ratio, \ sampling_ratio, \
reinterpret_cast<const ScalarT*>(x), \ reinterpret_cast<const math::ScalarType<InputT>::type*>(x), \
rois, \ rois, \
reinterpret_cast<ScalarT*>(y)); \ reinterpret_cast<math::ScalarType<OutputT>::type*>(y)); \
}
#define DEFINE_GRAD_KERNEL_LAUNCHER(T, ScalarT) \
template <> \
void RoiAlignGrad<T, CUDAContext>( \
const int C, \
const int H, \
const int W, \
const int out_h, \
const int out_w, \
const int num_rois, \
const float spatial_scale, \
const int sampling_ratio, \
const T* dy, \
const float* rois, \
float* dx, \
CUDAContext* ctx) { \
auto nthreads = num_rois * C * out_h * out_w; \
_RoiAlignGrad<<< \
CUDA_BLOCKS(nthreads), \
CUDA_THREADS, \
0, \
ctx->cuda_stream()>>>( \
nthreads, \
C, \
H, \
W, \
out_h, \
out_w, \
spatial_scale, \
sampling_ratio, \
reinterpret_cast<const ScalarT*>(dy), \
rois, \
dx); \
} }
DEFINE_KERNEL_LAUNCHER(float16, half); DEFINE_KERNEL_LAUNCHER(RoiAlign, float16, float16);
DEFINE_KERNEL_LAUNCHER(float, float); DEFINE_KERNEL_LAUNCHER(RoiAlign, float, float);
DEFINE_KERNEL_LAUNCHER(double, double); DEFINE_KERNEL_LAUNCHER(RoiAlign, double, double);
DEFINE_GRAD_KERNEL_LAUNCHER(float16, half); DEFINE_KERNEL_LAUNCHER(RoiAlignGrad, float16, float); // RoiAlignGrad
DEFINE_GRAD_KERNEL_LAUNCHER(float, float); DEFINE_KERNEL_LAUNCHER(RoiAlignGrad, float, float); // RoiAlignGrad
DEFINE_GRAD_KERNEL_LAUNCHER(double, double); DEFINE_KERNEL_LAUNCHER(RoiAlignGrad, double, float); // RoiAlignGrad
#undef DEFINE_KERNEL_LAUNCHER #undef DEFINE_KERNEL_LAUNCHER
#undef DEFINE_GRAD_KERNEL_LAUNCHER
} // namespace kernel } // namespace kernel
......
...@@ -37,51 +37,51 @@ void _RoiPool( ...@@ -37,51 +37,51 @@ void _RoiPool(
continue; continue;
} }
const int roi_start_w = std::round(roi[1] * spatial_scale); const int roi_wstart = std::round(roi[1] * spatial_scale);
const int roi_start_h = std::round(roi[2] * spatial_scale); const int roi_hstart = std::round(roi[2] * spatial_scale);
const int roi_end_w = std::round(roi[3] * spatial_scale); const int roi_wend = std::round(roi[3] * spatial_scale);
const int roi_end_h = std::round(roi[4] * spatial_scale); const int roi_hend = std::round(roi[4] * spatial_scale);
const int roi_w = std::max(roi_end_w - roi_start_w + 1, 1); const int roi_w = std::max(roi_wend - roi_wstart + 1, 1);
const int roi_h = std::max(roi_end_h - roi_start_h + 1, 1); const int roi_h = std::max(roi_hend - roi_hstart + 1, 1);
const float bin_h = (float)roi_h / (float)out_h; const float bin_h = (float)roi_h / (float)out_h;
const float bin_w = (float)roi_w / (float)out_w; const float bin_w = (float)roi_w / (float)out_w;
T val; T val;
bool empty; bool empty;
int xi, maxi, yi; int xi, yi, mask_val;
int hstart, wstart, hend, wend; int hstart, wstart, hend, wend;
const T* offset_x = x + batch_ind * x_cols; const T* offset_x = x + batch_ind * x_cols;
for (int c = 0; c < C; ++c) { for (int c = 0; c < C; ++c) {
yi = 0; yi = 0;
for (int oh = 0; oh < out_h; ++oh) { for (int h_out = 0; h_out < out_h; ++h_out) {
hstart = (int)(bin_h * oh); hstart = (int)(bin_h * h_out);
hstart = std::min(std::max(hstart + roi_start_h, 0), H); hstart = std::min(std::max(hstart + roi_hstart, 0), H);
hend = (int)ceil(bin_h * (oh + 1)); hend = (int)ceil(bin_h * (h_out + 1));
hend = std::min(std::max(hend + roi_start_h, 0), H); hend = std::min(std::max(hend + roi_hstart, 0), H);
empty = hend == hstart; empty = hend == hstart;
for (int ow = 0; ow < out_w; ++ow) { for (int w_out = 0; w_out < out_w; ++w_out) {
wstart = (int)(bin_w * ow); wstart = (int)(bin_w * w_out);
wstart = std::min(std::max(wstart + roi_start_w, 0), W); wstart = std::min(std::max(wstart + roi_wstart, 0), W);
wend = (int)ceil(bin_w * (ow + 1)); wend = (int)ceil(bin_w * (w_out + 1));
wend = std::min(std::max(wend + roi_start_w, 0), W); wend = std::min(std::max(wend + roi_wstart, 0), W);
empty = empty || (wend == wstart); empty = empty || (wend == wstart);
maxi = empty ? -1 : 0; mask_val = empty ? -1 : 0;
val = empty ? T(0) : offset_x[0]; val = empty ? T(0) : offset_x[0];
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
xi = h * W + w; xi = h * W + w;
if (offset_x[xi] > offset_y[yi]) { if (offset_x[xi] > offset_y[yi]) {
maxi = xi; mask_val = xi;
val = offset_x[xi]; val = offset_x[xi];
} }
} // End w } // End w
} // End h } // End h
offset_y[yi] = val; offset_y[yi] = val;
offset_mask[yi++] = maxi; offset_mask[yi++] = mask_val;
} }
} // End oh && ow } // End h_out && w_out
offset_x += x_inner_dim; offset_x += x_inner_dim;
offset_y += y_inner_dim; offset_y += y_inner_dim;
offset_mask += y_inner_dim; offset_mask += y_inner_dim;
...@@ -141,7 +141,7 @@ void RoiPool<float16, CPUContext>( ...@@ -141,7 +141,7 @@ void RoiPool<float16, CPUContext>(
const float spatial_scale, \ const float spatial_scale, \
const T* dy, \ const T* dy, \
const float* rois, \ const float* rois, \
const int* mask, \ int* mask, \
float* dx, \ float* dx, \
CPUContext* ctx) { \ CPUContext* ctx) { \
NOT_IMPLEMENTED; \ NOT_IMPLEMENTED; \
......
...@@ -31,8 +31,8 @@ __global__ void _RoiPool( ...@@ -31,8 +31,8 @@ __global__ void _RoiPool(
T* y) { T* y) {
auto Greater = math::GreaterFunctor<T>(); auto Greater = math::GreaterFunctor<T>();
CUDA_1D_KERNEL_LOOP(yi, nthreads) { CUDA_1D_KERNEL_LOOP(yi, nthreads) {
const int ow = yi % out_w; const int w_out = yi % out_w;
const int oh = (yi / out_w) % out_h; const int h_out = (yi / out_w) % out_h;
const int c = (yi / out_w / out_h) % C; const int c = (yi / out_w / out_h) % C;
const int n = yi / out_w / out_h / C; const int n = yi / out_w / out_h / C;
...@@ -45,25 +45,25 @@ __global__ void _RoiPool( ...@@ -45,25 +45,25 @@ __global__ void _RoiPool(
continue; continue;
} }
const int roi_start_w = round(roi[1] * spatial_scale); const int roi_wstart = round(roi[1] * spatial_scale);
const int roi_start_h = round(roi[2] * spatial_scale); const int roi_hstart = round(roi[2] * spatial_scale);
const int roi_end_w = round(roi[3] * spatial_scale); const int roi_wend = round(roi[3] * spatial_scale);
const int roi_end_h = round(roi[4] * spatial_scale); const int roi_hend = round(roi[4] * spatial_scale);
const int roi_w = max(roi_end_w - roi_start_w + 1, 1); const int roi_w = max(roi_wend - roi_wstart + 1, 1);
const int roi_h = max(roi_end_h - roi_start_h + 1, 1); const int roi_h = max(roi_hend - roi_hstart + 1, 1);
const float bin_h = (float)roi_h / (float)out_h; const float bin_h = (float)roi_h / (float)out_h;
const float bin_w = (float)roi_w / (float)out_w; const float bin_w = (float)roi_w / (float)out_w;
int hstart = floor(bin_h * oh); int hstart = floor(bin_h * h_out);
int wstart = floor(bin_w * ow); int wstart = floor(bin_w * w_out);
int hend = ceil(bin_h * (oh + 1)); int hend = ceil(bin_h * (h_out + 1));
int wend = ceil(bin_w * (ow + 1)); int wend = ceil(bin_w * (w_out + 1));
hstart = min(max(hstart + roi_start_h, 0), H); hstart = min(max(hstart + roi_hstart, 0), H);
hend = min(max(hend + roi_start_h, 0), H); hend = min(max(hend + roi_hstart, 0), H);
wstart = min(max(wstart + roi_start_w, 0), W); wstart = min(max(wstart + roi_wstart, 0), W);
wend = min(max(wend + roi_start_w, 0), W); wend = min(max(wend + roi_wstart, 0), W);
const bool empty = (hend <= hstart) || (wend <= wstart); const bool empty = (hend <= hstart) || (wend <= wstart);
int max_idx = empty ? -1 : 0; int max_idx = empty ? -1 : 0;
...@@ -106,7 +106,8 @@ __global__ void _RoiPoolGrad( ...@@ -106,7 +106,8 @@ __global__ void _RoiPoolGrad(
AccT* offset_dx = dx + (batch_ind * C + c) * H * W; AccT* offset_dx = dx + (batch_ind * C + c) * H * W;
if (LDG(mask, yi) != -1) { if (LDG(mask, yi) != -1) {
atomicAdd(offset_dx + LDG(mask, yi), convert::To<AccT>(dy[yi])); math::utils::AtomicAdd(
offset_dx + LDG(mask, yi), convert::To<AccT>(dy[yi]));
} }
} }
} }
...@@ -117,78 +118,43 @@ __global__ void _RoiPoolGrad( ...@@ -117,78 +118,43 @@ __global__ void _RoiPoolGrad(
/* ------------------- Launcher Separator ------------------- */ /* ------------------- Launcher Separator ------------------- */
#define DEFINE_KERNEL_LAUNCHER(T, ScalarT) \ #define DEFINE_KERNEL_LAUNCHER(name, InputT, OutputT) \
template <> \ template <> \
void RoiPool<T, CUDAContext>( \ void name<InputT, CUDAContext>( \
const int C, \ const int C, \
const int H, \ const int H, \
const int W, \ const int W, \
const int out_h, \ const int out_h, \
const int out_w, \ const int out_w, \
const int num_rois, \ const int num_rois, \
const float spatial_scale, \ const float spatial_scale, \
const T* x, \ const InputT* x, \
const float* rois, \ const float* rois, \
int* mask, \ int* mask, \
T* y, \ OutputT* y, \
CUDAContext* ctx) { \ CUDAContext* ctx) { \
auto nthreads = num_rois * C * out_h * out_w; \ auto nthreads = num_rois * C * out_h * out_w; \
_RoiPool<<<CUDA_BLOCKS(nthreads), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \ _##name<<<CUDA_BLOCKS(nthreads), CUDA_THREADS, 0, ctx->cuda_stream()>>>( \
nthreads, \ nthreads, \
C, \ C, \
H, \ H, \
W, \ W, \
out_h, \ out_h, \
out_w, \ out_w, \
spatial_scale, \ spatial_scale, \
reinterpret_cast<const ScalarT*>(x), \ reinterpret_cast<const math::ScalarType<InputT>::type*>(x), \
rois, \ rois, \
mask, \ mask, \
reinterpret_cast<ScalarT*>(y)); \ reinterpret_cast<math::ScalarType<OutputT>::type*>(y)); \
} }
#define DEFINE_GRAD_KERNEL_LAUNCHER(T, ScalarT) \ DEFINE_KERNEL_LAUNCHER(RoiPool, float16, float16);
template <> \ DEFINE_KERNEL_LAUNCHER(RoiPool, float, float);
void RoiPoolGrad<T, CUDAContext>( \ DEFINE_KERNEL_LAUNCHER(RoiPool, double, double);
const int C, \ DEFINE_KERNEL_LAUNCHER(RoiPoolGrad, float16, float); // RoiPoolGrad
const int H, \ DEFINE_KERNEL_LAUNCHER(RoiPoolGrad, float, float); // RoiPoolGrad
const int W, \ DEFINE_KERNEL_LAUNCHER(RoiPoolGrad, double, float); // RoiPoolGrad
const int out_h, \
const int out_w, \
const int num_rois, \
const float spatial_scale, \
const T* dy, \
const float* rois, \
const int* mask, \
float* dx, \
CUDAContext* ctx) { \
auto nthreads = num_rois * C * out_h * out_w; \
_RoiPoolGrad<<< \
CUDA_BLOCKS(nthreads), \
CUDA_THREADS, \
0, \
ctx->cuda_stream()>>>( \
nthreads, \
C, \
H, \
W, \
out_h, \
out_w, \
spatial_scale, \
reinterpret_cast<const ScalarT*>(dy), \
rois, \
mask, \
dx); \
}
DEFINE_KERNEL_LAUNCHER(float16, half);
DEFINE_KERNEL_LAUNCHER(float, float);
DEFINE_KERNEL_LAUNCHER(double, double);
DEFINE_GRAD_KERNEL_LAUNCHER(float16, half);
DEFINE_GRAD_KERNEL_LAUNCHER(float, float);
DEFINE_GRAD_KERNEL_LAUNCHER(double, double);
#undef DEFINE_KERNEL_LAUNCHER #undef DEFINE_KERNEL_LAUNCHER
#undef DEFINE_GRAD_KERNEL_LAUNCHER
} // namespace kernel } // namespace kernel
......
...@@ -63,15 +63,15 @@ if (USE_OPENMP) ...@@ -63,15 +63,15 @@ if (USE_OPENMP)
endif() endif()
if (USE_CUDA) if (USE_CUDA)
if (USE_SHARED_LIBS) if (USE_SHARED_LIBS)
target_link_libraries_v2(dragon cudart) target_link_libraries_v2(dragonrt cudart)
target_link_libraries_v2(dragon cublas) target_link_libraries_v2(dragonrt cublas)
target_link_libraries_v2(dragon curand) target_link_libraries_v2(dragonrt curand)
else() else()
target_link_libraries_v2(dragon cudart_static) target_link_libraries_v2(dragonrt cudart_static)
target_link_libraries_v2(dragon cublas_static) target_link_libraries_v2(dragonrt cublas_static)
target_link_libraries_v2(dragon curand_static) target_link_libraries_v2(dragonrt curand_static)
if (CUDA_VERSION VERSION_GREATER "10.0") if (CUDA_VERSION VERSION_GREATER "10.0")
target_link_libraries_v2(dragon cublasLt_static) target_link_libraries_v2(dragonrt cublasLt_static)
endif() endif()
endif() endif()
endif() endif()
......
...@@ -197,9 +197,7 @@ const Map<string, Map<string, string>>& ONNXBackend::get_node_renamed_attrs() ...@@ -197,9 +197,7 @@ const Map<string, Map<string, string>>& ONNXBackend::get_node_renamed_attrs()
} }
const Map<string, string>& ONNXBackend::get_renamed_attrs() const { const Map<string, string>& ONNXBackend::get_renamed_attrs() const {
const static Map<string, string> kRenamedAttrs{ const static Map<string, string> kRenamedAttrs;
{"keepdims", "keep_dims"},
};
return kRenamedAttrs; return kRenamedAttrs;
} }
......
...@@ -138,17 +138,12 @@ ONNXImporterReturns ONNXBackend::ConvPoolImporter( ...@@ -138,17 +138,12 @@ ONNXImporterReturns ONNXBackend::ConvPoolImporter(
attributes.AddRewrittenAttribute("mode")->set_s("AVG"); attributes.AddRewrittenAttribute("mode")->set_s("AVG");
} else if (onnx_op_type == "GlobalMaxPool") { } else if (onnx_op_type == "GlobalMaxPool") {
attributes.AddRewrittenAttribute("mode")->set_s("MAX"); attributes.AddRewrittenAttribute("mode")->set_s("MAX");
attributes.AddRewrittenAttribute("global_pooling")->set_i(1); attributes.AddRewrittenAttribute("global_pool")->set_i(1);
} else if (onnx_op_type == "GlobalAveragePool") { } else if (onnx_op_type == "GlobalAveragePool") {
attributes.AddRewrittenAttribute("mode")->set_s("AVG"); attributes.AddRewrittenAttribute("mode")->set_s("AVG");
attributes.AddRewrittenAttribute("global_pooling")->set_i(1); attributes.AddRewrittenAttribute("global_pool")->set_i(1);
} }
auto returns = GenericImporter(onnx_node, ctx); return GenericImporter(onnx_node, ctx);
// Determine the op type
OperatorDef* op_def = returns.GetOp(0);
auto ks = attributes.get<ONNX_INTS>("kernel_shape");
*(op_def->mutable_type()) += (str::to(ks.size() > 0 ? ks.size() : 2) + "d");
return returns;
} }
ONNXImporterReturns ONNXBackend::GenericImporter( ONNXImporterReturns ONNXBackend::GenericImporter(
...@@ -251,7 +246,11 @@ ONNXImporterReturns ONNXBackend::ResizeImporter( ...@@ -251,7 +246,11 @@ ONNXImporterReturns ONNXBackend::ResizeImporter(
auto node = NodeProto(onnx_node->node); auto node = NodeProto(onnx_node->node);
auto onnx_node_v2 = ONNXNode(node); auto onnx_node_v2 = ONNXNode(node);
auto& attributes = onnx_node_v2.attributes; auto& attributes = onnx_node_v2.attributes;
auto coord_mode = attributes.get<string>("coordinate_transformation_mode");
attributes.remove("coordinate_transformation_mode"); attributes.remove("coordinate_transformation_mode");
if (coord_mode == "align_corners") {
attributes.AddRewrittenAttribute("align_corners")->set_i(1);
}
if (ctx.opset_version() >= 9) { if (ctx.opset_version() >= 9) {
node.mutable_input()->Clear(); node.mutable_input()->Clear();
node.add_input(onnx_node->node.input(0)); node.add_input(onnx_node->node.input(0));
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!