Commit 96f7277e by Ting PAN

Add Cambricon's CNML Context

1 parent 5cd0761b
Showing with 2334 additions and 418 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in Dragon.
Dragon 0.2.2.12 (20181120)
DRAGON_VERSION == 2212
Changes (w.r.t. Dragon 0.2.2.11):
Preview Features:
- Added Cambricon's CNML context.
- Added the support for Int8(Char) Tensor.
- Removed the cuda device id query from pointer.
- Added ``DropBlock2dOp``
- Added ``MaximumOp``, ``MinimumOp``, ``NLLLossOp``.
- Added CuDNN support for ``BiasAddOp``.
- Optimized memory usage of ``DropoutOp``.
- Replaced ``thread_local`` with platform TLS solution.
- Changed the default norm eps from 1e-3 to 1e-5,
affected: ``BatchNorm``, ``BatchRenorm``, ``GroupNorm``, ``InstanceNorm``, ``L2Norm``.
- Enforced CUDA FP16 support (i.e. Removed ``WITH_CUDA_FP16``).
- [PyTorch] Added ``torch.one_hot``.
- [PyTorch] Added ``torch.log``, ``Tensor.log``, ``torch.exp`` and ``Tensor.exp``.
- [PyTorch] Added ``torch.minimum``, ``torch.maximum``,
``torch.clamp``, ``Tensor.clamp``, ``Tensor.clamp_``.
- [PyTorch] Added ``nn.ELU`` and ``nn.SELU``.
- [PyTorch] Added ``nn.GroupNorm``.
- [PyTorch] Added ``nn.NLLLoss``, ``nn.BCEWithLogitsLoss``,
``nn.L1Loss``, ``nn.MSELoss``, ``nn.SmoothL1Loss``.
- [PyTorch] Added ``nn.DropBlock2d``.
- [PyTorch] Added ``train`` and ``eval`` mode for Module,
affected: ``nn.BatchNorm``, ``nn.Dropout``.
- [PyTorch] Deprecated the ``size_average`` and ``reduce`` in
``nn.Loss``, added ``reduction`` instead.
- [PyTorch] ``torch.save`` can save both ``torch.Tensor`` and other pickle values.
- [PyCaffe] Added ``DropBlockLayer``.
Bugs fixed:
- Fixed the uncomputed output in ``BiasAddGradientOp``.
- Fixed the incorrect gradients of ``ClipGradientOp``.
- Fixed the wrong results of ``math::Inv`` under ``CPUContext``.
- Fixed the issue that the default device is used on initializing NCCL.
- Removed the strictly shape check in ``SmoothL1Op``.
- Fixed wrong CXX API exporting under Win32.
- [PyTorch] Fixed an issue that multiple ``GradientGather`` are triggered by one Operator.
- [PyTorch] Fixed the schema check by in-place fundamental ops.
- [PyTorch] Fixed the missing shape and dtype after ``Tensor.copy_``.
- [PyTorch] Fixed an issue that ``Tensor.fill_`` and ``Tensor.zero_``
will change the data type of an non-empty Tensor.
- [PyTorch] Fixed the Python2 Int(s) check.
------------------------------------------------------------------------
\ No newline at end of file
......@@ -8,10 +8,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
unzip \
ssh \
vim \
libtbb-dev \
libsdl2-dev \
libnuma-dev \
libprotobuf-dev \
protobuf-compiler \
libopencv-dev \
libopenblas-dev \
libboost-all-dev \
python3-pip \
python3-dev \
python3-pyqt4 \
......@@ -40,3 +44,5 @@ RUN git clone https://github.com/seetaresearch/Dragon.git && \
wget http://dragon.seetatech.com/download/docker/ubuntu-16.04-cpu-openblas/CMakeLists.txt && \
mkdir build && cd build && cmake .. && make install -j8 && cd .. && rm -rf build && \
cd python && python3 setup.py install
RUN rm /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python && ln -s /usr/bin/pip3 /usr/bin/pip
\ No newline at end of file
......@@ -9,10 +9,14 @@ RUN rm /etc/apt/sources.list.d/cuda.list && rm /etc/apt/sources.list.d/nvidia-ml
unzip \
ssh \
vim \
libtbb-dev \
libsdl2-dev \
libnuma-dev \
libprotobuf-dev \
protobuf-compiler \
libopencv-dev \
libopenblas-dev \
libboost-all-dev \
libnccl2 \
libnccl-dev \
python3-pip \
......@@ -43,3 +47,5 @@ RUN git clone https://github.com/seetaresearch/Dragon.git && \
wget http://dragon.seetatech.com/download/docker/ubuntu-16.04-cuda9.0-cudnn7/CMakeLists.txt && \
mkdir build && cd build && cmake .. && make install -j8 && cd .. && rm -rf build && \
cd python && python3 setup.py install
RUN rm /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python && ln -s /usr/bin/pip3 /usr/bin/pip
\ No newline at end of file
......@@ -17,7 +17,6 @@ option(WITH_SSE "Set ON to use SSE 4.1" ON)
option(WITH_MPI "Set ON to use MPI" OFF)
option(WITH_MPI_CUDA "Set ON to use MPI-CUDA" OFF)
option(WITH_MPI_NCCL "Set ON to use MPI-NCCL" OFF)
option(WITH_CUDA_FP16 "Set ON to use FP16" ON)
# Set your 3rdparty
set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty)
......@@ -163,10 +162,6 @@ if (WITH_MPI_NCCL)
ADD_DEFINITIONS(-DWITH_MPI_NCCL)
message(STATUS "Use MPI-NCCL [Optional]")
endif()
if (WITH_CUDA_FP16)
ADD_DEFINITIONS(-DWITH_CUDA_FP16)
message(STATUS "Use CUDA FP16 [Optional]")
endif()
# ---[ Flags
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_ARCH}")
......
......@@ -13,6 +13,7 @@
#define DRAGON_CORE_COMMON_H_
#include <ctime>
#include <random>
#include <climits>
#include <memory>
#include <string>
......@@ -49,25 +50,35 @@ using Map = std::unordered_map<Key, Value>;
template <typename Value>
using Set = std::unordered_set<Value> ;
/*
* Define the Kernel version.
*
* | Major(2) | Minor(2) | Patch(11) |
*/
#define DRAGON_VERSION 2211
/* * * * * * * * * * * * * * * * * * * * *
* *
* Kernel Version *
* *
* Major(2) | Minor(2) | Patch(12) *
* *
* * * * * * * * * * * * * * * * * * * * */
#define DRAGON_VERSION 2212
/* * * * * * * * * * * * * * * * * * * * *
* *
* Default Random Seed *
* *
* * * * * * * * * * * * * * * * * * * * */
/*
* Define the default random seed.
*/
#define DEFAULT_RNG_SEED 3
/*
* Define the common marcos.
*/
#ifdef _MSC_VER
#if _MSC_VER < 1900
#define thread_local __declspec(thread)
#endif
/* * * * * * * * * * * * * * * * * * * * *
* *
* Macros *
* *
* * * * * * * * * * * * * * * * * * * * */
// avoid using of "thread_local" for VS2013 or older Xcode
#if defined(__clang__) || defined(__GNUC__)
#define TLS_OBJECT __thread
#else
#define TLS_OBJECT __declspec(thread)
#endif
#define CONCATENATE_IMPL(s1, s2) s1##s2
......
......@@ -12,15 +12,8 @@
#ifndef DRAGON_CORE_CONTEXT_H_
#define DRAGON_CORE_CONTEXT_H_
#include <random>
#include <ctime>
#include "core/common.h"
#ifdef WITH_CUDA
#include "utils/cuda_device.h"
#endif
namespace dragon {
class CPUContext {
......@@ -45,7 +38,7 @@ class CPUContext {
#else
data = malloc(nbytes);
#endif
CHECK(data) << "Malloc mem: " << nbytes << " bytes failed.";
CHECK(data) << "\nMalloc mem: " << nbytes << " bytes failed.";
return data;
}
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_CORE_CONTEXT_CNML_H_
#define DRAGON_CORE_CONTEXT_CNML_H_
/* CAMBRICON's CNRT && CNML Environment */
#include "core/common.h"
struct cnrtStream;
struct cnmlCpuTensor;
struct cnmlTensor;
struct cnmlFusionOp;
typedef struct cnrtStream* cnrtStream_t;
typedef struct cnmlCpuTensor* cnmlCpuTensor_t;
typedef struct cnmlTensor* cnmlTensor_t;
typedef struct cnmlFusionOp* cnmlFusionOp_t;
namespace dragon {
class CNRTObject;
class CNMLContext {
public:
CNMLContext(const DeviceOption& option)
: device_id_(option.device_id()),
random_seed_(option.has_random_seed() ?
option.random_seed() : DEFAULT_RNG_SEED) {
CHECK_EQ(option.device_type(), CNML);
}
CNMLContext(const int device_id = 0)
: device_id_(device_id),
random_seed_(DEFAULT_RNG_SEED) {}
void SwitchToDevice(int stream_id);
inline void SwitchToDevice() { SwitchToDevice(1); }
void FinishDeviceCompution();
static void* New(size_t nbytes);
static void Memset(
size_t nbytes,
void* ptr);
inline void MemsetAsync(
size_t nbytes,
void* ptr) {
Memset(nbytes, ptr);
}
template<class DstContext, class SrcContext>
static void Memcpy(
size_t nbytes,
void* dst,
const void* src);
template<class DstContext, class SrcContext>
inline void MemcpyAsync(
size_t nbytes,
void* dst,
const void* src) {
Memcpy<DstContext, SrcContext>(dst, src, nbytes);
}
static void Delete(void* data);
inline int device_id() const { return device_id_; }
inline void set_stream_id(int stream_id) { stream_id_ = stream_id; }
inline cnrtStream_t cnrt_stream() {
return cnrt_stream(device_id_, stream_id_);
}
static cnrtStream_t cnrt_stream(
int device_id,
int stream_id);
static std::mutex& mutex() { static std::mutex m; return m; }
static thread_local CNRTObject cnrt_object_;
private:
int device_id_, stream_id_ = 1, random_seed_;
unique_ptr<std::mt19937> rand_generator_;
};
} // namepsace dragon
#endif // DRAGON_CORE_CONTEXT_CNML_H_
\ No newline at end of file
......@@ -12,8 +12,9 @@
#ifndef DRAGON_CORE_CONTEXT_CUDA_H_
#define DRAGON_CORE_CONTEXT_CUDA_H_
/* NVIDIA's CUDA Environment */
#include "core/common.h"
#include "core/context.h"
#include "utils/cuda_device.h"
#include "utils/cudnn_device.h"
......@@ -52,13 +53,13 @@ class CUDAObject {
}
// follow the caffe2,
// each device takes a group of non-bl0cking streams
// each device takes a group of non-blocking streams
// the stream 0 is reserved for default stream,
// as some computations really require it,
// e.g. cublas.asum() and mixed cpu/cuda operations
// besides, somes calls, such as cudnn.conv() and cudnn.rnn(),
// produce wrong results if running them on non-blocking streams
// note that caffe2 also use default streams (within CuDNNState)
// note that caffe2 also uses default streams (within CuDNNState)
cudaStream_t GetStream(int device_id, int stream_id) {
vector<cudaStream_t>& dev_streams = cuda_streams[device_id];
if (dev_streams.size() <= (unsigned)stream_id)
......@@ -140,7 +141,7 @@ class CUDAContext {
inline static void* New(size_t nbytes) {
void* data;
cudaMalloc(&data, nbytes);
CHECK(data) << "Malloc cuda mem: "
CHECK(data) << "\nMalloc cuda mem: "
<< nbytes << " bytes failed.";
return data;
}
......@@ -199,11 +200,11 @@ class CUDAContext {
static cudaStream_t cuda_stream(
int device_id,
int stream_id) {
return cuda_object_.GetStream(device_id, stream_id);
return cuda_object()->GetStream(device_id, stream_id);
}
cublasHandle_t cublas_handle() {
return cuda_object_.GetCuBLASHandle(device_id_, stream_id_);
return cuda_object()->GetCuBLASHandle(device_id_, stream_id_);
}
inline std::mt19937* rand_generator() {
......@@ -227,13 +228,17 @@ class CUDAContext {
#ifdef WITH_CUDNN
cudnnHandle_t cudnn_handle() {
return cuda_object_.GetCuDNNHandle(device_id_, stream_id_);
return cuda_object()->GetCuDNNHandle(device_id_, stream_id_);
}
#endif
static std::mutex& mutex() { static std::mutex m; return m; }
static thread_local CUDAObject cuda_object_;
static CUDAObject* cuda_object() {
static TLS_OBJECT CUDAObject* cuda_object_;
if (!cuda_object_) cuda_object_ = new CUDAObject();
return cuda_object_;
}
private:
int device_id_, stream_id_ = 1, random_seed_;
......
......@@ -48,10 +48,10 @@ class GraphBase {
Workspace* ws_;
};
class Graph final : public GraphBase {
class Graph : public GraphBase {
public:
Graph(const GraphDef& meta_graph, Workspace* ws);
~Graph() { for (auto* op : ops_) delete op; }
virtual ~Graph() { for (auto* op : ops_) delete op; }
bool Create(
const GraphDef& optimized_graph,
......@@ -73,7 +73,7 @@ class Graph final : public GraphBase {
inline Workspace* ws() const { return ws_; }
private:
protected:
void ForwardShareDyeing(string u, string ancestor);
void ForwardPruneDyeing(
string u,
......@@ -98,6 +98,9 @@ DECLARE_REGISTRY(
const GraphDef&,
Workspace*);
#define REGISTER_GRAPH(name, ...) \
REGISTER_CLASS(GraphRegistry, name, __VA_ARGS__)
} // namespace dragon
#endif // DRAGON_CORE_GRAPH_H_
\ No newline at end of file
......@@ -12,30 +12,49 @@
#ifndef DRAGON_CORE_MIXEDMEM_H_
#define DRAGON_CORE_MIXEDMEM_H_
#include "context.h"
#include "context_cuda.h"
#include "core/context.h"
#include "core/context_cuda.h"
#include "core/context_cnml.h"
namespace dragon {
typedef enum {
NCHW,
NHWC,
} DataOrder;
class MixedMemory {
public:
enum State {
typedef enum {
UNINITIALIZED,
STATE_AT_CPU,
STATE_AT_CUDA,
STATE_AT_CNML,
SWITCHED,
SYNCED };
SYNCED,
} State;
MixedMemory() : cpu_ptr_(nullptr), cuda_ptr_(nullptr) {}
MixedMemory() : cpu_ptr_(nullptr),
cuda_ptr_(nullptr), cnml_ptr_(nullptr) {}
MixedMemory(const TypeMeta& meta, const size_t nbytes)
: meta_(meta), nbytes_(nbytes),
cpu_ptr_(nullptr), cuda_ptr_(nullptr) {}
: meta_(meta), nbytes_(nbytes), cpu_ptr_(nullptr),
cuda_ptr_(nullptr), cnml_ptr_(nullptr) {}
~MixedMemory();
const void* cpu_data();
const void* cuda_data();
const void* cnml_data();
void* mutable_cpu_data();
void* mutable_cuda_data();
void* mutable_cnml_data();
void* malloc_cnml_data();
void fetch_cnml_data(void** data);
cnmlCpuTensor_t& cnml_cpu_tensor();
cnmlTensor_t& cnml_mlu_tensor();
void set_cpu_data(void* cpu_ptr, size_t nbytes);
void SwitchToDevice();
......@@ -43,23 +62,35 @@ class MixedMemory {
inline size_t nbytes() const { return nbytes_; }
inline void* cpu_ptr() { state_ = STATE_AT_CPU; return cpu_ptr_; }
inline void* cuda_ptr() { state_ = STATE_AT_CUDA; return cuda_ptr_; }
inline size_t nchunks() const { return nchunks_; }
void set_nchunks(size_t nchunks) { nchunks_ = nchunks; }
inline State state() const { return state_; }
inline DataOrder order() const { return order_; }
inline void set_order(DataOrder order) { order_ = order; }
const Map<string, string> info() const;
void ToCUDA();
void ToCPU();
void ToCUDA();
private:
void* cpu_ptr_, *cuda_ptr_;
bool own_cpu_ptr_ = true;
State state_ = UNINITIALIZED;
size_t nbytes_ = 0;
TypeMeta meta_;
size_t nbytes_ = 0, nchunks_ = 1;
DataOrder order_ = NCHW;
State state_ = UNINITIALIZED;
void* cpu_ptr_, *cuda_ptr_, *cnml_ptr_;
int own_cpu_ptr_ = 1, ptr_device_ = 0;
/* For CAMBRICON's CNML Environment */
cnmlCpuTensor_t cnml_cpu_tensor_ = nullptr;
cnmlTensor_t cnml_mlu_tensor_ = nullptr;
};
} // namespace dragon
#endif
\ No newline at end of file
#endif // DRAGON_CORE_MIXEDMEM_H_
\ No newline at end of file
......@@ -44,7 +44,9 @@ class OperatorBase {
const string& anchor);
inline void SwitchToPhase(const string& phase) { phase_ = phase; }
virtual void Run(int stream_id = 1) { NOT_IMPLEMENTED; }
virtual void Fusion(void* graph) { NOT_IMPLEMENTED; }
inline const string& name() const { return def_.name(); }
inline const string& type() const { return def_.type(); }
......@@ -186,12 +188,22 @@ DECLARE_REGISTRY(
const OperatorDef&,
Workspace*);
/* NVIDIA's Accelerated Library - CUDNN */
DECLARE_REGISTRY(
CUDNNOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
/* CAMBRICON's Accelerated Library - CNML */
DECLARE_REGISTRY(
CNMLOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
#define TENSOR_FILL_WITH_TYPE(tensor, shape, type) \
if (tensor.count() == 0) { \
CHECK(ws()->GetFiller(tensor.name())) \
......@@ -310,6 +322,9 @@ DECLARE_REGISTRY(
#define INSTANTIATE_CUDNN_OPERATOR(name) \
template class CuDNN##name##Op<CUDAContext>;
#define INSTANTIATE_CNML_OPERATOR(name) \
template class CnML##name##Op<CNMLContext>;
#define REGISTER_CPU_OPERATOR(name, ...) \
REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
......@@ -319,6 +334,9 @@ DECLARE_REGISTRY(
#define REGISTER_CUDNN_OPERATOR(name, ...) \
REGISTER_CLASS(CUDNNOperatorRegistry, name, __VA_ARGS__)
#define REGISTER_CNML_OPERATOR(name, ...) \
REGISTER_CLASS(CNMLOperatorRegistry, name, __VA_ARGS__)
#define DEPLOY_CPU(name) \
REGISTER_CPU_OPERATOR(name, name##Op<CPUContext>); \
INSTANTIATE_OPERATOR(name, CPUContext);
......@@ -336,6 +354,10 @@ DECLARE_REGISTRY(
REGISTER_CUDNN_OPERATOR(name, CuDNN##name##Op<CUDAContext>); \
INSTANTIATE_CUDNN_OPERATOR(name);
#define DEPLOY_CNML(name) \
REGISTER_CNML_OPERATOR(name, CnML##name##Op<CNMLContext>); \
INSTANTIATE_CNML_OPERATOR(name);
} // namespace dragon
#endif // DRAGON_CORE_OPERATOR_H_
\ No newline at end of file
......@@ -10,7 +10,7 @@
// ------------------------------------------------------------
#ifndef DRAGON_CORE_TENSOR_H_
#define DRAONG_CORE_TENSOR_H_
#define DRAGON_CORE_TENSOR_H_
#include "core/common.h"
#include "core/mixedmem.h"
......@@ -103,16 +103,20 @@ class Tensor {
return offset;
}
inline string DimString() const {
if (ndim() == 0) return "(0,)";
static inline string DimString(
const vector<TIndex>& dims) {
if (dims.size() == 0) return "(0,)";
std::stringstream ss;
ss << "(";
for (int i = 0; i < ndim() - 1; i++) ss << dim(i) << ",";
if (ndim() == 1) ss << dim(0) << ",)";
else ss << dim(ndim() - 1) << ")";
for (int i = 0; i < dims.size() - 1; i++)
ss << dims[i] << ",";
if (dims.size() == 1) ss << dims[0] << ",)";
else ss << dims.back() << ")";
return ss.str();
}
inline string DimString() const { return DimString(dims_); }
inline bool is_corrupted() const { return is_corrupted_; }
inline void Corrupt() { is_corrupted_ = true; }
......@@ -156,9 +160,12 @@ class Tensor {
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CUDAContext>()) {
*data_ptr = mem->mutable_cuda_data();
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CNMLContext>()) {
*data_ptr = mem->mutable_cnml_data();
} else {
LOG(FATAL) << "Unknown memory type. "
<< "Only CPU or CUDA is supported.";
LOG(FATAL) << "Unknown memory type.\n"
<< "Only CPU, CUDA and CNML are supported.";
}
}
}
......@@ -173,9 +180,12 @@ class Tensor {
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CUDAContext>()) {
return mem->cuda_data();
} else if (TypeMeta::Id<Context>() ==
TypeMeta::Id<CNMLContext>()) {
return mem->cnml_data();
} else {
LOG(FATAL) << "Unknown memory type. "
<< "Only CPU or CUDA are supported.";
LOG(FATAL) << "Unknown memory type.\n"
<< "Only CPU, CUDA, and CNML are supported.";
return nullptr;
}
}
......@@ -295,4 +305,4 @@ class Tensor {
} // namespace dragon
#endif // DRAONG_CORE_TENSOR_H_
\ No newline at end of file
#endif // DRAGON_CORE_TENSOR_H_
\ No newline at end of file
......@@ -18,6 +18,9 @@
namespace dragon {
typedef char int8;
typedef unsigned char uint8;
#ifdef _MSC_VER
typedef struct __declspec(align(2)) {
......@@ -49,8 +52,8 @@ inline const TypeMeta& TypeStringToMeta(
{ "int64", TypeMeta::Make<int64_t>() },
{ "float64", TypeMeta::Make<double>() },
{ "float16", TypeMeta::Make<float16>() },
{ "uint8", TypeMeta::Make<uint8_t>() },
{ "int8", TypeMeta::Make<char>() },
{ "uint8", TypeMeta::Make<uint8>() },
{ "int8", TypeMeta::Make<int8>() },
};
static TypeMeta unknown_type;
return s2m_type_map.count(str_type) ?
......@@ -66,8 +69,8 @@ inline const std::string TypeMetaToString(
{ TypeMeta::Id<int64_t>(), "int64" },
{ TypeMeta::Id<double>(), "float64", },
{ TypeMeta::Id<float16>(), "float16" },
{ TypeMeta::Id<uint8_t>(), "uint8" },
{ TypeMeta::Id<char>(), "int8" }
{ TypeMeta::Id<uint8>(), "uint8" },
{ TypeMeta::Id<int8>(), "int8" }
};
return m2s_type_map.count(meta.id()) ?
m2s_type_map[meta.id()] : "unknown";
......
......@@ -47,8 +47,8 @@ class Workspace {
recompute_flag->Reshape({ 1 });
recompute_flag->mutable_data<bool, CPUContext>()[0] = false;
for (int i = 0; i < WORKSPACE_MAX_CORRUPTED_SIZE; i++) {
string name = "/opt/mirror_stage/buffer_" +
dragon_cast<string, int>(i);
string name = "/opt/mirror_stage/buffer_"
+ std::to_string(i);
Tensor* buffer = CreateTensor(name);
head->mutable_data<string, CPUContext>()[i] = "";
}
......@@ -277,7 +277,8 @@ class Workspace {
inline bool SetProxy(
const string& key,
const string& proxy) {
if (proxy_map_.count(key))
if (key == proxy) return false;
if (proxy_map_.count(key) > 0)
return proxy_map_[key] == proxy;
proxy_map_[key] = proxy;
return true;
......
......@@ -23,7 +23,7 @@ class DropoutOp final : public Operator<Context> {
DropoutOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
use_scale(OperatorBase::Arg<bool>("scale", true)) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5);
GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
......@@ -42,7 +42,7 @@ class DropoutGradientOp final : public Operator<Context> {
DropoutGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
use_scale(OperatorBase::Arg<bool>("scale", true)) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5);
GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
......@@ -53,7 +53,6 @@ class DropoutGradientOp final : public Operator<Context> {
protected:
DECLARE_ARGUMENT_WITH_DESC(float, prob);
bool use_scale;
Tensor* mask;
};
DEFINE_ARGUMENT_WITH_DESC(float, DropoutOp, prob);
......@@ -70,7 +69,7 @@ public:
: Operator<Context>(def, ws), states_initialized(false),
use_scale(OperatorBase::Arg<bool>("scale", true)),
random_seed(DEFAULT_RNG_SEED) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5);
GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateDropoutDescriptor(&dropout_desc));
......@@ -101,7 +100,7 @@ public:
: Operator<Context>(def, ws), states_initialized(false),
use_scale(OperatorBase::Arg<bool>("scale", true)),
random_seed(DEFAULT_RNG_SEED) {
GET_ARGUMENT_WITH_DESC(float, prob, 0.5);
GET_ARGUMENT_WITH_DESC(float, prob, 0.5f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateDropoutDescriptor(&dropout_desc));
......
......@@ -21,7 +21,7 @@ class ReluOp : public Operator<Context> {
public:
ReluOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
slope(OperatorBase::Arg<float>("slope", 0.0)) {}
slope(OperatorBase::Arg<float>("slope", 0.f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
......@@ -36,7 +36,7 @@ class ReluGradientOp : public Operator<Context> {
public:
ReluGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
slope(OperatorBase::Arg<float>("slope", 0.0)) {}
slope(OperatorBase::Arg<float>("slope", 0.f)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
......
......@@ -48,8 +48,6 @@ class SoftmaxGradientOp final : public Operator<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNSoftmaxOp final : public Operator<Context> {
public:
......@@ -70,8 +68,7 @@ class CuDNNSoftmaxOp final : public Operator<Context> {
template <typename T> void RunWithType();
protected:
int axis;
TIndex outer_dim, inner_dim;
TIndex axis, outer_dim, inner_dim;
cudnnTensorDescriptor_t input_desc, output_desc;
};
......@@ -95,8 +92,7 @@ class CuDNNSoftmaxGradientOp final : public Operator<Context> {
template <typename T> void RunWithType();
protected:
int axis;
TIndex outer_dim, inner_dim;
TIndex axis, outer_dim, inner_dim;
cudnnTensorDescriptor_t input_desc, output_desc;
};
......
......@@ -55,7 +55,7 @@ class AffineGradientOp final : public Operator<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
#if CUDNN_VERSION_MIN(6, 0, 0)
template <class Context>
class CuDNNAffineOpBase : public Operator<Context> {
......@@ -152,6 +152,8 @@ protected:
Tensor sum_result;
};
#endif
#endif // WITH_CUDNN
} // namespace dragon
......
......@@ -36,11 +36,17 @@ class ClipOp final : public Operator<Context> {
template <class Context>
class ClipGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(ClipGradientOp);
ClipGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
low(OperatorBase::Arg<float>("low", -FLT_MAX)),
high(OperatorBase::Arg<float>("high", FLT_MAX)) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
float low, high;
};
} // namespace dragon
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
#define DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class MaximumOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MaximumOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
template <class Context>
class MaximumGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MaximumGradientOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
} // namespace dragon
#endif // DRAGON_OPERATORS_ARITHMETIC_MAXIMUM_OP_H_
\ No newline at end of file
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
#define DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class MinimumOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MinimumOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
template <class Context>
class MinimumGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(MinimumGradientOp);
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void EltwiseRunWithType();
template <typename T> void BroadcastRunWithType();
};
} // namespace dragon
#endif // DRAGON_OPERATORS_ARITHMETIC_MINIMUM_OP_H_
\ No newline at end of file
......@@ -43,8 +43,6 @@ public:
#if CUDNN_VERSION_MIN(7, 0, 0)
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNCTCLossOp final : public Operator<Context> {
public:
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// -------------------------------------------------------------
#ifndef DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
#define DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
#include "core/operator.h"
namespace dragon {
template <class Context>
class NLLLossOp : public Operator<Context> {
public:
NLLLossOp(
const OperatorDef& def,
Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>(
"normalization", "VALID")) {
auto xs = OperatorBase::Args<int>("ignore_labels");
if (xs.size()) {
ignores.Reshape({ (TIndex)xs.size() });
auto* Idata = ignores.mutable_data<int, CPUContext>();
for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
}
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename Tx, typename Ty> void RunWithType();
protected:
TIndex axis, outer_dim, inner_dim;
Tensor losses, flags, ignores;
string normalization;
};
template <class Context>
class NLLLossGradientOp : public Operator<Context> {
public:
NLLLossGradientOp(
const OperatorDef& def,
Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 1)),
normalization(OperatorBase::Arg<string>(
"normalization", "VALID")) {
auto xs = OperatorBase::Args<int>("ignore_labels");
if (xs.size()) {
ignores.Reshape({ (TIndex)xs.size() });
auto* Idata = ignores.mutable_data<int, CPUContext>();
for (int i = 0; i < xs.size(); i++) Idata[i] = xs[i];
}
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename Tx, typename Ty> void RunWithType();
protected:
TIndex axis, outer_dim, inner_dim;
Tensor ignores, flags;
string normalization;
};
} // namespace dragon
#endif // DRAGON_OPERATORS_LOSS_NLL_LOSS_OP_H_
\ No newline at end of file
......@@ -22,7 +22,8 @@ class InitializeOp : public Operator<Context> {
public:
InitializeOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
shape_desc(OperatorBase::Arg<string>("shape", "")) {
shape_desc(OperatorBase::Arg<string>("shape", "")),
dtype(OperatorBase::Arg<string>("dtype", "float32")) {
GET_ARGUMENTS_WITH_DESC(int, dims);
}
USE_OPERATOR_FUNCTIONS;
......@@ -32,19 +33,29 @@ class InitializeOp : public Operator<Context> {
protected:
DECLARE_ARGUMENTS_WITH_DESC(int, dims);
string shape_desc;
string shape_desc, dtype;
TensorFiller filler;
};
template <class Context>
class FillOp final : public InitializeOp<Context> {
class FillOp final : public Operator<Context> {
public:
FillOp(const OperatorDef& def, Workspace* ws)
: InitializeOp<Context>(def, ws) {
this->filler.set_type("constant");
this->filler.set_value(OperatorBase::Arg<float>("value", 0.0));
: Operator<Context>(def, ws),
shape_desc(OperatorBase::Arg<string>("shape", "")),
dtype(OperatorBase::Arg<string>("dtype", "float32")),
value(OperatorBase::Arg<float>("value", 0.0)) {
GET_ARGUMENTS_WITH_DESC(int, dims);
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
DECLARE_ARGUMENTS_WITH_DESC(int, dims);
string shape_desc, dtype;
float value;
};
template <class Context>
......@@ -130,6 +141,7 @@ public:
};
DEFINE_ARGUMENTS_WITH_DESC(int, InitializeOp, dims);
DEFINE_ARGUMENTS_WITH_DESC(int, FillOp, dims);
} // namespace
......
......@@ -25,7 +25,7 @@ class BatchNormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)),
mode(OperatorBase::Arg<string>("mode", "DEFAULT")) {
if (axis != -1)
......@@ -81,7 +81,7 @@ class FusedBatchNormOp : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)) {}
USE_OPERATOR_FUNCTIONS;
......@@ -105,7 +105,7 @@ class FusedBatchNormGradientOp : public Operator<Context> {
FusedBatchNormGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
use_stats(OperatorBase::Arg<int>("use_stats", -1)) {}
USE_OPERATOR_FUNCTIONS;
......@@ -127,14 +127,13 @@ class FusedBatchNormGradientOp : public Operator<Context> {
#if CUDNN_VERSION_MIN(5, 0, 0)
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNBatchNormOp final : public FusedBatchNormOp<Context> {
class CuDNNBatchNormOp final
: public FusedBatchNormOp<Context> {
public:
CuDNNBatchNormOp(const OperatorDef& def, Workspace* ws)
: FusedBatchNormOp<Context>(def, ws),
eps64(OperatorBase::Arg<float>("eps", 1e-3f)) {
eps64(OperatorBase::Arg<float>("eps", 1e-5f)) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bn_desc));
......@@ -167,11 +166,12 @@ class CuDNNBatchNormOp final : public FusedBatchNormOp<Context> {
};
template <class Context>
class CuDNNBatchNormGradientOp final : public FusedBatchNormGradientOp<Context> {
class CuDNNBatchNormGradientOp final
: public FusedBatchNormGradientOp<Context> {
public:
CuDNNBatchNormGradientOp(const OperatorDef& def, Workspace* ws)
: FusedBatchNormGradientOp<Context>(def, ws),
eps64(OperatorBase::Arg<float>("eps", 1e-3f)) {
eps64(OperatorBase::Arg<float>("eps", 1e-5f)) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bn_desc));
......
......@@ -23,7 +23,7 @@ class BatchRenormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
momentum(OperatorBase::Arg<float>("momentum", 0.9f)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
r_max(OperatorBase::Arg<float>("r_max", 3.f)),
d_max(OperatorBase::Arg<float>("d_max", 5.f)),
t_delta(OperatorBase::Arg<float>("t_delta", 1.f)),
......
......@@ -23,7 +23,7 @@ class GroupNormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
group(OperatorBase::Arg<int>("group", 32)),
axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) {
eps(OperatorBase::Arg<float>("eps", 1e-5f)) {
if (axis != -1)
CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1.";
......@@ -73,7 +73,7 @@ class FusedGroupNormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
group(OperatorBase::Arg<int>("group", 32)),
axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) {}
eps(OperatorBase::Arg<float>("eps", 1e-5f)) {}
USE_OPERATOR_FUNCTIONS;
void Setup();
......
......@@ -22,9 +22,10 @@ class InstanceNormOp final : public Operator<Context> {
InstanceNormOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)) {
eps(OperatorBase::Arg<float>("eps", 1e-5f)) {
if (axis != -1)
CHECK_EQ(axis, 1) << "\nThe axis can only be set to 1.";
CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1.";
}
USE_OPERATOR_FUNCTIONS;
......@@ -47,7 +48,8 @@ class InstanceNormGradientOp final : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", -1)) {
if (axis != -1)
CHECK_EQ(axis, 1) << "\nThe axis can only be set to 1.";
CHECK_EQ(axis, 1)
<< "\nThe axis can only be set to 1.";
}
USE_OPERATOR_FUNCTIONS;
......
......@@ -23,7 +23,7 @@ class L2NormOp final : public Operator<Context> {
: Operator<Context>(def, ws),
axis(OperatorBase::Arg<int>("axis", 0)),
num_axes(OperatorBase::Arg<int>("num_axes", -1)),
eps(OperatorBase::Arg<float>("eps", 1e-3f)),
eps(OperatorBase::Arg<float>("eps", 1e-5f)),
mode(OperatorBase::Arg<string>("mode", "SUM")) {}
USE_OPERATOR_FUNCTIONS;
......
......@@ -20,8 +20,6 @@ namespace dragon {
#if CUDNN_VERSION_MIN(5, 0, 0)
#include "utils/cudnn_device.h"
class cudnnTensorDescriptors {
public:
cudnnTensorDescriptors(const int num_descs) {
......
......@@ -21,7 +21,8 @@ class BiasAddOp final : public Operator<Context> {
public:
BiasAddOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {}
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
......@@ -37,7 +38,8 @@ class BiasAddGradientOp final : public Operator<Context> {
public:
BiasAddGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {}
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
......@@ -48,6 +50,62 @@ class BiasAddGradientOp final : public Operator<Context> {
string data_format;
};
#ifdef WITH_CUDNN
template <class Context>
class CuDNNBiasAddOp final : public Operator<Context> {
public:
CuDNNBiasAddOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&output_desc));
}
USE_OPERATOR_FUNCTIONS;
~CuDNNBiasAddOp() {
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc));
CUDNN_CHECK(cudnnDestroyTensorDescriptor(output_desc));
}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex outer_dim, dim, inner_dim;
string data_format;
cudnnTensorDescriptor_t bias_desc, output_desc;
};
template <class Context>
class CuDNNBiasAddGradientOp final : public Operator<Context> {
public:
CuDNNBiasAddGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
data_format(OperatorBase::Arg<string>(
"data_format", "NCHW")) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(&input_desc));
CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc));
}
USE_OPERATOR_FUNCTIONS;
~CuDNNBiasAddGradientOp() {
CUDNN_CHECK(cudnnDestroyTensorDescriptor(input_desc));
CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc));
}
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
TIndex outer_dim, dim, inner_dim;
string data_format;
cudnnTensorDescriptor_t input_desc, bias_desc;
};
#endif // WITH_CUDNN
} // namespace dragon
#endif // DRAGON_OPERATORS_VISION_BIAS_ADD_OP_H_
\ No newline at end of file
......@@ -50,8 +50,6 @@ class Conv2dGradientOp : public Conv2dOp<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNConv2dOp final : public Conv2dOp<Context> {
public:
......@@ -97,7 +95,7 @@ class CuDNNConv2dOp final : public Conv2dOp<Context> {
cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc;
size_t fwd_data_size;
TIndex bias_offset, cudnn_group;
TIndex cudnn_group;
vector<TIndex> input_dims;
bool enable_tensor_core;
};
......@@ -148,7 +146,7 @@ class CuDNNConv2dGradientOp final : public Conv2dGradientOp<Context> {
cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc;
size_t bwd_filter_size, bwd_data_size;
TIndex bias_offset, cudnn_group;
TIndex cudnn_group;
vector<TIndex> input_dims;
bool enable_tensor_core;
};
......
......@@ -84,6 +84,7 @@ class ConvOpBase : public Operator<Context> {
ctx());
} else LOG(FATAL) << "ConvNd has not been implemented yet";
}
template <typename T> void Col2Im(const T* col, T* im) {
if (Input(0).ndim() == 4) {
kernel::Col2Im2d<T, Context>(conv_in_channels,
......
......@@ -54,8 +54,6 @@ class Conv2dTransposeGradientOp : public Conv2dTransposeOp<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNConv2dTransposeOp final : public Conv2dTransposeOp<Context> {
public:
......@@ -100,7 +98,7 @@ class CuDNNConv2dTransposeOp final : public Conv2dTransposeOp<Context> {
cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc;
size_t fwd_data_size;
TIndex bias_offset, cudnn_group;
TIndex cudnn_group;
vector<TIndex> input_dims;
bool enable_tensor_core;
};
......@@ -150,7 +148,7 @@ public:
cudnnConvolutionDescriptor_t conv_desc;
cudnnFilterDescriptor_t filter_desc;
size_t bwd_filter_size, bwd_data_size;
TIndex bias_offset, cudnn_group;
TIndex cudnn_group;
vector<TIndex> input_dims;
bool enable_tensor_core;
};
......
// ------------------------------------------------------------
// Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
//
// Licensed under the BSD 2-Clause License.
// You should have received a copy of the BSD 2-Clause License
// along with the software. If not, See,
//
// <https://opensource.org/licenses/BSD-2-Clause>
//
// ------------------------------------------------------------
#ifndef DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
#define DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
#include "core/operator.h"
#include "utils/math_functions.h"
namespace dragon {
template <class Context>
class DropBlock2dOp final : public Operator<Context> {
public:
DropBlock2dOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws),
block_size(OperatorBase::Arg<int>("block_size", 7)),
alpha(OperatorBase::Arg<float>("alpha", 1.f)),
decrement(OperatorBase::Arg<float>("decrement", 0.f)),
data_format(OperatorBase::Arg<string>("data_format", "NCHW")) {
GET_ARGUMENT_WITH_DESC(float, keep_prob, 0.9f);
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
protected:
DECLARE_ARGUMENT_WITH_DESC(float, keep_prob);
TIndex block_size, seed_h, seed_w;
TIndex n, c, h, w;
float alpha, decrement, apply_prob = 1., gamma;
string data_format;
vector<TIndex> seed_dims;
};
template <class Context>
class DropBlock2dGradientOp final : public Operator<Context> {
public:
DropBlock2dGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) {
SwitchToPhase(OperatorBase::Arg<string>("phase", ""));
}
USE_OPERATOR_FUNCTIONS;
void RunOnDevice() override;
template <typename T> void RunWithType();
};
DEFINE_ARGUMENT_WITH_DESC(float, DropBlock2dOp, keep_prob);
} // namespace dragon
#endif // DRAGON_OPERATORS_VISION_DROP_BLOCK_OP_H_
\ No newline at end of file
......@@ -16,7 +16,10 @@
namespace dragon {
enum LRNMode { ACROSS_CHANNELS, WITHIN_CHANNEL };
typedef enum {
ACROSS_CHANNELS,
WITHIN_CHANNEL,
} LRNMode;
template <class Context>
class LRNOp : public Operator<Context> {
......@@ -82,8 +85,6 @@ class LRNGradientOp : public Operator<Context> {
#ifdef WITH_CUDNN
#include "utils/cudnn_device.h"
template <class Context>
class CuDNNLRNOp final : public LRNOp<Context> {
public:
......
......@@ -73,7 +73,7 @@ inline void LoadCaffeModel(
const string& layer_name = layer.name();
string prefix = layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++) {
string tensor_name = prefix + dragon_cast<string, int>(j);
string tensor_name = prefix + std::to_string(j);
if (!ws->HasTensor(tensor_name))
LOG(WARNING) << "Tensor(" << tensor_name << ") "
<< "does not exist in any Graphs, skip.";
......@@ -114,7 +114,7 @@ inline void SavaCaffeModel(
int layer_idx = -1;
for (int i = 0; i < tensors.size(); i++) {
if (tensors[i]->count() <= 0) continue;
vector<string> splits = SplitString(
vector<string> splits = str::split(
tensors[i]->name(), "/param:");
if (layer_hash.count(splits[0]) == 0) {
layer_hash[splits[0]] = ++layer_idx;
......
......@@ -28,6 +28,10 @@ template<> inline int dragon_cast<int, float>(float val) {
return static_cast<int>(val);
}
template<> inline int64_t dragon_cast<int64_t, float>(float val) {
return static_cast<int64_t>(val);
}
template<> inline float dragon_cast<float, float>(float val) {
return val;
}
......@@ -127,7 +131,7 @@ template<> inline float32 dragon_cast<float32, float>(float val) {
return dragon_cast<float32, float16>(t);
}
#ifdef WITH_CUDA_FP16
#ifdef WITH_CUDA
template<> inline half dragon_cast<half, float>(float val) {
#if CUDA_VERSION_MIN(9, 0, 0)
......@@ -165,7 +169,7 @@ template<> inline half2 dragon_cast<half2, float16>(float16 val) {
}
#endif // WITH_CUDA_FP16
#endif // WITH_CUDA
} // namespace dragon
......
......@@ -101,16 +101,10 @@ inline int CUDA_NUM_DEVICES() {
return count;
}
inline int CUDA_DEVICE() {
int gpu_id;
cudaGetDevice(&gpu_id);
return gpu_id;
}
inline int CUDA_DEVICE(const void* ptr) {
cudaPointerAttributes attr;
CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr));
return attr.device;
inline int CUDA_GET_DEVICE() {
int device_id;
cudaGetDevice(&device_id);
return device_id;
}
struct CUDADeviceProps {
......@@ -132,7 +126,7 @@ inline const cudaDeviceProp& GetDeviceProperty(
}
inline bool CUDA_TRUE_FP16_AVAILABLE() {
int device = CUDA_DEVICE();
int device = CUDA_GET_DEVICE();
auto& prop = GetDeviceProperty(device);
return prop.major >= 6;
}
......@@ -141,7 +135,7 @@ inline bool TENSOR_CORE_AVAILABLE() {
#if CUDA_VERSION < 9000
return false;
#else
int device = CUDA_DEVICE();
int device = CUDA_GET_DEVICE();
auto& prop = GetDeviceProperty(device);
return prop.major >= 7;
#endif
......@@ -149,23 +143,16 @@ inline bool TENSOR_CORE_AVAILABLE() {
class DeviceGuard {
public:
DeviceGuard(int newDevice)
: previous_(CUDA_DEVICE()) {
if (previous_ != newDevice)
CUDA_CHECK(cudaSetDevice(newDevice));
DeviceGuard(int new_id) : prev_id(CUDA_GET_DEVICE()) {
if (prev_id != new_id) CUDA_CHECK(cudaSetDevice(new_id));
}
~DeviceGuard() {
CUDA_CHECK(cudaSetDevice(previous_));
}
~DeviceGuard() { CUDA_CHECK(cudaSetDevice(prev_id)); }
private:
int previous_;
int prev_id;
};
#define CUDA_FP16_NOT_COMPILED \
LOG(FATAL) << "CUDA-FP16 was not compiled."
#else
#define CUDA_NOT_COMPILED \
......
......@@ -55,7 +55,6 @@ template<> class CUDNNType<double> {
typedef double BNParamType;
};
#ifdef WITH_CUDA_FP16
template<> class CUDNNType<float16> {
public:
static const cudnnDataType_t type = CUDNN_DATA_HALF;
......@@ -63,37 +62,63 @@ template<> class CUDNNType<float16> {
static const void *one, *zero;
typedef float BNParamType;
};
#endif
template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, Tensor* tensor);
void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
Tensor* tensor);
template <typename T>
void cudnnSetTensor4dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor);
void cudnnSetTensor4dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T>
void cudnnSetTensor5dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor);
void cudnnSetTensor5dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T>
void cudnnSetTensor3dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, Tensor* tensor);
void cudnnSetTensor3dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
Tensor* tensor);
template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, const std::vector<int64_t>& dims);
void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
const std::vector<int64_t>& dims);
template <typename T>
void cudnnSetTensor4dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims);
void cudnnSetTensor4dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T>
void cudnnSetTensor4dDescWithGroup(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims, const int64_t group);
void cudnnSetTensor4dDescWithGroup(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims,
const int64_t group);
template <typename T>
void cudnnSetTensor5dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims);
void cudnnSetTensor5dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T>
void cudnnSetTensor3dDesc(cudnnTensorDescriptor_t* desc, const string& data_format, const std::vector<int64_t>& dims);
void cudnnSetTensor3dDesc(
cudnnTensorDescriptor_t* desc,
const string& data_format,
const std::vector<int64_t>& dims);
template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc,
void cudnnSetTensorDesc(
cudnnTensorDescriptor_t* desc,
const std::vector<int64_t>& dims,
const std::vector<int64_t>& strides);
......
......@@ -69,7 +69,7 @@ template <typename T, class Context>
void RandomBernoulli(
const int n,
const float p,
uint32_t* x,
T* x,
Context* ctx);
/******************** Level-1 ********************/
......
......@@ -25,21 +25,21 @@ typedef int64_t TIndex;
template <typename T, class Context>
void Dropout(
const int count,
T prob,
T scale,
float prob,
float scale,
const T* x,
uint32_t* mask,
uint32_t* mask32,
uint8_t* mask8,
T* y,
Context* ctx);
template <typename T, class Context>
void DropoutGrad(
template <typename Tx, typename Tm, class Context>
void ApplyMask(
const int count,
T prob,
T scale,
const T* dy,
const uint32_t* mask,
T* dx,
const float scale,
const Tx* x,
const Tm* mask,
Tx* y,
Context* ctx);
/******************** activation.elu ********************/
......@@ -234,10 +234,95 @@ void Clip(
const float low,
const float high,
const T* x,
T* mask,
T* y,
Context* ctx);
template <typename T, class Context>
void ClipGrad(
const int count,
const float low,
const float high,
const T* x,
const T* dy,
T* dx,
Context* ctx);
/******************** arithmetic.maximum ********************/
template <typename T, class Context>
void MaximumE(
const int count,
const T* x1,
const T* x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MaximumB(
const int count,
const T* x1,
const T x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MaximumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2,
Context* ctx);
template <typename T, class Context>
void MaximumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1,
/* T* dx2, */
Context* ctx);
/******************** arithmetic.minimum ********************/
template <typename T, class Context>
void MinimumE(
const int count,
const T* x1,
const T* x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MinimumB(
const int count,
const T* x1,
const T x2,
T* y,
Context* ctx);
template <typename T, class Context>
void MinimumEGrad(
const int count,
const T* x1,
const T* x2,
const T* dy,
T* dx1,
T* dx2,
Context* ctx);
template <typename T, class Context>
void MinimumBGrad(
const int count,
const T* x1,
const T x2,
const T* dy,
T* dx1,
/* T* dx2, */
Context* ctx);
/******************** control_flow.compare ********************/
template <typename T, class Context>
......@@ -257,6 +342,34 @@ void AbsGrad(
T* dx,
Context* ctx);
/******************** loss.nll_loss ********************/
template <typename Tx, typename Ty, class Context>
void NLLLoss(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* log_prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
float* losses,
float* flags,
Context* ctx);
template <typename Tx, typename Ty, class Context>
void NLLLossGrad(
const int outer_dim,
const int axis_dim,
const int inner_dim,
const Tx* prob,
const Ty* labels,
const int* ignores,
const int num_ignores,
Tx* dx,
float* flags,
Context* ctx);
/******************** loss.sigmoid_cross_entropy ********************/
template <typename T, class Context>
......@@ -902,6 +1015,23 @@ void Col2Im2d(
T* im,
Context* ctx);
/******************** vision.drop_block ********************/
template <class Context>
void DropBlock2d(
const int N,
const int C,
const int H,
const int W,
const int seed_h,
const int seed_w,
const int block_size,
const float gamma,
const string& data_format,
uint32_t* seed,
int* mask,
Context* ctx);
/******************** vision.nn_resize ********************/
template <typename T, class Context>
......
......@@ -111,7 +111,7 @@ void Axpby(
const T beta,
T* y);
} // namespace ssd
} // namespace sse
} // namespace dragon
......
......@@ -18,11 +18,11 @@
#include <iostream>
#include <cstdlib>
#include "utils/cast.h"
namespace dragon {
inline std::vector<std::string> SplitString(
namespace str {
inline std::vector<std::string> split(
const std::string& str,
const std::string& c) {
std::vector<std::string> ret;
......@@ -36,17 +36,7 @@ inline std::vector<std::string> SplitString(
return ret;
}
#define DEFINE_NUMBER2STRING(T) \
template<> inline std::string dragon_cast<std::string, T>(T val) { \
std::stringstream ss; ss << val; return ss.str(); \
}
DEFINE_NUMBER2STRING(int);
DEFINE_NUMBER2STRING(unsigned long long);
template<> inline int dragon_cast<int, std::string>(std::string val) {
return atoi(val.c_str());
}
} // namespace str
} // namespace dragon
......
......@@ -2,6 +2,7 @@ message(STATUS "Found CXX Module: ${CMAKE_CURRENT_LIST_DIR}")
FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc)
FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc)
LIST(REMOVE_ITEM SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/../../src/operators/misc/python_op.cc)
# ---[ Target
if (WITH_CUDA)
......@@ -36,7 +37,9 @@ if(WIN32)
TARGET_LINK_LIBRARIES(${PROJECT_NAME}_cxx shlwapi.lib)
endif()
SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES OUTPUT_NAME dragon_cxx)
SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES OUTPUT_NAME dragon)
SET_TARGET_PROPERTIES(${PROJECT_NAME}_cxx PROPERTIES DEFINE_SYMBOL DRAGON_CXX_EXPORTS)
# ---[ Install
INSTALL(TARGETS ${PROJECT_NAME}_cxx DESTINATION ${PROJECT_BINARY_DIR}/../lib)
\ No newline at end of file
INSTALL(TARGETS ${PROJECT_NAME}_cxx DESTINATION ${PROJECT_BINARY_DIR}/../api/lib)
FILE(INSTALL dragon.h DESTINATION ${PROJECT_BINARY_DIR}/../api/include)
\ No newline at end of file
#include "dragon.h"
#include "core/common.h"
#include "utils/logging.h"
namespace dragon {
......
......@@ -12,6 +12,12 @@
namespace dragon {
/* * * * * * * * * * * * * * * * * * * * *
* *
* Workspace *
* *
* * * * * * * * * * * * * * * * * * * * */
Map<string, unique_ptr < Workspace > > g_workspaces;
Map<string, vector<string> > sub_workspaces;
std::mutex g_mutex;
......@@ -29,7 +35,8 @@ Workspace* CreateWorkspace(const std::string& name){
Workspace* ResetWorkspace(const std::string& name) {
std::unique_lock<std::mutex> lock(g_mutex);
CHECK(g_workspaces.count(name))
<< "\nWorkspace(" << name << ") does not exist, can not be reset.";
<< "\nWorkspace(" << name << ") does not exist."
<< "\nCan not be reset.";
LOG(INFO) << "Reset the Workspace(" << name << ").";
g_workspaces[name].reset(new Workspace(name));
for (auto& sub_workspace : sub_workspaces[name]) {
......@@ -43,7 +50,8 @@ Workspace* ResetWorkspace(const std::string& name) {
void ReleaseWorkspace(const std::string& name) {
std::unique_lock<std::mutex> lock(g_mutex);
CHECK(g_workspaces.count(name))
<< "\nWorkspace(" << name << ") does not exist, can not be released.";
<< "\nWorkspace(" << name << ") does not exist."
<< "\nCan not be released.";
LOG(INFO) << "Release the Workspace(" << name << ").";
g_workspaces[name].reset();
g_workspaces.erase(name);
......@@ -61,6 +69,12 @@ void MoveWorkspace(
<< "into the Workspace(" << target_ws->name() << ").";
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* Graph *
* *
* * * * * * * * * * * * * * * * * * * * */
std::string CreateGraph(
const std::string& graph_file,
Workspace* ws) {
......@@ -102,6 +116,19 @@ std::string CreateGraph(
return meta_graph.name();
}
void RunGraph(
const std::string& graph_name,
Workspace* ws,
const int stream_id) {
ws->RunGraph(graph_name, "", "", stream_id);
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* Tensor *
* *
* * * * * * * * * * * * * * * * * * * * */
void CreateTensor(
const std::string& name,
Workspace* ws) {
......@@ -109,6 +136,32 @@ void CreateTensor(
}
template <typename T>
T* FetchTensor(
const std::string& name,
vector<TIndex>& shape,
Workspace* ws){
if (!ws->HasTensor(name)){
LOG(FATAL) << "Tensor(" << name << ")"
<< " doesn't exist, try create it before.";
}
Tensor* tensor = ws->GetTensor(name);
if (tensor->meta().id() == 0){
LOG(FATAL) << "Tensor(" << name << ")"
<< " has not been computed yet";
}
shape = tensor->dims();
void* data = malloc(tensor->nbytes());
if (tensor->memory_state() == MixedMemory::STATE_AT_CUDA) {
CUDAContext::Memcpy<CPUContext, CUDAContext>(
tensor->nbytes(), data, tensor->raw_data<CUDAContext>());
} else {
CPUContext::Memcpy<CPUContext, CPUContext>(
tensor->nbytes(), data, tensor->raw_data<CPUContext>());
}
return static_cast<T*>(data);
}
template <typename T>
void FeedTensor(
const std::string& name,
const vector<TIndex>& shape,
......@@ -135,6 +188,12 @@ void FeedTensor(
}
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* I / O *
* *
* * * * * * * * * * * * * * * * * * * * */
void TransplantCaffeModel(
const std::string& input_model,
const std::string& output_model) {
......@@ -146,7 +205,7 @@ void TransplantCaffeModel(
const string& layer_name = layer.name();
string prefix = layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++) {
string tensor_name = prefix + dragon_cast<string, int>(j);
string tensor_name = prefix + std::to_string(j);
BlobProto blob = layer.blobs(j);
TensorProto* proto = protos.add_protos();
proto->set_data_type(TensorProto_DataType_FLOAT);
......@@ -218,7 +277,7 @@ void LoadCaffemodel(
const string& layer_name = layer.name();
string prefix = scope + layer_name + "/param:";
for (int j = 0; j < layer.blobs_size(); j++){
string tensor_name = prefix + dragon_cast<string, int>(j);
string tensor_name = prefix + std::to_string(j);
if (!ws->HasTensor(tensor_name))
ws->CreateTensor(tensor_name);
BlobProto blob = layer.blobs(j);
......@@ -248,63 +307,54 @@ void LoadCaffemodel(
}
}
void RunGraph(
const std::string& graph_name,
Workspace* ws,
const int stream_id) {
ws->RunGraph(graph_name, "", "", stream_id);
}
template <typename T>
T* FetchTensor(
const std::string& name,
vector<TIndex>& shape,
Workspace* ws){
if (!ws->HasTensor(name)){
LOG(FATAL) << "Tensor(" << name << ")"
<< " doesn't exist, try create it before.";
}
Tensor* tensor = ws->GetTensor(name);
if (tensor->meta().id() == 0){
LOG(FATAL) << "Tensor(" << name << ")"
<< " has not been computed yet";
}
shape = tensor->dims();
void* data = malloc(tensor->nbytes());
if (tensor->memory_state() == MixedMemory::STATE_AT_CUDA) {
CUDAContext::Memcpy<CPUContext, CUDAContext>(
tensor->nbytes(), data, tensor->raw_data<CUDAContext>());
} else {
CPUContext::Memcpy<CPUContext, CPUContext>(
tensor->nbytes(), data, tensor->raw_data<CPUContext>());
}
return static_cast<T*>(data);
}
/* * * * * * * * * * * * * * * * * * * * *
* *
* Config *
* *
* * * * * * * * * * * * * * * * * * * * */
void SetLogLevel(const std::string& level) {
SetLogDestination(StrToLogSeverity(level));
}
template float* FetchTensor<float>(
/* * * * * * * * * * * * * * * * * * * * *
* *
* Template *
* *
* * * * * * * * * * * * * * * * * * * * */
template DRAGON_API float* FetchTensor<float>(
const std::string&,
std::vector<TIndex>&,
Workspace*);
template void FeedTensor<float>(
template DRAGON_API float16* FetchTensor<float16>(
const std::string&,
std::vector<TIndex>&,
Workspace*);
template DRAGON_API void FeedTensor<float>(
const std::string&,
const std::vector<TIndex>&,
const float*,
const Device&,
Workspace*);
template void FeedTensor<int>(
template DRAGON_API void FeedTensor<float16>(
const std::string&,
const std::vector<TIndex>&,
const float16*,
const Device&,
Workspace*);
template DRAGON_API void FeedTensor<int>(
const std::string&,
const std::vector<TIndex>&,
const int*,
const Device&,
Workspace*);
template void FeedTensor<uint8_t>(
template DRAGON_API void FeedTensor<uint8_t>(
const std::string&,
const std::vector<TIndex>&,
const uint8_t*,
......
......@@ -16,10 +16,28 @@
#include <cstdint>
#include <vector>
#ifdef WIN32
#define EXPORT __declspec(dllexport)
#ifdef _MSC_VER
#ifdef DRAGON_CXX_EXPORTS
#define DRAGON_API __declspec(dllexport)
#else
#define DRAGON_API __declspec(dllimport)
#endif
#else
#define EXPORT
#define DRAGON_API
#endif
/* * * * * * * * * * * * * * * * * * * * *
* *
* Internal Headers *
* *
* * * * * * * * * * * * * * * * * * * * */
#ifdef DRAGON_CXX_EXPORTS
#include "core/types.h"
#else
namespace dragon {
struct float16;
}
#endif
namespace dragon {
......@@ -28,72 +46,102 @@ typedef int64_t TIndex;
class Workspace;
class Device {
class DRAGON_API Device {
public:
EXPORT Device();
EXPORT explicit Device(std::string device_type);
EXPORT Device(std::string device_type, int device_id);
Device();
explicit Device(std::string device_type);
Device(std::string device_type, int device_id);
EXPORT const int& device_type() const { return device_type_; }
EXPORT const int device_id() const { return device_id_; }
const int& device_type() const { return device_type_; }
const int device_id() const { return device_id_; }
private:
int device_type_, device_id_;
};
EXPORT Workspace* CreateWorkspace(const std::string& name);
/* * * * * * * * * * * * * * * * * * * * *
* *
* Workspace *
* *
* * * * * * * * * * * * * * * * * * * * */
EXPORT Workspace* ResetWorkspace(const std::string& name);
DRAGON_API Workspace* CreateWorkspace(const std::string& name);
EXPORT void ReleaseWorkspace(const std::string& name);
DRAGON_API Workspace* ResetWorkspace(const std::string& name);
EXPORT void MoveWorkspace(Workspace* main, Workspace* sub);
DRAGON_API void ReleaseWorkspace(const std::string& name);
EXPORT std::string CreateGraph(
DRAGON_API void MoveWorkspace(Workspace* main, Workspace* sub);
/* * * * * * * * * * * * * * * * * * * * *
* *
* Graph *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API std::string CreateGraph(
const std::string& graph_file,
Workspace* ws);
EXPORT std::string CreateGraph(
DRAGON_API std::string CreateGraph(
const std::string& graph_file,
const Device& device,
Workspace* ws);
EXPORT void RunGraph(
DRAGON_API void RunGraph(
const std::string& graph_name,
Workspace* ws,
const int stream_id = 1);
EXPORT void CreateTensor(
/* * * * * * * * * * * * * * * * * * * * *
* *
* Tensor *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void CreateTensor(
const std::string& name,
Workspace* ws);
template <typename T>
EXPORT void FeedTensor(
DRAGON_API T* FetchTensor(
const std::string& name,
const std::vector<TIndex>& shape,
const T* data,
const Device& device,
std::vector<TIndex>& shape,
Workspace* ws);
template <typename T>
EXPORT T* FetchTensor(
DRAGON_API void FeedTensor(
const std::string& name,
std::vector<TIndex>& shape,
const std::vector<TIndex>& shape,
const T* data,
const Device& device,
Workspace* ws);
EXPORT void LoadCaffemodel(
/* * * * * * * * * * * * * * * * * * * * *
* *
* I / O *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void LoadCaffemodel(
const std::string& model_file,
Workspace* ws);
EXPORT void TransplantCaffeModel(
DRAGON_API void TransplantCaffeModel(
const std::string& input_model,
const std::string& output_model);
EXPORT void LoadDragonmodel(
DRAGON_API void LoadDragonmodel(
const std::string& model_file,
Workspace* ws);
EXPORT void SetLogLevel(const std::string& level);
/* * * * * * * * * * * * * * * * * * * * *
* *
* Config *
* *
* * * * * * * * * * * * * * * * * * * * */
DRAGON_API void SetLogLevel(const std::string& level);
} // namespace dragon
......
......@@ -19,7 +19,8 @@ Workspace* ws() { return g_workspace; }
TypeId CTypeToFetcher(TypeId type) {
static Map<TypeId,TypeId> c_type_map {
{ TypeMeta::Id<uint8_t>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<int8>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<uint8>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<int>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<int64_t>(), TypeMeta::Id<NumpyFetcher>() },
{ TypeMeta::Id<float>(), TypeMeta::Id<NumpyFetcher>() },
......@@ -197,6 +198,11 @@ inline PyObject* FeedTensorCC(PyObject* self, PyObject* args) {
}
}
inline PyObject* OnModuleExitCC(PyObject* self, PyObject* args) {
g_workspaces.clear();
Py_RETURN_TRUE;
}
#define PYFUNC(name) {#name, name, METH_VARARGS, ""}
#define PYENDFUNC {nullptr, nullptr, 0, nullptr}
......@@ -255,6 +261,7 @@ PyMethodDef* GetAllMethods() {
PYFUNC(SnapshotCC),
/**** Config ****/
PYFUNC(SetLogLevelCC),
PYFUNC(OnModuleExitCC),
PYENDFUNC,
};
return g_python_methods;
......@@ -272,9 +279,11 @@ void common_init() {
}
#ifdef WITH_PYTHON3
static struct PyModuleDef libdragon = { PyModuleDef_HEAD_INIT,
static struct PyModuleDef libdragon = {
PyModuleDef_HEAD_INIT,
"libdragon", "", -1,
GetAllMethods() };
GetAllMethods()
};
PyMODINIT_FUNC PyInit_libdragon(void) {
PyObject* module = PyModule_Create(&libdragon);
......@@ -285,7 +294,8 @@ PyMODINIT_FUNC PyInit_libdragon(void) {
#else // WITH_PYTHON2
PyMODINIT_FUNC initlibdragon(void) {
PyObject* moudle = Py_InitModule("libdragon", GetAllMethods());
PyObject* moudle = Py_InitModule(
"libdragon", GetAllMethods());
if (moudle == nullptr) return;
common_init();
}
......
......@@ -31,7 +31,8 @@ class TensorFetcherBase {
class TensorFeederBase {
public:
virtual ~TensorFeederBase() {}
virtual PyObject* Feed(const DeviceOption& option,
virtual PyObject* Feed(
const DeviceOption& option,
PyArrayObject* array,
Tensor* tensor) = 0;
};
......@@ -61,7 +62,7 @@ class NumpyFetcher : public TensorFetcherBase {
PyErr_SetString(PyExc_RuntimeError, s.c_str());
return nullptr;
}
// create a empty array with r shape
// create a empty array with the same shape
PyObject* array = PyArray_SimpleNew(
tensor.ndim(), npy_dims.data(), npy_type);
// copy the tensor data to the numpy array
......@@ -88,7 +89,8 @@ class StringFetcher : public TensorFetcherBase {
class NumpyFeeder : public TensorFeederBase {
public:
PyObject* Feed(const DeviceOption& option,
PyObject* Feed(
const DeviceOption& option,
PyArrayObject* original_array,
Tensor* tensor) override {
PyArrayObject* array = PyArray_GETCONTIGUOUS(original_array);
......@@ -100,7 +102,6 @@ class NumpyFeeder : public TensorFeederBase {
if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0)
LOG(WARNING) << "Feed Tensor(" << tensor->name() << ")"
<< " with different data type from original one.";
tensor->SetMeta(meta);
int ndim = PyArray_NDIM(array);
npy_intp* npy_dims = PyArray_DIMS(array);
vector<TIndex> dims;
......@@ -110,16 +111,16 @@ class NumpyFeeder : public TensorFeederBase {
#ifdef WITH_CUDA
CUDAContext context(option);
context.SwitchToDevice();
auto* data = tensor->raw_mutable_data<CUDAContext>(meta);
context.Memcpy<CUDAContext, CPUContext>(tensor->nbytes(),
tensor->raw_mutable_data<CUDAContext>(),
static_cast<void*>(PyArray_DATA(array)));
data, static_cast<void*>(PyArray_DATA(array)));
#else
LOG(FATAL) << "CUDA was not compiled.";
#endif
} else {
auto* data = tensor->raw_mutable_data<CPUContext>(meta);
CPUContext::Memcpy<CPUContext, CPUContext>(tensor->nbytes(),
tensor->raw_mutable_data<CPUContext>(),
static_cast<void*>(PyArray_DATA(array)));
data, static_cast<void*>(PyArray_DATA(array)));
}
Py_XDECREF(array);
Py_RETURN_TRUE;
......
......@@ -25,4 +25,4 @@ inline PyObject* IsCUDADriverSufficientCC(PyObject* self, PyObject* args) {
#endif
}
#endif // DRAGON_PYTHON_PY_MPI_H_
\ No newline at end of file
#endif // DRAGON_PYTHON_PY_CUDA_H_
\ No newline at end of file
......@@ -94,7 +94,6 @@ PyObject* TensorFromShapeCC(PyObject* self, PyObject* args) {
if (meta.id() != tensor->meta().id() && tensor->meta().id() != 0)
LOG(WARNING) << "Set Tensor(" << tensor->name() << ")"
<< " with different data type from original one.";
tensor->SetMeta(meta);
int ndim = PyList_Size(shape);
CHECK_GT(ndim, 0)
<< "\nThe len of shape should be greater than 1. Got " << ndim << ".";
......@@ -112,9 +111,9 @@ PyObject* TensorFromShapeCC(PyObject* self, PyObject* args) {
if (dev_opt.device_type() == CUDA) {
CUDAContext ctx(dev_opt);
ctx.SwitchToDevice();
tensor->raw_mutable_data<CUDAContext>();
tensor->raw_mutable_data<CUDAContext>(meta);
} else {
tensor->raw_mutable_data<CPUContext>();
tensor->raw_mutable_data<CPUContext>(meta);
}
Py_RETURN_TRUE;
}
......@@ -173,19 +172,19 @@ PyObject* TensorFromTensorCC(PyObject* self, PyObject* args) {
Tensor* srcT = ws()->GetTensor(src_name);
Tensor* dstT = ws()->CreateTensor(dst_name);
dstT->ReshapeLike(*srcT);
dstT->SetMeta(srcT->meta());
const TypeMeta& meta = srcT->meta();
if (dst_ctx.device_type() == DeviceType::CUDA) {
if (src_ctx.device_type() == DeviceType::CUDA) {
// CUDA <- CUDA
CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(),
dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CUDAContext>());
} else {
// CUDA <- CPU
CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CUDAContext>(),
dstT->raw_mutable_data<CUDAContext>(meta),
srcT->raw_data<CPUContext>());
}
} else {
......@@ -193,13 +192,13 @@ PyObject* TensorFromTensorCC(PyObject* self, PyObject* args) {
// CPU <- CUDA
CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(),
dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CUDAContext>());
} else {
// CPU <- CPU
CUDAContext::Memcpy<CUDAContext, CUDAContext>(
srcT->nbytes(),
dstT->raw_mutable_data<CPUContext>(),
dstT->raw_mutable_data<CPUContext>(meta),
srcT->raw_data<CPUContext>());
}
}
......
......@@ -23,8 +23,8 @@ inline const int TypeMetaToNPY(const TypeMeta& meta) {
{ TypeMeta::Id<int64_t>(), NPY_INT64 },
{ TypeMeta::Id<double>(), NPY_FLOAT64 },
{ TypeMeta::Id<float16>(), NPY_FLOAT16 },
{ TypeMeta::Id<uint8_t>(), NPY_UINT8 },
{ TypeMeta::Id<char>(), NPY_INT8 }
{ TypeMeta::Id<uint8>(), NPY_UINT8 },
{ TypeMeta::Id<int8>(), NPY_INT8 }
};
return m2npy_type_map.count(meta.id()) ? m2npy_type_map[meta.id()] : -1;
}
......@@ -36,11 +36,12 @@ inline const TypeMeta& TypeNPYToMeta(int npy_type) {
{ NPY_INT64, TypeMeta::Make<int64_t>() },
{ NPY_FLOAT64, TypeMeta::Make<double>() },
{ NPY_FLOAT16, TypeMeta::Make<float16>() },
{ NPY_UINT8, TypeMeta::Make<uint8_t>() },
{ NPY_INT8, TypeMeta::Make<char>() },
{ NPY_UINT8, TypeMeta::Make<uint8>() },
{ NPY_INT8, TypeMeta::Make<int8>() },
};
static TypeMeta unknown_type;
return npy2m_type_map.count(npy_type) ? npy2m_type_map[npy_type] : unknown_type;
return npy2m_type_map.count(npy_type) ?
npy2m_type_map[npy_type] : unknown_type;
}
#endif // DRAGON_PYTHON_PY_TYPES_H_
\ No newline at end of file
......@@ -26,11 +26,11 @@ option = {}
REGISTERED_OPERATORS = set(s for s in RegisteredOperatorsCC())
NO_GRADIENT_OPERATORS = set(s for s in NoGradientOperatorsCC())
# The current device, 'CPU' or 'CUDA'
# The current device, 'CPU', 'CUDA' or 'CNML'
option['device'] = 'CPU'
# The device id
option['gpu_id'] = 0
option['device_id'] = 0
# Whether to use cuDNN if possible
option['use_cudnn'] = False
......@@ -44,6 +44,9 @@ option['debug_mode'] = False
# Whether to share grads
option['share_grads'] = True
# Optional graph type
option['graph_type'] = ''
# Whether to log the meta graphs
option['log_meta_graph'] = False
......@@ -84,7 +87,7 @@ def IsCUDADriverSufficient():
def EnableCUDA(gpu_id=0, use_cudnn=True):
"""Enable CUDA mode globally.
"""Enable NVIDIA's CUDA mode globally.
Parameters
----------
......@@ -100,9 +103,28 @@ def EnableCUDA(gpu_id=0, use_cudnn=True):
"""
global option
option['device'] = 'CUDA'
option['gpu_id'] = gpu_id
option['device_id'] = gpu_id
option['use_cudnn'] = use_cudnn
def EnableCNML(mlu_id=0):
"""Enable Cambricon's CNML mode globally.
Parameters
----------
device_id : int
The id of MLU to use.
Returns
-------
None
"""
global option
option['device'] = 'CNML'
option['device_id'] = mlu_id
# TODO(PhyscalX): please not use @setter
# TODO(PhyscalX): seems that it can't change the global value
......@@ -133,7 +155,6 @@ def GetRandomSeed():
The global random seed.
"""
global option
return option['random_seed']
......@@ -151,7 +172,7 @@ def SetGPU(id):
"""
global option
option['gpu_id'] = id
option['device_id'] = id
def GetGPU():
......@@ -163,8 +184,7 @@ def GetGPU():
The global id of GPU.
"""
global option
return option['gpu_id']
return option['device_id']
def SetDebugMode(enabled=True):
......@@ -186,6 +206,25 @@ def SetDebugMode(enabled=True):
option['debug_mode'] = enabled
def SetGraphType(graph_type=''):
"""Set the graph type.
If empty, the default DAG graph will be used.
Parameters
----------
graph_type : str
The graph type.
Returns
-------
None
"""
global option
option['graph_type'] = graph_type
def LogMetaGraph(enabled=True):
"""Enable to log meta graph globally.
......
......@@ -737,7 +737,7 @@ class Tensor(object):
Parameters
----------
new_value : basic type, list or numpy.ndarray
new_value : number, list or numpy.ndarray
The values to set.
Returns
......
......@@ -325,5 +325,7 @@ def GetTensorInfo(tensor, stream=1):
info['mem'].append('CPU'); info['device_id'] = 0
if 'CUDA' in info:
info['mem'].append('CUDA'); info['device_id'] = int(info['CUDA'])
if 'CNML' in info:
info['mem'].append('CNML'); info['device_id'] = int(info['CNML'])
info['init'] = len(info['mem']) > 0
return info
\ No newline at end of file
......@@ -439,7 +439,7 @@ def FetchTensor(tensor):
Returns
-------
numpy.ndarray
ndarray
The values copied from the backend.
References
......@@ -457,7 +457,7 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None):
----------
tensor : Tensor or str
The tensor to feed.
ndarray : basic type, list or numpy.ndarray
ndarray : number, list or ndarray
The values to feed.
force_cpu : boolean
Whether force to feed to cpu context.
......@@ -488,25 +488,23 @@ def FeedTensor(tensor, array, force_cpu=False, dtype=None):
"""
name = tensor.name if hasattr(tensor, 'name') else str(tensor)
dev = None
if force_cpu is True: dev = utils.MakeDeviceOption(0, 0)
if force_cpu is True:
dev = utils.MakeDeviceOption(0, 0)
else:
from dragon.core.scope import _DEVICE_SCOPE
if _DEVICE_SCOPE != '':
supports = {'/cpu': 0, '/gpu': 1}
supports = {'/cpu': 0, '/gpu': 1, '/mlu': 2}
dev = pb.DeviceOption()
dev.device_type = supports[_DEVICE_SCOPE.split(':')[0]]
dev.gpu_id = int(_DEVICE_SCOPE.split(':')[1])
dev.device_id = int(_DEVICE_SCOPE.split(':')[1])
else:
from dragon.config import option
if option['device'] == 'CUDA':
dev = utils.MakeDeviceOption(1, option['gpu_id'])
elif option['device'] == 'CPU':
dev = utils.MakeDeviceOption(1, option['device_id'])
else:
dev = utils.MakeDeviceOption(0, 0)
if not isinstance(array, np.ndarray):
if not isinstance(array, list):
array = [array]
auto_data_type = np.float32 if dtype is None else dtype
else:
auto_data_type = array.dtype if dtype is None else dtype
......@@ -573,8 +571,8 @@ def RunGraph(graph_name, inputs=(), outputs=[], stage=None, return_outputs=True)
Returns
-------
None, numpy.ndarray or list of numpy.ndarray
The outputs, format as numpy.ndarray.
None, ndarray or list of ndarray
The outputs, format as ndarray.
See Also
--------
......
......@@ -42,6 +42,7 @@ List Brief
`BilinearResize`_ Resize the image with Bi-linear method.
`BiasAdd`_ Add the bias across channels to a ``NCHW`` or ``NHWC`` input.
`DenseConcat`_ Memory-efficient concatenation for DenseNet. `[Huang et.al, 2017] <http://arxiv.org/abs/1608.06993>`_.
`DropBlock2d`_ Randomly drop the outputs according to the spatial blocks. `[Ghiasi et.al, 2018] <https://arxiv.org/abs/1810.12890>`_.
=================== ======================================================================
Recurrent
......@@ -76,6 +77,7 @@ Loss
============================= ======================================================================
List Brief
============================= ======================================================================
`NLLLoss`_ Negative likelihood loss with sparse labels.
`SparseSoftmaxCrossEntropy`_ SoftmaxCrossEntropy with sparse labels.
`SigmoidCrossEntropy`_ SigmoidCrossEntropy.
`SoftmaxCrossEntropy`_ SoftmaxCrossEntropy with dense(one-hot) labels.
......@@ -102,6 +104,8 @@ List Brief
`Exp`_ Calculate the exponential of input.
`Square`_ Calculate the square of input.
`Sqrt`_ Calculate the sqrt of input.
`Maximum`_ Return the max value of given two inputs.
`Minimum`_ Return the min value of given two inputs.
`Clip`_ Clip the input to be between lower and higher bounds.
`Matmul`_ Matrix Multiplication.
`InnerProduct`_ InnerProduct Function.
......@@ -215,6 +219,7 @@ List Brief
.. _BilinearResize: operators/vision.html#dragon.operators.vision.BilinearResize
.. _BiasAdd: operators/vision.html#dragon.operators.vision.BiasAdd
.. _DenseConcat: operators/vision.html#dragon.operators.vision.DenseConcat
.. _DropBlock2d: operators/vision.html#dragon.operators.vision.DropBlock2d
.. _RNN: operators/recurrent.html#dragon.operators.recurrent.RNN
.. _LSTM: operators/recurrent.html#dragon.operators.recurrent.LSTM
......@@ -231,6 +236,7 @@ List Brief
.. _Softmax: operators/activation.html#dragon.operators.activation.Softmax
.. _Dropout: operators/activation.html#dragon.operators.activation.Dropout
.. _NLLLoss: operators/loss.html#dragon.operators.loss.NLLLoss
.. _SparseSoftmaxCrossEntropy: operators/loss.html#dragon.operators.loss.SparseSoftmaxCrossEntropy
.. _SigmoidCrossEntropy: operators/loss.html#dragon.operators.loss.SigmoidCrossEntropy
.. _SoftmaxCrossEntropy: operators/loss.html#dragon.operators.loss.SoftmaxCrossEntropy
......@@ -246,6 +252,8 @@ List Brief
.. _Mul: operators/arithmetic.html#dragon.operators.arithmetic.Mul
.. _Div: operators/arithmetic.html#dragon.operators.arithmetic.Div
.. _Clip: operators/arithmetic.html#dragon.operators.arithmetic.Clip
.. _Maximum: operators/arithmetic.html#dragon.operators.arithmetic.Maximum
.. _Minimum: operators/arithmetic.html#dragon.operators.arithmetic.Minimum
.. _Pow: operators/arithmetic.html#dragon.operators.arithmetic.Pow
.. _Log: operators/arithmetic.html#dragon.operators.arithmetic.Log
.. _Exp: operators/arithmetic.html#dragon.operators.arithmetic.Exp
......
......@@ -32,6 +32,7 @@ List Brief
`LRNLayer`_ The implementation of ``LRNLayer``.
`NNResizeLayer`_ The implementation of ``NNResizeLayer``.
`BilinearResizeLayer`_ The implementation of ``BilinearResizeLayer``.
`DropBlockLayer`_ The implementation of ``DropBlockLayer``.
====================== =============================================================================
......@@ -160,6 +161,7 @@ API Reference
.. _LRNLayer: #dragon.vm.caffe.layers.vision.LRNLayer
.. _NNResizeLayer: #dragon.vm.caffe.layers.vision.NNResizeLayer
.. _BilinearResizeLayer: #dragon.vm.caffe.layers.vision.BilinearResizeLayer
.. _DropBlockLayer: #dragon.vm.caffe.layers.vision.DropBlockLayer
.. _ReLULayer: #dragon.vm.caffe.layers.neuron.ReLULayer
.. _PReLULayer: #dragon.vm.caffe.layers.neuron.PReLULayer
......
......@@ -15,6 +15,7 @@ from __future__ import print_function
import sys
import logging
import atexit
try:
from dragon.libdragon import *
......@@ -22,3 +23,5 @@ except ImportError as e:
logging.critical(
'Cannot import dragon. Error: {0}'.format(str(e)))
sys.exit(1)
atexit.register(OnModuleExitCC)
\ No newline at end of file
......@@ -101,7 +101,8 @@ class DataTransformer(Process):
im = im.reshape((datum.height, datum.width, datum.channels))
# random scale
random_scale = npr.uniform() * (self._max_random_scale - self._min_random_scale) \
random_scale = npr.uniform() * (
self._max_random_scale - self._min_random_scale) \
+ self._min_random_scale
if random_scale != 1.0:
if sys.version_info >= (3, 0):
......@@ -110,7 +111,9 @@ class DataTransformer(Process):
else:
# Fuck Fuck Fuck opencv-python2, it always has a BUG
# that leads to duplicate cuDA handles created at gpu:0
new_shape = (int(im.shape[1] * random_scale), int(im.shape[0] * random_scale))
new_shape = (
int(np.ceil(im.shape[1] * random_scale)),
int(np.ceil(im.shape[0] * random_scale)))
im = PIL.Image.fromarray(im)
im = im.resize(new_shape, PIL.Image.BILINEAR)
im = np.array(im)
......
......@@ -9,10 +9,12 @@
#
# ------------------------------------------------------------
import numpy as np
from dragon.core.tensor import Tensor
INT_MAX = 2147483647
def CheckInputs(inputs, *args):
def Verify(inputs, min_num, max_num):
# type checking
......@@ -44,6 +46,17 @@ def ParseArguments(locals):
return dict(__all__, **kwargs)
def WrapConstants(constants, dtype='float32'):
if not isinstance(constants, Tensor):
if not isinstance(constants, np.ndarray):
constants = np.array(constants, dtype=dtype)
tensor = Tensor()
tensor.set_value(constants)
tensor.shape = constants.shape
constants = tensor
return constants
def AddArgumentWithDesc(arguments, property, name, as_target=True):
if isinstance(property, Tensor):
if as_target:
......
......@@ -115,6 +115,70 @@ def Div(inputs, **kwargs):
return output
def Maximum(inputs, **kwargs):
"""Return the max value of given two inputs.
Parameters
----------
inputs : list
The input tensors, A and B.
Returns
-------
Tensor
The output tensor.
"""
inputs[0] = WrapConstants(inputs[0], dtype='float32')
inputs[1] = WrapConstants(inputs[1], dtype='float32')
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='Maximum', **arguments)
if inputs[0].shape is not None and \
inputs[1].shape is not None:
output.shape = inputs[0].shape[:]
if output.shape != inputs[1].shape and \
len(output.shape) < len(inputs[1].shape):
output.shape = inputs[1].shape
return output
def Minimum(inputs, **kwargs):
"""Return the min value of given two inputs.
Parameters
----------
inputs : list
The input tensors, A and B.
Returns
-------
Tensor
The output tensor.
"""
inputs[0] = WrapConstants(inputs[0], dtype='float32')
inputs[1] = WrapConstants(inputs[1], dtype='float32')
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='Minimum', **arguments)
if inputs[0].shape is not None and \
inputs[1].shape is not None:
output.shape = inputs[0].shape[:]
if output.shape != inputs[1].shape and \
len(output.shape) < len(inputs[1].shape):
output.shape = inputs[1].shape
return output
def Clip(inputs, low=None, high=None, **kwargs):
"""Clip the input to be between lower and higher bounds.
......
......@@ -36,15 +36,19 @@ def _wrap_output_shape(output, shape):
return output
def Fill(shape, value=0, **kwargs):
def Fill(shape, value=0, dtype='float32', **kwargs):
"""Return a Tensor with specific value filled.
If ``dtype`` is None, tensor
Parameters
----------
shape : list, tuple or Tensor
The output shape.
value : basic numerical type
The value to fill.
dtype : str
The optional data type.
Returns
-------
......
......@@ -19,6 +19,46 @@ from . import *
from .activation import Softmax
def NLLLoss(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
"""Negative likelihood loss with sparse labels.
Parameters
----------
inputs : list of Tensor
The inputs, represent [input, sparse_labels].
axis : int
The axis of softmax function.
normalization : str
The normalization, ``UNIT``, ``FULL``, ``VALID``, ``BATCH_SIZE`` or ``NONE``.
ignore_label : tuple or list
The label id to ignore. Default is ``empty``.
Returns
-------
Tensor
The loss.
Notes
-----
Set the normalization to ``UNIT`` will return unreduced losses.
"""
CheckInputs(inputs, 2)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='NLLLoss', **arguments)
if inputs[0].shape is not None:
if normalization != 'UNIT': output.shape = [1]
elif all(dim is not None for dim in inputs[0].shape):
outer_dim = int(np.prod(inputs[0].shape[0 : axis]))
inner_dim = int(np.prod(inputs[0].shape[axis + 1 :]))
output.shape = [outer_dim * inner_dim]
else: output.shape = [None]
return output
def SparseSoftmaxCrossEntropy(inputs, axis=1, normalization='VALID', ignore_labels=(), **kwargs):
"""SoftmaxCrossEntropy with sparse labels.
......
......@@ -16,8 +16,10 @@ from __future__ import print_function
from . import *
def BatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
use_stats=-1, mode='DEFAULT', **kwargs):
def BatchNorm(
inputs, axis=-1, momentum=0.9, eps=1e-5,
use_stats=-1, mode='DEFAULT', **kwargs
):
"""Batch Normalization. `[Ioffe & Szegedy, 2015] <https://arxiv.org/abs/1502.03167>`_.
It follows the implementation of `Caffe`_, that scale procedure is moved to `ops.Scale(*args, **kwargs)`_.
......@@ -70,9 +72,11 @@ def BatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
return output
def BatchRenorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
def BatchRenorm(
inputs, axis=-1, momentum=0.9, eps=1e-5,
r_max=3.0, d_max=5.0, t_delta=0.001,
use_stats=-1, mode='DEFAULT', **kwargs):
use_stats=-1, mode='DEFAULT', **kwargs
):
"""Batch Renormalization. `[Ioffe, 2017] <https://arxiv.org/abs/1702.03275>`_.
It follows the implementation of `Caffe`_, that scale procedure is moved to `ops.Scale(*args, **kwargs)`_.
......@@ -131,7 +135,10 @@ def BatchRenorm(inputs, axis=-1, momentum=0.9, eps=1e-3,
return output
def FusedBatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, use_stats=-1, **kwargs):
def FusedBatchNorm(
inputs, axis=-1, momentum=0.9, eps=1e-5,
use_stats=-1, **kwargs
):
"""Batch Normalization, with scale procedure after normalization.
Parameters
......@@ -170,7 +177,7 @@ def FusedBatchNorm(inputs, axis=-1, momentum=0.9, eps=1e-3, use_stats=-1, **kwar
return output
def GroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
def GroupNorm(inputs, group=32, axis=-1, eps=1e-5, **kwargs):
"""Group Normalization. `[Wu & He, 2018] <https://arxiv.org/abs/1803.08494>`_.
Parameters
......@@ -203,7 +210,7 @@ def GroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
return output
def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-5, **kwargs):
"""Group Normalization, with scale procedure after normalization.
Parameters
......@@ -236,7 +243,7 @@ def FusedGroupNorm(inputs, group=32, axis=-1, eps=1e-3, **kwargs):
return output
def InstanceNorm(inputs, axis=-1, eps=1e-3, **kwargs):
def InstanceNorm(inputs, axis=-1, eps=1e-5, **kwargs):
"""Instance Normalization. `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_
Parameters
......
......@@ -630,3 +630,44 @@ def DenseConcat(inputs, growth_rate=0, axis=1, **kwargs):
output.shape[axis] += inputs[i].shape[axis]
return output
def DropBlock2d(inputs, block_size=7, keep_prob=0.9,
alpha=1., decrement=0., data_format='NCHW', **kwargs):
"""Randomly drop the outputs according to the spatial blocks. `[Ghiasi et.al, 2018] <https://arxiv.org/abs/1810.12890>`_.
Set the ``decrement`` to schedule ``keep_prob`` for each iteration.
Set the ``alpha`` to decrease ``gamma`` for different stages.
Parameters
----------
inputs : Tensor
The input tensor.
block_size : int
The size of dropping block.
keep_prob : float or Tensor
The prob of keeping. Default is ``0.9``.
alpha : float
The scale factor to gamma.
decrement : float
The decrement to keep prob.
data_format : str
The data format, ``NCHW`` or ``NHWC``.
Returns
-------
Tensor
The output tensor.
"""
CheckInputs(inputs, 1)
arguments = ParseArguments(locals())
arguments = AddArgumentWithDesc(arguments, keep_prob, 'keep_prob', as_target=False)
output = Tensor.CreateOperator(nout=1, op_type='DropBlock2d', **arguments)
if inputs.shape is not None:
output.shape = inputs.shape[:]
return output
\ No newline at end of file
......@@ -51,6 +51,7 @@ NNResize = vision.NNResize
BilinearResize = vision.BilinearResize
BiasAdd = vision.BiasAdd
DenseConcat = vision.DenseConcat
DropBlock2d = vision.DropBlock2d
# recurrent
LSTMCell = recurrent.LSTMCell
......@@ -70,6 +71,7 @@ Softmax = act.Softmax
Dropout = act.Dropout
# loss
NLLLoss = loss.NLLLoss
SparseSoftmaxCrossEntropy = loss.SparseSoftmaxCrossEntropy
SigmoidCrossEntropy = loss.SigmoidCrossEntropy
SoftmaxCrossEntropy = loss.SoftmaxCrossEntropy
......@@ -85,6 +87,8 @@ Add = math.Add
Sub = math.Sub
Mul = math.Mul
Div = math.Div
Maximum = math.Maximum
Minimum = math.Minimum
Clip = math.Clip
Matmul = math.Matmul
Pow = math.Pow
......
......@@ -35,7 +35,11 @@ message Argument {
repeated string strings=7;
}
enum DeviceType { CPU = 0; CUDA = 1; OPENCL = 2; }
enum DeviceType {
CPU = 0;
CUDA = 1;
CNML = 2;
}
message DeviceOption {
optional DeviceType device_type = 1 [default = CPU];
......
......@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='dragon.proto',
package='dragon',
serialized_pb=_b('\n\x0c\x64ragon.proto\x12\x06\x64ragon\"\xfe\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12\x36\n\tdata_type\x18\x02 \x01(\x0e\x32\x1c.dragon.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\"3\n\x0cTensorProtos\x12#\n\x06protos\x18\x01 \x03(\x0b\x32\x13.dragon.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"z\n\x0c\x44\x65viceOption\x12,\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x12.dragon.DeviceType:\x03\x43PU\x12\x14\n\tdevice_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\x94\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x1d\n\x03\x61rg\x18\x05 \x03(\x0b\x32\x10.dragon.Argument\x12+\n\rdevice_option\x18\x06 \x01(\x0b\x32\x14.dragon.DeviceOption\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"Y\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x1d\n\x03\x61rg\x18\x04 \x03(\x0b\x32\x10.dragon.Argument\"\x94\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12@\n\rvariance_norm\x18\t \x01(\x0e\x32!.dragon.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\xfb\x01\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1f\n\x02op\x18\x02 \x03(\x0b\x32\x13.dragon.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12+\n\rdevice_option\x18\x05 \x01(\x0b\x32\x14.dragon.DeviceOption\x12\x1d\n\x03\x61rg\x18\x06 \x03(\x0b\x32\x10.dragon.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12(\n\x08g_target\x18\x08 \x03(\x0b\x32\x16.dragon.GradientTarget\x12&\n\x08u_target\x18\t \x03(\x0b\x32\x14.dragon.UpdateTarget*+\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\n\n\x06OPENCL\x10\x02')
serialized_pb=_b('\n\x0c\x64ragon.proto\x12\x06\x64ragon\"\xfe\x01\n\x0bTensorProto\x12\x0c\n\x04\x64ims\x18\x01 \x03(\x05\x12\x36\n\tdata_type\x18\x02 \x01(\x0e\x32\x1c.dragon.TensorProto.DataType:\x05\x46LOAT\x12\x16\n\nfloat_data\x18\x03 \x03(\x02\x42\x02\x10\x01\x12\x16\n\nint32_data\x18\x04 \x03(\x05\x42\x02\x10\x01\x12\x11\n\tbyte_data\x18\x05 \x01(\x0c\x12\x13\n\x0bstring_data\x18\x06 \x03(\x0c\x12\x0c\n\x04name\x18\x07 \x01(\t\"C\n\x08\x44\x61taType\x12\t\n\x05\x46LOAT\x10\x01\x12\t\n\x05INT32\x10\x02\x12\x08\n\x04\x42YTE\x10\x03\x12\n\n\x06STRING\x10\x04\x12\x0b\n\x07\x46LOAT16\x10\x0c\"3\n\x0cTensorProtos\x12#\n\x06protos\x18\x01 \x03(\x0b\x32\x13.dragon.TensorProto\"\x80\x01\n\x08\x41rgument\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\t\n\x01\x66\x18\x02 \x01(\x02\x12\t\n\x01i\x18\x03 \x01(\x05\x12\x0b\n\x03i64\x18\t \x01(\x03\x12\t\n\x01s\x18\x04 \x01(\t\x12\t\n\x01\x62\x18\x08 \x01(\x08\x12\x0e\n\x06\x66loats\x18\x05 \x03(\x02\x12\x0c\n\x04ints\x18\x06 \x03(\x05\x12\x0f\n\x07strings\x18\x07 \x03(\t\"z\n\x0c\x44\x65viceOption\x12,\n\x0b\x64\x65vice_type\x18\x01 \x01(\x0e\x32\x12.dragon.DeviceType:\x03\x43PU\x12\x14\n\tdevice_id\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0brandom_seed\x18\x03 \x01(\r:\x01\x33\x12\x0e\n\x06\x65ngine\x18\x04 \x01(\t\"\x94\x01\n\x0bOperatorDef\x12\r\n\x05input\x18\x01 \x03(\t\x12\x0e\n\x06output\x18\x02 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0c\n\x04type\x18\x04 \x01(\t\x12\x1d\n\x03\x61rg\x18\x05 \x03(\x0b\x32\x10.dragon.Argument\x12+\n\rdevice_option\x18\x06 \x01(\x0b\x32\x14.dragon.DeviceOption\"=\n\x0eGradientTarget\x12\x0c\n\x04\x63ost\x18\x01 \x01(\t\x12\x0b\n\x03wrt\x18\x02 \x01(\t\x12\x10\n\x08\x65xternal\x18\x03 \x01(\t\"Y\n\x0cUpdateTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06tensor\x18\x03 \x03(\t\x12\x1d\n\x03\x61rg\x18\x04 \x03(\x0b\x32\x10.dragon.Argument\"\x94\x02\n\x0cTensorFiller\x12\x0e\n\x06tensor\x18\x01 \x01(\t\x12\x16\n\x04type\x18\x02 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03low\x18\x04 \x01(\x02:\x01\x30\x12\x0f\n\x04high\x18\x05 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x06 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x07 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x08 \x01(\x02:\x01\x33\x12@\n\rvariance_norm\x18\t \x01(\x0e\x32!.dragon.TensorFiller.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x46\x41N_AVG\x10\x02\"\xfb\x01\n\x08GraphDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1f\n\x02op\x18\x02 \x03(\x0b\x32\x13.dragon.OperatorDef\x12\x12\n\ngraph_type\x18\x03 \x01(\t\x12+\n\rdevice_option\x18\x05 \x01(\x0b\x32\x14.dragon.DeviceOption\x12\x1d\n\x03\x61rg\x18\x06 \x03(\x0b\x32\x10.dragon.Argument\x12\x0e\n\x06target\x18\x07 \x03(\t\x12(\n\x08g_target\x18\x08 \x03(\x0b\x32\x16.dragon.GradientTarget\x12&\n\x08u_target\x18\t \x03(\x0b\x32\x14.dragon.UpdateTarget*)\n\nDeviceType\x12\x07\n\x03\x43PU\x10\x00\x12\x08\n\x04\x43UDA\x10\x01\x12\x08\n\x04\x43NML\x10\x02')
)
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
......@@ -38,21 +38,21 @@ _DEVICETYPE = _descriptor.EnumDescriptor(
options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='OPENCL', index=2, number=2,
name='CNML', index=2, number=2,
options=None,
type=None),
],
containing_type=None,
options=None,
serialized_start=1427,
serialized_end=1470,
serialized_end=1468,
)
_sym_db.RegisterEnumDescriptor(_DEVICETYPE)
DeviceType = enum_type_wrapper.EnumTypeWrapper(_DEVICETYPE)
CPU = 0
CUDA = 1
OPENCL = 2
CNML = 2
_TENSORPROTO_DATATYPE = _descriptor.EnumDescriptor(
......
......@@ -14,7 +14,6 @@ from __future__ import division
from __future__ import print_function
import pprint
import numpy as np
import dragon.core.workspace as ws
from dragon.core.tensor import Tensor
......@@ -43,7 +42,7 @@ class BaseUpdater(object):
self._defaults = {
'scale_gradient': scale_gradient,
'clip_gradient': clip_gradient,
'l2_decay': l2_decay
'l2_decay': l2_decay,
}
self._param_group = []
self._slot = slot
......@@ -77,7 +76,7 @@ class BaseUpdater(object):
defaults = self.__dict__.get('_defaults')
if item in defaults:
if self._registered:
return ws.FetchTensor(self._slot + '/' + item)[0]
return ws.FetchTensor(self._slot + '/' + item)
else: return defaults[item]
return self.__dict__[item]
......@@ -85,9 +84,8 @@ class BaseUpdater(object):
defaults = self.__dict__.get('_defaults')
if defaults is not None and key in defaults:
if self._registered:
# convert all defaults as float32 for convenience
ws.FeedTensor(self._slot + '/' + key,
np.array([value], dtype=np.float32))
ws.FeedTensor(self._slot + '/' + key, value,
dtype='float32', force_cpu=True)
else:
self._defaults[key] = value
else:
......@@ -96,8 +94,8 @@ class BaseUpdater(object):
def register_in_workspace(self):
if not self._registered:
for k, v in self._defaults.items():
# convert all defaults as float32 for convenience
ws.FeedTensor(self._slot + "/" + k, np.array([v], dtype=np.float32))
ws.FeedTensor(self._slot + "/" + k, v,
dtype='float32', force_cpu=True)
self._registered = True
if self._verbose:
from dragon.config import logger
......
......@@ -14,7 +14,7 @@ from __future__ import division
from __future__ import print_function
version = '0.2.2'
full_version = '0.2.2.11'
full_version = '0.2.2.13'
release = False
if not release:
......
......@@ -19,7 +19,8 @@ from .vision import ConvolutionLayer, \
ROIPoolingLayer, \
ROIAlignLayer, \
NNResizeLayer, \
BilinearResizeLayer
BilinearResizeLayer, \
DropBlockLayer
from .neuron import ReLULayer, \
PReLULayer, \
......
......@@ -446,10 +446,13 @@ class InstanceNormLayer(Layer):
The implementation of ``InstanceNormLayer``.
Introduced by `[Ulyanov et.al, 2016] <https://arxiv.org/abs/1607.08022>`_
"""
def __init__(self, LayerParameter):
super(InstanceNormLayer, self).__init__(LayerParameter)
self._param = {'axis': 1}
param = LayerParameter.instance_norm_param
self._param = {'eps': param.eps,
'axis': 1}
def Setup(self, bottom):
super(InstanceNormLayer, self).Setup(bottom)
......
......@@ -250,7 +250,7 @@ class NNResizeLayer(Layer):
Parameters
----------
shape : caffe_pb2. BlobShape
shape : caffe_pb2.BlobShape
The output shape. Refer `ResizeParameter.shape`_.
fx : float
The scale factor of height. Refer `ResizeParameter.fx`_.
......@@ -283,7 +283,7 @@ class BilinearResizeLayer(Layer):
Parameters
----------
shape : caffe_pb2. BlobShape
shape : caffe_pb2.BlobShape
The output shape. Refer `ResizeParameter.shape`_.
fx : float
The scale factor of height. Refer `ResizeParameter.fx`_.
......@@ -309,3 +309,33 @@ class BilinearResizeLayer(Layer):
raise ValueError('The second bottom should be provided to determine the shape.')
self._param['shape_like'] = bottom[1]
return ops.BilinearResize(input, **self._param)
class DropBlockLayer(Layer):
"""The implementation of ``DropBlock2dLayer``.
Parameters
----------
block_size : int
The size of dropping block. Refer ``DropBlockParameter.block_size``.
keep_prob : float
The prob of keeping. Refer ``DropBlockParameter.keep_prob``.
alpha : float
The scale factor to gamma. Refer ``DropBlockParameter.alpha``.
decrement : float
The decrement to keep prob. Refer ``DropBlockParameter.decrement``.
"""
def __init__(self, LayerParameter):
super(DropBlockLayer, self).__init__(LayerParameter)
param = LayerParameter.drop_block_param
self._param = {'block_size': param.block_size,
'keep_prob': param.keep_prob,
'alpha': param.alpha,
'decrement': param.decrement,
'data_format': 'NCHW'}
def Setup(self, bottom):
super(DropBlockLayer, self).Setup(bottom)
input = bottom[0] if isinstance(bottom, list) else bottom
return ops.DropBlock2d(input, **self._param)
\ No newline at end of file
......@@ -424,7 +424,9 @@ message LayerParameter {
optional DenseConcatParameter dense_concat_param = 163;
optional FocalLossParameter focal_loss_param = 164;
optional GatherParameter gather_param = 165;
optional GroupNormParameter group_norm_param = 166;
optional InstanceNormParameter instance_norm_param = 166;
optional GroupNormParameter group_norm_param = 167;
optional DropBlockParameter drop_block_param = 168;
}
// Message that stores parameters used to apply transformation
......@@ -537,7 +539,7 @@ message BatchNormParameter {
optional float moving_average_fraction = 2 [default = 0.9];
// Small value to add to the variance estimate so that we don't divide by
// zero.
optional float eps = 3 [default = 1e-3];
optional float eps = 3 [default = 1e-5];
}
message BiasParameter {
......@@ -595,7 +597,7 @@ message ConvolutionParameter {
repeated uint32 stride = 6; // The stride; defaults to 1
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme à trous from Holschneider et al. 1987.)
// algorithme ¨¤ trous from Holschneider et al. 1987.)
repeated uint32 dilation = 18; // The dilation; defaults to 1
// For 2D convolution only, the *_h and *_w versions may also be used to
......@@ -1456,7 +1458,7 @@ message NormalizeParameter {
// Whether or not scale parameters are shared across channels.
optional bool channel_shared = 3 [default = true];
// Epsilon for not dividing by zero while normalizing variance
optional float eps = 4 [default = 1e-3];
optional float eps = 4 [default = 1e-5];
}
message ParallelParameter {
......@@ -1492,7 +1494,7 @@ message ProposalParameter {
message BatchRenormParameter {
optional bool use_global_stats = 1;
optional float moving_average_fraction = 2 [default = 0.9];
optional float eps = 3 [default = 1e-3];
optional float eps = 3 [default = 1e-5];
optional float r_max = 4 [default = 3.0];
optional float d_max = 5 [default = 5.0];
optional float t_delta = 6 [default = 0.001];
......@@ -1513,17 +1515,18 @@ message GatherParameter {
optional int32 axis = 1 [default = 0];
}
message GroupNormParameter {
// If false, accumulate global mean/variance values via a moving average. If
// true, use those accumulated values instead of computing mean/variance
// across the batch.
optional bool use_global_stats = 1;
// How much does the moving average decay each iteration?
optional float moving_average_fraction = 2 [default = 0.9];
// Small value to add to the variance estimate so that we don't divide by
// zero.
optional float eps = 3 [default = 1e-3];
optional uint32 group = 5 [default = 32]; // The group size
message InstanceNormParameter {
optional float eps = 1 [default = 1e-5];
}
message GroupNormParameter {
optional float eps = 1 [default = 1e-5];
optional int32 group = 2 [default = 32]; // The group size
}
message DropBlockParameter {
optional int32 block_size = 1 [default = 7];
optional float keep_prob = 2 [default = 0.9];
optional float alpha = 3 [default = 1.0];
optional float decrement = 4 [default = 0.0];
}
......@@ -24,7 +24,7 @@ def convert_to_tensor(value, dtype=None, name=None, **kwargs):
Parameters
----------
value : basic type, list or numpy.ndarray
value : number, list or numpy.ndarray
The value to convert.
dtype : Dtype or None
The data type. If ``None``, inferred from the type of `value`.
......
......@@ -15,6 +15,7 @@ import numpy as np
import dragon.core.mpi as mpi
import dragon.core.workspace as ws
import dragon.protos.dragon_pb2 as pb
from dragon.core.utils import MakeArgument
from dragon.core.gradient_maker import GraphGradientMaker
from dragon.core.scope import GetOperatorName, GetTensorName
......@@ -156,6 +157,7 @@ def GraphDef_Opt(meta_graph):
OX = 3 if option['share_grads'] else 2
if option['debug_mode']: OX = 1
meta_graph.arg.add().CopyFrom(MakeArgument('optimization_level', OX))
meta_graph.graph_type = option['graph_type']
def GraphDef_Device(meta_graph):
......@@ -181,11 +183,12 @@ def GraphDef_Device(meta_graph):
"""
from dragon.config import option
if option['device'] is not 'None':
supports = {'CPU': 0, 'CUDA': 1}
supports = {'CPU': 0, 'CUDA': 1, 'CNML': 2}
device_option = pb.DeviceOption()
device_option.device_type = supports[option['device']]
device_option.device_id = option['gpu_id']
device_option.device_id = option['device_id']
device_option.random_seed = option['random_seed']
if option['device'] == 'CUDA':
if option['use_cudnn']: device_option.engine = 'CUDNN'
meta_graph.device_option.CopyFrom(device_option)
......@@ -217,16 +220,16 @@ def function(inputs=None, outputs=None, givens=None, updater=None):
Examples
--------
>>> x = Tensor('x').Variable()
>>> x = Tensor('x', dtype='float32').Variable()
>>> y = x * 2
>>> f = theano.function(outputs=y)
>>> x.set_value(np.ones((2, 3), dtype=np.float32))
>>> f = function(outputs=y)
>>> x.set_value(np.ones((2, 3)))
>>> print(f())
>>> [[ 2. 2. 2.]
[ 2. 2. 2.]]
>>> f = theano.function(inputs=x, outputs=y)
>>> print(f(np.ones((2, 3), dtype=np.float32)))
>>> f = function(inputs=x, outputs=y)
>>> print(f(np.ones((2, 3)))
>>> [[ 2. 2. 2.]
[ 2. 2. 2.]]
......@@ -339,13 +342,15 @@ def eval(self, feed_dict=None):
raise TypeError('The key of feed_dict key should be a Tensor.')
if key.shape is not None:
if len(key.shape) != len(value.shape):
raise RuntimeError('The Tensor({}) was limited to {} dimensions, \
while feed a value with {} dimensions.'.
format(key.name, len(key.shape), len(value.shape)))
raise RuntimeError(
'The Tensor({}) was limited to {} dimensions, \
while feed a value with {} dimensions.'.format(
key.name, len(key.shape), len(value.shape)))
for i in range(len(key.shape)):
if key.shape[i] is None: continue
if key.shape[i] != value.shape[i]:
raise RuntimeError('The shape of Tensor({}) was limited as ('.format(key.name) +
raise RuntimeError(
'The shape of Tensor({}) was limited as ('.format(key.name) +
','.join([str(dim) for dim in key.shape]) + '), ' +
'while feed a value with (' + ','.join([str(dim) for dim in value.shape]) + ').')
return self._eval_func(*feed_dict.values())
......
......@@ -20,7 +20,7 @@ def shared(value, name=None, **kwargs):
Parameters
----------
value : basic type, list or numpy.ndarray
value : number, list or numpy.ndarray
The numerical values.
name : str
The name of tensor.
......
......@@ -42,6 +42,7 @@ class Module(object):
self._buffers = OrderedDict()
self._persistent_key = self._op = None
self._ctx = ('CPU', 0)
self.training = True
def __getattr__(self, item):
if '_parameters' in self.__dict__:
......@@ -363,3 +364,12 @@ class Module(object):
def run(self, inputs, outputs, auto_grad=True):
meta = ('PERSISTENT', self.persistent_key, self.op)
return RunOperator(inputs, outputs, meta, auto_grad=auto_grad)
def train(self, mode=True):
self.training = mode
for module in self.children():
module.train(mode)
return self
def eval(self):
return self.train(False)
\ No newline at end of file
......@@ -10,20 +10,35 @@
# ------------------------------------------------------------
"""We move the Module & Parameter to ``torch`` instead of ``torch.nn``,
as it will be reused by the ``torch.ops``.
"""
from dragon.vm.torch.module import Module
from dragon.vm.torch.tensor import Parameter
from .modules.conv import Conv2d, ConvTranspose2d
from .modules.pooling import MaxPool2d, AvgPool2d
from .modules.activation import ReLU, LeakyReLU, Sigmoid, Softmax
from .modules.activation import (
ReLU, LeakyReLU, ELU, SELU,
Sigmoid, Softmax,
)
from .modules.linear import Linear
from .modules.loss import CrossEntropyLoss
from .modules.loss import (
BCEWithLogitsLoss,
NLLLoss, CrossEntropyLoss,
L1Loss, MSELoss, SmoothL1Loss,
)
from .modules.container import Container, Sequential, ModuleList
from .modules.batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d
from .modules.groupnorm import GroupNorm1d, GroupNorm2d, GroupNorm3d
from .modules.affine import Affine
from .modules.dropout import Dropout, Dropout2d, Dropout3d
from .modules.dropblock import DropBlock2d
from .modules.rnn import RNNBase, RNN, LSTM, GRU
from . import init
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/pytorch/pytorch/blob/master/torch/nn/functional.py>
#
# ------------------------------------------------------------
import warnings
class _Reduction:
@staticmethod
def get_enum(reduction):
if reduction == 'none':
return 0
if reduction == 'elementwise_mean':
return 1
if reduction == 'sum':
return 2
raise ValueError(reduction + " is not a valid value for reduction")
# In order to support previous versions, accept boolean size_average and reduce
# and convert them into the new constants for now
# We use these functions in torch/legacy as well, in which case we'll silence the warning
@staticmethod
def legacy_get_string(size_average, reduce, emit_warning=True):
warning = "size_average and reduce args will be deprecated, please use reduction='{}' instead."
if size_average is None:
size_average = True
if reduce is None:
reduce = True
if size_average and reduce:
ret = 'elementwise_mean'
elif reduce:
ret = 'sum'
else:
ret = 'none'
if emit_warning:
warnings.warn(warning.format(ret))
return ret
@staticmethod
def legacy_get_enum(size_average, reduce, emit_warning=True):
return _Reduction.get_enum(_Reduction.legacy_get_string(size_average, reduce, emit_warning))
\ No newline at end of file
......@@ -55,6 +55,47 @@ class LeakyReLU(Module):
return self.run(inputs, outputs)
class ELU(Module):
def __init__(self, alpha=1.0, inplace=False):
super(ELU, self).__init__()
self.alpha = alpha
self._inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Elu',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'alpha': self.alpha,
}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [x if self._inplace else self.register_output(x.dtype)]
return self.run(inputs, outputs)
class SELU(Module):
def __init__(self, inplace=False):
super(SELU, self).__init__()
self._inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SElu',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [x if self._inplace else self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Sigmoid(Module):
def __init__(self, inplace=False):
super(Sigmoid, self).__init__()
......
......@@ -102,7 +102,7 @@ class _BatchNorm(Module):
inputs = [input] + self.inputs
self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
phase = 'TRAIN' if input.requires_grad else 'TEST'
phase = 'TRAIN' if self.training else 'TEST'
# Normalize the input by using batch stats ALWAYS
# Note that the update of moving average is meaningless(
# Because we can not remove it. Why? Ask nvidia and cuDNN -:)
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.nn import Module
class DropBlock2d(Module):
def __init__(self, block_size=7, kp=0.9,
alpha=1., decrement=0., inplace=False):
super(DropBlock2d, self).__init__()
self.kp = kp
self.block_size = block_size
self.alpha = alpha
self.decrement = decrement
self.inplace = inplace
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'DropBlock2d',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'block_size': self.block_size,
'keep_prob': self.kp,
'alpha': self.alpha,
'decrement': self.decrement,
'data_format': 'NCHW',
'phase': 'TRAIN',
}
}
def forward(self, input):
if not self.training: return input
inputs = [input]
self.unify_devices(inputs)
outputs = [input if self.inplace else self.register_output(input.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
......@@ -34,7 +34,7 @@ class Dropout(Module):
}
def forward(self, input):
if not input.requires_grad: return input
if not self.training: return input
inputs = [input]
self.unify_devices(inputs)
outputs = [input if self.inplace else self.register_output(input.dtype)]
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.tensor import Tensor
from dragon.vm.torch.nn import Module, Parameter
from dragon.vm.torch.ops.creation import zeros, ones
from dragon.vm.torch.module import RunOperator
class _GroupNorm(Module):
def __init__(self, num_features, group=32,
eps=1e-5, affine=True):
super(_GroupNorm, self).__init__()
self.num_features = num_features
self.group = group
self.eps = eps
self.affine = affine
if self.affine:
self.weight = Parameter(Tensor(num_features))
self.bias = Parameter(Tensor(num_features))
else:
self.weight = self.bias = None
self.inputs = [self.weight, self.bias] if self.affine else []
self.reset_parameters()
self.register_op()
def reset_parameters(self):
if self.affine:
self.weight.data.uniform_()
self.bias.data.zero_()
def register_op(self):
self.op_meta = {
'op_type': 'FusedGroupNorm' if self.affine else 'GroupNorm',
'n_inputs': 3 if self.affine else 1, 'n_outputs': 1,
'arguments': {
'group': self.group,
'axis': 1, # Data format: NCHW
'eps': self.eps,
}
}
def forward(self, input):
inputs = [input] + self.inputs
self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class GroupNorm1d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
class GroupNorm2d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
class GroupNorm3d(_GroupNorm):
"""Dragon does not use separate backend functions."""
pass
\ No newline at end of file
......@@ -18,50 +18,176 @@ from __future__ import division
from __future__ import print_function
from dragon.vm.torch.nn import Module
def _assert_no_grad(variable):
assert not variable.requires_grad, \
"nn criterions don't compute the gradient w.r.t. targets - please " \
"mark these variables as not requiring gradients"
from dragon.vm.torch.nn.functional import _Reduction
class _Loss(Module):
def __init__(self, size_average=True):
def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(_Loss, self).__init__()
self.size_average = size_average
if size_average is not None or reduce is not None:
self.reduction = _Reduction.legacy_get_string(size_average, reduce)
else:
self.reduction = reduction
class _WeightedLoss(_Loss):
def __init__(self, weight=None, size_average=True):
super(_WeightedLoss, self).__init__(size_average)
def __init__(self, weight=None, size_average=None, reduce=None, reduction='elementwise_mean'):
super(_WeightedLoss, self).__init__(size_average, reduce, reduction)
self.weight = weight
# TODO(PhyscalX): Dragon will support it later :).
if weight is not None:
raise NotImplementedError('WeightedLoss has been not implemented yet.')
class NLLLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=None, ignore_index=-100,
reduce=None, reduction='elementwise_mean'):
super(NLLLoss, self).__init__(weight, size_average, reduce, reduction)
self.ignore_index = ignore_index
self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'NLLLoss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'axis': 1,
'normalization': self.normalization,
'ignore_labels': () if self.ignore_index < 0 else (self.ignore_index),
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class BCEWithLogitsLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=None, reduce=None,
reduction='elementwise_mean', pos_weight=None):
super(BCEWithLogitsLoss, self).__init__(weight, size_average, reduce, reduction)
if pos_weight is not None:
raise NotImplementedError('Positive weight has been not implemented yet.')
self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SigmoidCrossEntropy',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class CrossEntropyLoss(_WeightedLoss):
def __init__(self, weight=None, size_average=True, ignore_index=-100, reduce=True):
super(CrossEntropyLoss, self).__init__(weight, size_average)
def __init__(self, weight=None, size_average=None, ignore_index=-100,
reduce=None, reduction='elementwise_mean'):
super(CrossEntropyLoss, self).__init__(weight, size_average, reduce, reduction)
self.ignore_index = ignore_index
self.reduce = reduce
self.normalization = {
'elementwise_mean': 'VALID',
'sum': 'None',
'none': 'UNIT'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SparseSoftmaxCrossEntropy' if self.reduce else 'SoftmaxCrossEntropy',
'op_type': 'SparseSoftmaxCrossEntropy',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'axis': 1,
'normalization': 'VALID' if self.size_average else 'NONE',
'normalization': self.normalization,
'ignore_labels': () if self.ignore_index < 0 else (self.ignore_index),
}
}
def forward(self, input, target):
_assert_no_grad(target)
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class L1Loss(_Loss):
def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(L1Loss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'L1Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class MSELoss(_Loss):
def __init__(self, size_average=None, reduce=None, reduction='elementwise_mean'):
super(MSELoss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'L2Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
class SmoothL1Loss(_Loss):
def __init__(self, size_average=None, beta=1.0,
reduce=None, reduction='elementwise_mean'):
super(SmoothL1Loss, self).__init__(size_average, reduce, reduction)
self.normalization = {
'elementwise_mean': 'BATCH_SIZE',
'sum': 'None'}[self.reduction]
self.beta = beta
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'SmoothL1Loss',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {
'beta': self.beta,
'normalization': self.normalization,
}
}
def forward(self, input, target):
inputs = [input, target]; self.unify_devices(inputs)
outputs = [self.register_output(input.dtype)]
return self.run(inputs, outputs)
......@@ -11,11 +11,12 @@
from .creation import (
zeros, zeros_like, ones, ones_like,
rand, randn
one_hot, rand, randn,
)
from .arithmetic import (
add, sub, mul, div,
add, sub, mul, div, log, exp,
maximum, minimum, clamp,
)
from .ndarray import (
......
......@@ -16,14 +16,15 @@ from __future__ import print_function
from dragon.vm.torch.tensor import Tensor
from dragon.vm.torch.ops.primitive import MakeContext, WrapScalar
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.ops.modules.arithmetic import Fundamental
from dragon.vm.torch.ops.modules.arithmetic import (
Fundamental, Log, Exp,
Maximum, Minimum, Clamp,
)
def _fundamental(input, value, op='Add', out=None):
if not isinstance(value, Tensor):
if not isinstance(value, (int, float)):
raise TypeError('Type of value should be numerical, got {}.'
.format(type(value)))
value = WrapScalar(value, input._dtype, input._ctx)
ctx = MakeContext(inputs=[input, value])
key = 'torch/ops/{}/{}:{}'.format(op.lower(), ctx[0].lower(), ctx[1])
......@@ -33,17 +34,63 @@ def _fundamental(input, value, op='Add', out=None):
def _rfundamental(input, value, op='RAdd', out=None):
if not isinstance(value, Tensor):
if not isinstance(value, (int, float)):
raise TypeError('Type of value should be numerical, got {}.'
.format(type(value)))
value = WrapScalar(value, input._dtype, input._ctx)
ctx = MakeContext(inputs=[input, value])
key = 'torch/ops/{}/{}:{}'.format(op.lower(), ctx[0].lower(), ctx[1])
module = get_module(Fundamental, key, ctx, op_type=op)
return module.forward(value, input, out)
def _maximum(input, other, out=None):
if not isinstance(input, Tensor):
input = WrapScalar(input, 'float32', other._ctx)
dtype = other._dtype
elif not isinstance(other, Tensor):
other = WrapScalar(other, 'float32', input._ctx)
dtype = input._dtype
else: dtype = input._dtype
ctx = MakeContext(inputs=[input])
key = 'torch/ops/maximum/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Maximum, key, ctx)
return module.forward(input, other, out, dtype)
def _minimum(input, other, out=None):
if not isinstance(input, Tensor):
input = WrapScalar(input, 'float32', other._ctx)
dtype = other._dtype
elif not isinstance(other, Tensor):
other = WrapScalar(other, 'float32', input._ctx)
dtype = input._dtype
else: dtype = input._dtype
ctx = MakeContext(inputs=[input])
key = 'torch/ops/minimum/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Minimum, key, ctx)
return module.forward(input, other, out, dtype)
def _clamp(input, min=None, max=None, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/clamp/{}:{}/min:{}/max:{}'.format(
ctx[0].lower(), ctx[1], min, max)
module = get_module(Clamp, key, ctx, min=min, max=max)
return module.forward(input, out)
def _exp(input, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/exp/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Exp, key, ctx)
return module.forward(input, out)
def _log(input, out=None):
ctx = MakeContext(inputs=[input])
key = 'torch/ops/log/{}:{}'.format(ctx[0].lower(), ctx[1])
module = get_module(Log, key, ctx)
return module.forward(input, out)
def add(input, value, out=None):
"""Add the ``input`` and ``value`` into the output tensor.
......@@ -126,3 +173,106 @@ def div(input, value, out=None):
"""
return _fundamental(input, value, out=out, op='Div')
def maximum(input, other, out=None):
"""Return the max value of given two tensors.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
other : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _maximum(input, other, out)
def minimum(input, other, out=None):
"""Return the min value of given two tensors.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
other : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _minimum(input, other, out)
def clamp(input, min=None, max=None, out=None):
"""Clamp all elements into the range [min, max].
Parameters
----------
input : vm.torch.Tensor
The input tensor.
min : numerical or None
The min value.
max : numerical or None
The max value.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(input, min, max, out)
def log(input, out=None):
"""Compute the natural logarithm of input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _log(input, out)
def exp(input, out=None):
"""Compute the exponential of input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
out : vm.torch.Tensor or None
The output tensor.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _exp(input, out)
......@@ -21,12 +21,18 @@ from dragon.vm.torch.execute_engine import RunOperator
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.autograd.grad_mode import no_grad
from dragon.vm.torch.ops.primitive import MakeContext
from dragon.vm.torch.ops.arithmetic import _fundamental, _rfundamental
from dragon.vm.torch.ops.arithmetic import (
_fundamental, _rfundamental, _log, _exp,
_clamp,
)
from dragon.vm.torch.ops.ndarray import (
reshape, squeeze, unsqueeze,
_permute, _repeat, _crop,
_fill, _reduce, _arg_reduce,
)
from dragon.vm.torch.ops.modules.dtype import AsType
......@@ -53,9 +59,14 @@ def copy_(self, src, non_blocking=False):
The ``self`` tensor.
"""
# Copy memory
FromTensor(
src, CTX_TO_DEVICE_OPTION[tuple(src._ctx)],
self.name, CTX_TO_DEVICE_OPTION[tuple(self._ctx)])
self._dtype = src._dtype
# Transfer the static shape if necessary
self._static_shape = src.size() \
if self._static_shape else None
return self
......@@ -295,6 +306,76 @@ def rdiv(self, value):
return _rfundamental(self, value, op='RDiv')
def clamp(self, min=None, max=None):
"""Return a tensor that all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(self, min, max)
def clamp_(self, min=None, max=None):
"""Clamp all elements are clamped into the range [min, max].
Parameters
----------
min : numerical or None
The min value.
max : numerical or None
The max value.
Returns
-------
vm.torch.Tensor
The output tensor.
"""
return _clamp(self, min, max, self)
def log(self):
"""Compute the natural logarithm of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The log tensor.
"""
return _log(self)
def exp(self):
"""Compute the exponential of this tensor.
Parameters
----------
None
Returns
-------
vm.torch.Tensor
The exp tensor.
"""
return _exp(self)
Tensor.add = add
Tensor.add_ = add_
Tensor.__radd__ = radd
......@@ -308,6 +389,10 @@ Tensor.div = div
Tensor.div_ = div_
Tensor.__rdiv__ = rdiv
Tensor.__rtruediv__ = rdiv
Tensor.clamp = clamp
Tensor.clamp_ = clamp_
Tensor.log = log
Tensor.exp = exp
##############################################
......@@ -387,16 +472,12 @@ def _unsqueeze_(self, dim=None):
def view(self, *args):
if self._static_shape:
raise RuntimeError('Can not view a leaf variable, it owns the static sizes.')
return reshape(self, shape=args)
def view_as(self, other):
if not isinstance(other, Tensor):
raise ValueError('The other should be a torch tensor.')
if self._static_shape:
raise RuntimeError('Can not view a leaf variable, it owns the static sizes.')
return reshape(self, shape=None, shape_like=other)
......
......@@ -13,14 +13,20 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.ops.primitive import MakeContext, CanonicalAxis
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.tensor import LeafTensor
from dragon.vm.torch.execute_engine import RunOperator
from dragon.vm.torch.ops.primitive import MakeContext
from dragon.vm.torch.ops.factory import get_module
from dragon.vm.torch.ops.modules.creation import OneHot
__all__= [
'zeros', 'zeros_like', 'ones', 'ones_like',
'rand', 'randn',
'one_hot', 'rand', 'randn',
]
......@@ -180,3 +186,26 @@ def randn(*sizes, **kwargs):
inputs = []; outputs = [out]; ctx = MakeContext(inputs, outputs)
meta = ('ONCE', 'RandomNormal', ctx)
return RunOperator(inputs, outputs, meta, **arguments)
def one_hot(input, depth):
"""Return a ont hot tensor according to given input.
Parameters
----------
input : vm.torch.Tensor
The input tensor.
depth : int
The depth of channels.
Returns
-------
vm.torch.FloatTensor
The output tensor.
"""
ctx = MakeContext(inputs=[input])
key = 'torch/ops/one_hot/{}:{}/depth:{}'.format(
ctx[0].lower(), ctx[1], depth)
module = get_module(OneHot, key, ctx, depth=depth)
return module.forward(input)
\ No newline at end of file
......@@ -38,3 +38,125 @@ class Fundamental(BaseModule):
inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x1.dtype)]
return self.run(inputs, outputs)
class Maximum(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Maximum, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for minimum op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Maximum',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x1, x2, y, dtype):
inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(dtype)]
return self.run(inputs, outputs)
class Minimum(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Minimum, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for minimum op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Minimum',
'n_inputs': 2, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x1, x2, y, dtype):
inputs = [x1, x2]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(dtype)]
return self.run(inputs, outputs)
class Clamp(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Clamp, self).__init__(key, ctx, **kwargs)
self.min = kwargs.get('min', None)
self.max = kwargs.get('max', None)
if self.min is not None: self.min = float(self.min)
if self.max is not None: self.max = float(self.max)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments clamp op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Clip',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'low': self.min,
'high': self.max,
}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Log(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Log, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for Log op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Log',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
class Exp(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(Exp, self).__init__(key, ctx, **kwargs)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No arguments for Log op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'Exp',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {}
}
def forward(self, x, y):
inputs = [x]; self.unify_devices(inputs)
outputs = [y] if y else [self.register_output(x.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.ops.modules.base import BaseModule
class OneHot(BaseModule):
def __init__(self, key, ctx, **kwargs):
super(OneHot, self).__init__(key, ctx, **kwargs)
self.depth = kwargs.get('depth', 1)
self.register_arguments()
self.register_op()
def register_arguments(self):
"""No Arguments for concat op."""
pass
def register_op(self):
self.op_meta = {
'op_type': 'OneHot',
'n_inputs': 1, 'n_outputs': 1,
'arguments': {
'depth': self.depth,
}
}
def forward(self, x):
inputs = [x]; self.unify_devices(inputs)
outputs = [self.register_output(x.dtype)]
return self.run(inputs, outputs)
\ No newline at end of file
......@@ -22,6 +22,7 @@ class Fill(BaseModule):
super(Fill, self).__init__(key, ctx, **kwargs)
self.len_shape = kwargs.get('len_shape', 0)
self.value = kwargs.get('value', 0.0)
self.dtype = kwargs.get('dtype', 'float32')
self.register_arguments()
self.register_op()
......@@ -34,6 +35,7 @@ class Fill(BaseModule):
'op_type': 'Fill',
'n_inputs': 0, 'n_outputs': 1,
'arguments': {
'dtype': self.dtype,
'value': float(self.value),
'dims_desc': [d for d in self.shape] if len(self.shape) > 0 else None,
}
......
......@@ -62,9 +62,10 @@ def _repeat(input, times):
def _fill(input, shape, value):
ctx = MakeContext(inputs=[input]); len_shape = len(shape)
key = 'torch/ops/fill/{}:{}/ndims:#{}/value:{}'.format(
ctx[0].lower(), ctx[1], len_shape, value)
module = get_module(Fill, key, ctx, len_shape=len_shape, value=value)
key = 'torch/ops/fill/{}:{}/dtype:{}/ndims:#{}/value:{}'.format(
ctx[0].lower(), ctx[1], input._dtype, len_shape, value)
module = get_module(Fill, key, ctx, len_shape=len_shape,
value=value, dtype=input._dtype)
return module.forward(input, shape)
......
......@@ -35,7 +35,7 @@ def _update(param, grad, op_type, slot,
lr_mult=1.0, decay_mult=1.0):
ctx = MakeContext(inputs=[param])
key = 'torch/ops/{}/{}:{}/{}/{}'.format(op_type.lower(),
ctx[0].lower(),ctx[1], slot, param.name)
ctx[0].lower(), ctx[1], slot, param.name)
module = get_module(Update, key, ctx, op_type=op_type,
lr_mult=lr_mult, decay_mult=decay_mult, slot=slot)
return module.forward(param, grad)
\ No newline at end of file
......@@ -72,10 +72,9 @@ class Optimizer(object):
param_temp = group['slot'] + '/{}'
for k, v in group.items():
if k in self._mutable_parameters:
# convert all defaults as float32 for convenience
dg.workspace.FeedTensor(param_temp.format(
self._mutable_parameters[k]),
np.array([v], dtype=np.float32))
self._mutable_parameters[k]), v,
dtype='float32', force_cpu=True)
def _run_update_ops(self, group):
"""Generate & Run UpdateOps.
......@@ -107,10 +106,12 @@ class Optimizer(object):
# Run regular update ops
for p, g in zip(params, grads):
_update(p, g, op_type=self._update_type,
_update(p, g,
op_type=self._update_type,
slot=group['slot'],
lr_mult=group.get('lr_mult', 1.0),
decay_mult=group.get('decay_mult', 1.0))
decay_mult=group.get('decay_mult', 1.0)
)
def zero_grad(self):
"""Set all gradients to zeros.
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!