Commit 5d518b6c by Ting PAN

io refactoring

1 parent 31e02b2b
Showing with 310 additions and 237 deletions
...@@ -24,7 +24,7 @@ set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty) ...@@ -24,7 +24,7 @@ set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty)
set(PYTHON_DIR /usr/include/python2.7) # prefer set(PYTHON_DIR /usr/include/python2.7) # prefer
#set(PYTHON_DIR /usr/include/python3.x) # optional, set specific version #set(PYTHON_DIR /usr/include/python3.x) # optional, set specific version
#set(ANACONDA_DIR /xxx/anaconda) # optional, set specific version below if using py3 #set(ANACONDA_DIR /xxx/anaconda) # optional, set specific version below if using py3
set(NUMPY_DIR /xxx/numpy) # require root folder of numpy package set(NUMPY_DIR /xxx/numpy) # require, root folder of numpy package
# set CUDA compiling architecture # set CUDA compiling architecture
set(CUDA_ARCH -gencode arch=compute_20,code=sm_20 set(CUDA_ARCH -gencode arch=compute_20,code=sm_20
......
...@@ -52,7 +52,7 @@ class CPUContext{ ...@@ -52,7 +52,7 @@ class CPUContext{
inline static void Delete(void* data) { free(data); } inline static void Delete(void* data) { free(data); }
template<typename T, class DstContext, class SrcContext> template<typename T, class DstContext, class SrcContext>
inline static void Copy(int n, T* dst, const T* src){ inline static void Copy(int n, T* dst, const T* src) {
if (dst == src) return; if (dst == src) return;
// only the basic types(e.g. int/float) can memcpy correctly // only the basic types(e.g. int/float) can memcpy correctly
if (std::is_fundamental<T>::value) if (std::is_fundamental<T>::value)
......
...@@ -119,7 +119,7 @@ class CUDAContext { ...@@ -119,7 +119,7 @@ class CUDAContext {
inline static void Delete(void* data) { cudaFree(data); } inline static void Delete(void* data) { cudaFree(data); }
template<typename T, class DstContext, class SrcContext> template<typename T, class DstContext, class SrcContext>
static void Copy(int n, T* dst, const T* src){ static void Copy(int n, T* dst, const T* src) {
if (dst == src) return; if (dst == src) return;
Memcpy<SrcContext, DstContext>(n * sizeof(T), (void*)dst, (const void*)src); Memcpy<SrcContext, DstContext>(n * sizeof(T), (void*)dst, (const void*)src);
} }
...@@ -148,7 +148,7 @@ class CUDAContext { ...@@ -148,7 +148,7 @@ class CUDAContext {
} }
#ifdef WITH_CUDNN #ifdef WITH_CUDNN
cudnnHandle_t cudnn_handle(){ cudnnHandle_t cudnn_handle() {
auto& handle = cuda_object_.cudnn_handle[gpu_id_]; auto& handle = cuda_object_.cudnn_handle[gpu_id_];
if (handle) { if (handle) {
return handle; return handle;
......
...@@ -77,7 +77,7 @@ class Tensor { ...@@ -77,7 +77,7 @@ class Tensor {
inline TIndex offset(const vector<TIndex>& vec) { inline TIndex offset(const vector<TIndex>& vec) {
CHECK_LE(vec.size(), ndim()); CHECK_LE(vec.size(), ndim());
TIndex offset = 0; TIndex offset = 0;
for (int i = 0; i < ndim(); i++){ for (int i = 0; i < ndim(); i++) {
offset = offset * dim(i); offset = offset * dim(i);
if (vec.size() > i) offset += vec[i]; if (vec.size() > i) offset += vec[i];
} }
...@@ -130,7 +130,7 @@ class Tensor { ...@@ -130,7 +130,7 @@ class Tensor {
} }
template <class Context> template <class Context>
void* raw_mutable_data(const TypeMeta& meta){ void* raw_mutable_data(const TypeMeta& meta) {
void* data_ptr; void* data_ptr;
active_data_ptr<Context>(&data_ptr); active_data_ptr<Context>(&data_ptr);
if (meta_ == meta && data_ptr) { if (meta_ == meta && data_ptr) {
......
...@@ -75,20 +75,20 @@ class TypeMeta { ...@@ -75,20 +75,20 @@ class TypeMeta {
bool Match() const { return (id_ == Id<T>()); } bool Match() const { return (id_ == Id<T>()); }
template <typename T> template <typename T>
static void Ctor(void* ptr, size_t n){ static void Ctor(void* ptr, size_t n) {
T* typed_ptr = static_cast<T*>(ptr); T* typed_ptr = static_cast<T*>(ptr);
for (unsigned int i = 0; i < n; i++) new(typed_ptr + i) T; for (unsigned int i = 0; i < n; i++) new(typed_ptr + i) T;
} }
template <typename T> template <typename T>
static void Copy(const void* src, void* dst, size_t n){ static void Copy(const void* src, void* dst, size_t n) {
const T* typed_src = static_cast<const T*>(src); const T* typed_src = static_cast<const T*>(src);
T* typed_dst = static_cast<T*>(dst); T* typed_dst = static_cast<T*>(dst);
for (unsigned int i = 0; i < n; i++) typed_dst[i] = typed_src[i]; for (unsigned int i = 0; i < n; i++) typed_dst[i] = typed_src[i];
} }
template <typename T> template <typename T>
static void Dtor(void* ptr, size_t n){ static void Dtor(void* ptr, size_t n) {
T* typed_ptr = static_cast<T*>(ptr); T* typed_ptr = static_cast<T*>(ptr);
for (unsigned int i = 0; i < n; i++) typed_ptr[i].~T(); for (unsigned int i = 0; i < n; i++) typed_ptr[i].~T();
} }
......
...@@ -44,7 +44,7 @@ class Workspace{ ...@@ -44,7 +44,7 @@ class Workspace{
return tensor_map_.count(query) > 0; return tensor_map_.count(query) > 0;
} }
inline Tensor* CreateTensor(const string& name){ inline Tensor* CreateTensor(const string& name) {
string query = GetTensorName(name); string query = GetTensorName(name);
if (!HasTensor(query)) if (!HasTensor(query))
tensor_map_[query] = unique_ptr<Tensor>(new Tensor(query)); tensor_map_[query] = unique_ptr<Tensor>(new Tensor(query));
...@@ -143,7 +143,7 @@ class Workspace{ ...@@ -143,7 +143,7 @@ class Workspace{
return graph_map_[graph_name]->Run(include, exclude); return graph_map_[graph_name]->Run(include, exclude);
} }
inline vector<string> GetGraphs(){ inline vector<string> GetGraphs() {
vector<string> names; vector<string> names;
for (auto& it : graph_map_) names.push_back(it.first); for (auto& it : graph_map_) names.push_back(it.first);
return names; return names;
......
...@@ -25,9 +25,9 @@ class AccuracyOp final: public Operator<Context> { ...@@ -25,9 +25,9 @@ class AccuracyOp final: public Operator<Context> {
public: public:
AccuracyOp(const OperatorDef& op_def, Workspace* ws) AccuracyOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
top_k(OperatorBase::GetSingleArg<int>("top_k", 1)){ top_k(OperatorBase::GetSingleArg<int>("top_k", 1)) {
vector<int> args = OperatorBase::GetRepeatedArg<int>("ignore_labels"); vector<int> args = OperatorBase::GetRepeatedArg<int>("ignore_labels");
if (args.size()){ if (args.size()) {
ignore_labels.Reshape(vector<TIndex>(1, args.size())); ignore_labels.Reshape(vector<TIndex>(1, args.size()));
int* ignore_data = ignore_labels.mutable_data<int, CPUContext>(); int* ignore_data = ignore_labels.mutable_data<int, CPUContext>();
for (int i = 0; i < args.size(); i++) ignore_data[i] = args[i]; for (int i = 0; i < args.size(); i++) ignore_data[i] = args[i];
......
...@@ -39,7 +39,7 @@ class ROIAlignGradientOp : public Operator<Context> { ...@@ -39,7 +39,7 @@ class ROIAlignGradientOp : public Operator<Context> {
: Operator<Context>(op_def, ws), : Operator<Context>(op_def, ws),
pool_h(OperatorBase::GetSingleArg<int>("pool_h", 0)), pool_h(OperatorBase::GetSingleArg<int>("pool_h", 0)),
pool_w(OperatorBase::GetSingleArg<int>("pool_w", 0)), pool_w(OperatorBase::GetSingleArg<int>("pool_w", 0)),
spatial_scale(OperatorBase::GetSingleArg<float>("spatial_scale", 1.0)){ spatial_scale(OperatorBase::GetSingleArg<float>("spatial_scale", 1.0)) {
CHECK_GT(pool_h, 0) << "\npool_h must > 0"; CHECK_GT(pool_h, 0) << "\npool_h must > 0";
CHECK_GT(pool_w, 0) << "\npool_w must > 0"; CHECK_GT(pool_w, 0) << "\npool_w must > 0";
} }
......
...@@ -54,7 +54,7 @@ inline void LoadCaffeModel(string file, string scope, Workspace* ws) { ...@@ -54,7 +54,7 @@ inline void LoadCaffeModel(string file, string scope, Workspace* ws) {
ReadProtoFromBinaryFile(file.c_str(), &net_param); ReadProtoFromBinaryFile(file.c_str(), &net_param);
LOG(INFO) << "Restore From Model @: " << file << "......"; LOG(INFO) << "Restore From Model @: " << file << "......";
LOG(INFO) << "Model Format: CaffeModel"; LOG(INFO) << "Model Format: CaffeModel";
for (int i = 0; i < net_param.layer_size(); i++){ for (int i = 0; i < net_param.layer_size(); i++) {
const LayerParameter& layer = net_param.layer(i); const LayerParameter& layer = net_param.layer(i);
const string& layer_name = layer.name(); const string& layer_name = layer.name();
string prefix = scope + layer_name + "@param"; string prefix = scope + layer_name + "@param";
......
...@@ -22,7 +22,7 @@ inline std::vector<std::string> SplitString(const std::string& str, ...@@ -22,7 +22,7 @@ inline std::vector<std::string> SplitString(const std::string& str,
std::vector<std::string> ret; std::vector<std::string> ret;
std::string temp(str); std::string temp(str);
size_t pos; size_t pos;
while (pos = temp.find(c), pos != std::string::npos){ while (pos = temp.find(c), pos != std::string::npos) {
ret.push_back(temp.substr(0, pos)); ret.push_back(temp.substr(0, pos));
temp.erase(0, pos + 1); temp.erase(0, pos + 1);
} }
......
...@@ -31,7 +31,7 @@ const TypeMeta& NumpyTypeToDragon(int numpy_type) { ...@@ -31,7 +31,7 @@ const TypeMeta& NumpyTypeToDragon(int numpy_type) {
{ NPY_FLOAT16, TypeMeta::Make<float16>() }, { NPY_FLOAT16, TypeMeta::Make<float16>() },
{ NPY_UINT8, TypeMeta::Make<uint8_t>() }}; { NPY_UINT8, TypeMeta::Make<uint8_t>() }};
static TypeMeta unknown_type; // id = 0 static TypeMeta unknown_type;
return dragon_type_map.count(numpy_type) ? dragon_type_map[numpy_type] : unknown_type; return dragon_type_map.count(numpy_type) ? dragon_type_map[numpy_type] : unknown_type;
} }
...@@ -50,7 +50,7 @@ REGISTER_TENSOR_FETCHER(TypeMeta::Id<NumpyFetcher>(), NumpyFetcher); ...@@ -50,7 +50,7 @@ REGISTER_TENSOR_FETCHER(TypeMeta::Id<NumpyFetcher>(), NumpyFetcher);
REGISTER_TENSOR_FETCHER(TypeMeta::Id<StringFetcher>(), StringFetcher); REGISTER_TENSOR_FETCHER(TypeMeta::Id<StringFetcher>(), StringFetcher);
REGISTER_TENSOR_FEEDER(TypeMeta::Id<NumpyFeeder>(), NumpyFeeder); REGISTER_TENSOR_FEEDER(TypeMeta::Id<NumpyFeeder>(), NumpyFeeder);
extern "C"{ extern "C" {
PyObject* RegisteredOperatorsCC(PyObject* self, PyObject* args) { PyObject* RegisteredOperatorsCC(PyObject* self, PyObject* args) {
set<string> all_keys; set<string> all_keys;
...@@ -123,7 +123,7 @@ bool SwitchWorkspaceInternal(const string& name, const bool create_if_missing) { ...@@ -123,7 +123,7 @@ bool SwitchWorkspaceInternal(const string& name, const bool create_if_missing) {
} else if (create_if_missing) { } else if (create_if_missing) {
unique_ptr<Workspace> new_workspace(new Workspace()); unique_ptr<Workspace> new_workspace(new Workspace());
g_workspace = new_workspace.get(); g_workspace = new_workspace.get();
g_workspaces[name] = std::move(new_workspace); // ??? g_workspaces[name] = std::move(new_workspace);
g_current_workspace = name; g_current_workspace = name;
return true; return true;
} else { } else {
......
...@@ -33,7 +33,7 @@ inline PyObject* StdStringToPyBytes(const std::string& str) { ...@@ -33,7 +33,7 @@ inline PyObject* StdStringToPyBytes(const std::string& str) {
return PyBytes_FromStringAndSize(str.c_str(), str.size()); return PyBytes_FromStringAndSize(str.c_str(), str.size());
} }
template <typename T> template <typename T>
inline void MakeStringInternal(std::stringstream& ss, const T& t){ ss << t; } inline void MakeStringInternal(std::stringstream& ss, const T& t) { ss << t; }
template <typename T,typename ... Args> template <typename T,typename ... Args>
inline void MakeStringInternal(std::stringstream& ss, const T& t, const Args& ... args) { inline void MakeStringInternal(std::stringstream& ss, const T& t, const Args& ... args) {
...@@ -124,7 +124,7 @@ class NumpyFeeder : public TensorFeederBase { ...@@ -124,7 +124,7 @@ class NumpyFeeder : public TensorFeederBase {
Tensor* tensor) override { Tensor* tensor) override {
PyArrayObject* array = PyArray_GETCONTIGUOUS(original_array); PyArrayObject* array = PyArray_GETCONTIGUOUS(original_array);
const TypeMeta& meta = NumpyTypeToDragon(PyArray_TYPE(array)); const TypeMeta& meta = NumpyTypeToDragon(PyArray_TYPE(array));
if (meta.id() == 0){ if (meta.id() == 0) {
PyErr_SetString(PyExc_TypeError, "numpy data type is not supported."); PyErr_SetString(PyExc_TypeError, "numpy data type is not supported.");
return nullptr; return nullptr;
} }
......
...@@ -63,11 +63,11 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) { ...@@ -63,11 +63,11 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) {
// check inclue ranks // check inclue ranks
int size = PyList_Size(incl); int size = PyList_Size(incl);
if (size > 0){ if (size > 0) {
all_ranks.clear(); all_ranks.clear();
unique_ptr<int> incl_ranks(new int[size]); unique_ptr<int> incl_ranks(new int[size]);
int* ranks = incl_ranks.get(); int* ranks = incl_ranks.get();
for (int i = 0; i < size; i++){ for (int i = 0; i < size; i++) {
ranks[i] = _PyInt_AsInt(PyList_GetItem(incl, i)); ranks[i] = _PyInt_AsInt(PyList_GetItem(incl, i));
all_ranks.insert(ranks[i]); all_ranks.insert(ranks[i]);
} }
...@@ -81,7 +81,7 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) { ...@@ -81,7 +81,7 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) {
all_ranks.clear(); Set<int> tmp; all_ranks.clear(); Set<int> tmp;
unique_ptr<int> excl_ranks(new int[size]); unique_ptr<int> excl_ranks(new int[size]);
int* ranks = excl_ranks.get(); int* ranks = excl_ranks.get();
for (int i = 0; i < size; i++){ for (int i = 0; i < size; i++) {
ranks[i] = _PyInt_AsInt(PyList_GetItem(excl, i)); ranks[i] = _PyInt_AsInt(PyList_GetItem(excl, i));
tmp.insert(ranks[i]); tmp.insert(ranks[i]);
} }
...@@ -97,7 +97,7 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) { ...@@ -97,7 +97,7 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) {
if (local_comm != MPI_COMM_NULL) { if (local_comm != MPI_COMM_NULL) {
int world_rank, local_size; int world_rank, local_size;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
if (world_rank == local_root){ if (world_rank == local_root) {
MPI_Comm_size(local_comm, &local_size); MPI_Comm_size(local_comm, &local_size);
std::stringstream ss; std::stringstream ss;
ss << "Rank[" << world_rank << "]: " ss << "Rank[" << world_rank << "]: "
......
# --------------------------------------------------------
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
from .data_batch import DataBatch
# -------------------------------------------------------- # --------------------------------------------------------
# Caffe for Dragon # Dragon
# Copyright(c) 2017 SeetaTech # Copyright(c) 2017 SeetaTech
# Written by Ting Pan # Written by Ting Pan
# -------------------------------------------------------- # --------------------------------------------------------
...@@ -10,7 +10,7 @@ from six.moves import range as xrange ...@@ -10,7 +10,7 @@ from six.moves import range as xrange
from dragon.config import logger from dragon.config import logger
from .__init__ import GetProperty from .utils import GetProperty
class BlobFetcher(Process): class BlobFetcher(Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
...@@ -30,16 +30,16 @@ class BlobFetcher(Process): ...@@ -30,16 +30,16 @@ class BlobFetcher(Process):
atexit.register(cleanup) atexit.register(cleanup)
def im_list_to_blob(self): def im_list_to_blob(self):
datum = self.Q_in.get() # (h, w, BGR) datum = self.Q_in.get()
im = datum[0]; h, w, c = im.shape im_blob = []
im_blob = np.zeros((self._batch_size, h, w, c), dtype=np.float32)
label_blob = np.zeros((self._batch_size, len(datum[1])), dtype=np.float32) \ label_blob = np.zeros((self._batch_size, len(datum[1])), dtype=np.float32) \
if len(datum) > 1 else None if len(datum) > 1 else None
for i in xrange(0, self._batch_size): for i in xrange(0, self._batch_size):
im_blob[i, 0:h, 0:w, :] = datum[0] im_blob.append(datum[0])
if label_blob is not None: label_blob[i, :] = datum[1] if label_blob is not None: label_blob[i, :] = datum[1]
if i != self._batch_size - 1: datum = self.Q_in.get() if i != self._batch_size - 1: datum = self.Q_in.get()
channel_swap = (0, 3, 1, 2) channel_swap = (0, 3, 1, 2)
im_blob = np.array(im_blob, dtype=np.float32)
im_blob = im_blob.transpose(channel_swap) im_blob = im_blob.transpose(channel_swap)
return (im_blob, label_blob) return (im_blob, label_blob)
......
# -------------------------------------------------------- # --------------------------------------------------------
# Caffe for Dragon # Dragon
# Copyright(c) 2017 SeetaTech # Copyright(c) 2017 SeetaTech
# Written by Ting Pan # Written by Ting Pan
# -------------------------------------------------------- # --------------------------------------------------------
import sys
import time import time
import pprint import pprint
from multiprocessing import Queue from multiprocessing import Queue
if sys.version_info >= (3,0):
from queue import Queue as Queue2
else:
from Queue import Queue as Queue2
import threading
from six.moves import range as xrange from six.moves import range as xrange
import dragon.core.mpi as mpi import dragon.core.mpi as mpi
...@@ -16,10 +22,11 @@ from .data_reader import DataReader ...@@ -16,10 +22,11 @@ from .data_reader import DataReader
from .data_transformer import DataTransformer from .data_transformer import DataTransformer
from .blob_fetcher import BlobFetcher from .blob_fetcher import BlobFetcher
from .__init__ import GetProperty from .utils import GetProperty
class DataBatch(object): class DataBatch(threading.Thread):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataBatch, self).__init__()
"""DataBatch use Triple-Buffering to speed up""" """DataBatch use Triple-Buffering to speed up"""
...@@ -35,10 +42,10 @@ class DataBatch(object): ...@@ -35,10 +42,10 @@ class DataBatch(object):
kwargs['group_size'] = group_size kwargs['group_size'] = group_size
# configuration # configuration
self._prefetch = GetProperty(kwargs, 'prefetch', 40) self._prefetch = GetProperty(kwargs, 'prefetch', 5)
self._num_readers = GetProperty(kwargs, 'num_readers', 1) self._num_readers = GetProperty(kwargs, 'num_readers', 1)
self._num_transformers = GetProperty(kwargs, 'num_transformers', -1) self._num_transformers = GetProperty(kwargs, 'num_transformers', -1)
self._num_fetchers = GetProperty(kwargs, 'num_fetchers', 3) self._num_fetchers = GetProperty(kwargs, 'num_fetchers', 1)
# default policy # default policy
if self._num_transformers == -1: if self._num_transformers == -1:
...@@ -60,6 +67,7 @@ class DataBatch(object): ...@@ -60,6 +67,7 @@ class DataBatch(object):
self.Q_level_1 = Queue(self._prefetch * self._num_readers * self._batch_size) self.Q_level_1 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q_level_2 = Queue(self._prefetch * self._num_readers * self._batch_size) self.Q_level_2 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q_level_3 = Queue(self._prefetch * self._num_readers) self.Q_level_3 = Queue(self._prefetch * self._num_readers)
self.Q_level_4 = Queue2(self._prefetch * self._num_readers)
# init readers # init readers
self._readers = [] self._readers = []
...@@ -102,11 +110,16 @@ class DataBatch(object): ...@@ -102,11 +110,16 @@ class DataBatch(object):
self._fetchers.append(fetcher) self._fetchers.append(fetcher)
time.sleep(0.1) time.sleep(0.1)
self.daemon = True
self.start()
#self.echo() #self.echo()
@property def run(self):
def blobs(self): while True:
return self.Q_level_3.get() self.Q_level_4.put(self.Q_level_3.get())
def get(self):
return self.Q_level_4.get()
def echo(self): def echo(self):
logger.info('---------------------------------------------------------') logger.info('---------------------------------------------------------')
......
# -------------------------------------------------------- # --------------------------------------------------------
# Caffe for Dragon # Dragon
# Copyright(c) 2017 SeetaTech # Copyright(c) 2017 SeetaTech
# Written by Ting Pan # Written by Ting Pan
# -------------------------------------------------------- # --------------------------------------------------------
...@@ -12,8 +12,7 @@ import dragon.config as config ...@@ -12,8 +12,7 @@ import dragon.config as config
from dragon.config import logger from dragon.config import logger
from dragon.tools.db import LMDB from dragon.tools.db import LMDB
from .__init__ import GetProperty from .utils import GetProperty
class DataReader(Process): class DataReader(Process):
def __init__(self, **kwargs): def __init__(self, **kwargs):
......
# -------------------------------------------------------- # --------------------------------------------------------
# Caffe for Dragon # Dragon
# Copyright(c) 2017 SeetaTech # Copyright(c) 2017 SeetaTech
# Written by Ting Pan # Written by Ting Pan
# -------------------------------------------------------- # --------------------------------------------------------
...@@ -12,7 +12,7 @@ import dragon.config as config ...@@ -12,7 +12,7 @@ import dragon.config as config
from dragon.config import logger from dragon.config import logger
import dragon.vm.caffe.proto.caffe_pb2 as pb import dragon.vm.caffe.proto.caffe_pb2 as pb
from .__init__ import GetProperty from .utils import GetProperty
try: try:
import cv2 import cv2
...@@ -131,5 +131,3 @@ class DataTransformer(Process): ...@@ -131,5 +131,3 @@ class DataTransformer(Process):
while True: while True:
serialized = self.Q_in.get() serialized = self.Q_in.get()
self.Q_out.put(self.transform_image_label(serialized)) self.Q_out.put(self.transform_image_label(serialized))
\ No newline at end of file
# -------------------------------------------------------- # --------------------------------------------------------
# Caffe for Dragon # Dragon
# Copyright(c) 2017 SeetaTech # Copyright(c) 2017 SeetaTech
# Written by Ting Pan # Written by Ting Pan
# -------------------------------------------------------- # --------------------------------------------------------
......
# -------------------------------------------------------- # --------------------------------------------------------
# Caffe for Dragon # Dragon
# Copyright(c) 2017 SeetaTech # Copyright(c) 2017 SeetaTech
# Written by Ting Pan # Written by Ting Pan
# -------------------------------------------------------- # --------------------------------------------------------
import dragon.vm.caffe as caffe
import dragon.core.workspace as ws import dragon.core.workspace as ws
from .minibatch import DataBatch from dragon.io.data_batch import DataBatch
class DataLayer(caffe.Layer): class MiniBatchOp(object):
def setup(self, bottom, top):
def setup(self, inputs, outputs):
kwargs = eval(self.param_str) kwargs = eval(self.param_str)
self._data_batch = DataBatch(**kwargs) self._data_batch = DataBatch(**kwargs)
def forward(self, bottom, top): def run(self, inputs, outputs):
blobs = self._data_batch.blobs blobs = self._data_batch.get()
for idx, blob in enumerate(blobs): for idx, blob in enumerate(blobs):
ws.FeedTensor(top[idx], blob) ws.FeedTensor(outputs[idx], blob)
\ No newline at end of file \ No newline at end of file
...@@ -10,7 +10,6 @@ from dragon.operators.utils import Run ...@@ -10,7 +10,6 @@ from dragon.operators.utils import Run
def LMDBData(**kwargs): def LMDBData(**kwargs):
""" """
:param kwargs: a dict of imagenet data param
:param --> mean_value: a list of mean values for channles [B-G-R] :param --> mean_value: a list of mean values for channles [B-G-R]
:param --> source: a str of the images root directory :param --> source: a str of the images root directory
:param --> imageset: a str of text file contains image name / label :param --> imageset: a str of text file contains image name / label
...@@ -30,8 +29,8 @@ def LMDBData(**kwargs): ...@@ -30,8 +29,8 @@ def LMDBData(**kwargs):
args = locals(); kwargs = args['kwargs'] args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs) del args['kwargs']; kwargs = dict(args, **kwargs)
kwargs['module'] = 'dragon.vm.caffe.io.data_layer' kwargs['module'] = 'dragon.operators.custom.minibatch'
kwargs['op'] = 'DataLayer' kwargs['op'] = 'MiniBatchOp'
return Run([], param_str=str(kwargs), nout=2, **kwargs) return Run([], param_str=str(kwargs), nout=2, **kwargs)
......
# --------------------------------------------------------
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
""" Generate LMDB from images """
import os
import sys
import time
import shutil
import argparse
import cv2
try:
import numpy as np
except: pass
from dragon.tools.db import LMDB
from dragon.vm.caffe.proto import caffe_pb2
def resize_image(im, resize):
if im.shape[0] > im.shape[1]:
newsize = (resize, im.shape[0] * resize / im.shape[1])
else:
newsize = (im.shape[1] * resize / im.shape[0], resize)
im = cv2.resize(im, newsize)
return im
def make_db(args):
if os.path.isfile(args.list) is False:
raise ValueError('the path of image list is invalid.')
if os.path.isdir(args.database) is True:
raise ValueError('the database is already exist or invalid.')
print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000)
db.open(args.database, mode='w')
total_line = sum(1 for line in open(args.list))
count = 0
zfill_flag = '{0:0%d}' % (args.zfill)
encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality]
start_time = time.time()
with open(args.list, 'r') as input_file:
records = input_file.readlines()
if args.shuffle:
import random
random.shuffle(records)
for record in records:
count += 1
if count % 10000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
record = record.split()
path = record[0]
label = record[1]
img = cv2.imread(os.path.join(args.root, path))
if args.resize > 0:
img = resize_image(img, args.resize)
if args.pad > 0:
pad_img = np.zeros((img.shape[0] + 2 * args.pad,
img.shape[1] + 2 * args.pad, 3), dtype=img.dtype)
pad_img[args.pad : args.pad + img.shape[0],
args.pad : args.pad + img.shape[1], :] = img
img = pad_img
result, imgencode = cv2.imencode('.jpg', img, encode_param)
datum = caffe_pb2.Datum()
datum.height, datum.width, datum.channels = img.shape
datum.label = int(label)
datum.encoded = True
datum.data = imgencode.tostring()
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
db.put('size', str(count))
db.put('zfill', str(args.zfill))
db.commit()
db.close()
shutil.copy(args.list, args.database + '/image_list.txt')
end_time = time.time()
print('{0} images have been stored in the database.'.format(total_line))
print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
print('The size of database is {0} MB.'.
format(float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))
def parse_args():
parser = argparse.ArgumentParser(description='Create LMDB from images for classification.')
parser.add_argument('--root', help='the root folder of raw images')
parser.add_argument('--list', help='the filepath of image list')
parser.add_argument('--database', help='the filepath of database')
parser.add_argument('--zfill', type=int, default=8, help='zfill for the key of database')
parser.add_argument('--resize', type=int, default=0, help='resize the shorter edge of image to the newsize')
parser.add_argument('--pad', type=int, default=0, help='zero-pad the image')
parser.add_argument('--quality', type=int, default=95, help='JPEG quality for encoding, 1-100')
parser.add_argument('--shuffle', type=bool, default=True, help='randomize the order in list file True')
if len(sys.argv) < 4:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
make_db(args)
\ No newline at end of file
# --------------------------------------------------------
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
from google.protobuf.message import Message
from protos import dragon_pb2 as pb
import numpy as np
def MakeArgument(key, value):
argument = pb.Argument()
argument.name = key
if type(value) is float: argument.f = value
elif type(value) is int : argument.i = value
elif type(value) is np.int64: argument.i64 = int(value)
elif type(value) is str: argument.s = value
elif type(value) is unicode: argument.s = value
elif type(value) is bool: argument.b = value
elif isinstance(value, Message): argument.s = value.SerializeToString()
elif all(type(v) is float for v in value): argument.floats.extend(value)
elif all(type(v) is int for v in value): argument.ints.extend(value)
elif all(type(v) is str for v in value): argument.strings.extend(value)
elif all(type(v) is unicode or type(v) is str for v in value): argument.strings.extend(value)
elif all(isinstance(v,Message) for v in value):
argument.strings.extend([v.SerializeToString() for v in value])
else: raise ValueError('unknown argument type: key={} value={} value type={}' \
.format(key,value,type(value)))
return argument
def MakeOperatorDef(op_type, inputs, outputs, name='',
device_option=None, arg=None, engine=None, **kwargs):
operator = pb.OperatorDef()
operator.type = op_type
operator.name = name
operator.input.extend([str(tensor) for tensor in inputs])
operator.output.extend([str(tensor) for tensor in outputs])
if device_option is not None:
operator.device_option.CopyFrom(device_option)
if engine is not None:
operator.engine = engine
if 'random_seed' in kwargs:
operator.device_option.random_seed = kwargs['random_seed']
del kwargs['random_seed']
if arg is not None:
operator.arg.extend(arg)
for k,v in kwargs.items():
if v is None: continue
operator.arg.add().CopyFrom(MakeArgument(k,v))
return operator
def MakeDeviceOption(device_type, gpu_id, rng_seed = None):
""" return a DeviceOption """
option = pb.DeviceOption()
option.device_type = device_type
option.gpu_id = gpu_id
if rng_seed is not None: option.random_seed = rng_seed
return option
# fix the python stdout
class Unbuffered(object):
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
# clear the stdout buffer for mpi(c++ & python)
import sys
sys.stdout = Unbuffered(sys.stdout)
\ No newline at end of file
...@@ -221,7 +221,7 @@ message SolverParameter { ...@@ -221,7 +221,7 @@ message SolverParameter {
// RMSProp decay value // RMSProp decay value
// MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
optional float rms_decay = 38; optional float rms_decay = 38 [default = 0.99];
// If true, print information about the state of the net that may help with // If true, print information about the state of the net that may help with
// debugging learning problems. // debugging learning problems.
...@@ -676,7 +676,7 @@ message DataParameter { ...@@ -676,7 +676,7 @@ message DataParameter {
optional bool force_encoded_color = 9 [default = false]; optional bool force_encoded_color = 9 [default = false];
// Prefetch queue (Number of batches to prefetch to host memory, increase if // Prefetch queue (Number of batches to prefetch to host memory, increase if
// data access bandwidth varies). // data access bandwidth varies).
optional uint32 prefetch = 10 [default = 40]; optional uint32 prefetch = 10 [default = 5];
} }
message DropoutParameter { message DropoutParameter {
......
...@@ -10,7 +10,7 @@ CUDAObject CUDAContext::cuda_object_; ...@@ -10,7 +10,7 @@ CUDAObject CUDAContext::cuda_object_;
// cpu <- gpu // cpu <- gpu
template<> void CPUContext::Memcpy<CPUContext, CUDAContext>( template<> void CPUContext::Memcpy<CPUContext, CUDAContext>(
size_t nbytes, void* dst, const void* src){ size_t nbytes, void* dst, const void* src) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
CUDAContext ctx(POINTER_DEVICE(src)); CUDAContext ctx(POINTER_DEVICE(src));
ctx.Memcpy<CPUContext, CUDAContext>(nbytes, dst, src); ctx.Memcpy<CPUContext, CUDAContext>(nbytes, dst, src);
...@@ -21,7 +21,7 @@ template<> void CPUContext::Memcpy<CPUContext, CUDAContext>( ...@@ -21,7 +21,7 @@ template<> void CPUContext::Memcpy<CPUContext, CUDAContext>(
// gpu <- cpu // gpu <- cpu
template<> void CPUContext::Memcpy<CUDAContext, CPUContext>( template<> void CPUContext::Memcpy<CUDAContext, CPUContext>(
size_t nbytes, void* dst, const void* src){ size_t nbytes, void* dst, const void* src) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
CUDAContext ctx(POINTER_DEVICE(dst)); CUDAContext ctx(POINTER_DEVICE(dst));
ctx.Memcpy<CUDAContext, CPUContext>(nbytes, dst, src); ctx.Memcpy<CUDAContext, CPUContext>(nbytes, dst, src);
......
...@@ -151,13 +151,13 @@ GraphDef Graph::Prune(const GraphDef& graph_def) { ...@@ -151,13 +151,13 @@ GraphDef Graph::Prune(const GraphDef& graph_def) {
OperatorDef op_def; OperatorDef op_def;
op_def.CopyFrom(graph_def.op(it)); op_def.CopyFrom(graph_def.op(it));
// handle inputs // handle inputs
for (int i = 0; i < graph_def.op(it).input_size(); i++){ for (int i = 0; i < graph_def.op(it).input_size(); i++) {
string input = graph_def.op(it).input(i); string input = graph_def.op(it).input(i);
if (!colored_[input] || !outputs.count(input)) if (!colored_[input] || !outputs.count(input))
*op_def.mutable_input(i) = "ignore"; *op_def.mutable_input(i) = "ignore";
} }
// handle outputs // handle outputs
for (int i = 0; i < graph_def.op(it).output_size(); i++){ for (int i = 0; i < graph_def.op(it).output_size(); i++) {
string output = graph_def.op(it).output(i); string output = graph_def.op(it).output(i);
if (!colored_[output]) *op_def.mutable_output(i) = "ignore"; if (!colored_[output]) *op_def.mutable_output(i) = "ignore";
else outputs.insert(op_def.output(i)); else outputs.insert(op_def.output(i));
......
...@@ -23,7 +23,7 @@ CheckTuple GraphGradientMaker::CheckMissingGrad(OperatorDef* forward_op) { ...@@ -23,7 +23,7 @@ CheckTuple GraphGradientMaker::CheckMissingGrad(OperatorDef* forward_op) {
inputs_to_grads_[output] = g_output; inputs_to_grads_[output] = g_output;
// consider generate virtual grad // consider generate virtual grad
else if (targets_set_.count(output) && g_output != "ignore"){ else if (targets_set_.count(output) && g_output != "ignore") {
gen_grads.push_back({ output, idx }); gen_grads.push_back({ output, idx });
inputs_to_grads_[output] = g_output; inputs_to_grads_[output] = g_output;
} }
...@@ -50,7 +50,7 @@ GraphDef GraphGradientMaker::Make() { ...@@ -50,7 +50,7 @@ GraphDef GraphGradientMaker::Make() {
Set<string> all_split_grads; Set<string> all_split_grads;
// PLAY for the forward // PLAY for the forward
for (auto& op : forward_def_.op()){ for (auto& op : forward_def_.op()) {
if (NoGradientRegistry()->Has(op.type())) continue; if (NoGradientRegistry()->Has(op.type())) continue;
for (auto& input : op.input()) inputs_count[input]++; for (auto& input : op.input()) inputs_count[input]++;
} }
...@@ -73,17 +73,17 @@ GraphDef GraphGradientMaker::Make() { ...@@ -73,17 +73,17 @@ GraphDef GraphGradientMaker::Make() {
Gradient grad = MakeGradientForOp(*op, g_outputs); Gradient grad = MakeGradientForOp(*op, g_outputs);
// replace terms // replace terms
for (auto& g_op : grad.ops){ for (auto& g_op : grad.ops) {
g_op.set_name(GetOperatorName()); g_op.set_name(GetOperatorName());
for (int i = 0; i < g_op.input_size(); i++){ for (int i = 0; i < g_op.input_size(); i++) {
string* input = g_op.mutable_input(i); string* input = g_op.mutable_input(i);
if (terms_.count(*input)) *input = terms_[*input]; if (terms_.count(*input)) *input = terms_[*input];
} }
for (int i = 0; i < g_op.output_size(); i++){ for (int i = 0; i < g_op.output_size(); i++) {
string* output = g_op.mutable_output(i); string* output = g_op.mutable_output(i);
if (terms_.count(*output)) *output = terms_[*output]; if (terms_.count(*output)) *output = terms_[*output];
} }
for (int i = 0; i < grad.g_inputs.size(); i++){ for (int i = 0; i < grad.g_inputs.size(); i++) {
if (terms_.count(grad.g_inputs[i])) if (terms_.count(grad.g_inputs[i]))
grad.g_inputs[i] = terms_[grad.g_inputs[i]]; grad.g_inputs[i] = terms_[grad.g_inputs[i]];
} }
...@@ -106,14 +106,14 @@ GraphDef GraphGradientMaker::Make() { ...@@ -106,14 +106,14 @@ GraphDef GraphGradientMaker::Make() {
string split_name = *output + "_autosplit_" + str(grads_count[*output]++); string split_name = *output + "_autosplit_" + str(grads_count[*output]++);
if (!is_skip) all_split_grads.insert(split_name); if (!is_skip) all_split_grads.insert(split_name);
// gather // gather
if (grads_count[*output] == inputs_count[original_name]){ if (grads_count[*output] == inputs_count[original_name]) {
gather_op = new OperatorDef(); gather_op = new OperatorDef();
gather_op->set_name(GetOperatorName()); gather_op->set_name(GetOperatorName());
gather_op->set_type("GradientGather"); gather_op->set_type("GradientGather");
gather_op->add_output(*output); gather_op->add_output(*output);
if (g_op.has_device_option()) if (g_op.has_device_option())
gather_op->mutable_device_option()->CopyFrom(g_op.device_option()); gather_op->mutable_device_option()->CopyFrom(g_op.device_option());
for (int j = 0; j < grads_count[*output]; j++){ for (int j = 0; j < grads_count[*output]; j++) {
string key = *output + "_autosplit_" + str(j); string key = *output + "_autosplit_" + str(j);
if (all_split_grads.count(key)) gather_op->add_input(key); if (all_split_grads.count(key)) gather_op->add_input(key);
} }
......
...@@ -66,7 +66,7 @@ DEFINE_REGISTRY(GradientRegistry, GradientMakerBase, const OperatorDef&, const v ...@@ -66,7 +66,7 @@ DEFINE_REGISTRY(GradientRegistry, GradientMakerBase, const OperatorDef&, const v
DEFINE_REGISTRY(NoGradientRegistry, GradientMakerBase, const OperatorDef&, const vector<string>&); DEFINE_REGISTRY(NoGradientRegistry, GradientMakerBase, const OperatorDef&, const vector<string>&);
#define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname) \ #define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname) \
template <> T OperatorBase::GetSingleArg(const string& name, const T& default_value){ \ template <> T OperatorBase::GetSingleArg(const string& name, const T& default_value) { \
if(args_.count(name) == 0) { \ if(args_.count(name) == 0) { \
return default_value; \ return default_value; \
} \ } \
...@@ -82,7 +82,7 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(int64_t, i64); ...@@ -82,7 +82,7 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(int64_t, i64);
#define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname) \ #define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname) \
template<> vector<T> OperatorBase::GetRepeatedArg<T>(const string& name){ \ template<> vector<T> OperatorBase::GetRepeatedArg<T>(const string& name) { \
if(args_.count(name) == 0) return vector<T>(); \ if(args_.count(name) == 0) return vector<T>(); \
vector<T> values; \ vector<T> values; \
for(const auto& v : args_[name]->fieldname()) values.push_back(v); \ for(const auto& v : args_[name]->fieldname()) values.push_back(v); \
......
...@@ -17,7 +17,7 @@ bool OpSchema::Verify(const OperatorDef& def) const { ...@@ -17,7 +17,7 @@ bool OpSchema::Verify(const OperatorDef& def) const {
} }
for (int in = 0; in < def.input_size(); in++) { for (int in = 0; in < def.input_size(); in++) {
if (def.input(in) == "ignore") continue; if (def.input(in) == "ignore") continue;
for (int out = 0; out < def.output_size(); out++){ for (int out = 0; out < def.output_size(); out++) {
if (def.output(out) == "ignore") continue; if (def.output(out) == "ignore") continue;
if (def.input(in) == def.output(out) && (!CheckInplace(in, out))) if (def.input(in) == def.output(out) && (!CheckInplace(in, out)))
LOG(FATAL) << "[" << def.name() << "] input(" LOG(FATAL) << "[" << def.name() << "] input("
......
...@@ -71,10 +71,9 @@ void DropoutGradientOp<Context>::RunOnDevice() { ...@@ -71,10 +71,9 @@ void DropoutGradientOp<Context>::RunOnDevice() {
template <class Context> template <class Context>
void DropoutGradientOp<Context>::ClearAfterRun() { void DropoutGradientOp<Context>::ClearAfterRun() {
ws()->ReleaseBuffer(mask); ws()->ReleaseBuffer(mask, true);
} }
DEPLOY_CPU(DropoutGradient); DEPLOY_CPU(DropoutGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
DEPLOY_CUDA(DropoutGradient); DEPLOY_CUDA(DropoutGradient);
......
...@@ -125,7 +125,7 @@ void DotGradientOp<Context>::GemvRunWithType() { ...@@ -125,7 +125,7 @@ void DotGradientOp<Context>::GemvRunWithType() {
} }
template <class Context> template <class Context>
void DotGradientOp<Context>::RunOnDevice(){ void DotGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0)); output(0)->ReshapeLike(input(0));
output(1)->ReshapeLike(input(1)); output(1)->ReshapeLike(input(1));
......
...@@ -65,7 +65,7 @@ void EltwiseGradientOp<Context>::SumRunWithType() { ...@@ -65,7 +65,7 @@ void EltwiseGradientOp<Context>::SumRunWithType() {
auto* dYdata = input(-1).template data<T, Context>(); auto* dYdata = input(-1).template data<T, Context>();
TIndex count = input(-1).count(); TIndex count = input(-1).count();
for (int i = 0; i < OutputSize(); i++){ for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() == "ignore") continue; if (output(i)->name() == "ignore") continue;
auto* dXdata = output(i)->template mutable_data<T, Context>(); auto* dXdata = output(i)->template mutable_data<T, Context>();
if (coeffs[i] == float(1)) { if (coeffs[i] == float(1)) {
......
...@@ -8,7 +8,7 @@ template <class Context> template <typename T> ...@@ -8,7 +8,7 @@ template <class Context> template <typename T>
void GramMatrixOp<Context>::RunWithType() { void GramMatrixOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>(); auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>(); auto* Ydata = output(0)->template mutable_data<T, Context>();
for (int i = 0; i < outer_dim; i++){ for (int i = 0; i < outer_dim; i++) {
math::Gemm<T, Context>(CblasNoTrans, CblasTrans, math::Gemm<T, Context>(CblasNoTrans, CblasTrans,
dim, dim, inner_dim, 1.0, Xdata, Xdata, 0.0, Ydata); dim, dim, inner_dim, 1.0, Xdata, Xdata, 0.0, Ydata);
Xdata += x_offset; Xdata += x_offset;
...@@ -40,7 +40,7 @@ void GramMatrixGradientOp<Context>::RunWithType() { ...@@ -40,7 +40,7 @@ void GramMatrixGradientOp<Context>::RunWithType() {
auto* dYdata = input(-1).template data<T, Context>(); auto* dYdata = input(-1).template data<T, Context>();
auto* Xdata = input(0).template data<T, Context>(); auto* Xdata = input(0).template data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>(); auto* dXdata = output(0)->template mutable_data<T, Context>();
for (int i = 0; i < outer_dim; i++){ for (int i = 0; i < outer_dim; i++) {
math::Gemm<T, Context>(CblasNoTrans, CblasNoTrans, math::Gemm<T, Context>(CblasNoTrans, CblasNoTrans,
dim, inner_dim, dim, 2.0, dYdata, Xdata, 0.0, dXdata); dim, inner_dim, dim, 2.0, dYdata, Xdata, 0.0, dXdata);
dYdata += y_offset; dYdata += y_offset;
...@@ -49,7 +49,7 @@ void GramMatrixGradientOp<Context>::RunWithType() { ...@@ -49,7 +49,7 @@ void GramMatrixGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void GramMatrixGradientOp<Context>::RunOnDevice(){ void GramMatrixGradientOp<Context>::RunOnDevice() {
outer_dim = input(0).count(0, axis); outer_dim = input(0).count(0, axis);
dim = input(0).dim(axis); dim = input(0).dim(axis);
inner_dim = input(0).count(axis + 1); inner_dim = input(0).count(axis + 1);
......
...@@ -81,7 +81,7 @@ void MatmulGradientOp<Context>::RunWithType() { ...@@ -81,7 +81,7 @@ void MatmulGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void MatmulGradientOp<Context>::RunOnDevice(){ void MatmulGradientOp<Context>::RunOnDevice() {
CHECK(input(0).ndim() == input(1).ndim()) CHECK(input(0).ndim() == input(1).ndim())
<< "both matrices must have the same number of dimensions."; << "both matrices must have the same number of dimensions.";
CHECK_GE(input(0).ndim(), 2) CHECK_GE(input(0).ndim(), 2)
......
...@@ -9,7 +9,7 @@ void PowOp<Context>::RunWithType() { ...@@ -9,7 +9,7 @@ void PowOp<Context>::RunWithType() {
TIndex count = input(0).count(); TIndex count = input(0).count();
auto* Ydata = output(0)->template mutable_data<T, Context>(); auto* Ydata = output(0)->template mutable_data<T, Context>();
if (power_scale == float(0)){ if (power_scale == float(0)) {
float value = (power == float(0)) ? float(1) : pow(shift, power); float value = (power == float(0)) ? float(1) : pow(shift, power);
math::Set<T, Context>(count, dragon_cast<T, float>(value), Ydata); math::Set<T, Context>(count, dragon_cast<T, float>(value), Ydata);
return; return;
......
...@@ -12,7 +12,7 @@ void SquareOp<Context>::RunWithType() { ...@@ -12,7 +12,7 @@ void SquareOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void SquareOp<Context>::RunOnDevice(){ void SquareOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0)); output(0)->ReshapeLike(input(0));
if (input(0).template IsType<float>()) RunWithType<float>(); if (input(0).template IsType<float>()) RunWithType<float>();
......
...@@ -44,7 +44,7 @@ void SubOp<Context>::BroadcastRunWithType(int type) { ...@@ -44,7 +44,7 @@ void SubOp<Context>::BroadcastRunWithType(int type) {
} }
template <class Context> template <class Context>
void SubOp<Context>::RunOnDevice(){ void SubOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0)); output(0)->ReshapeLike(input(0));
if (input(0).dims() == input(1).dims()) { if (input(0).dims() == input(1).dims()) {
......
...@@ -7,7 +7,7 @@ namespace dragon { ...@@ -7,7 +7,7 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void ConcatOp<Context>::RunWithType() { void ConcatOp<Context>::RunWithType() {
auto* Ydata = output(0)->template mutable_data<T, Context>(); auto* Ydata = output(0)->template mutable_data<T, Context>();
for (int i = 0; i < nin; i++){ for (int i = 0; i < nin; i++) {
auto* Xdata = input(i).template data<T, Context>(); auto* Xdata = input(i).template data<T, Context>();
TIndex count = input(i).count(); TIndex count = input(i).count();
x_concat_dim = input(i).dim(axis); x_concat_dim = input(i).dim(axis);
...@@ -25,12 +25,12 @@ void ConcatOp<Context>::RunWithType() { ...@@ -25,12 +25,12 @@ void ConcatOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void ConcatOp<Context>::RunOnDevice(){ void ConcatOp<Context>::RunOnDevice() {
concat_dims = input(0).dims(); concat_dims = input(0).dims();
for (int i = 1; i < nin; i++) { for (int i = 1; i < nin; i++) {
CHECK_EQ(concat_dims.size(), input(i).ndim()) CHECK_EQ(concat_dims.size(), input(i).ndim())
<< "\nall inputs must have the same ndim."; << "\nall inputs must have the same ndim.";
for (int j = 0; j < concat_dims.size(); j++){ for (int j = 0; j < concat_dims.size(); j++) {
if (j == axis) continue; if (j == axis) continue;
CHECK_EQ(concat_dims[j], input(i).dim(j)) CHECK_EQ(concat_dims[j], input(i).dim(j))
<< "\nall inputs must have the same dims" << "\nall inputs must have the same dims"
...@@ -82,7 +82,7 @@ void ConcatGradientOp<Context>::RunWithType() { ...@@ -82,7 +82,7 @@ void ConcatGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void ConcatGradientOp<Context>::RunOnDevice(){ void ConcatGradientOp<Context>::RunOnDevice() {
if (input(-1).name() == "ignore") return; if (input(-1).name() == "ignore") return;
concat_dims = input(-1).dims(); concat_dims = input(-1).dims();
y_concat_dim = concat_dims[axis]; y_concat_dim = concat_dims[axis];
......
...@@ -27,7 +27,7 @@ OPERATOR_SCHEMA(Flatten).NumInputs(1).NumOutputs(1); ...@@ -27,7 +27,7 @@ OPERATOR_SCHEMA(Flatten).NumInputs(1).NumOutputs(1);
template <class Context> template <class Context>
void FlattenGradientOp<Context>::RunOnDevice(){ void FlattenGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0)); output(0)->ReshapeLike(input(0));
output(0)->Share(input(1)); output(0)->Share(input(1));
} }
......
...@@ -78,7 +78,7 @@ void ReduceGradientOp<Context>::SumRunWithType() { ...@@ -78,7 +78,7 @@ void ReduceGradientOp<Context>::SumRunWithType() {
template <class Context> template <typename T> template <class Context> template <typename T>
void ReduceGradientOp<Context>::MeanRunWithType() { void ReduceGradientOp<Context>::MeanRunWithType() {
auto* dXdata = output(0)->template mutable_data<T, Context>(); auto* dXdata = output(0)->template mutable_data<T, Context>();
if (axis == -1){ if (axis == -1) {
auto* dYdata = input(-1).template data<T, CPUContext>(); auto* dYdata = input(-1).template data<T, CPUContext>();
math::Set<T, Context>(output(0)->count(), dYdata[0] / input(0).count(), dXdata); math::Set<T, Context>(output(0)->count(), dYdata[0] / input(0).count(), dXdata);
} else { } else {
......
...@@ -150,13 +150,11 @@ void ScanOp<Context>::UnrollTemplate() { ...@@ -150,13 +150,11 @@ void ScanOp<Context>::UnrollTemplate() {
template <class Context> template <class Context>
void ScanOp<Context>::RunOnDevice() { void ScanOp<Context>::RunOnDevice() {
// unroll
UnrollTemplate(); UnrollTemplate();
if (!graphs.count(nsteps)) if (!graphs.count(nsteps)) {
graphs[nsteps].reset(new Graph(new_def, ws())); graphs[nsteps].reset(new Graph(new_def, ws()));
}
cur_graph = graphs[nsteps].get(); cur_graph = graphs[nsteps].get();
// forward
cur_graph->Run("", ""); cur_graph->Run("", "");
} }
...@@ -201,13 +199,11 @@ void ScanGradientOp<Context>::MakeGradientOps() { ...@@ -201,13 +199,11 @@ void ScanGradientOp<Context>::MakeGradientOps() {
template <class Context> template <class Context>
void ScanGradientOp<Context>::RunOnDevice() { void ScanGradientOp<Context>::RunOnDevice() {
// make graph
MakeGradientOps(); MakeGradientOps();
if (!graphs.count(nsteps)) if (!graphs.count(nsteps)) {
graphs[nsteps].reset(new Graph(new_def, ws())); graphs[nsteps].reset(new Graph(new_def, ws()));
}
cur_graph = graphs[nsteps].get(); cur_graph = graphs[nsteps].get();
// backward
cur_graph->Run("Gradient", ""); cur_graph->Run("Gradient", "");
} }
......
...@@ -7,7 +7,7 @@ namespace dragon { ...@@ -7,7 +7,7 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void SliceOp<Context>::RunWithType() { void SliceOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>(); auto* Xdata = input(0).template data<T, Context>();
for (int i = 0; i < nout; i++){ for (int i = 0; i < nout; i++) {
auto* Ydata = output(i)->template mutable_data<T, Context>(); auto* Ydata = output(i)->template mutable_data<T, Context>();
TIndex count = output(i)->count(); TIndex count = output(i)->count();
kernel::Slice<T, Context>(count, outer_dim, inner_dim, kernel::Slice<T, Context>(count, outer_dim, inner_dim,
...@@ -46,7 +46,7 @@ OPERATOR_SCHEMA(Slice).NumInputs(1).NumOutputs(1, INT_MAX); ...@@ -46,7 +46,7 @@ OPERATOR_SCHEMA(Slice).NumInputs(1).NumOutputs(1, INT_MAX);
template <class Context> template <typename T> template <class Context> template <typename T>
void SliceGradientOp<Context>::RunWithType() { void SliceGradientOp<Context>::RunWithType() {
auto* dXdata = output(0)->template mutable_data<T, Context>(); auto* dXdata = output(0)->template mutable_data<T, Context>();
for (int i = 0; i < nout; i++){ for (int i = 0; i < nout; i++) {
if (input(i + 1).name() == "ignore") continue; if (input(i + 1).name() == "ignore") continue;
auto* dYdata = input(i + 1).template data<T, Context>(); auto* dYdata = input(i + 1).template data<T, Context>();
TIndex count = input(i + 1).count(); TIndex count = input(i + 1).count();
......
...@@ -64,7 +64,7 @@ void AccuracyOp<Context>::RunWithType() { ...@@ -64,7 +64,7 @@ void AccuracyOp<Context>::RunWithType() {
} // end outer_num } // end outer_num
output(0)->template mutable_data<T, CPUContext>()[0] = acc / count; output(0)->template mutable_data<T, CPUContext>()[0] = acc / count;
if (OutputSize() > 1){ if (OutputSize() > 1) {
auto* acc_per_class = output(1)->template mutable_data<T, CPUContext>(); auto* acc_per_class = output(1)->template mutable_data<T, CPUContext>();
for (int i = 0; i < classes; i++) for (int i = 0; i < classes; i++)
acc_per_class[i] = num_per_class[i] == 0 ? 0 : acc_per_class[i] / acc_per_class[i]; acc_per_class[i] = num_per_class[i] == 0 ? 0 : acc_per_class[i] / acc_per_class[i];
......
...@@ -28,7 +28,7 @@ void L1LossOp<Context>::RunWithType() { ...@@ -28,7 +28,7 @@ void L1LossOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void L1LossOp<Context>::RunOnDevice(){ void L1LossOp<Context>::RunOnDevice() {
CHECK_EQ(input(0).count(), input(1).count()); CHECK_EQ(input(0).count(), input(1).count());
output(0)->Reshape(vector<TIndex>(1, 1)); output(0)->Reshape(vector<TIndex>(1, 1));
diff = ws()->CreateTensor("_t_" + anchor() + "_l1_loss_diff"); diff = ws()->CreateTensor("_t_" + anchor() + "_l1_loss_diff");
...@@ -53,7 +53,7 @@ void L1LossGradientOp<Context>::RunWithType() { ...@@ -53,7 +53,7 @@ void L1LossGradientOp<Context>::RunWithType() {
else if (normalization == "FULL") normalizer = input(0).count(); else if (normalization == "FULL") normalizer = input(0).count();
else if (normalization == "NONE") normalizer = 1; else if (normalization == "NONE") normalizer = 1;
alpha = alpha / normalizer; alpha = alpha / normalizer;
for (int i = 0; i < 2; i++){ for (int i = 0; i < 2; i++) {
if (output(i)->name() == "ignore") continue; if (output(i)->name() == "ignore") continue;
output(i)->ReshapeLike(input(i)); output(i)->ReshapeLike(input(i));
auto* dXdata = output(i)->template mutable_data<T, Context>(); auto* dXdata = output(i)->template mutable_data<T, Context>();
......
...@@ -11,7 +11,7 @@ void L2LossOp<Context>::RunWithType() { ...@@ -11,7 +11,7 @@ void L2LossOp<Context>::RunWithType() {
auto* diff_data = diff->template mutable_data<T, Context>(); auto* diff_data = diff->template mutable_data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, CPUContext>(); auto* Ydata = output(0)->template mutable_data<T, CPUContext>();
math::Sub<T, Context>(diff->count(), X0data, X1data, diff_data); math::Sub<T, Context>(diff->count(), X0data, X1data, diff_data);
if (InputSize() > 2){ if (InputSize() > 2) {
CHECK_EQ(input(0).count(), input(2).count()); CHECK_EQ(input(0).count(), input(2).count());
auto* Wdata = input(2).template data<T, Context>(); auto* Wdata = input(2).template data<T, Context>();
math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data); math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data);
......
...@@ -14,12 +14,12 @@ void SmoothL1LossOp<Context>::RunWithType() { ...@@ -14,12 +14,12 @@ void SmoothL1LossOp<Context>::RunWithType() {
auto* Ydata = output(0)->template mutable_data<T, CPUContext>(); auto* Ydata = output(0)->template mutable_data<T, CPUContext>();
math::Sub<T, Context>(diff->count(), X0data, X1data, diff_data); math::Sub<T, Context>(diff->count(), X0data, X1data, diff_data);
if (InputSize() > 2){ if (InputSize() > 2) {
auto* inside_w_data = input(2).template data<T, Context>(); auto* inside_w_data = input(2).template data<T, Context>();
math::Mul<T, Context>(diff->count(), inside_w_data, diff_data, diff_data); math::Mul<T, Context>(diff->count(), inside_w_data, diff_data, diff_data);
} }
kernel::SmoothL1<T, Context>(diff->count(), sigma2, diff_data, error_data); kernel::SmoothL1<T, Context>(diff->count(), sigma2, diff_data, error_data);
if (InputSize() > 3){ if (InputSize() > 3) {
auto* outside_w_data = input(3).template data<T, Context>(); auto* outside_w_data = input(3).template data<T, Context>();
math::Mul<T, Context>(diff->count(), outside_w_data, error_data, error_data); math::Mul<T, Context>(diff->count(), outside_w_data, error_data, error_data);
} }
......
...@@ -26,14 +26,14 @@ void MPIBroadcastOp<Context>::RunWithType() { ...@@ -26,14 +26,14 @@ void MPIBroadcastOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void MPIBroadcastOp<Context>::RunOnDevice(){ void MPIBroadcastOp<Context>::RunOnDevice() {
CHECK(this->comm != MPI_COMM_NULL) CHECK(this->comm != MPI_COMM_NULL)
<< "\nMPIBroadcastOp, name: " << name() << "\nMPIBroadcastOp, name: " << name()
<< ", does not belong to any group, can't run."; << ", does not belong to any group, can't run.";
size_t ndim[1]; size_t ndim[1];
TIndex* dims = nullptr; TIndex* dims = nullptr;
if (this->comm_rank == this->comm_root){ if (this->comm_rank == this->comm_root) {
ndim[0] = input(0).ndim(); ndim[0] = input(0).ndim();
dims = new TIndex[ndim[0]]; dims = new TIndex[ndim[0]];
for (int i = 0; i < input(0).ndim(); i++) for (int i = 0; i < input(0).ndim(); i++)
...@@ -90,7 +90,7 @@ void MPIBroadcastGradientOp<Context>::RunWithType() { ...@@ -90,7 +90,7 @@ void MPIBroadcastGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void MPIBroadcastGradientOp<Context>::RunOnDevice(){ void MPIBroadcastGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(-1)); output(0)->ReshapeLike(input(-1));
if (input(0).template IsType<float>()) RunWithType<float>(); if (input(0).template IsType<float>()) RunWithType<float>();
......
...@@ -51,7 +51,7 @@ template <class Context> template <typename T> ...@@ -51,7 +51,7 @@ template <class Context> template <typename T>
void MPIGatherGradientOp<Context>::RunWithType() { void MPIGatherGradientOp<Context>::RunWithType() {
if (this->comm_rank == this->comm_root) { if (this->comm_rank == this->comm_root) {
output(0)->Share(input(this->comm_rank + 1)); output(0)->Share(input(this->comm_rank + 1));
for (int i = 0; i < this->comm_size; i++){ for (int i = 0; i < this->comm_size; i++) {
if (i == this->comm_root) continue; if (i == this->comm_root) continue;
#ifdef WITH_CUDA_AWARE #ifdef WITH_CUDA_AWARE
auto* dYdata = input(this->comm_rank + 1).template data<T, Context>(); auto* dYdata = input(this->comm_rank + 1).template data<T, Context>();
......
...@@ -60,7 +60,7 @@ void InstanceNormOp<Context>::RunWithType() { ...@@ -60,7 +60,7 @@ void InstanceNormOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void InstanceNormOp<Context>::RunOnDevice(){ void InstanceNormOp<Context>::RunOnDevice() {
num = input(0).dim(0); channels = input(0).dim(1); num = input(0).dim(0); channels = input(0).dim(1);
spatial_dim = input(0).count(2); nbychans = num * channels; spatial_dim = input(0).count(2); nbychans = num * channels;
vector<TIndex> dims({ num, channels }); vector<TIndex> dims({ num, channels });
......
...@@ -109,7 +109,7 @@ void L2NormGradientOp<Context>::RunWithType() { ...@@ -109,7 +109,7 @@ void L2NormGradientOp<Context>::RunWithType() {
auto* Bdata = buffer->template mutable_data<T, Context>(); auto* Bdata = buffer->template mutable_data<T, Context>();
auto* BInnerdata = buffer_inner->template mutable_data<T, Context>(); auto* BInnerdata = buffer_inner->template mutable_data<T, Context>();
for (int n = 0; n < outer_dim; n++){ for (int n = 0; n < outer_dim; n++) {
if (across_inner) { if (across_inner) {
Ndata = norm->template data<T, CPUContext>(); Ndata = norm->template data<T, CPUContext>();
T sum_of_x_mul_dy = math::Dot<T, Context>(buffer->count(), Xdata, dYdata); T sum_of_x_mul_dy = math::Dot<T, Context>(buffer->count(), Xdata, dYdata);
......
...@@ -21,7 +21,7 @@ void AsyncUpdateOp<Context>::UpdateTimestamp(int tag) { ...@@ -21,7 +21,7 @@ void AsyncUpdateOp<Context>::UpdateTimestamp(int tag) {
} }
template <class Context> template <class Context>
int AsyncUpdateOp<Context>::GetDelay(int tag){ int AsyncUpdateOp<Context>::GetDelay(int tag) {
Tensor* t = ws()->GetTensor("_t_" + this->domain + "async_timestamp"); Tensor* t = ws()->GetTensor("_t_" + this->domain + "async_timestamp");
int* global_timestamp = t->template mutable_data<int, CPUContext>(); int* global_timestamp = t->template mutable_data<int, CPUContext>();
return global_timestamp[tag] - local_timestamp[tag] + 1; return global_timestamp[tag] - local_timestamp[tag] + 1;
...@@ -71,7 +71,7 @@ AsyncUpdateOp<Context>::AsyncUpdateOp(const OperatorDef& op_def, Workspace* ws) ...@@ -71,7 +71,7 @@ AsyncUpdateOp<Context>::AsyncUpdateOp(const OperatorDef& op_def, Workspace* ws)
template <class Context> template <typename T> template <class Context> template <typename T>
void AsyncUpdateOp<Context>::RootRunWithType() { void AsyncUpdateOp<Context>::RootRunWithType() {
for (int i = 0; i < InputSize(); i++){ for (int i = 0; i < InputSize(); i++) {
auto* dXdata = input(i).template mutable_data<T, Context>(); auto* dXdata = input(i).template mutable_data<T, Context>();
auto* Xdata = output(i)->template mutable_data<T, Context>(); auto* Xdata = output(i)->template mutable_data<T, Context>();
...@@ -88,7 +88,7 @@ void AsyncUpdateOp<Context>::RootRunWithType() { ...@@ -88,7 +88,7 @@ void AsyncUpdateOp<Context>::RootRunWithType() {
} }
template <class Context> template <class Context>
void AsyncUpdateOp<Context>::RunOnDevice(){ void AsyncUpdateOp<Context>::RunOnDevice() {
if (this->comm_rank != this->comm_root) return; if (this->comm_rank != this->comm_root) return;
if (input(0).template IsType<float>()) { if (input(0).template IsType<float>()) {
......
...@@ -98,7 +98,7 @@ void UpdateOpBase<Context>::ReduceRunWithType() { ...@@ -98,7 +98,7 @@ void UpdateOpBase<Context>::ReduceRunWithType() {
} }
// ave-normalize // ave-normalize
if (comm_size > 1){ if (comm_size > 1) {
#ifdef WITH_CUDA_AWARE #ifdef WITH_CUDA_AWARE
math::Scal<T, Context>(count, T(1.0 / comm_size), dXdata); math::Scal<T, Context>(count, T(1.0 / comm_size), dXdata);
#else #else
...@@ -112,7 +112,7 @@ template <class Context> template <typename T> ...@@ -112,7 +112,7 @@ template <class Context> template <typename T>
void UpdateOpBase<Context>::PreprocessRunWithType() { void UpdateOpBase<Context>::PreprocessRunWithType() {
// scale // scale
scale_factor = param("scale_gradient"); scale_factor = param("scale_gradient");
if (scale_factor != 1){ if (scale_factor != 1) {
auto* dXdata = input(0).template mutable_data<T, Context>(); auto* dXdata = input(0).template mutable_data<T, Context>();
math::Scal<T, Context>(input(0).count(), scale_factor, dXdata); math::Scal<T, Context>(input(0).count(), scale_factor, dXdata);
} }
...@@ -128,11 +128,11 @@ void UpdateOpBase<Context>::PreprocessRunWithType() { ...@@ -128,11 +128,11 @@ void UpdateOpBase<Context>::PreprocessRunWithType() {
} }
} }
// decay // decay
l2_decay = param("l2_decay"); l2_decay = param("l2_decay") * decay_mult;
if (l2_decay > 0){ if (l2_decay > 0) {
auto* dXdata = input(0).template mutable_data<T, Context>(); auto* dXdata = input(0).template mutable_data<T, Context>();
auto* Xdata = output(0)->template data<T, Context>(); auto* Xdata = output(0)->template data<T, Context>();
math::Axpy<T, Context>(input(0).count(), l2_decay * decay_mult, Xdata, dXdata); math::Axpy<T, Context>(input(0).count(), l2_decay, Xdata, dXdata);
} }
} }
......
...@@ -12,7 +12,7 @@ void CompareOp<Context>::EqualRunWithType() { ...@@ -12,7 +12,7 @@ void CompareOp<Context>::EqualRunWithType() {
} }
template <class Context> template <class Context>
void CompareOp<Context>::RunOnDevice(){ void CompareOp<Context>::RunOnDevice() {
CHECK_EQ(input(0).count(), input(1).count()) CHECK_EQ(input(0).count(), input(1).count())
<< "both conditioned tensor must have same elements."; << "both conditioned tensor must have same elements.";
output(0)->ReshapeLike(input(0)); output(0)->ReshapeLike(input(0));
......
...@@ -6,7 +6,7 @@ namespace dragon { ...@@ -6,7 +6,7 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void GradientGenerateOp<Context>::RunWithType() { void GradientGenerateOp<Context>::RunWithType() {
for (int i = 0; i < OutputSize(); i++){ for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() == "ignore") continue; if (output(i)->name() == "ignore") continue;
output(i)->ReshapeLike(input(i)); output(i)->ReshapeLike(input(i));
auto* dXdata = output(0)->template mutable_data<T, Context>(); auto* dXdata = output(0)->template mutable_data<T, Context>();
......
...@@ -7,7 +7,7 @@ namespace dragon { ...@@ -7,7 +7,7 @@ namespace dragon {
template <class Context> template <class Context>
void ConvOp<Context>::ComputeOutputShape() { void ConvOp<Context>::ComputeOutputShape() {
this->output_shape.clear(); this->output_shape.clear();
for (int i = 0; i < this->num_spatial_axes; i++){ for (int i = 0; i < this->num_spatial_axes; i++) {
const int input_dim = this->bottom_shape[this->channel_axis + i + 1]; const int input_dim = this->bottom_shape[this->channel_axis + i + 1];
const int dilated_kernel = this->dilation[i] * (this->kernel_size[i] - 1) + 1; const int dilated_kernel = this->dilation[i] * (this->kernel_size[i] - 1) + 1;
const int output_dim = (input_dim + 2 * this->pad[i] - dilated_kernel) / this->stride[i] + 1; const int output_dim = (input_dim + 2 * this->pad[i] - dilated_kernel) / this->stride[i] + 1;
...@@ -43,7 +43,7 @@ void ConvOp<Context>::RunWithType() { ...@@ -43,7 +43,7 @@ void ConvOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void ConvOp<Context>::RunOnDevice(){ void ConvOp<Context>::RunOnDevice() {
Reshape(); Reshape();
if (input(0).template IsType<float>()) RunWithType<float>(); if (input(0).template IsType<float>()) RunWithType<float>();
...@@ -71,7 +71,7 @@ void ConvGradientOp<Context>::RunWithType() { ...@@ -71,7 +71,7 @@ void ConvGradientOp<Context>::RunWithType() {
Db(dYdata + n * this->y_offset, dBdata); Db(dYdata + n * this->y_offset, dBdata);
} }
for (int n = 0; n < input(2).dim(0); n++){ for (int n = 0; n < input(2).dim(0); n++) {
if (output(1)->name() != "ignore") { if (output(1)->name() != "ignore") {
auto* Xdata = input(0).template data<T, Context>(); auto* Xdata = input(0).template data<T, Context>();
auto* dWdata = output(1)->template mutable_data<T, Context>(); auto* dWdata = output(1)->template mutable_data<T, Context>();
......
...@@ -161,7 +161,7 @@ void ConvOpBase<Context>::GradientReshape() { ...@@ -161,7 +161,7 @@ void ConvOpBase<Context>::GradientReshape() {
// compute input shape // compute input shape
input_shape.clear(); input_shape.clear();
for (int i = 0; i < num_spatial_axes; i++){ for (int i = 0; i < num_spatial_axes; i++) {
if (ReverseDimensions()) { if (ReverseDimensions()) {
input_shape.push_back(input(2).dim(channel_axis + i + 1)); input_shape.push_back(input(2).dim(channel_axis + i + 1));
} else { } else {
...@@ -181,7 +181,7 @@ void ConvOpBase<Context>::GradientReshape() { ...@@ -181,7 +181,7 @@ void ConvOpBase<Context>::GradientReshape() {
// compute col buffer shape // compute col buffer shape
col_buffer_shape.clear(); col_buffer_shape.clear();
col_buffer_shape.push_back(kernel_dim * group); col_buffer_shape.push_back(kernel_dim * group);
for (int i = 0; i < num_spatial_axes; i++){ for (int i = 0; i < num_spatial_axes; i++) {
if (ReverseDimensions()) { if (ReverseDimensions()) {
col_buffer_shape.push_back(bottom_shape[channel_axis + i + 1]); col_buffer_shape.push_back(bottom_shape[channel_axis + i + 1]);
} else { } else {
......
...@@ -228,13 +228,13 @@ void CuDNNConvGradientOp<Context>::RunWithType() { ...@@ -228,13 +228,13 @@ void CuDNNConvGradientOp<Context>::RunWithType() {
const T* dYdata = input(2).template data<T, Context>(); const T* dYdata = input(2).template data<T, Context>();
for (int g = 0; g < this->group; g++) { for (int g = 0; g < this->group; g++) {
if (output(2)->name() != "ignore"){ if (output(2)->name() != "ignore") {
T* dBdata = output(2)->template mutable_data<T, Context>(); T* dBdata = output(2)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(handle[g], CUDNN_CHECK(cudnnConvolutionBackwardBias(handle[g],
CUDNNType<T>::one, input_desc, dYdata + this->y_offset * g, CUDNNType<T>::one, input_desc, dYdata + this->y_offset * g,
CUDNNType<T>::one, bias_desc, dBdata + bias_offset * g)); CUDNNType<T>::one, bias_desc, dBdata + bias_offset * g));
} }
if (output(1)->name() != "ignore"){ if (output(1)->name() != "ignore") {
auto* Xdata = input(0).template data<T, Context>(); auto* Xdata = input(0).template data<T, Context>();
auto* dWdata = output(1)->template mutable_data<T, Context>(); auto* dWdata = output(1)->template mutable_data<T, Context>();
auto* workspace = buffer2->mutable_data<char, Context>(); auto* workspace = buffer2->mutable_data<char, Context>();
...@@ -246,7 +246,7 @@ void CuDNNConvGradientOp<Context>::RunWithType() { ...@@ -246,7 +246,7 @@ void CuDNNConvGradientOp<Context>::RunWithType() {
workspace + g * workspace_bwd_filter_size, workspace_bwd_filter_size, workspace + g * workspace_bwd_filter_size, workspace_bwd_filter_size,
CUDNNType<T>::one, filter_desc, dWdata + this->weight_offset * g)); CUDNNType<T>::one, filter_desc, dWdata + this->weight_offset * g));
} }
if (output(0)->name() != "ignore"){ if (output(0)->name() != "ignore") {
auto* Wdata = input(1).template data<T, Context>(); auto* Wdata = input(1).template data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>(); auto* dXdata = output(0)->template mutable_data<T, Context>();
auto* workspace = buffer1->mutable_data<char, Context>(); auto* workspace = buffer1->mutable_data<char, Context>();
......
...@@ -54,7 +54,7 @@ void CuDNNLRNGradientOp<Context>::RunWithType() { ...@@ -54,7 +54,7 @@ void CuDNNLRNGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void CuDNNLRNGradientOp<Context>::RunOnDevice(){ void CuDNNLRNGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0)); output(0)->ReshapeLike(input(0));
if (this->mode == ACROSS_CHANNELS) { if (this->mode == ACROSS_CHANNELS) {
......
...@@ -43,7 +43,7 @@ void DeConvOp<Context>::RunWithType() { ...@@ -43,7 +43,7 @@ void DeConvOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void DeConvOp<Context>::RunOnDevice(){ void DeConvOp<Context>::RunOnDevice() {
Reshape(); Reshape();
if (input(0).template IsType<float>()) RunWithType<float>(); if (input(0).template IsType<float>()) RunWithType<float>();
......
...@@ -98,7 +98,7 @@ void LRNOp<Context>::ProdRunWithType() { ...@@ -98,7 +98,7 @@ void LRNOp<Context>::ProdRunWithType() {
} }
template <class Context> template <class Context>
void LRNOp<Context>::RunOnDevice(){ void LRNOp<Context>::RunOnDevice() {
if (mode == ACROSS_CHANNELS) { if (mode == ACROSS_CHANNELS) {
if (input(0).template IsType<float>()) { if (input(0).template IsType<float>()) {
AcrossRunWithType<float>(); AcrossRunWithType<float>();
...@@ -223,7 +223,7 @@ void LRNGradientOp<Context>::SplitRunWithType() { ...@@ -223,7 +223,7 @@ void LRNGradientOp<Context>::SplitRunWithType() {
} }
template <class Context> template <class Context>
void LRNGradientOp<Context>::RunOnDevice(){ void LRNGradientOp<Context>::RunOnDevice() {
if (mode == ACROSS_CHANNELS) { if (mode == ACROSS_CHANNELS) {
if (input(0).template IsType<float>()) { if (input(0).template IsType<float>()) {
AcrossRunWithType<float>(); AcrossRunWithType<float>();
......
...@@ -16,7 +16,7 @@ void NNResizeOp<Context>::RunWithType() { ...@@ -16,7 +16,7 @@ void NNResizeOp<Context>::RunWithType() {
template <class Context> template <class Context>
void NNResizeOp<Context>::RunOnDevice() { void NNResizeOp<Context>::RunOnDevice() {
dims = input(0).dims(); dims = input(0).dims();
if (dsize.size() == 0){ if (dsize.size() == 0) {
CHECK(fy != -1.0 && fx != -1.0); CHECK(fy != -1.0 && fx != -1.0);
dims[2] = int(dims[2] * fy); dims[2] = int(dims[2] * fy);
dims[3] = int(dims[3] * fx); dims[3] = int(dims[3] * fx);
...@@ -47,7 +47,7 @@ void NNResizeGradientOp<Context>::RunWithType() { ...@@ -47,7 +47,7 @@ void NNResizeGradientOp<Context>::RunWithType() {
} }
template <class Context> template <class Context>
void NNResizeGradientOp<Context>::RunOnDevice(){ void NNResizeGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0)); output(0)->ReshapeLike(input(0));
if (input(0).template IsType<float>()) return RunWithType<float>(); if (input(0).template IsType<float>()) return RunWithType<float>();
......
...@@ -37,7 +37,7 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, const vector<TIndex>& dim ...@@ -37,7 +37,7 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, const vector<TIndex>& dim
int* dimA = new int[ndim]; int* dimA = new int[ndim];
int* strideA = new int[ndim]; int* strideA = new int[ndim];
TIndex stride = 1; TIndex stride = 1;
for (int i = ndim - 1; i >= 0; i--){ for (int i = ndim - 1; i >= 0; i--) {
strideA[i] = stride; strideA[i] = stride;
dimA[i] = dims[i]; dimA[i] = dims[i];
stride *= dimA[i]; stride *= dimA[i];
...@@ -55,7 +55,7 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, ...@@ -55,7 +55,7 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc,
int ndim = (int)dims.size(); int ndim = (int)dims.size();
int* dimA = new int[ndim]; int* dimA = new int[ndim];
int* strideA = new int[ndim]; int* strideA = new int[ndim];
for (int i = ndim - 1; i >= 0; i--){ for (int i = ndim - 1; i >= 0; i--) {
strideA[i] = strides[i]; strideA[i] = strides[i];
dimA[i] = dims[i]; dimA[i] = dims[i];
} }
...@@ -66,10 +66,10 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, ...@@ -66,10 +66,10 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc,
template <typename T> template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, Tensor* tensor) { void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, Tensor* tensor) {
// CUDNN only support ndim from 3 to 8 // cuDNN requires ndim from 3 to 8
// we fake a reshaped dims to pass check // we fake a reshaped dims to pass check
vector<TIndex> fake_dims(tensor->dims()); vector<TIndex> fake_dims(tensor->dims());
if (fake_dims.size() < 3 || fake_dims.size() > 8){ if (fake_dims.size() < 3 || fake_dims.size() > 8) {
fake_dims.assign({ 1, 1 }); fake_dims.assign({ 1, 1 });
fake_dims.push_back(tensor->count()); fake_dims.push_back(tensor->count());
} }
......
...@@ -142,7 +142,7 @@ template <> void Add<float, CPUContext>(const int n, ...@@ -142,7 +142,7 @@ template <> void Add<float, CPUContext>(const int n,
template <> void Sub<float, CPUContext>(const int n, template <> void Sub<float, CPUContext>(const int n,
const float* a, const float* a,
const float* b, const float* b,
float* y){ float* y) {
#ifdef WITH_SSE #ifdef WITH_SSE
sse::Sub<float>(n, a, b, y); sse::Sub<float>(n, a, b, y);
#else // naive implement #else // naive implement
......
...@@ -98,7 +98,7 @@ template<> void Softmax<float, CPUContext>(const int count, ...@@ -98,7 +98,7 @@ template<> void Softmax<float, CPUContext>(const int count,
const int dim = count / outer_dim; const int dim = count / outer_dim;
for (int i = 0; i < outer_dim; ++i) { for (int i = 0; i < outer_dim; ++i) {
context->Copy<float, CPUContext, CPUContext>(inner_dim, scale, x + i*dim); context->Copy<float, CPUContext, CPUContext>(inner_dim, scale, x + i*dim);
for (int j = 0; j < classes; ++j){ for (int j = 0; j < classes; ++j) {
for (int k = 0; k < inner_dim; k++) for (int k = 0; k < inner_dim; k++)
scale[k] = std::max(scale[k], x[i * dim + j * inner_dim + k]); scale[k] = std::max(scale[k], x[i * dim + j * inner_dim + k]);
} }
...@@ -1362,7 +1362,7 @@ template<> void ROIPooling<float, CPUContext>(const float spatial_scale, ...@@ -1362,7 +1362,7 @@ template<> void ROIPooling<float, CPUContext>(const float spatial_scale,
Ydata[pool_idx] = Idata[idx]; Ydata[pool_idx] = Idata[idx];
Mdata[pool_idx] = idx; Mdata[pool_idx] = idx;
} }
} //end w } // end w
} // end h } // end h
} // end pw } // end pw
} // end ph } // end ph
...@@ -1373,7 +1373,7 @@ template<> void ROIPooling<float, CPUContext>(const float spatial_scale, ...@@ -1373,7 +1373,7 @@ template<> void ROIPooling<float, CPUContext>(const float spatial_scale,
} // end c } // end c
// offset roi region // offset roi region
Rdata += roi->offset(1); Rdata += roi->offset(1);
} //end n } // end n
} }
template<> void ROIPoolingGrad<float, CPUContext>(const float spatial_scale, template<> void ROIPoolingGrad<float, CPUContext>(const float spatial_scale,
......
...@@ -130,7 +130,7 @@ __global__ void _ReluGrad(const int count, ...@@ -130,7 +130,7 @@ __global__ void _ReluGrad(const int count,
const T* y, const T* y,
const float slope, const float slope,
T* dx) { T* dx) {
CUDA_KERNEL_LOOP(i, count){ CUDA_KERNEL_LOOP(i, count) {
dx[i] = dy[i] * ((y[i] > 0) + slope * (y[i] <= 0)); dx[i] = dy[i] * ((y[i] > 0) + slope * (y[i] <= 0));
} }
} }
...@@ -912,7 +912,7 @@ __global__ void _Sum(const int count, ...@@ -912,7 +912,7 @@ __global__ void _Sum(const int count,
template<> void Sum<float, CUDAContext>( template<> void Sum<float, CUDAContext>(
const int count, const int axis_dim, const int count, const int axis_dim,
const int inner_dim, const float* x, float* y){ const int inner_dim, const float* x, float* y) {
_Sum<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count, _Sum<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
axis_dim, axis_dim,
inner_dim, inner_dim,
...@@ -954,7 +954,7 @@ template<> void SumGrad<float, CUDAContext>(const int count, ...@@ -954,7 +954,7 @@ template<> void SumGrad<float, CUDAContext>(const int count,
template <typename T> template <typename T>
__global__ void _Slice(const int count, const int outer_dim, const int inner_dim, __global__ void _Slice(const int count, const int outer_dim, const int inner_dim,
const int x_slice_dim, const int y_slice_dim, const int slice_offset, const T* x, T* y){ const int x_slice_dim, const int y_slice_dim, const int slice_offset, const T* x, T* y) {
CUDA_KERNEL_LOOP(idx, count) { CUDA_KERNEL_LOOP(idx, count) {
const int tmp = y_slice_dim * inner_dim; const int tmp = y_slice_dim * inner_dim;
const int outer_idx = idx / tmp; const int outer_idx = idx / tmp;
......
...@@ -9,20 +9,20 @@ namespace dragon { ...@@ -9,20 +9,20 @@ namespace dragon {
namespace sse { namespace sse {
template<> void Set(const int n, const float alpha, float* x){ template<> void Set(const int n, const float alpha, float* x) {
__m128 scalar = SSE_FP32_SCALAR(alpha); __m128 scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) SSE_FP32_STORE(x + i, scalar); SSE_LOOP1(i, n) SSE_FP32_STORE(x + i, scalar);
SSE_LOOP2(i, n) x[i] = alpha; SSE_LOOP2(i, n) x[i] = alpha;
} }
template<> void Set(const int n, const int alpha, int* x){ template<> void Set(const int n, const int alpha, int* x) {
__m128i scalar = SSE_INT32_SCALAR(alpha); __m128i scalar = SSE_INT32_SCALAR(alpha);
__m128i* x1 = reinterpret_cast<__m128i*>(x); __m128i* x1 = reinterpret_cast<__m128i*>(x);
SSE_LOOP1(i, n) SSE_INT128_STORE(x1++, scalar); SSE_LOOP1(i, n) SSE_INT128_STORE(x1++, scalar);
SSE_LOOP2(i, n) x[i] = alpha; SSE_LOOP2(i, n) x[i] = alpha;
} }
template<> void Add(const int n, const float* a, const float* b, float* y){ template<> void Add(const int n, const float* a, const float* b, float* y) {
__m128 x1, y1, z1; __m128 x1, y1, z1;
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i); x1 = SSE_FP32_LOAD(a + i);
...@@ -33,7 +33,7 @@ namespace sse { ...@@ -33,7 +33,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = a[i] + b[i]; SSE_LOOP2(i, n) y[i] = a[i] + b[i];
} }
template<> void Sub(const int n, const float* a, const float* b, float* y){ template<> void Sub(const int n, const float* a, const float* b, float* y) {
__m128 x1, y1, z1; __m128 x1, y1, z1;
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i); x1 = SSE_FP32_LOAD(a + i);
...@@ -44,7 +44,7 @@ namespace sse { ...@@ -44,7 +44,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = a[i] - b[i]; SSE_LOOP2(i, n) y[i] = a[i] - b[i];
} }
template<> void Mul(const int n, const float* a, const float* b, float* y){ template<> void Mul(const int n, const float* a, const float* b, float* y) {
__m128 x1, y1, z1; __m128 x1, y1, z1;
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i); x1 = SSE_FP32_LOAD(a + i);
...@@ -55,7 +55,7 @@ namespace sse { ...@@ -55,7 +55,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = a[i] * b[i]; SSE_LOOP2(i, n) y[i] = a[i] * b[i];
} }
template<> void Div(const int n, const float* a, const float* b, float* y){ template<> void Div(const int n, const float* a, const float* b, float* y) {
__m128 x1, y1, z1; __m128 x1, y1, z1;
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i); x1 = SSE_FP32_LOAD(a + i);
...@@ -66,7 +66,7 @@ namespace sse { ...@@ -66,7 +66,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = a[i] / b[i]; SSE_LOOP2(i, n) y[i] = a[i] / b[i];
} }
template<> void Scal(const int n, const float alpha, float* y){ template<> void Scal(const int n, const float alpha, float* y) {
__m128 y1, scalar = SSE_FP32_SCALAR(alpha); __m128 y1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
y1 = SSE_FP32_LOAD(y + i); y1 = SSE_FP32_LOAD(y + i);
...@@ -76,7 +76,7 @@ namespace sse { ...@@ -76,7 +76,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] *= alpha; SSE_LOOP2(i, n) y[i] *= alpha;
} }
template<> void Scale(const int n, const float alpha, const float* x, float* y){ template<> void Scale(const int n, const float alpha, const float* x, float* y) {
__m128 x1, scalar = SSE_FP32_SCALAR(alpha); __m128 x1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(x + i); x1 = SSE_FP32_LOAD(x + i);
...@@ -87,7 +87,7 @@ namespace sse { ...@@ -87,7 +87,7 @@ namespace sse {
} }
template<> void Axpy(const int n, float alpha, const float* x, float *y){ template<> void Axpy(const int n, float alpha, const float* x, float *y) {
__m128 x1, y1, scalar = SSE_FP32_SCALAR(alpha); __m128 x1, y1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(x + i); x1 = SSE_FP32_LOAD(x + i);
...@@ -100,7 +100,7 @@ namespace sse { ...@@ -100,7 +100,7 @@ namespace sse {
} }
template<> void Axpby(const int n, float alpha, const float* x, template<> void Axpby(const int n, float alpha, const float* x,
const float beta, float *y){ const float beta, float *y) {
__m128 x1, y1, z1; __m128 x1, y1, z1;
__m128 scalar1 = SSE_FP32_SCALAR(alpha); __m128 scalar1 = SSE_FP32_SCALAR(alpha);
__m128 scalar2 = SSE_FP32_SCALAR(beta); __m128 scalar2 = SSE_FP32_SCALAR(beta);
...@@ -115,7 +115,7 @@ namespace sse { ...@@ -115,7 +115,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = alpha * x[i] + beta* y[i]; SSE_LOOP2(i, n) y[i] = alpha * x[i] + beta* y[i];
} }
template<> float ASum(const int n, const float *x){ template<> float ASum(const int n, const float *x) {
__m128 x1, sum = SSE_FP32_ZERO; __m128 x1, sum = SSE_FP32_ZERO;
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(x + i); x1 = SSE_FP32_LOAD(x + i);
...@@ -128,7 +128,7 @@ namespace sse { ...@@ -128,7 +128,7 @@ namespace sse {
return ret; return ret;
} }
template<> void AddScalar(const int n, const float alpha, float* y){ template<> void AddScalar(const int n, const float alpha, float* y) {
__m128 y1, scalar = SSE_FP32_SCALAR(alpha); __m128 y1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
y1 = SSE_FP32_LOAD(y + i); y1 = SSE_FP32_LOAD(y + i);
...@@ -138,7 +138,7 @@ namespace sse { ...@@ -138,7 +138,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] += alpha; SSE_LOOP2(i, n) y[i] += alpha;
} }
template<> void MulScalar(const int n, const float alpha, float* y){ template<> void MulScalar(const int n, const float alpha, float* y) {
__m128 y1, scalar = SSE_FP32_SCALAR(alpha); __m128 y1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
y1 = SSE_FP32_LOAD(y + i); y1 = SSE_FP32_LOAD(y + i);
...@@ -148,7 +148,7 @@ namespace sse { ...@@ -148,7 +148,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] *= alpha; SSE_LOOP2(i, n) y[i] *= alpha;
} }
template <> float Dot(const int n, const float* a, const float* b){ template <> float Dot(const int n, const float* a, const float* b) {
__m128 x1, y1, sum = SSE_FP32_ZERO; __m128 x1, y1, sum = SSE_FP32_ZERO;
SSE_LOOP1(i, n) { SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i); x1 = SSE_FP32_LOAD(a + i);
......
...@@ -11,6 +11,7 @@ import sys ...@@ -11,6 +11,7 @@ import sys
import time import time
import shutil import shutil
import tarfile import tarfile
import numpy as np
from six.moves import range as xrange from six.moves import range as xrange
import cv2 import cv2
...@@ -78,7 +79,7 @@ def extract_images(): ...@@ -78,7 +79,7 @@ def extract_images():
f.write(item) f.write(item)
def make_db(image_path, label_path, database_path): def make_db(image_path, label_path, database_path, pad=0):
if os.path.isfile(label_path) is False: if os.path.isfile(label_path) is False:
raise ValueError('input path is empty or wrong.') raise ValueError('input path is empty or wrong.')
if os.path.isdir(database_path) is True: if os.path.isdir(database_path) is True:
...@@ -111,6 +112,12 @@ def make_db(image_path, label_path, database_path): ...@@ -111,6 +112,12 @@ def make_db(image_path, label_path, database_path):
label = record[1] label = record[1]
img = cv2.imread(os.path.join(image_path ,path)) img = cv2.imread(os.path.join(image_path ,path))
if pad > 0:
pad_img = np.zeros((img.shape[0] + 2 * pad,
img.shape[1] + 2 * pad, 3), dtype=np.uint8)
pad_img[pad : pad + img.shape[0],
pad : pad + img.shape[1], :] = img
img = pad_img
result, imgencode = cv2.imencode('.jpg', img, encode_param) result, imgencode = cv2.imencode('.jpg', img, encode_param)
datum = caffe_pb2.Datum() datum = caffe_pb2.Datum()
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!