Commit 5d518b6c by Ting PAN

io refactoring

1 parent 31e02b2b
Showing with 310 additions and 237 deletions
......@@ -24,7 +24,7 @@ set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty)
set(PYTHON_DIR /usr/include/python2.7) # prefer
#set(PYTHON_DIR /usr/include/python3.x) # optional, set specific version
#set(ANACONDA_DIR /xxx/anaconda) # optional, set specific version below if using py3
set(NUMPY_DIR /xxx/numpy) # require root folder of numpy package
set(NUMPY_DIR /xxx/numpy) # require, root folder of numpy package
# set CUDA compiling architecture
set(CUDA_ARCH -gencode arch=compute_20,code=sm_20
......
......@@ -52,7 +52,7 @@ class CPUContext{
inline static void Delete(void* data) { free(data); }
template<typename T, class DstContext, class SrcContext>
inline static void Copy(int n, T* dst, const T* src){
inline static void Copy(int n, T* dst, const T* src) {
if (dst == src) return;
// only the basic types(e.g. int/float) can memcpy correctly
if (std::is_fundamental<T>::value)
......
......@@ -119,7 +119,7 @@ class CUDAContext {
inline static void Delete(void* data) { cudaFree(data); }
template<typename T, class DstContext, class SrcContext>
static void Copy(int n, T* dst, const T* src){
static void Copy(int n, T* dst, const T* src) {
if (dst == src) return;
Memcpy<SrcContext, DstContext>(n * sizeof(T), (void*)dst, (const void*)src);
}
......@@ -148,7 +148,7 @@ class CUDAContext {
}
#ifdef WITH_CUDNN
cudnnHandle_t cudnn_handle(){
cudnnHandle_t cudnn_handle() {
auto& handle = cuda_object_.cudnn_handle[gpu_id_];
if (handle) {
return handle;
......
......@@ -77,7 +77,7 @@ class Tensor {
inline TIndex offset(const vector<TIndex>& vec) {
CHECK_LE(vec.size(), ndim());
TIndex offset = 0;
for (int i = 0; i < ndim(); i++){
for (int i = 0; i < ndim(); i++) {
offset = offset * dim(i);
if (vec.size() > i) offset += vec[i];
}
......@@ -130,7 +130,7 @@ class Tensor {
}
template <class Context>
void* raw_mutable_data(const TypeMeta& meta){
void* raw_mutable_data(const TypeMeta& meta) {
void* data_ptr;
active_data_ptr<Context>(&data_ptr);
if (meta_ == meta && data_ptr) {
......
......@@ -75,20 +75,20 @@ class TypeMeta {
bool Match() const { return (id_ == Id<T>()); }
template <typename T>
static void Ctor(void* ptr, size_t n){
static void Ctor(void* ptr, size_t n) {
T* typed_ptr = static_cast<T*>(ptr);
for (unsigned int i = 0; i < n; i++) new(typed_ptr + i) T;
}
template <typename T>
static void Copy(const void* src, void* dst, size_t n){
static void Copy(const void* src, void* dst, size_t n) {
const T* typed_src = static_cast<const T*>(src);
T* typed_dst = static_cast<T*>(dst);
for (unsigned int i = 0; i < n; i++) typed_dst[i] = typed_src[i];
}
template <typename T>
static void Dtor(void* ptr, size_t n){
static void Dtor(void* ptr, size_t n) {
T* typed_ptr = static_cast<T*>(ptr);
for (unsigned int i = 0; i < n; i++) typed_ptr[i].~T();
}
......
......@@ -44,7 +44,7 @@ class Workspace{
return tensor_map_.count(query) > 0;
}
inline Tensor* CreateTensor(const string& name){
inline Tensor* CreateTensor(const string& name) {
string query = GetTensorName(name);
if (!HasTensor(query))
tensor_map_[query] = unique_ptr<Tensor>(new Tensor(query));
......@@ -143,7 +143,7 @@ class Workspace{
return graph_map_[graph_name]->Run(include, exclude);
}
inline vector<string> GetGraphs(){
inline vector<string> GetGraphs() {
vector<string> names;
for (auto& it : graph_map_) names.push_back(it.first);
return names;
......
......@@ -25,9 +25,9 @@ class AccuracyOp final: public Operator<Context> {
public:
AccuracyOp(const OperatorDef& op_def, Workspace* ws)
: Operator<Context>(op_def, ws),
top_k(OperatorBase::GetSingleArg<int>("top_k", 1)){
top_k(OperatorBase::GetSingleArg<int>("top_k", 1)) {
vector<int> args = OperatorBase::GetRepeatedArg<int>("ignore_labels");
if (args.size()){
if (args.size()) {
ignore_labels.Reshape(vector<TIndex>(1, args.size()));
int* ignore_data = ignore_labels.mutable_data<int, CPUContext>();
for (int i = 0; i < args.size(); i++) ignore_data[i] = args[i];
......
......@@ -39,7 +39,7 @@ class ROIAlignGradientOp : public Operator<Context> {
: Operator<Context>(op_def, ws),
pool_h(OperatorBase::GetSingleArg<int>("pool_h", 0)),
pool_w(OperatorBase::GetSingleArg<int>("pool_w", 0)),
spatial_scale(OperatorBase::GetSingleArg<float>("spatial_scale", 1.0)){
spatial_scale(OperatorBase::GetSingleArg<float>("spatial_scale", 1.0)) {
CHECK_GT(pool_h, 0) << "\npool_h must > 0";
CHECK_GT(pool_w, 0) << "\npool_w must > 0";
}
......
......@@ -54,7 +54,7 @@ inline void LoadCaffeModel(string file, string scope, Workspace* ws) {
ReadProtoFromBinaryFile(file.c_str(), &net_param);
LOG(INFO) << "Restore From Model @: " << file << "......";
LOG(INFO) << "Model Format: CaffeModel";
for (int i = 0; i < net_param.layer_size(); i++){
for (int i = 0; i < net_param.layer_size(); i++) {
const LayerParameter& layer = net_param.layer(i);
const string& layer_name = layer.name();
string prefix = scope + layer_name + "@param";
......
......@@ -22,7 +22,7 @@ inline std::vector<std::string> SplitString(const std::string& str,
std::vector<std::string> ret;
std::string temp(str);
size_t pos;
while (pos = temp.find(c), pos != std::string::npos){
while (pos = temp.find(c), pos != std::string::npos) {
ret.push_back(temp.substr(0, pos));
temp.erase(0, pos + 1);
}
......
......@@ -31,7 +31,7 @@ const TypeMeta& NumpyTypeToDragon(int numpy_type) {
{ NPY_FLOAT16, TypeMeta::Make<float16>() },
{ NPY_UINT8, TypeMeta::Make<uint8_t>() }};
static TypeMeta unknown_type; // id = 0
static TypeMeta unknown_type;
return dragon_type_map.count(numpy_type) ? dragon_type_map[numpy_type] : unknown_type;
}
......@@ -50,7 +50,7 @@ REGISTER_TENSOR_FETCHER(TypeMeta::Id<NumpyFetcher>(), NumpyFetcher);
REGISTER_TENSOR_FETCHER(TypeMeta::Id<StringFetcher>(), StringFetcher);
REGISTER_TENSOR_FEEDER(TypeMeta::Id<NumpyFeeder>(), NumpyFeeder);
extern "C"{
extern "C" {
PyObject* RegisteredOperatorsCC(PyObject* self, PyObject* args) {
set<string> all_keys;
......@@ -123,7 +123,7 @@ bool SwitchWorkspaceInternal(const string& name, const bool create_if_missing) {
} else if (create_if_missing) {
unique_ptr<Workspace> new_workspace(new Workspace());
g_workspace = new_workspace.get();
g_workspaces[name] = std::move(new_workspace); // ???
g_workspaces[name] = std::move(new_workspace);
g_current_workspace = name;
return true;
} else {
......
......@@ -33,7 +33,7 @@ inline PyObject* StdStringToPyBytes(const std::string& str) {
return PyBytes_FromStringAndSize(str.c_str(), str.size());
}
template <typename T>
inline void MakeStringInternal(std::stringstream& ss, const T& t){ ss << t; }
inline void MakeStringInternal(std::stringstream& ss, const T& t) { ss << t; }
template <typename T,typename ... Args>
inline void MakeStringInternal(std::stringstream& ss, const T& t, const Args& ... args) {
......@@ -124,7 +124,7 @@ class NumpyFeeder : public TensorFeederBase {
Tensor* tensor) override {
PyArrayObject* array = PyArray_GETCONTIGUOUS(original_array);
const TypeMeta& meta = NumpyTypeToDragon(PyArray_TYPE(array));
if (meta.id() == 0){
if (meta.id() == 0) {
PyErr_SetString(PyExc_TypeError, "numpy data type is not supported.");
return nullptr;
}
......
......@@ -63,11 +63,11 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) {
// check inclue ranks
int size = PyList_Size(incl);
if (size > 0){
if (size > 0) {
all_ranks.clear();
unique_ptr<int> incl_ranks(new int[size]);
int* ranks = incl_ranks.get();
for (int i = 0; i < size; i++){
for (int i = 0; i < size; i++) {
ranks[i] = _PyInt_AsInt(PyList_GetItem(incl, i));
all_ranks.insert(ranks[i]);
}
......@@ -81,7 +81,7 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) {
all_ranks.clear(); Set<int> tmp;
unique_ptr<int> excl_ranks(new int[size]);
int* ranks = excl_ranks.get();
for (int i = 0; i < size; i++){
for (int i = 0; i < size; i++) {
ranks[i] = _PyInt_AsInt(PyList_GetItem(excl, i));
tmp.insert(ranks[i]);
}
......@@ -97,7 +97,7 @@ inline PyObject* MPICreateGroupCC(PyObject* self, PyObject* args) {
if (local_comm != MPI_COMM_NULL) {
int world_rank, local_size;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
if (world_rank == local_root){
if (world_rank == local_root) {
MPI_Comm_size(local_comm, &local_size);
std::stringstream ss;
ss << "Rank[" << world_rank << "]: "
......
# --------------------------------------------------------
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
from .data_batch import DataBatch
# --------------------------------------------------------
# Caffe for Dragon
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
......@@ -10,7 +10,7 @@ from six.moves import range as xrange
from dragon.config import logger
from .__init__ import GetProperty
from .utils import GetProperty
class BlobFetcher(Process):
def __init__(self, **kwargs):
......@@ -30,16 +30,16 @@ class BlobFetcher(Process):
atexit.register(cleanup)
def im_list_to_blob(self):
datum = self.Q_in.get() # (h, w, BGR)
im = datum[0]; h, w, c = im.shape
im_blob = np.zeros((self._batch_size, h, w, c), dtype=np.float32)
datum = self.Q_in.get()
im_blob = []
label_blob = np.zeros((self._batch_size, len(datum[1])), dtype=np.float32) \
if len(datum) > 1 else None
for i in xrange(0, self._batch_size):
im_blob[i, 0:h, 0:w, :] = datum[0]
im_blob.append(datum[0])
if label_blob is not None: label_blob[i, :] = datum[1]
if i != self._batch_size - 1: datum = self.Q_in.get()
channel_swap = (0, 3, 1, 2)
im_blob = np.array(im_blob, dtype=np.float32)
im_blob = im_blob.transpose(channel_swap)
return (im_blob, label_blob)
......
# --------------------------------------------------------
# Caffe for Dragon
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
import sys
import time
import pprint
from multiprocessing import Queue
if sys.version_info >= (3,0):
from queue import Queue as Queue2
else:
from Queue import Queue as Queue2
import threading
from six.moves import range as xrange
import dragon.core.mpi as mpi
......@@ -16,10 +22,11 @@ from .data_reader import DataReader
from .data_transformer import DataTransformer
from .blob_fetcher import BlobFetcher
from .__init__ import GetProperty
from .utils import GetProperty
class DataBatch(object):
class DataBatch(threading.Thread):
def __init__(self, **kwargs):
super(DataBatch, self).__init__()
"""DataBatch use Triple-Buffering to speed up"""
......@@ -35,10 +42,10 @@ class DataBatch(object):
kwargs['group_size'] = group_size
# configuration
self._prefetch = GetProperty(kwargs, 'prefetch', 40)
self._prefetch = GetProperty(kwargs, 'prefetch', 5)
self._num_readers = GetProperty(kwargs, 'num_readers', 1)
self._num_transformers = GetProperty(kwargs, 'num_transformers', -1)
self._num_fetchers = GetProperty(kwargs, 'num_fetchers', 3)
self._num_fetchers = GetProperty(kwargs, 'num_fetchers', 1)
# default policy
if self._num_transformers == -1:
......@@ -60,6 +67,7 @@ class DataBatch(object):
self.Q_level_1 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q_level_2 = Queue(self._prefetch * self._num_readers * self._batch_size)
self.Q_level_3 = Queue(self._prefetch * self._num_readers)
self.Q_level_4 = Queue2(self._prefetch * self._num_readers)
# init readers
self._readers = []
......@@ -102,11 +110,16 @@ class DataBatch(object):
self._fetchers.append(fetcher)
time.sleep(0.1)
self.daemon = True
self.start()
#self.echo()
@property
def blobs(self):
return self.Q_level_3.get()
def run(self):
while True:
self.Q_level_4.put(self.Q_level_3.get())
def get(self):
return self.Q_level_4.get()
def echo(self):
logger.info('---------------------------------------------------------')
......
# --------------------------------------------------------
# Caffe for Dragon
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
......@@ -12,8 +12,7 @@ import dragon.config as config
from dragon.config import logger
from dragon.tools.db import LMDB
from .__init__ import GetProperty
from .utils import GetProperty
class DataReader(Process):
def __init__(self, **kwargs):
......
# --------------------------------------------------------
# Caffe for Dragon
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
......@@ -12,7 +12,7 @@ import dragon.config as config
from dragon.config import logger
import dragon.vm.caffe.proto.caffe_pb2 as pb
from .__init__ import GetProperty
from .utils import GetProperty
try:
import cv2
......@@ -131,5 +131,3 @@ class DataTransformer(Process):
while True:
serialized = self.Q_in.get()
self.Q_out.put(self.transform_image_label(serialized))
\ No newline at end of file
# --------------------------------------------------------
# Caffe for Dragon
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
......
# --------------------------------------------------------
# Caffe for Dragon
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
import dragon.vm.caffe as caffe
import dragon.core.workspace as ws
from .minibatch import DataBatch
from dragon.io.data_batch import DataBatch
class DataLayer(caffe.Layer):
def setup(self, bottom, top):
class MiniBatchOp(object):
def setup(self, inputs, outputs):
kwargs = eval(self.param_str)
self._data_batch = DataBatch(**kwargs)
def forward(self, bottom, top):
blobs = self._data_batch.blobs
def run(self, inputs, outputs):
blobs = self._data_batch.get()
for idx, blob in enumerate(blobs):
ws.FeedTensor(top[idx], blob)
\ No newline at end of file
ws.FeedTensor(outputs[idx], blob)
\ No newline at end of file
......@@ -10,7 +10,6 @@ from dragon.operators.utils import Run
def LMDBData(**kwargs):
"""
:param kwargs: a dict of imagenet data param
:param --> mean_value: a list of mean values for channles [B-G-R]
:param --> source: a str of the images root directory
:param --> imageset: a str of text file contains image name / label
......@@ -30,8 +29,8 @@ def LMDBData(**kwargs):
args = locals(); kwargs = args['kwargs']
del args['kwargs']; kwargs = dict(args, **kwargs)
kwargs['module'] = 'dragon.vm.caffe.io.data_layer'
kwargs['op'] = 'DataLayer'
kwargs['module'] = 'dragon.operators.custom.minibatch'
kwargs['op'] = 'MiniBatchOp'
return Run([], param_str=str(kwargs), nout=2, **kwargs)
......
# --------------------------------------------------------
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
""" Generate LMDB from images """
import os
import sys
import time
import shutil
import argparse
import cv2
try:
import numpy as np
except: pass
from dragon.tools.db import LMDB
from dragon.vm.caffe.proto import caffe_pb2
def resize_image(im, resize):
if im.shape[0] > im.shape[1]:
newsize = (resize, im.shape[0] * resize / im.shape[1])
else:
newsize = (im.shape[1] * resize / im.shape[0], resize)
im = cv2.resize(im, newsize)
return im
def make_db(args):
if os.path.isfile(args.list) is False:
raise ValueError('the path of image list is invalid.')
if os.path.isdir(args.database) is True:
raise ValueError('the database is already exist or invalid.')
print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
db = LMDB(max_commit=10000)
db.open(args.database, mode='w')
total_line = sum(1 for line in open(args.list))
count = 0
zfill_flag = '{0:0%d}' % (args.zfill)
encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality]
start_time = time.time()
with open(args.list, 'r') as input_file:
records = input_file.readlines()
if args.shuffle:
import random
random.shuffle(records)
for record in records:
count += 1
if count % 10000 == 0:
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(
count, total_line, now_time - start_time))
db.commit()
record = record.split()
path = record[0]
label = record[1]
img = cv2.imread(os.path.join(args.root, path))
if args.resize > 0:
img = resize_image(img, args.resize)
if args.pad > 0:
pad_img = np.zeros((img.shape[0] + 2 * args.pad,
img.shape[1] + 2 * args.pad, 3), dtype=img.dtype)
pad_img[args.pad : args.pad + img.shape[0],
args.pad : args.pad + img.shape[1], :] = img
img = pad_img
result, imgencode = cv2.imencode('.jpg', img, encode_param)
datum = caffe_pb2.Datum()
datum.height, datum.width, datum.channels = img.shape
datum.label = int(label)
datum.encoded = True
datum.data = imgencode.tostring()
db.put(zfill_flag.format(count - 1), datum.SerializeToString())
now_time = time.time()
print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
db.put('size', str(count))
db.put('zfill', str(args.zfill))
db.commit()
db.close()
shutil.copy(args.list, args.database + '/image_list.txt')
end_time = time.time()
print('{0} images have been stored in the database.'.format(total_line))
print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
print('The size of database is {0} MB.'.
format(float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))
def parse_args():
parser = argparse.ArgumentParser(description='Create LMDB from images for classification.')
parser.add_argument('--root', help='the root folder of raw images')
parser.add_argument('--list', help='the filepath of image list')
parser.add_argument('--database', help='the filepath of database')
parser.add_argument('--zfill', type=int, default=8, help='zfill for the key of database')
parser.add_argument('--resize', type=int, default=0, help='resize the shorter edge of image to the newsize')
parser.add_argument('--pad', type=int, default=0, help='zero-pad the image')
parser.add_argument('--quality', type=int, default=95, help='JPEG quality for encoding, 1-100')
parser.add_argument('--shuffle', type=bool, default=True, help='randomize the order in list file True')
if len(sys.argv) < 4:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
make_db(args)
\ No newline at end of file
# --------------------------------------------------------
# Dragon
# Copyright(c) 2017 SeetaTech
# Written by Ting Pan
# --------------------------------------------------------
from google.protobuf.message import Message
from protos import dragon_pb2 as pb
import numpy as np
def MakeArgument(key, value):
argument = pb.Argument()
argument.name = key
if type(value) is float: argument.f = value
elif type(value) is int : argument.i = value
elif type(value) is np.int64: argument.i64 = int(value)
elif type(value) is str: argument.s = value
elif type(value) is unicode: argument.s = value
elif type(value) is bool: argument.b = value
elif isinstance(value, Message): argument.s = value.SerializeToString()
elif all(type(v) is float for v in value): argument.floats.extend(value)
elif all(type(v) is int for v in value): argument.ints.extend(value)
elif all(type(v) is str for v in value): argument.strings.extend(value)
elif all(type(v) is unicode or type(v) is str for v in value): argument.strings.extend(value)
elif all(isinstance(v,Message) for v in value):
argument.strings.extend([v.SerializeToString() for v in value])
else: raise ValueError('unknown argument type: key={} value={} value type={}' \
.format(key,value,type(value)))
return argument
def MakeOperatorDef(op_type, inputs, outputs, name='',
device_option=None, arg=None, engine=None, **kwargs):
operator = pb.OperatorDef()
operator.type = op_type
operator.name = name
operator.input.extend([str(tensor) for tensor in inputs])
operator.output.extend([str(tensor) for tensor in outputs])
if device_option is not None:
operator.device_option.CopyFrom(device_option)
if engine is not None:
operator.engine = engine
if 'random_seed' in kwargs:
operator.device_option.random_seed = kwargs['random_seed']
del kwargs['random_seed']
if arg is not None:
operator.arg.extend(arg)
for k,v in kwargs.items():
if v is None: continue
operator.arg.add().CopyFrom(MakeArgument(k,v))
return operator
def MakeDeviceOption(device_type, gpu_id, rng_seed = None):
""" return a DeviceOption """
option = pb.DeviceOption()
option.device_type = device_type
option.gpu_id = gpu_id
if rng_seed is not None: option.random_seed = rng_seed
return option
# fix the python stdout
class Unbuffered(object):
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
# clear the stdout buffer for mpi(c++ & python)
import sys
sys.stdout = Unbuffered(sys.stdout)
\ No newline at end of file
......@@ -221,7 +221,7 @@ message SolverParameter {
// RMSProp decay value
// MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
optional float rms_decay = 38;
optional float rms_decay = 38 [default = 0.99];
// If true, print information about the state of the net that may help with
// debugging learning problems.
......@@ -676,7 +676,7 @@ message DataParameter {
optional bool force_encoded_color = 9 [default = false];
// Prefetch queue (Number of batches to prefetch to host memory, increase if
// data access bandwidth varies).
optional uint32 prefetch = 10 [default = 40];
optional uint32 prefetch = 10 [default = 5];
}
message DropoutParameter {
......
......@@ -10,7 +10,7 @@ CUDAObject CUDAContext::cuda_object_;
// cpu <- gpu
template<> void CPUContext::Memcpy<CPUContext, CUDAContext>(
size_t nbytes, void* dst, const void* src){
size_t nbytes, void* dst, const void* src) {
#ifdef WITH_CUDA
CUDAContext ctx(POINTER_DEVICE(src));
ctx.Memcpy<CPUContext, CUDAContext>(nbytes, dst, src);
......@@ -21,7 +21,7 @@ template<> void CPUContext::Memcpy<CPUContext, CUDAContext>(
// gpu <- cpu
template<> void CPUContext::Memcpy<CUDAContext, CPUContext>(
size_t nbytes, void* dst, const void* src){
size_t nbytes, void* dst, const void* src) {
#ifdef WITH_CUDA
CUDAContext ctx(POINTER_DEVICE(dst));
ctx.Memcpy<CUDAContext, CPUContext>(nbytes, dst, src);
......
......@@ -151,13 +151,13 @@ GraphDef Graph::Prune(const GraphDef& graph_def) {
OperatorDef op_def;
op_def.CopyFrom(graph_def.op(it));
// handle inputs
for (int i = 0; i < graph_def.op(it).input_size(); i++){
for (int i = 0; i < graph_def.op(it).input_size(); i++) {
string input = graph_def.op(it).input(i);
if (!colored_[input] || !outputs.count(input))
*op_def.mutable_input(i) = "ignore";
}
// handle outputs
for (int i = 0; i < graph_def.op(it).output_size(); i++){
for (int i = 0; i < graph_def.op(it).output_size(); i++) {
string output = graph_def.op(it).output(i);
if (!colored_[output]) *op_def.mutable_output(i) = "ignore";
else outputs.insert(op_def.output(i));
......
......@@ -23,7 +23,7 @@ CheckTuple GraphGradientMaker::CheckMissingGrad(OperatorDef* forward_op) {
inputs_to_grads_[output] = g_output;
// consider generate virtual grad
else if (targets_set_.count(output) && g_output != "ignore"){
else if (targets_set_.count(output) && g_output != "ignore") {
gen_grads.push_back({ output, idx });
inputs_to_grads_[output] = g_output;
}
......@@ -50,7 +50,7 @@ GraphDef GraphGradientMaker::Make() {
Set<string> all_split_grads;
// PLAY for the forward
for (auto& op : forward_def_.op()){
for (auto& op : forward_def_.op()) {
if (NoGradientRegistry()->Has(op.type())) continue;
for (auto& input : op.input()) inputs_count[input]++;
}
......@@ -73,17 +73,17 @@ GraphDef GraphGradientMaker::Make() {
Gradient grad = MakeGradientForOp(*op, g_outputs);
// replace terms
for (auto& g_op : grad.ops){
for (auto& g_op : grad.ops) {
g_op.set_name(GetOperatorName());
for (int i = 0; i < g_op.input_size(); i++){
for (int i = 0; i < g_op.input_size(); i++) {
string* input = g_op.mutable_input(i);
if (terms_.count(*input)) *input = terms_[*input];
}
for (int i = 0; i < g_op.output_size(); i++){
for (int i = 0; i < g_op.output_size(); i++) {
string* output = g_op.mutable_output(i);
if (terms_.count(*output)) *output = terms_[*output];
}
for (int i = 0; i < grad.g_inputs.size(); i++){
for (int i = 0; i < grad.g_inputs.size(); i++) {
if (terms_.count(grad.g_inputs[i]))
grad.g_inputs[i] = terms_[grad.g_inputs[i]];
}
......@@ -106,14 +106,14 @@ GraphDef GraphGradientMaker::Make() {
string split_name = *output + "_autosplit_" + str(grads_count[*output]++);
if (!is_skip) all_split_grads.insert(split_name);
// gather
if (grads_count[*output] == inputs_count[original_name]){
if (grads_count[*output] == inputs_count[original_name]) {
gather_op = new OperatorDef();
gather_op->set_name(GetOperatorName());
gather_op->set_type("GradientGather");
gather_op->add_output(*output);
if (g_op.has_device_option())
gather_op->mutable_device_option()->CopyFrom(g_op.device_option());
for (int j = 0; j < grads_count[*output]; j++){
for (int j = 0; j < grads_count[*output]; j++) {
string key = *output + "_autosplit_" + str(j);
if (all_split_grads.count(key)) gather_op->add_input(key);
}
......
......@@ -66,7 +66,7 @@ DEFINE_REGISTRY(GradientRegistry, GradientMakerBase, const OperatorDef&, const v
DEFINE_REGISTRY(NoGradientRegistry, GradientMakerBase, const OperatorDef&, const vector<string>&);
#define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname) \
template <> T OperatorBase::GetSingleArg(const string& name, const T& default_value){ \
template <> T OperatorBase::GetSingleArg(const string& name, const T& default_value) { \
if(args_.count(name) == 0) { \
return default_value; \
} \
......@@ -82,7 +82,7 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(int64_t, i64);
#define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname) \
template<> vector<T> OperatorBase::GetRepeatedArg<T>(const string& name){ \
template<> vector<T> OperatorBase::GetRepeatedArg<T>(const string& name) { \
if(args_.count(name) == 0) return vector<T>(); \
vector<T> values; \
for(const auto& v : args_[name]->fieldname()) values.push_back(v); \
......
......@@ -17,7 +17,7 @@ bool OpSchema::Verify(const OperatorDef& def) const {
}
for (int in = 0; in < def.input_size(); in++) {
if (def.input(in) == "ignore") continue;
for (int out = 0; out < def.output_size(); out++){
for (int out = 0; out < def.output_size(); out++) {
if (def.output(out) == "ignore") continue;
if (def.input(in) == def.output(out) && (!CheckInplace(in, out)))
LOG(FATAL) << "[" << def.name() << "] input("
......
......@@ -71,10 +71,9 @@ void DropoutGradientOp<Context>::RunOnDevice() {
template <class Context>
void DropoutGradientOp<Context>::ClearAfterRun() {
ws()->ReleaseBuffer(mask);
ws()->ReleaseBuffer(mask, true);
}
DEPLOY_CPU(DropoutGradient);
#ifdef WITH_CUDA
DEPLOY_CUDA(DropoutGradient);
......
......@@ -125,7 +125,7 @@ void DotGradientOp<Context>::GemvRunWithType() {
}
template <class Context>
void DotGradientOp<Context>::RunOnDevice(){
void DotGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
output(1)->ReshapeLike(input(1));
......
......@@ -65,7 +65,7 @@ void EltwiseGradientOp<Context>::SumRunWithType() {
auto* dYdata = input(-1).template data<T, Context>();
TIndex count = input(-1).count();
for (int i = 0; i < OutputSize(); i++){
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() == "ignore") continue;
auto* dXdata = output(i)->template mutable_data<T, Context>();
if (coeffs[i] == float(1)) {
......
......@@ -8,7 +8,7 @@ template <class Context> template <typename T>
void GramMatrixOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, Context>();
for (int i = 0; i < outer_dim; i++){
for (int i = 0; i < outer_dim; i++) {
math::Gemm<T, Context>(CblasNoTrans, CblasTrans,
dim, dim, inner_dim, 1.0, Xdata, Xdata, 0.0, Ydata);
Xdata += x_offset;
......@@ -40,7 +40,7 @@ void GramMatrixGradientOp<Context>::RunWithType() {
auto* dYdata = input(-1).template data<T, Context>();
auto* Xdata = input(0).template data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>();
for (int i = 0; i < outer_dim; i++){
for (int i = 0; i < outer_dim; i++) {
math::Gemm<T, Context>(CblasNoTrans, CblasNoTrans,
dim, inner_dim, dim, 2.0, dYdata, Xdata, 0.0, dXdata);
dYdata += y_offset;
......@@ -49,7 +49,7 @@ void GramMatrixGradientOp<Context>::RunWithType() {
}
template <class Context>
void GramMatrixGradientOp<Context>::RunOnDevice(){
void GramMatrixGradientOp<Context>::RunOnDevice() {
outer_dim = input(0).count(0, axis);
dim = input(0).dim(axis);
inner_dim = input(0).count(axis + 1);
......
......@@ -81,7 +81,7 @@ void MatmulGradientOp<Context>::RunWithType() {
}
template <class Context>
void MatmulGradientOp<Context>::RunOnDevice(){
void MatmulGradientOp<Context>::RunOnDevice() {
CHECK(input(0).ndim() == input(1).ndim())
<< "both matrices must have the same number of dimensions.";
CHECK_GE(input(0).ndim(), 2)
......
......@@ -9,7 +9,7 @@ void PowOp<Context>::RunWithType() {
TIndex count = input(0).count();
auto* Ydata = output(0)->template mutable_data<T, Context>();
if (power_scale == float(0)){
if (power_scale == float(0)) {
float value = (power == float(0)) ? float(1) : pow(shift, power);
math::Set<T, Context>(count, dragon_cast<T, float>(value), Ydata);
return;
......
......@@ -12,7 +12,7 @@ void SquareOp<Context>::RunWithType() {
}
template <class Context>
void SquareOp<Context>::RunOnDevice(){
void SquareOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
if (input(0).template IsType<float>()) RunWithType<float>();
......
......@@ -44,7 +44,7 @@ void SubOp<Context>::BroadcastRunWithType(int type) {
}
template <class Context>
void SubOp<Context>::RunOnDevice(){
void SubOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
if (input(0).dims() == input(1).dims()) {
......
......@@ -7,7 +7,7 @@ namespace dragon {
template <class Context> template <typename T>
void ConcatOp<Context>::RunWithType() {
auto* Ydata = output(0)->template mutable_data<T, Context>();
for (int i = 0; i < nin; i++){
for (int i = 0; i < nin; i++) {
auto* Xdata = input(i).template data<T, Context>();
TIndex count = input(i).count();
x_concat_dim = input(i).dim(axis);
......@@ -25,12 +25,12 @@ void ConcatOp<Context>::RunWithType() {
}
template <class Context>
void ConcatOp<Context>::RunOnDevice(){
void ConcatOp<Context>::RunOnDevice() {
concat_dims = input(0).dims();
for (int i = 1; i < nin; i++) {
CHECK_EQ(concat_dims.size(), input(i).ndim())
<< "\nall inputs must have the same ndim.";
for (int j = 0; j < concat_dims.size(); j++){
for (int j = 0; j < concat_dims.size(); j++) {
if (j == axis) continue;
CHECK_EQ(concat_dims[j], input(i).dim(j))
<< "\nall inputs must have the same dims"
......@@ -82,7 +82,7 @@ void ConcatGradientOp<Context>::RunWithType() {
}
template <class Context>
void ConcatGradientOp<Context>::RunOnDevice(){
void ConcatGradientOp<Context>::RunOnDevice() {
if (input(-1).name() == "ignore") return;
concat_dims = input(-1).dims();
y_concat_dim = concat_dims[axis];
......
......@@ -27,7 +27,7 @@ OPERATOR_SCHEMA(Flatten).NumInputs(1).NumOutputs(1);
template <class Context>
void FlattenGradientOp<Context>::RunOnDevice(){
void FlattenGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
output(0)->Share(input(1));
}
......
......@@ -78,7 +78,7 @@ void ReduceGradientOp<Context>::SumRunWithType() {
template <class Context> template <typename T>
void ReduceGradientOp<Context>::MeanRunWithType() {
auto* dXdata = output(0)->template mutable_data<T, Context>();
if (axis == -1){
if (axis == -1) {
auto* dYdata = input(-1).template data<T, CPUContext>();
math::Set<T, Context>(output(0)->count(), dYdata[0] / input(0).count(), dXdata);
} else {
......
......@@ -150,13 +150,11 @@ void ScanOp<Context>::UnrollTemplate() {
template <class Context>
void ScanOp<Context>::RunOnDevice() {
// unroll
UnrollTemplate();
if (!graphs.count(nsteps))
if (!graphs.count(nsteps)) {
graphs[nsteps].reset(new Graph(new_def, ws()));
}
cur_graph = graphs[nsteps].get();
// forward
cur_graph->Run("", "");
}
......@@ -201,13 +199,11 @@ void ScanGradientOp<Context>::MakeGradientOps() {
template <class Context>
void ScanGradientOp<Context>::RunOnDevice() {
// make graph
MakeGradientOps();
if (!graphs.count(nsteps))
if (!graphs.count(nsteps)) {
graphs[nsteps].reset(new Graph(new_def, ws()));
}
cur_graph = graphs[nsteps].get();
// backward
cur_graph->Run("Gradient", "");
}
......
......@@ -7,7 +7,7 @@ namespace dragon {
template <class Context> template <typename T>
void SliceOp<Context>::RunWithType() {
auto* Xdata = input(0).template data<T, Context>();
for (int i = 0; i < nout; i++){
for (int i = 0; i < nout; i++) {
auto* Ydata = output(i)->template mutable_data<T, Context>();
TIndex count = output(i)->count();
kernel::Slice<T, Context>(count, outer_dim, inner_dim,
......@@ -46,7 +46,7 @@ OPERATOR_SCHEMA(Slice).NumInputs(1).NumOutputs(1, INT_MAX);
template <class Context> template <typename T>
void SliceGradientOp<Context>::RunWithType() {
auto* dXdata = output(0)->template mutable_data<T, Context>();
for (int i = 0; i < nout; i++){
for (int i = 0; i < nout; i++) {
if (input(i + 1).name() == "ignore") continue;
auto* dYdata = input(i + 1).template data<T, Context>();
TIndex count = input(i + 1).count();
......
......@@ -64,7 +64,7 @@ void AccuracyOp<Context>::RunWithType() {
} // end outer_num
output(0)->template mutable_data<T, CPUContext>()[0] = acc / count;
if (OutputSize() > 1){
if (OutputSize() > 1) {
auto* acc_per_class = output(1)->template mutable_data<T, CPUContext>();
for (int i = 0; i < classes; i++)
acc_per_class[i] = num_per_class[i] == 0 ? 0 : acc_per_class[i] / acc_per_class[i];
......
......@@ -28,7 +28,7 @@ void L1LossOp<Context>::RunWithType() {
}
template <class Context>
void L1LossOp<Context>::RunOnDevice(){
void L1LossOp<Context>::RunOnDevice() {
CHECK_EQ(input(0).count(), input(1).count());
output(0)->Reshape(vector<TIndex>(1, 1));
diff = ws()->CreateTensor("_t_" + anchor() + "_l1_loss_diff");
......@@ -53,7 +53,7 @@ void L1LossGradientOp<Context>::RunWithType() {
else if (normalization == "FULL") normalizer = input(0).count();
else if (normalization == "NONE") normalizer = 1;
alpha = alpha / normalizer;
for (int i = 0; i < 2; i++){
for (int i = 0; i < 2; i++) {
if (output(i)->name() == "ignore") continue;
output(i)->ReshapeLike(input(i));
auto* dXdata = output(i)->template mutable_data<T, Context>();
......
......@@ -11,7 +11,7 @@ void L2LossOp<Context>::RunWithType() {
auto* diff_data = diff->template mutable_data<T, Context>();
auto* Ydata = output(0)->template mutable_data<T, CPUContext>();
math::Sub<T, Context>(diff->count(), X0data, X1data, diff_data);
if (InputSize() > 2){
if (InputSize() > 2) {
CHECK_EQ(input(0).count(), input(2).count());
auto* Wdata = input(2).template data<T, Context>();
math::Mul<T, Context>(diff->count(), Wdata, diff_data, diff_data);
......
......@@ -14,12 +14,12 @@ void SmoothL1LossOp<Context>::RunWithType() {
auto* Ydata = output(0)->template mutable_data<T, CPUContext>();
math::Sub<T, Context>(diff->count(), X0data, X1data, diff_data);
if (InputSize() > 2){
if (InputSize() > 2) {
auto* inside_w_data = input(2).template data<T, Context>();
math::Mul<T, Context>(diff->count(), inside_w_data, diff_data, diff_data);
}
kernel::SmoothL1<T, Context>(diff->count(), sigma2, diff_data, error_data);
if (InputSize() > 3){
if (InputSize() > 3) {
auto* outside_w_data = input(3).template data<T, Context>();
math::Mul<T, Context>(diff->count(), outside_w_data, error_data, error_data);
}
......
......@@ -26,14 +26,14 @@ void MPIBroadcastOp<Context>::RunWithType() {
}
template <class Context>
void MPIBroadcastOp<Context>::RunOnDevice(){
void MPIBroadcastOp<Context>::RunOnDevice() {
CHECK(this->comm != MPI_COMM_NULL)
<< "\nMPIBroadcastOp, name: " << name()
<< ", does not belong to any group, can't run.";
size_t ndim[1];
TIndex* dims = nullptr;
if (this->comm_rank == this->comm_root){
if (this->comm_rank == this->comm_root) {
ndim[0] = input(0).ndim();
dims = new TIndex[ndim[0]];
for (int i = 0; i < input(0).ndim(); i++)
......@@ -90,7 +90,7 @@ void MPIBroadcastGradientOp<Context>::RunWithType() {
}
template <class Context>
void MPIBroadcastGradientOp<Context>::RunOnDevice(){
void MPIBroadcastGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(-1));
if (input(0).template IsType<float>()) RunWithType<float>();
......
......@@ -51,7 +51,7 @@ template <class Context> template <typename T>
void MPIGatherGradientOp<Context>::RunWithType() {
if (this->comm_rank == this->comm_root) {
output(0)->Share(input(this->comm_rank + 1));
for (int i = 0; i < this->comm_size; i++){
for (int i = 0; i < this->comm_size; i++) {
if (i == this->comm_root) continue;
#ifdef WITH_CUDA_AWARE
auto* dYdata = input(this->comm_rank + 1).template data<T, Context>();
......
......@@ -60,7 +60,7 @@ void InstanceNormOp<Context>::RunWithType() {
}
template <class Context>
void InstanceNormOp<Context>::RunOnDevice(){
void InstanceNormOp<Context>::RunOnDevice() {
num = input(0).dim(0); channels = input(0).dim(1);
spatial_dim = input(0).count(2); nbychans = num * channels;
vector<TIndex> dims({ num, channels });
......
......@@ -109,7 +109,7 @@ void L2NormGradientOp<Context>::RunWithType() {
auto* Bdata = buffer->template mutable_data<T, Context>();
auto* BInnerdata = buffer_inner->template mutable_data<T, Context>();
for (int n = 0; n < outer_dim; n++){
for (int n = 0; n < outer_dim; n++) {
if (across_inner) {
Ndata = norm->template data<T, CPUContext>();
T sum_of_x_mul_dy = math::Dot<T, Context>(buffer->count(), Xdata, dYdata);
......
......@@ -21,7 +21,7 @@ void AsyncUpdateOp<Context>::UpdateTimestamp(int tag) {
}
template <class Context>
int AsyncUpdateOp<Context>::GetDelay(int tag){
int AsyncUpdateOp<Context>::GetDelay(int tag) {
Tensor* t = ws()->GetTensor("_t_" + this->domain + "async_timestamp");
int* global_timestamp = t->template mutable_data<int, CPUContext>();
return global_timestamp[tag] - local_timestamp[tag] + 1;
......@@ -71,7 +71,7 @@ AsyncUpdateOp<Context>::AsyncUpdateOp(const OperatorDef& op_def, Workspace* ws)
template <class Context> template <typename T>
void AsyncUpdateOp<Context>::RootRunWithType() {
for (int i = 0; i < InputSize(); i++){
for (int i = 0; i < InputSize(); i++) {
auto* dXdata = input(i).template mutable_data<T, Context>();
auto* Xdata = output(i)->template mutable_data<T, Context>();
......@@ -88,7 +88,7 @@ void AsyncUpdateOp<Context>::RootRunWithType() {
}
template <class Context>
void AsyncUpdateOp<Context>::RunOnDevice(){
void AsyncUpdateOp<Context>::RunOnDevice() {
if (this->comm_rank != this->comm_root) return;
if (input(0).template IsType<float>()) {
......
......@@ -98,7 +98,7 @@ void UpdateOpBase<Context>::ReduceRunWithType() {
}
// ave-normalize
if (comm_size > 1){
if (comm_size > 1) {
#ifdef WITH_CUDA_AWARE
math::Scal<T, Context>(count, T(1.0 / comm_size), dXdata);
#else
......@@ -112,7 +112,7 @@ template <class Context> template <typename T>
void UpdateOpBase<Context>::PreprocessRunWithType() {
// scale
scale_factor = param("scale_gradient");
if (scale_factor != 1){
if (scale_factor != 1) {
auto* dXdata = input(0).template mutable_data<T, Context>();
math::Scal<T, Context>(input(0).count(), scale_factor, dXdata);
}
......@@ -128,11 +128,11 @@ void UpdateOpBase<Context>::PreprocessRunWithType() {
}
}
// decay
l2_decay = param("l2_decay");
if (l2_decay > 0){
l2_decay = param("l2_decay") * decay_mult;
if (l2_decay > 0) {
auto* dXdata = input(0).template mutable_data<T, Context>();
auto* Xdata = output(0)->template data<T, Context>();
math::Axpy<T, Context>(input(0).count(), l2_decay * decay_mult, Xdata, dXdata);
math::Axpy<T, Context>(input(0).count(), l2_decay, Xdata, dXdata);
}
}
......
......@@ -12,7 +12,7 @@ void CompareOp<Context>::EqualRunWithType() {
}
template <class Context>
void CompareOp<Context>::RunOnDevice(){
void CompareOp<Context>::RunOnDevice() {
CHECK_EQ(input(0).count(), input(1).count())
<< "both conditioned tensor must have same elements.";
output(0)->ReshapeLike(input(0));
......
......@@ -6,7 +6,7 @@ namespace dragon {
template <class Context> template <typename T>
void GradientGenerateOp<Context>::RunWithType() {
for (int i = 0; i < OutputSize(); i++){
for (int i = 0; i < OutputSize(); i++) {
if (output(i)->name() == "ignore") continue;
output(i)->ReshapeLike(input(i));
auto* dXdata = output(0)->template mutable_data<T, Context>();
......
......@@ -7,7 +7,7 @@ namespace dragon {
template <class Context>
void ConvOp<Context>::ComputeOutputShape() {
this->output_shape.clear();
for (int i = 0; i < this->num_spatial_axes; i++){
for (int i = 0; i < this->num_spatial_axes; i++) {
const int input_dim = this->bottom_shape[this->channel_axis + i + 1];
const int dilated_kernel = this->dilation[i] * (this->kernel_size[i] - 1) + 1;
const int output_dim = (input_dim + 2 * this->pad[i] - dilated_kernel) / this->stride[i] + 1;
......@@ -43,7 +43,7 @@ void ConvOp<Context>::RunWithType() {
}
template <class Context>
void ConvOp<Context>::RunOnDevice(){
void ConvOp<Context>::RunOnDevice() {
Reshape();
if (input(0).template IsType<float>()) RunWithType<float>();
......@@ -71,7 +71,7 @@ void ConvGradientOp<Context>::RunWithType() {
Db(dYdata + n * this->y_offset, dBdata);
}
for (int n = 0; n < input(2).dim(0); n++){
for (int n = 0; n < input(2).dim(0); n++) {
if (output(1)->name() != "ignore") {
auto* Xdata = input(0).template data<T, Context>();
auto* dWdata = output(1)->template mutable_data<T, Context>();
......
......@@ -161,7 +161,7 @@ void ConvOpBase<Context>::GradientReshape() {
// compute input shape
input_shape.clear();
for (int i = 0; i < num_spatial_axes; i++){
for (int i = 0; i < num_spatial_axes; i++) {
if (ReverseDimensions()) {
input_shape.push_back(input(2).dim(channel_axis + i + 1));
} else {
......@@ -181,7 +181,7 @@ void ConvOpBase<Context>::GradientReshape() {
// compute col buffer shape
col_buffer_shape.clear();
col_buffer_shape.push_back(kernel_dim * group);
for (int i = 0; i < num_spatial_axes; i++){
for (int i = 0; i < num_spatial_axes; i++) {
if (ReverseDimensions()) {
col_buffer_shape.push_back(bottom_shape[channel_axis + i + 1]);
} else {
......
......@@ -228,13 +228,13 @@ void CuDNNConvGradientOp<Context>::RunWithType() {
const T* dYdata = input(2).template data<T, Context>();
for (int g = 0; g < this->group; g++) {
if (output(2)->name() != "ignore"){
if (output(2)->name() != "ignore") {
T* dBdata = output(2)->template mutable_data<T, Context>();
CUDNN_CHECK(cudnnConvolutionBackwardBias(handle[g],
CUDNNType<T>::one, input_desc, dYdata + this->y_offset * g,
CUDNNType<T>::one, bias_desc, dBdata + bias_offset * g));
}
if (output(1)->name() != "ignore"){
if (output(1)->name() != "ignore") {
auto* Xdata = input(0).template data<T, Context>();
auto* dWdata = output(1)->template mutable_data<T, Context>();
auto* workspace = buffer2->mutable_data<char, Context>();
......@@ -246,7 +246,7 @@ void CuDNNConvGradientOp<Context>::RunWithType() {
workspace + g * workspace_bwd_filter_size, workspace_bwd_filter_size,
CUDNNType<T>::one, filter_desc, dWdata + this->weight_offset * g));
}
if (output(0)->name() != "ignore"){
if (output(0)->name() != "ignore") {
auto* Wdata = input(1).template data<T, Context>();
auto* dXdata = output(0)->template mutable_data<T, Context>();
auto* workspace = buffer1->mutable_data<char, Context>();
......
......@@ -54,7 +54,7 @@ void CuDNNLRNGradientOp<Context>::RunWithType() {
}
template <class Context>
void CuDNNLRNGradientOp<Context>::RunOnDevice(){
void CuDNNLRNGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
if (this->mode == ACROSS_CHANNELS) {
......
......@@ -43,7 +43,7 @@ void DeConvOp<Context>::RunWithType() {
}
template <class Context>
void DeConvOp<Context>::RunOnDevice(){
void DeConvOp<Context>::RunOnDevice() {
Reshape();
if (input(0).template IsType<float>()) RunWithType<float>();
......
......@@ -98,7 +98,7 @@ void LRNOp<Context>::ProdRunWithType() {
}
template <class Context>
void LRNOp<Context>::RunOnDevice(){
void LRNOp<Context>::RunOnDevice() {
if (mode == ACROSS_CHANNELS) {
if (input(0).template IsType<float>()) {
AcrossRunWithType<float>();
......@@ -223,7 +223,7 @@ void LRNGradientOp<Context>::SplitRunWithType() {
}
template <class Context>
void LRNGradientOp<Context>::RunOnDevice(){
void LRNGradientOp<Context>::RunOnDevice() {
if (mode == ACROSS_CHANNELS) {
if (input(0).template IsType<float>()) {
AcrossRunWithType<float>();
......
......@@ -16,7 +16,7 @@ void NNResizeOp<Context>::RunWithType() {
template <class Context>
void NNResizeOp<Context>::RunOnDevice() {
dims = input(0).dims();
if (dsize.size() == 0){
if (dsize.size() == 0) {
CHECK(fy != -1.0 && fx != -1.0);
dims[2] = int(dims[2] * fy);
dims[3] = int(dims[3] * fx);
......@@ -47,7 +47,7 @@ void NNResizeGradientOp<Context>::RunWithType() {
}
template <class Context>
void NNResizeGradientOp<Context>::RunOnDevice(){
void NNResizeGradientOp<Context>::RunOnDevice() {
output(0)->ReshapeLike(input(0));
if (input(0).template IsType<float>()) return RunWithType<float>();
......
......@@ -37,7 +37,7 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, const vector<TIndex>& dim
int* dimA = new int[ndim];
int* strideA = new int[ndim];
TIndex stride = 1;
for (int i = ndim - 1; i >= 0; i--){
for (int i = ndim - 1; i >= 0; i--) {
strideA[i] = stride;
dimA[i] = dims[i];
stride *= dimA[i];
......@@ -55,7 +55,7 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc,
int ndim = (int)dims.size();
int* dimA = new int[ndim];
int* strideA = new int[ndim];
for (int i = ndim - 1; i >= 0; i--){
for (int i = ndim - 1; i >= 0; i--) {
strideA[i] = strides[i];
dimA[i] = dims[i];
}
......@@ -66,10 +66,10 @@ void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc,
template <typename T>
void cudnnSetTensorDesc(cudnnTensorDescriptor_t* desc, Tensor* tensor) {
// CUDNN only support ndim from 3 to 8
// cuDNN requires ndim from 3 to 8
// we fake a reshaped dims to pass check
vector<TIndex> fake_dims(tensor->dims());
if (fake_dims.size() < 3 || fake_dims.size() > 8){
if (fake_dims.size() < 3 || fake_dims.size() > 8) {
fake_dims.assign({ 1, 1 });
fake_dims.push_back(tensor->count());
}
......
......@@ -142,7 +142,7 @@ template <> void Add<float, CPUContext>(const int n,
template <> void Sub<float, CPUContext>(const int n,
const float* a,
const float* b,
float* y){
float* y) {
#ifdef WITH_SSE
sse::Sub<float>(n, a, b, y);
#else // naive implement
......
......@@ -98,7 +98,7 @@ template<> void Softmax<float, CPUContext>(const int count,
const int dim = count / outer_dim;
for (int i = 0; i < outer_dim; ++i) {
context->Copy<float, CPUContext, CPUContext>(inner_dim, scale, x + i*dim);
for (int j = 0; j < classes; ++j){
for (int j = 0; j < classes; ++j) {
for (int k = 0; k < inner_dim; k++)
scale[k] = std::max(scale[k], x[i * dim + j * inner_dim + k]);
}
......@@ -1362,7 +1362,7 @@ template<> void ROIPooling<float, CPUContext>(const float spatial_scale,
Ydata[pool_idx] = Idata[idx];
Mdata[pool_idx] = idx;
}
} //end w
} // end w
} // end h
} // end pw
} // end ph
......@@ -1373,7 +1373,7 @@ template<> void ROIPooling<float, CPUContext>(const float spatial_scale,
} // end c
// offset roi region
Rdata += roi->offset(1);
} //end n
} // end n
}
template<> void ROIPoolingGrad<float, CPUContext>(const float spatial_scale,
......
......@@ -130,7 +130,7 @@ __global__ void _ReluGrad(const int count,
const T* y,
const float slope,
T* dx) {
CUDA_KERNEL_LOOP(i, count){
CUDA_KERNEL_LOOP(i, count) {
dx[i] = dy[i] * ((y[i] > 0) + slope * (y[i] <= 0));
}
}
......@@ -912,7 +912,7 @@ __global__ void _Sum(const int count,
template<> void Sum<float, CUDAContext>(
const int count, const int axis_dim,
const int inner_dim, const float* x, float* y){
const int inner_dim, const float* x, float* y) {
_Sum<float> << <GET_BLOCKS(count), CUDA_NUM_THREADS >> >(count,
axis_dim,
inner_dim,
......@@ -954,7 +954,7 @@ template<> void SumGrad<float, CUDAContext>(const int count,
template <typename T>
__global__ void _Slice(const int count, const int outer_dim, const int inner_dim,
const int x_slice_dim, const int y_slice_dim, const int slice_offset, const T* x, T* y){
const int x_slice_dim, const int y_slice_dim, const int slice_offset, const T* x, T* y) {
CUDA_KERNEL_LOOP(idx, count) {
const int tmp = y_slice_dim * inner_dim;
const int outer_idx = idx / tmp;
......
......@@ -9,20 +9,20 @@ namespace dragon {
namespace sse {
template<> void Set(const int n, const float alpha, float* x){
template<> void Set(const int n, const float alpha, float* x) {
__m128 scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) SSE_FP32_STORE(x + i, scalar);
SSE_LOOP2(i, n) x[i] = alpha;
}
template<> void Set(const int n, const int alpha, int* x){
template<> void Set(const int n, const int alpha, int* x) {
__m128i scalar = SSE_INT32_SCALAR(alpha);
__m128i* x1 = reinterpret_cast<__m128i*>(x);
SSE_LOOP1(i, n) SSE_INT128_STORE(x1++, scalar);
SSE_LOOP2(i, n) x[i] = alpha;
}
template<> void Add(const int n, const float* a, const float* b, float* y){
template<> void Add(const int n, const float* a, const float* b, float* y) {
__m128 x1, y1, z1;
SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i);
......@@ -33,7 +33,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = a[i] + b[i];
}
template<> void Sub(const int n, const float* a, const float* b, float* y){
template<> void Sub(const int n, const float* a, const float* b, float* y) {
__m128 x1, y1, z1;
SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i);
......@@ -44,7 +44,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = a[i] - b[i];
}
template<> void Mul(const int n, const float* a, const float* b, float* y){
template<> void Mul(const int n, const float* a, const float* b, float* y) {
__m128 x1, y1, z1;
SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i);
......@@ -55,7 +55,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = a[i] * b[i];
}
template<> void Div(const int n, const float* a, const float* b, float* y){
template<> void Div(const int n, const float* a, const float* b, float* y) {
__m128 x1, y1, z1;
SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i);
......@@ -66,7 +66,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = a[i] / b[i];
}
template<> void Scal(const int n, const float alpha, float* y){
template<> void Scal(const int n, const float alpha, float* y) {
__m128 y1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) {
y1 = SSE_FP32_LOAD(y + i);
......@@ -76,7 +76,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] *= alpha;
}
template<> void Scale(const int n, const float alpha, const float* x, float* y){
template<> void Scale(const int n, const float alpha, const float* x, float* y) {
__m128 x1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(x + i);
......@@ -87,7 +87,7 @@ namespace sse {
}
template<> void Axpy(const int n, float alpha, const float* x, float *y){
template<> void Axpy(const int n, float alpha, const float* x, float *y) {
__m128 x1, y1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(x + i);
......@@ -100,7 +100,7 @@ namespace sse {
}
template<> void Axpby(const int n, float alpha, const float* x,
const float beta, float *y){
const float beta, float *y) {
__m128 x1, y1, z1;
__m128 scalar1 = SSE_FP32_SCALAR(alpha);
__m128 scalar2 = SSE_FP32_SCALAR(beta);
......@@ -115,7 +115,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] = alpha * x[i] + beta* y[i];
}
template<> float ASum(const int n, const float *x){
template<> float ASum(const int n, const float *x) {
__m128 x1, sum = SSE_FP32_ZERO;
SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(x + i);
......@@ -128,7 +128,7 @@ namespace sse {
return ret;
}
template<> void AddScalar(const int n, const float alpha, float* y){
template<> void AddScalar(const int n, const float alpha, float* y) {
__m128 y1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) {
y1 = SSE_FP32_LOAD(y + i);
......@@ -138,7 +138,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] += alpha;
}
template<> void MulScalar(const int n, const float alpha, float* y){
template<> void MulScalar(const int n, const float alpha, float* y) {
__m128 y1, scalar = SSE_FP32_SCALAR(alpha);
SSE_LOOP1(i, n) {
y1 = SSE_FP32_LOAD(y + i);
......@@ -148,7 +148,7 @@ namespace sse {
SSE_LOOP2(i, n) y[i] *= alpha;
}
template <> float Dot(const int n, const float* a, const float* b){
template <> float Dot(const int n, const float* a, const float* b) {
__m128 x1, y1, sum = SSE_FP32_ZERO;
SSE_LOOP1(i, n) {
x1 = SSE_FP32_LOAD(a + i);
......
......@@ -11,6 +11,7 @@ import sys
import time
import shutil
import tarfile
import numpy as np
from six.moves import range as xrange
import cv2
......@@ -78,7 +79,7 @@ def extract_images():
f.write(item)
def make_db(image_path, label_path, database_path):
def make_db(image_path, label_path, database_path, pad=0):
if os.path.isfile(label_path) is False:
raise ValueError('input path is empty or wrong.')
if os.path.isdir(database_path) is True:
......@@ -111,6 +112,12 @@ def make_db(image_path, label_path, database_path):
label = record[1]
img = cv2.imread(os.path.join(image_path ,path))
if pad > 0:
pad_img = np.zeros((img.shape[0] + 2 * pad,
img.shape[1] + 2 * pad, 3), dtype=np.uint8)
pad_img[pad : pad + img.shape[0],
pad : pad + img.shape[1], :] = img
img = pad_img
result, imgencode = cv2.imencode('.jpg', img, encode_param)
datum = caffe_pb2.Datum()
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!