Commit ba81b99b by Ting PAN

Simplify Installation

1 parent 72f3c4ba
Showing with 494 additions and 203 deletions
# - Find the NumPy libraries
# This module finds if NumPy is installed, and sets the following variables
# indicating where it is.
#
# TODO: Update to provide the libraries and paths for linking npymath lib.
#
# NUMPY_FOUND - was NumPy found
# NUMPY_VERSION - the version of NumPy found as a string
# NUMPY_VERSION_MAJOR - the major version number of NumPy
# NUMPY_VERSION_MINOR - the minor version number of NumPy
# NUMPY_VERSION_PATCH - the patch version number of NumPy
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
# NUMPY_INCLUDE_DIR - path to the NumPy include files
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result
OUTPUT_VARIABLE __output
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(__result MATCHES 0)
string(REGEX REPLACE ";" "\\\\;" __values ${__output})
string(REGEX REPLACE "\r?\n" ";" __values ${__values})
list(GET __values 0 NUMPY_VERSION)
list(GET __values 1 NUMPY_INCLUDE_DIR)
string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
if(NOT "${__ver_check}" STREQUAL "")
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
math(EXPR NUMPY_VERSION_DECIMAL
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
else()
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
endif()
endif()
else()
message("Can not find Python interpretator.")
message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
VERSION_VAR NUMPY_VERSION)
if(NUMPY_FOUND)
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
endif()
\ No newline at end of file
# - Find python libraries
# This module finds the libraries corresponding to the Python interpeter
# FindPythonInterp provides.
# This code sets the following variables:
#
# PYTHONLIBS_FOUND - have the Python libs been found
# PYTHON_PREFIX - path to the Python installation
# PYTHON_LIBRARIES - path to the python library
# PYTHON_INCLUDE_DIRS - path to where Python.h is found
# PYTHON_MODULE_EXTENSION - lib extension, e.g. '.so' or '.pyd'
# PYTHON_MODULE_PREFIX - lib name prefix: usually an empty string
# PYTHON_SITE_PACKAGES - path to installation site-packages
# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build
#
# Thanks to talljimbo for the patch adding the 'LDVERSION' config
# variable usage.
#=============================================================================
# Copyright 2001-2009 Kitware, Inc.
# Copyright 2012 Continuum Analytics, Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# * Neither the names of Kitware, Inc., the Insight Software Consortium,
# nor the names of their contributors may be used to endorse or promote
# products derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
# Checking for the extension makes sure that `LibsNew` was found and not just `Libs`.
if(PYTHONLIBS_FOUND AND PYTHON_MODULE_EXTENSION)
return()
endif()
# Use the Python interpreter to find the libs.
if(PythonLibsNew_FIND_REQUIRED)
find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} REQUIRED)
else()
find_package(PythonInterp ${PythonLibsNew_FIND_VERSION})
endif()
if(NOT PYTHONINTERP_FOUND)
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
# testing whether sys has the gettotalrefcount function is a reliable, cross-platform
# way to detect a CPython debug interpreter.
#
# The library suffix is from the config var LDVERSION sometimes, otherwise
# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"from distutils import sysconfig as s;import sys;import struct;
print('.'.join(str(v) for v in sys.version_info));
print(sys.prefix);
print(s.get_python_inc(plat_specific=True));
print(s.get_python_lib(plat_specific=True));
print(s.get_config_var('SO'));
print(hasattr(sys, 'gettotalrefcount')+0);
print(struct.calcsize('@P'));
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(s.get_config_var('LIBDIR') or '');
print(s.get_config_var('MULTIARCH') or '');
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
ERROR_VARIABLE _PYTHON_ERROR_VALUE)
if(NOT _PYTHON_SUCCESS MATCHES 0)
if(PythonLibsNew_FIND_REQUIRED)
message(FATAL_ERROR
"Python config failure:\n${_PYTHON_ERROR_VALUE}")
endif()
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# Convert the process output into a list
string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR)
list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH)
# Make sure the Python has the same pointer-size as the chosen compiler
# Skip if CMAKE_SIZEOF_VOID_P is not defined
if(CMAKE_SIZEOF_VOID_P AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
if(PythonLibsNew_FIND_REQUIRED)
math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
message(FATAL_ERROR
"Python config failure: Python is ${_PYTHON_BITS}-bit, "
"chosen compiler is ${_CMAKE_BITS}-bit")
endif()
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# The built-in FindPython didn't always give the version numbers
string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
# Make sure all directory separators are '/'
string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
if(CMAKE_HOST_WIN32)
set(PYTHON_LIBRARY
"${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
# when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
# original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
if(NOT EXISTS "${PYTHON_LIBRARY}")
get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
set(PYTHON_LIBRARY
"${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
endif()
# raise an error if the python libs are still not found.
if(NOT EXISTS "${PYTHON_LIBRARY}")
message(FATAL_ERROR "Python libraries not found")
endif()
else()
if(PYTHON_MULTIARCH)
set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}/${PYTHON_MULTIARCH}" "${PYTHON_LIBDIR}")
else()
set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}")
endif()
#message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
# Probably this needs to be more involved. It would be nice if the config
# information the python interpreter itself gave us were more complete.
find_library(PYTHON_LIBRARY
NAMES "python${PYTHON_LIBRARY_SUFFIX}"
PATHS ${_PYTHON_LIBS_SEARCH}
NO_DEFAULT_PATH)
# If all else fails, just set the name/version and let the linker figure out the path.
if(NOT PYTHON_LIBRARY)
set(PYTHON_LIBRARY python${PYTHON_LIBRARY_SUFFIX})
endif()
endif()
MARK_AS_ADVANCED(
PYTHON_LIBRARY
PYTHON_INCLUDE_DIR
)
# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
# cache entries because they are meant to specify the location of a single
# library. We now set the variables listed by the documentation for this
# module.
SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
find_package_message(PYTHON
"Found PythonLibs: ${PYTHON_LIBRARY}"
"${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
set(PYTHONLIBS_FOUND TRUE)
# ---------------- Welcom To Use Dragon ---------------- # ---------------- Welcom To Use Dragon ----------------
PROJECT(dragon) project(dragon)
CMAKE_MINIMUM_REQUIRED(VERSION 3.0.0) cmake_minimum_required(VERSION 3.0.0)
# ---------------- Welcom To Use Dragon ---------------- # ---------------- Welcom To Use Dragon ----------------
# ---------------- User Config ---------------- # ---------------- User Config ----------------
# set optional libraries # Set optional libraries
option(WITH_PYTHON3 "Set ON to use PYTHON3 otherwise PYTHON2" OFF) option(WITH_PYTHON "Set ON to use PYTHON" ON)
option(WITH_CUDA "Set ON to use CUDA" ON) option(WITH_CUDA "Set ON to use CUDA" ON)
option(WITH_CUDNN "Set ON to use CUDNN" OFF) option(WITH_CUDNN "Set ON to use CUDNN" ON)
option(WITH_BLAS "Set ON to use BLAS" OFF) option(WITH_BLAS "Set ON to use BLAS" ON)
option(WITH_OMP "Set ON to use OpenMP" ON) option(WITH_OMP "Set ON to use OpenMP" OFF)
option(WITH_SSE "Set ON to use SSE 4.1" ON) option(WITH_SSE "Set ON to use SSE 4.1" ON)
option(WITH_MPI "Set ON to use MPI" OFF) option(WITH_MPI "Set ON to use MPI" OFF)
option(WITH_MPI_CUDA "Set ON to use MPI-CUDA" OFF) option(WITH_MPI_CUDA "Set ON to use MPI-CUDA" OFF)
option(WITH_MPI_NCCL "Set ON to use MPI-NCCL" OFF) option(WITH_MPI_NCCL "Set ON to use MPI-NCCL" OFF)
option(WITH_CUDA_FP16 "Set ON to use FP16" ON) option(WITH_CUDA_FP16 "Set ON to use FP16" ON)
# set your 3rdparty # Set your 3rdparty
set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty) set(3RDPARTY_DIR ${PROJECT_SOURCE_DIR}/../3rdparty)
# set your python environment # set your python "interpreter" if necessary
set(PYTHON_INCLUDE_DIR /usr/include/python2.7) # preferred # if not, a default interpreter will be used
#set(PYTHON_INCLUDE_DIR /usr/include/python3.x) # optional, set specific version # here, provide several examples:
#set(ANACONDA_ROOT_DIR /xxx/anaconda) # optional, preset for 2.7, 3.5, and 3.6 # set(PYTHON_EXECUTABLE /usr/bin/python) # Linux, OS
set(NUMPY_ROOT_DIR /xxx/numpy) # required # set(PYTHON_EXECUTABLE /X/anaconda/bin/python) # Linux, Anaconda
# set(PYTHON_EXECUTABLE X:/Anaconda/python) # Win, Anaconda
# set CUDA compiling architecture # Set CUDA compiling architecture
set(CUDA_ARCH -gencode arch=compute_30,code=sm_30 set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
-gencode arch=compute_35,code=sm_35 -gencode arch=compute_35,code=sm_35
-gencode arch=compute_50,code=sm_50 -gencode arch=compute_50,code=sm_50
-gencode arch=compute_60,code=sm_60) -gencode arch=compute_60,code=sm_60)
# Set CUDNN Libs if necessary (Linux Only)
set(CUDNN_LIBRARIES /usr/local/cuda/lib64)
# ---------------- User Config ---------------- # ---------------- User Config ----------------
...@@ -61,8 +65,12 @@ set(CUDA_ARCH -gencode arch=compute_30,code=sm_30 ...@@ -61,8 +65,12 @@ set(CUDA_ARCH -gencode arch=compute_30,code=sm_30
# ---[ Dependencies # ---[ Dependencies
if (WITH_PYTHON)
include(${PROJECT_SOURCE_DIR}/../CMake/FindPythonLibs.cmake)
include(${PROJECT_SOURCE_DIR}/../CMake/FindNumPy.cmake)
endif()
if (WITH_CUDA) if (WITH_CUDA)
FIND_PACKAGE(CUDA REQUIRED) find_package(CUDA REQUIRED)
endif() endif()
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
...@@ -74,35 +82,42 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "set build type to release") ...@@ -74,35 +82,42 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "set build type to release")
set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING "set build type to release" FORCE) set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING "set build type to release" FORCE)
# ---[ Includes # ---[ Includes
set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
include_directories(${INCLUDE_DIR})
include_directories(${3RDPARTY_DIR}/include) include_directories(${3RDPARTY_DIR}/include)
include_directories(${3RDPARTY_DIR}/include/mpi) include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${PROJECT_SOURCE_DIR}/src) include_directories(${PROJECT_SOURCE_DIR}/src)
include_directories(${PYTHON_INCLUDE_DIR}) if (WITH_PYTHON)
include_directories(${ANACONDA_ROOT_DIR}/include) include_directories(${PYTHON_INCLUDE_DIRS})
include_directories(${ANACONDA_ROOT_DIR}/include/python2.7) include_directories(${NUMPY_INCLUDE_DIR})
include_directories(${ANACONDA_ROOT_DIR}/include/python3.5) endif()
include_directories(${ANACONDA_ROOT_DIR}/include/python3.6) if (WITH_CUDA)
include_directories(${NUMPY_ROOT_DIR}/core/include) include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${NUMPY_ROOT_DIR}/include) endif()
include_directories(${NUMPY_ROOT_DIR}) if (WITH_MPI)
include_directories(${3RDPARTY_DIR}/include/mpi)
endif()
# ---[ libs # ---[ libs
set(3RDPARTY_LIBS ${3RDPARTY_DIR}/lib) set(3RDPARTY_LIBS ${3RDPARTY_DIR}/lib)
set(UINX_CUDNN_LIBS /usr/local/cuda/lib64)
link_directories(${3RDPARTY_LIBS}) link_directories(${3RDPARTY_LIBS})
link_directories(${UINX_CUDNN_LIBS}) link_directories(${CUDNN_LIBRARIES})
link_directories(${PYTHON_LIBRARIES})
# ---[ Install # ---[ Install
set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR} CACHE STRING "set install prefix" FORCE) set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR} CACHE STRING "set install prefix" FORCE)
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${3RDPARTY_LIBS}) set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${3RDPARTY_LIBS})
# ---[ defines # ---[ defines
if (WITH_PYTHON3) if (WITH_PYTHON)
ADD_DEFINITIONS(-DWITH_PYTHON)
if (${PYTHON_VERSION_MAJOR} STREQUAL "2")
message(STATUS "Use Python2 [Optional]")
elseif (${PYTHON_VERSION_MAJOR} STREQUAL "3")
message(STATUS "Use Python3 [Optional]")
ADD_DEFINITIONS(-DWITH_PYTHON3) ADD_DEFINITIONS(-DWITH_PYTHON3)
message(STATUS "Use PYTHON3 [Optional]") else()
message("Invalid version of Python(Detected ${PYTHON_VERSION_STRING})")
message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
endif()
endif() endif()
if (WITH_CUDA) if (WITH_CUDA)
ADD_DEFINITIONS(-DWITH_CUDA) ADD_DEFINITIONS(-DWITH_CUDA)
...@@ -133,6 +148,7 @@ if (WITH_SSE) ...@@ -133,6 +148,7 @@ if (WITH_SSE)
endif() endif()
if (WITH_MPI) if (WITH_MPI)
ADD_DEFINITIONS(-DWITH_MPI) ADD_DEFINITIONS(-DWITH_MPI)
#set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${3RDPARTY_LIBS}/../openmpi/install/lib)
message(STATUS "Use MPI [Optional]") message(STATUS "Use MPI [Optional]")
endif() endif()
if (WITH_MPI_CUDA) if (WITH_MPI_CUDA)
......
...@@ -110,8 +110,8 @@ class Operator : public OperatorBase { ...@@ -110,8 +110,8 @@ class Operator : public OperatorBase {
virtual void RunOnDevice() = 0; virtual void RunOnDevice() = 0;
inline Context& ctx() { return ctx_; } inline Context& ctx() { return ctx_; }
inline string anchor() { return GetSingleArg("anchor", name()); } inline string Anchor() { return GetSingleArg("anchor", name()); }
inline bool allow_run() { return allow_run_; } inline bool AllowRun() { return allow_run_; }
protected: protected:
Context ctx_; Context ctx_;
...@@ -155,7 +155,8 @@ OperatorBase* CreateOperator(const OperatorDef& op_def, Workspace* ws); ...@@ -155,7 +155,8 @@ OperatorBase* CreateOperator(const OperatorDef& op_def, Workspace* ws);
#define USE_OPERATOR_FUNCTIONS(context) \ #define USE_OPERATOR_FUNCTIONS(context) \
USE_OPERATOR_BASE_FUNCTIONS; \ USE_OPERATOR_BASE_FUNCTIONS; \
using Operator<context>::ctx; \ using Operator<context>::ctx; \
using Operator<context>::anchor using Operator<context>::Anchor; \
using Operator<context>::AllowRun
DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase,const OperatorDef&, Workspace*); DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase,const OperatorDef&, Workspace*);
DECLARE_REGISTRY(CUDAOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*); DECLARE_REGISTRY(CUDAOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);
......
...@@ -25,7 +25,7 @@ class SoftmaxCrossEntropyOp final : public Operator<Context> { ...@@ -25,7 +25,7 @@ class SoftmaxCrossEntropyOp final : public Operator<Context> {
normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) { normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {
OperatorDef softmax_def = MakeOperatorDef("Softmax", "", OperatorDef softmax_def = MakeOperatorDef("Softmax", "",
vector<string>({ Input(0).name() }), vector<string>({ Input(0).name() }),
vector<string>({ "/mnt/" + anchor() + "/softmax_prob" })); vector<string>({ "/mnt/" + Anchor() + "/softmax/prob" }));
softmax_def.add_arg()->CopyFrom(this->arg("axis")); softmax_def.add_arg()->CopyFrom(this->arg("axis"));
if (op_def.has_device_option()) if (op_def.has_device_option())
softmax_def.mutable_device_option()->CopyFrom(op_def.device_option()); softmax_def.mutable_device_option()->CopyFrom(op_def.device_option());
......
...@@ -31,7 +31,7 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> { ...@@ -31,7 +31,7 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> {
} }
OperatorDef softmax_def = MakeOperatorDef("Softmax", "", OperatorDef softmax_def = MakeOperatorDef("Softmax", "",
vector<string>({ Input(0).name() }), vector<string>({ Input(0).name() }),
vector<string>({ "/mnt/" + anchor() + "/softmax_prob" })); vector<string>({ "/mnt/" + Anchor() + "/softmax/prob" }));
softmax_def.add_arg()->CopyFrom(this->arg("axis")); softmax_def.add_arg()->CopyFrom(this->arg("axis"));
if (op_def.has_device_option()) if (op_def.has_device_option())
softmax_def.mutable_device_option()->CopyFrom(op_def.device_option()); softmax_def.mutable_device_option()->CopyFrom(op_def.device_option());
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#ifndef DRAGON_OPERATORS_MISC_PYTHON_OP_H_ #ifndef DRAGON_OPERATORS_MISC_PYTHON_OP_H_
#define DRAGON_OPERATORS_MISC_PYTHON_OP_H_ #define DRAGON_OPERATORS_MISC_PYTHON_OP_H_
#ifdef WITH_PYTHON
#include <Python.h> #include <Python.h>
#include "core/operator.h" #include "core/operator.h"
...@@ -53,4 +55,6 @@ public: ...@@ -53,4 +55,6 @@ public:
} // namespace dragon } // namespace dragon
#endif // WITH_PYTHON
#endif // DRAGON_OPERATORS_MISC_PYTHON_OP_H_ #endif // DRAGON_OPERATORS_MISC_PYTHON_OP_H_
\ No newline at end of file
...@@ -33,8 +33,7 @@ class AdamUpdateOp final : public UpdateOpBase<Context> { ...@@ -33,8 +33,7 @@ class AdamUpdateOp final : public UpdateOpBase<Context> {
protected: protected:
float lr, beta1, beta2, eps, coeff; float lr, beta1, beta2, eps, coeff;
int t; int t;
unique_ptr<Tensor> m, v; Tensor* m, *v, *tmp;
Tensor temp;
}; };
} // namespace dragon } // namespace dragon
......
...@@ -29,8 +29,7 @@ class NesterovUpdateOp final : public UpdateOpBase<Context> { ...@@ -29,8 +29,7 @@ class NesterovUpdateOp final : public UpdateOpBase<Context> {
protected: protected:
float lr, momentum; float lr, momentum;
unique_ptr<Tensor> history; Tensor* h, *tmp;
Tensor temp;
}; };
} // namespace dragon } // namespace dragon
......
...@@ -30,8 +30,7 @@ class RMSPropUpdateOp final : public UpdateOpBase<Context> { ...@@ -30,8 +30,7 @@ class RMSPropUpdateOp final : public UpdateOpBase<Context> {
protected: protected:
float lr, decay, eps; float lr, decay, eps;
unique_ptr<Tensor> history; Tensor* h, *tmp;
Tensor temp;
}; };
} // namespace dragon } // namespace dragon
......
...@@ -29,7 +29,7 @@ class SGDUpdateOp final : public UpdateOpBase<Context> { ...@@ -29,7 +29,7 @@ class SGDUpdateOp final : public UpdateOpBase<Context> {
protected: protected:
float lr, momentum; float lr, momentum;
unique_ptr<Tensor> history; Tensor* h;
}; };
......
...@@ -27,6 +27,7 @@ class UpdateOpBase : public Operator<Context> { ...@@ -27,6 +27,7 @@ class UpdateOpBase : public Operator<Context> {
USE_OPERATOR_FUNCTIONS(Context); USE_OPERATOR_FUNCTIONS(Context);
float Param(const string& name) const; float Param(const string& name) const;
string Slot();
void RunOnDevice() override; void RunOnDevice() override;
template <typename T> void PreprocessRunWithType(); template <typename T> void PreprocessRunWithType();
...@@ -40,7 +41,8 @@ class UpdateOpBase : public Operator<Context> { ...@@ -40,7 +41,8 @@ class UpdateOpBase : public Operator<Context> {
}; };
#define USE_UPDATER_FUNCTIONS(context) \ #define USE_UPDATER_FUNCTIONS(context) \
using UpdateOpBase<context>::Param using UpdateOpBase<context>::Param; \
using UpdateOpBase<context>::Slot
} // namespace dragon } // namespace dragon
......
...@@ -2,8 +2,6 @@ message(STATUS "Found CC Module: ${CMAKE_CURRENT_LIST_DIR}") ...@@ -2,8 +2,6 @@ message(STATUS "Found CC Module: ${CMAKE_CURRENT_LIST_DIR}")
FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc) FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc)
FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc) FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc)
FILE(GLOB_RECURSE REMOVE_FILES ../../src/python*)
LIST(REMOVE_ITEM SRC_FILES ${REMOVE_FILES})
# ---[ complier # ---[ complier
if (WITH_CUDA) if (WITH_CUDA)
......
message(STATUS "Found Python Module: ${CMAKE_CURRENT_LIST_DIR}") message(STATUS "Found Python Module: ${CMAKE_CURRENT_LIST_DIR}")
if (NOT WITH_PYTHON)
message(FATAL_ERROR "Set WITH_PYTHON as ON for this module.")
endif()
FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc) FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc)
FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc) FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc)
......
...@@ -29,6 +29,8 @@ Installation - Linux (Normal, CPU) ...@@ -29,6 +29,8 @@ Installation - Linux (Normal, CPU)
**$** Setup C++ Development Environment **$** Setup C++ Development Environment
.. code-block:: shell
sudo apt-get install libprotobuf-dev sudo apt-get install libprotobuf-dev
sudo apt-get install protobuf-compiler sudo apt-get install protobuf-compiler
sudo apt-get install libopenblas-dev sudo apt-get install libopenblas-dev
...@@ -42,9 +44,9 @@ Installation - Linux (Normal, CPU) ...@@ -42,9 +44,9 @@ Installation - Linux (Normal, CPU)
**Step 3:** Configure ``DRAGON_ROOT/CMakeLists.txt`` **Step 3:** Configure ``DRAGON_ROOT/CMakeLists.txt``
**$** Select optional libraries [``PYTHON3`` / ``BLAS`` / ``SSE``] **$** Select optional libraries [``PYTHON`` / ``BLAS`` / ``SSE``]
**$** Set ``PYTHON_INCLUDE_DIR`` / ``ANACONDA_ROOT_DIR`` and ``NUMPY_ROOT_DIR`` **$** (Optional) Set ``PYTHON_EXECUTABLE`` if you want to use specific Python (e.g. Anaconda)
**Step 4:** Compile Dragon **Step 4:** Compile Dragon
...@@ -101,6 +103,8 @@ Installation - Linux (Normal, GPU) ...@@ -101,6 +103,8 @@ Installation - Linux (Normal, GPU)
**$** Setup C++ Development Environment **$** Setup C++ Development Environment
.. code-block:: shell
sudo apt-get install libprotobuf-dev sudo apt-get install libprotobuf-dev
sudo apt-get install protobuf-compiler sudo apt-get install protobuf-compiler
sudo apt-get install libopenblas-dev sudo apt-get install libopenblas-dev
...@@ -114,13 +118,13 @@ Installation - Linux (Normal, GPU) ...@@ -114,13 +118,13 @@ Installation - Linux (Normal, GPU)
**Step 4:** Configure ``DRAGON_ROOT/CMakeLists.txt`` **Step 4:** Configure ``DRAGON_ROOT/CMakeLists.txt``
**$** Select optional libraries [``PYTHON3`` / ``CUDA`` / ``CUDNN`` / ``BLAS`` / ``SSE``] **$** Select optional libraries [``PYTHON`` / ``CUDA`` / ``CUDNN`` / ``BLAS`` / ``SSE``]
**$** Set ``PYTHON_INCLUDE_DIR`` / ``ANACONDA_ROOT_DIR`` and ``NUMPY_ROOT_DIR`` **$** (Optional) Set ``PYTHON_EXECUTABLE`` if you want to use specific Python (e.g. Anaconda)
**$** Set CUDA compiling architectures if necessary **$** (Optional) Set ``CUDA_ARCH`` for different architectures, see `ComputeCapability`_
**$** GCC version(4.8+, 5.0-) should add ``-std=c++11`` to ``CUDA_NVCC_FLAGS``, if ``nullptr`` is not found **$** GCC version(4.8+, 5.0-) should add ``-std=c++11`` to ``CUDA_NVCC_FLAGS``, if ``nullptr`` is not found.
**Step 5:** Compile Dragon **Step 5:** Compile Dragon
...@@ -159,7 +163,7 @@ Installation - Linux (Normal, GPU) ...@@ -159,7 +163,7 @@ Installation - Linux (Normal, GPU)
Installation - Linux (Distributed, CPU) Installation - Linux (Distributed, CPU)
--------------------------------------- ---------------------------------------
**Step 1:** Download ``3rdparty.zip`` and unzip it under ``REPO_ROOT`` **Step 1:** Download ``3rdparty.zip`` and unzip it under the ``REPO_ROOT``
`3rdparty_linux_x64.zip <http://dragon.seetatech.com/download/3rdparty_linux_x64_dist_cpu.zip>`_ (OpenMPI) `3rdparty_linux_x64.zip <http://dragon.seetatech.com/download/3rdparty_linux_x64_dist_cpu.zip>`_ (OpenMPI)
...@@ -175,6 +179,8 @@ Installation - Linux (Distributed, CPU) ...@@ -175,6 +179,8 @@ Installation - Linux (Distributed, CPU)
**$** Setup C++ Development Environment **$** Setup C++ Development Environment
.. code-block:: shell
sudo apt-get install libprotobuf-dev sudo apt-get install libprotobuf-dev
sudo apt-get install protobuf-compiler sudo apt-get install protobuf-compiler
sudo apt-get install libopenblas-dev sudo apt-get install libopenblas-dev
...@@ -188,11 +194,11 @@ Installation - Linux (Distributed, CPU) ...@@ -188,11 +194,11 @@ Installation - Linux (Distributed, CPU)
**Step 4:** Configure ``DRAGON_ROOT/CMakeLists.txt`` **Step 4:** Configure ``DRAGON_ROOT/CMakeLists.txt``
**$** Select optional libraries [``PYTHON3`` / ``BLAS`` / ``SSE`` / ``MPI``] **$** Select optional libraries [``PYTHON`` / ``BLAS`` / ``SSE`` / ``MPI``]
**$** Set ``3RDPARTY_DIR`` (Recommend to Keep Default) **$** (Optional) Set ``3RDPARTY_DIR`` (Recommend to Keep Default)
**$** Set ``PYTHON_INCLUDE_DIR`` / ``ANACONDA_ROOT_DIR`` and ``NUMPY_ROOT_DIR`` **$** (Optional) Set ``PYTHON_EXECUTABLE`` if you want to use specific Python (e.g. Anaconda)
**Step 5:** Setup MPI **Step 5:** Setup MPI
...@@ -247,7 +253,7 @@ Installation - Linux (Distributed, GPU) ...@@ -247,7 +253,7 @@ Installation - Linux (Distributed, GPU)
**$** (Optional) Download and install `NCCL`_ **$** (Optional) Download and install `NCCL`_
**Step 2:** Download ``3rdparty.zip`` and unzip it under ``REPO_ROOT`` **Step 2:** Download ``3rdparty.zip`` and unzip it under the ``REPO_ROOT``
`3rdparty_linux_x64.zip <http://dragon.seetatech.com/download/3rdparty_linux_x64_dist_gpu.zip>`_ (OpenMPI) `3rdparty_linux_x64.zip <http://dragon.seetatech.com/download/3rdparty_linux_x64_dist_gpu.zip>`_ (OpenMPI)
...@@ -263,6 +269,8 @@ Installation - Linux (Distributed, GPU) ...@@ -263,6 +269,8 @@ Installation - Linux (Distributed, GPU)
**$** Setup C++ Development Environment **$** Setup C++ Development Environment
.. code-block:: shell
sudo apt-get install libprotobuf-dev sudo apt-get install libprotobuf-dev
sudo apt-get install protobuf-compiler sudo apt-get install protobuf-compiler
sudo apt-get install libopenblas-dev sudo apt-get install libopenblas-dev
...@@ -276,15 +284,15 @@ Installation - Linux (Distributed, GPU) ...@@ -276,15 +284,15 @@ Installation - Linux (Distributed, GPU)
**Step 5:** Configure ``DRAGON_ROOT/CMakeLists.txt`` **Step 5:** Configure ``DRAGON_ROOT/CMakeLists.txt``
**$** Select optional libraries [``PYTHON3`` / ``CUDA`` / ``CUDNN`` / ``BLAS`` / ``SSE`` / ``MPI``] **$** Select optional libraries [``PYTHON`` / ``CUDA`` / ``CUDNN`` / ``BLAS`` / ``SSE`` / ``MPI``]
**$** Set ``3RDPARTY_DIR`` (Recommend to Keep Default) **$** (Optional) Set ``3RDPARTY_DIR`` (Recommend to Keep Default)
**$** Set ``PYTHON_INCLUDE_DIR`` / ``ANACONDA_ROOT_DIR`` and ``NUMPY_ROOT_DIR`` **$** (Optional) Set ``PYTHON_EXECUTABLE`` if you want to use specific Python (e.g. Anaconda)
**$** Set CUDA compiling architectures if necessary **$** (Optional) Set ``CUDA_ARCH`` for different architectures, see `ComputeCapability`_
**$** GCC version(4.8+, 5.0-) should add ``-std=c++11`` to ``CUDA_NVCC_FLAGS``, if ``nullptr`` is not found **$** GCC version(4.8+, 5.0-) should add ``-std=c++11`` to ``CUDA_NVCC_FLAGS``, if ``nullptr`` is not found.
**$** OpenMPI can take ``NCCL`` and our ``CUDA-AWARE`` communications at the same time. **$** OpenMPI can take ``NCCL`` and our ``CUDA-AWARE`` communications at the same time.
...@@ -332,14 +340,12 @@ Installation - Linux (Distributed, GPU) ...@@ -332,14 +340,12 @@ Installation - Linux (Distributed, GPU)
Installation - Windows (Normal, CPU) Installation - Windows (Normal, CPU)
------------------------------------ ------------------------------------
**Step 1:** Download ``3rdparty.zip`` and unzip it under ``REPO_ROOT`` **Step 1:** Download ``3rdparty.zip`` and unzip it under the ``REPO_ROOT``
`3rdparty_vc12_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc12_x64_cpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2013) `3rdparty_vc12_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc12_x64_cpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2013)
`3rdparty_vc14_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc14_x64_cpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2015) `3rdparty_vc14_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc14_x64_cpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2015)
**$** You must copy ``python27/35/36.lib`` to ``REPO_ROOT/3rdparty/lib``, it depends on the version of Python
**Step 2:** Install Python Requirements **Step 2:** Install Python Requirements
.. code-block:: shell .. code-block:: shell
...@@ -349,11 +355,11 @@ Installation - Windows (Normal, CPU) ...@@ -349,11 +355,11 @@ Installation - Windows (Normal, CPU)
**Step 3:** Configure ``DRAGON_ROOT/CMakeLists.txt`` **Step 3:** Configure ``DRAGON_ROOT/CMakeLists.txt``
**$** Select optional libraries [``PYTHON3`` / ``BLAS`` / ``SSE``] **$** Select optional libraries [``PYTHON`` / ``BLAS`` / ``SSE``]
**$** Set ``3RDPARTY_DIR`` (Recommend to Keep Default) **$** (Optional) Set ``3RDPARTY_DIR`` (Recommend to Keep Default)
**$** Set ``PYTHON_INCLUDE_DIR`` / ``ANACONDA_ROOT_DIR`` and ``NUMPY_ROOT_DIR`` **$** (Optional) Set ``PYTHON_EXECUTABLE`` if you want to use specific Python (e.g. Anaconda)
**Step 4:** Set Environment Variables **Step 4:** Set Environment Variables
...@@ -367,9 +373,9 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables ...@@ -367,9 +373,9 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables
**$** Install `CMake-GUI <https://cmake.org>`_ **$** Install `CMake-GUI <https://cmake.org>`_
**$** Make ``build`` directory under ``DRAGON_ROOT`` **$** Make ``build`` directory under the ``DRAGON_ROOT``
**$** Configure and generate MSVC project in ``DRAGON_ROOT/build`` **$** Configure and generate MSVC project in the ``DRAGON_ROOT/build``
**$** Open ``DRAGON_ROOT/build/Dragon.sln`` **$** Open ``DRAGON_ROOT/build/Dragon.sln``
...@@ -400,15 +406,13 @@ Installation - Windows (Normal, GPU) ...@@ -400,15 +406,13 @@ Installation - Windows (Normal, GPU)
**$** (Optional) Download and install `CUDNN`_ **$** (Optional) Download and install `CUDNN`_
**Step 2:** Download ``3rdparty.zip`` and unzip it under ``REPO_ROOT`` **Step 2:** Download ``3rdparty.zip`` and unzip it under the ``REPO_ROOT``
`3rdparty_vc12_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc12_x64_gpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2013) `3rdparty_vc12_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc12_x64_gpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2013)
`3rdparty_vc14_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc14_x64_gpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2015) `3rdparty_vc14_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc14_x64_gpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2015)
**$** You must copy ``python27/35/36.lib`` to ``REPO_ROOT/3rdparty/lib``, it depends on the version of Python **$** Recommend to install ``cuDNN`` into the ``REPO_ROOT/3rdparty``
**$** Recommend you to install ``cuDNN`` into ``REPO_ROOT/3rdparty``
**Step 3:** Install Python Requirements **Step 3:** Install Python Requirements
...@@ -419,13 +423,13 @@ Installation - Windows (Normal, GPU) ...@@ -419,13 +423,13 @@ Installation - Windows (Normal, GPU)
**Step 4:** Configure ``DRAGON_ROOT/CMakeLists.txt`` **Step 4:** Configure ``DRAGON_ROOT/CMakeLists.txt``
**$** Select optional libraries [``PYTHON3`` / ``CUDA`` / ``CUDNN`` / ``BLAS`` / ``SSE``] **$** Select optional libraries [``PYTHON`` / ``CUDA`` / ``CUDNN`` / ``BLAS`` / ``SSE``]
**$** Set ``3RDPARTY_DIR`` (Recommend to Keep Default) **$** (Optional) Set ``3RDPARTY_DIR`` (Recommend to Keep Default)
**$** Set ``PYTHON_INCLUDE_DIR`` / ``ANACONDA_ROOT_DIR`` and ``NUMPY_ROOT_DIR`` **$** (Optional) Set ``PYTHON_EXECUTABLE`` if you want to use specific Python (e.g. Anaconda)
**$** Set CUDA compiling architectures if necessary **$** (Optional) Set ``CUDA_ARCH`` for different architectures, see `ComputeCapability`_
**Step 5:** Set Environment Variables **Step 5:** Set Environment Variables
...@@ -439,9 +443,9 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables ...@@ -439,9 +443,9 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables
**$** Install `CMake-GUI <https://cmake.org>`_ **$** Install `CMake-GUI <https://cmake.org>`_
**$** Make ``build`` directory under ``DRAGON_ROOT`` **$** Make ``build`` directory under the ``DRAGON_ROOT``
**$** Configure and generate MSVC project in ``DRAGON_ROOT/build`` **$** Configure and generate MSVC project in the ``DRAGON_ROOT/build``
**$** Open ``DRAGON_ROOT/build/Dragon.sln`` **$** Open ``DRAGON_ROOT/build/Dragon.sln``
...@@ -466,14 +470,12 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables ...@@ -466,14 +470,12 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables
Installation - Windows (Distributed, CPU) Installation - Windows (Distributed, CPU)
----------------------------------------- -----------------------------------------
**Step 1:** Download ``3rdparty.zip`` and unzip it under ``REPO_ROOT`` **Step 1:** Download ``3rdparty.zip`` and unzip it under the ``REPO_ROOT``
`3rdparty_vc12_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc12_x64_dist_cpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2013 / Microsoft MPI) `3rdparty_vc12_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc12_x64_dist_cpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2013 / Microsoft MPI)
`3rdparty_vc14_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc14_x64_dist_cpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2015 / Microsoft MPI) `3rdparty_vc14_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc14_x64_dist_cpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2015 / Microsoft MPI)
**$** You must copy ``python27/35/36.lib`` to ``REPO_ROOT/3rdparty/lib``, it depends on the version of Python
**Step 2:** Install Python Requirements **Step 2:** Install Python Requirements
.. code-block:: shell .. code-block:: shell
...@@ -483,11 +485,11 @@ Installation - Windows (Distributed, CPU) ...@@ -483,11 +485,11 @@ Installation - Windows (Distributed, CPU)
**Step 3:** Configure ``DRAGON_ROOT/CMakeLists.txt`` **Step 3:** Configure ``DRAGON_ROOT/CMakeLists.txt``
**$** Select optional libraries [``PYTHON3`` / ``BLAS`` / ``SSE`` / ``MPI``] **$** Select optional libraries [``PYTHON`` / ``BLAS`` / ``SSE`` / ``MPI``]
**$** Set ``3RDPARTY_DIR`` (Recommend to Keep Default) **$** (Optional) Set ``3RDPARTY_DIR`` (Recommend to Keep Default)
**$** Set ``PYTHON_INCLUDE_DIR`` / ``ANACONDA_ROOT_DIR`` and ``NUMPY_ROOT_DIR`` **$** (Optional) Set ``PYTHON_EXECUTABLE`` if you want to use specific Python (e.g. Anaconda)
**Step 4:** Set Environment Variables **Step 4:** Set Environment Variables
...@@ -501,9 +503,9 @@ Add ``DRAGON_ROOT/3rdparty/bin`` to system environment variables ...@@ -501,9 +503,9 @@ Add ``DRAGON_ROOT/3rdparty/bin`` to system environment variables
**$** Install `CMake-GUI <https://cmake.org>`_ **$** Install `CMake-GUI <https://cmake.org>`_
**$** Make ``build`` directory under ``DRAGON_ROOT`` **$** Make ``build`` directory under the ``DRAGON_ROOT``
**$** Configure and generate MSVC project in ``DRAGON_ROOT/build`` **$** Configure and generate MSVC project in the ``DRAGON_ROOT/build``
**$** Open ``DRAGON_ROOT/build/Dragon.sln`` **$** Open ``DRAGON_ROOT/build/Dragon.sln``
...@@ -534,15 +536,13 @@ Installation - Windows (Distributed, GPU) ...@@ -534,15 +536,13 @@ Installation - Windows (Distributed, GPU)
**$** (Optional) Download and install `CUDNN`_ **$** (Optional) Download and install `CUDNN`_
**Step 2:** Download ``3rdparty.zip`` and unzip it under ``REPO_ROOT`` **Step 2:** Download ``3rdparty.zip`` and unzip it under the ``REPO_ROOT``
`3rdparty_vc12_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc12_x64_dist_gpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2013 / Microsoft MPI) `3rdparty_vc12_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc12_x64_dist_gpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2013 / Microsoft MPI)
`3rdparty_vc14_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc14_x64_dist_gpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2015 / Microsoft MPI) `3rdparty_vc14_x64.zip <http://dragon.seetatech.com/download/3rdparty_vc14_x64_dist_gpu.zip>`_ (OpenBLAS / Google Protobuf 2.6 For VS2015 / Microsoft MPI)
**$** You must copy ``python27/35/36.lib`` to ``REPO_ROOT/3rdparty/lib``, it depends on the version of Python **$** Recommend to install ``cuDNN`` into the ``REPO_ROOT/3rdparty``
**$** Recommend you to install ``cuDNN`` into ``REPO_ROOT/3rdparty``
**Step 3:** Install Python Requirements **Step 3:** Install Python Requirements
...@@ -553,13 +553,13 @@ Installation - Windows (Distributed, GPU) ...@@ -553,13 +553,13 @@ Installation - Windows (Distributed, GPU)
**Step 4:** Configure ``DRAGON_ROOT/CMakeLists.txt`` **Step 4:** Configure ``DRAGON_ROOT/CMakeLists.txt``
**$** Select optional libraries [``PYTHON3`` / ``CUDA`` / ``CUDNN`` / ``BLAS`` / ``SSE`` / ``MPI``] **$** Select optional libraries [``PYTHON`` / ``CUDA`` / ``CUDNN`` / ``BLAS`` / ``SSE`` / ``MPI``]
**$** Set ``3RDPARTY_DIR`` (Recommend to Keep Default) **$** (Optional) Set ``3RDPARTY_DIR`` (Recommend to Keep Default)
**$** Set ``PYTHON_INCLUDE_DIR`` / ``ANACONDA_ROOT_DIR`` and ``NUMPY_ROOT_DIR`` **$** (Optional) Set ``PYTHON_EXECUTABLE`` if you want to use specific Python (e.g. Anaconda)
**$** Set CUDA compiling architectures if necessary **$** (Optional) Set ``CUDA_ARCH`` for different architectures, see `ComputeCapability`_
**Step 5:** Set Environment Variables **Step 5:** Set Environment Variables
...@@ -573,9 +573,9 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables ...@@ -573,9 +573,9 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables
**$** Install `CMake-GUI <https://cmake.org>`_ **$** Install `CMake-GUI <https://cmake.org>`_
**$** Make ``build`` directory under ``DRAGON_ROOT`` **$** Make ``build`` directory under the ``DRAGON_ROOT``
**$** Configure and generate MSVC project in ``DRAGON_ROOT/build`` **$** Configure and generate MSVC project in the ``DRAGON_ROOT/build``
**$** Open ``DRAGON_ROOT/build/Dragon.sln`` **$** Open ``DRAGON_ROOT/build/Dragon.sln``
...@@ -600,4 +600,5 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables ...@@ -600,4 +600,5 @@ Add ``REPO_ROOT/3rdparty/bin`` to system environment variables
.. _CUDA: https://developer.nvidia.com/cuda-toolkit .. _CUDA: https://developer.nvidia.com/cuda-toolkit
.. _CUDNN: https://developer.nvidia.com/cudnn .. _CUDNN: https://developer.nvidia.com/cudnn
.. _NCCL: https://developer.nvidia.com/nccl .. _NCCL: https://developer.nvidia.com/nccl
.. _ComputeCapability: https://developer.nvidia.com/cuda-gpus
.. _Dragon: https://github.com/neopenx/Dragon .. _Dragon: https://github.com/neopenx/Dragon
...@@ -33,7 +33,7 @@ class BaseUpdater(object): ...@@ -33,7 +33,7 @@ class BaseUpdater(object):
scale_gradient : float scale_gradient : float
The scale factor of gradients. The scale factor of gradients.
clip_gradient : float clip_gradient : float
The clip factor of gradients. The clip factor of gradients. \
l2_decay : float l2_decay : float
The l2 decay factor. Default is ``-1.0`` (Disabled). The l2 decay factor. Default is ``-1.0`` (Disabled).
slot : str slot : str
......
...@@ -36,9 +36,9 @@ find_packages('dragon') ...@@ -36,9 +36,9 @@ find_packages('dragon')
find_modules() find_modules()
setup(name = 'dragon', setup(name = 'dragon',
version='0.2.1.15', version='0.2.1.16',
description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework', description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework',
url='https://github.com/neopenx/Dragon', url='https://github.com/seetaresearch/Dragon',
author='Ting Pan', author='Ting Pan',
license='BSD 2-Clause', license='BSD 2-Clause',
packages=packages, packages=packages,
......
...@@ -27,7 +27,7 @@ void DropoutOp<Context>::RunWithType() { ...@@ -27,7 +27,7 @@ void DropoutOp<Context>::RunWithType() {
template <class Context> template <class Context>
void DropoutOp<Context>::RunOnDevice() { void DropoutOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
mask = ws()->CreateTensor("/mnt/" + anchor() + "/dropout_mask"); mask = ws()->CreateTensor("/mnt/" + Anchor() + "/dropout/mask");
mask->ReshapeLike(Input(0)); mask->ReshapeLike(Input(0));
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
...@@ -42,7 +42,7 @@ OPERATOR_SCHEMA(Dropout).NumInputs(1).NumOutputs(1).Inplace({ { 0, 0 } }); ...@@ -42,7 +42,7 @@ OPERATOR_SCHEMA(Dropout).NumInputs(1).NumOutputs(1).Inplace({ { 0, 0 } });
template <class Context> template <typename T> template <class Context> template <typename T>
void DropoutGradientOp<Context>::RunWithType() { void DropoutGradientOp<Context>::RunWithType() {
mask = ws()->GetTensor("/mnt/" + anchor() + "/dropout_mask"); mask = ws()->GetTensor("/mnt/" + Anchor() + "/dropout/mask");
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
auto* Mdata = mask->template data<uint32_t, Context>(); auto* Mdata = mask->template data<uint32_t, Context>();
......
...@@ -16,7 +16,7 @@ void ClipOp<Context>::RunWithType() { ...@@ -16,7 +16,7 @@ void ClipOp<Context>::RunWithType() {
template <class Context> template <class Context>
void ClipOp<Context>::RunOnDevice() { void ClipOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
mask = ws()->CreateTensor("/mnt/" + anchor() + "/clip_mask"); mask = ws()->CreateTensor("/mnt/" + Anchor() + "/clip/mask");
mask->ReshapeLike(Input(0)); mask->ReshapeLike(Input(0));
if (Input(0).template IsType<float>()) return RunWithType<float>(); if (Input(0).template IsType<float>()) return RunWithType<float>();
else LOG(FATAL) << "Unsupported input types."; else LOG(FATAL) << "Unsupported input types.";
...@@ -38,7 +38,7 @@ void ClipGradientOp<Context>::RunWithType() { ...@@ -38,7 +38,7 @@ void ClipGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void ClipGradientOp<Context>::RunOnDevice() { void ClipGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
mask = ws()->GetTensor("/mnt/" + anchor() + "/clip_mask"); mask = ws()->GetTensor("/mnt/" + Anchor() + "/clip/mask");
if (Input(0).template IsType<float>()) return RunWithType<float>(); if (Input(0).template IsType<float>()) return RunWithType<float>();
else LOG(FATAL) << "Unsupported input types."; else LOG(FATAL) << "Unsupported input types.";
} }
......
...@@ -142,7 +142,7 @@ void ScanOp<Context>::UnrollTemplate() { ...@@ -142,7 +142,7 @@ void ScanOp<Context>::UnrollTemplate() {
new_def.add_target(Output(i)->name()); new_def.add_target(Output(i)->name());
} }
// upload // upload
Tensor* string_tensor = ws()->CreateTensor("/mnt/" + anchor() + "/raw_ops"); Tensor* string_tensor = ws()->CreateTensor("/mnt/" + Anchor() + "/raw_ops");
string_tensor->Reshape(vector<TIndex>(1, 1)); string_tensor->Reshape(vector<TIndex>(1, 1));
string* data = string_tensor->mutable_data <string, CPUContext>(); string* data = string_tensor->mutable_data <string, CPUContext>();
data[0] = new_def.SerializeAsString(); data[0] = new_def.SerializeAsString();
...@@ -171,7 +171,7 @@ void ScanGradientOp<Context>::MakeGradientOps() { ...@@ -171,7 +171,7 @@ void ScanGradientOp<Context>::MakeGradientOps() {
else if (step_type == "Default") nsteps = Input(0).dim(axis); else if (step_type == "Default") nsteps = Input(0).dim(axis);
if (graphs.count(nsteps)) return; if (graphs.count(nsteps)) return;
Tensor* ops = ws()->GetTensor("/mnt/" + anchor() + "/raw_ops"); Tensor* ops = ws()->GetTensor("/mnt/" + Anchor() + "/raw_ops");
forward_def.ParseFromString(ops->data<string, CPUContext>()[0]); forward_def.ParseFromString(ops->data<string, CPUContext>()[0]);
vector<string> targets; vector<string> targets;
for (auto& target : forward_def.target()) targets.push_back(target); for (auto& target : forward_def.target()) targets.push_back(target);
......
...@@ -31,7 +31,7 @@ template <class Context> ...@@ -31,7 +31,7 @@ template <class Context>
void L1LossOp<Context>::RunOnDevice() { void L1LossOp<Context>::RunOnDevice() {
CHECK_EQ(Input(0).count(), Input(1).count()); CHECK_EQ(Input(0).count(), Input(1).count());
Output(0)->Reshape(vector<TIndex>(1, 1)); Output(0)->Reshape(vector<TIndex>(1, 1));
diff = ws()->CreateTensor("/mnt/" + anchor() + "/l1_loss_diff"); diff = ws()->CreateTensor("/mnt/" + Anchor() + "/l1_loss/diff");
diff->ReshapeLike(Input(0)); diff->ReshapeLike(Input(0));
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
...@@ -67,7 +67,7 @@ void L1LossGradientOp<Context>::RunWithType() { ...@@ -67,7 +67,7 @@ void L1LossGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void L1LossGradientOp<Context>::RunOnDevice() { void L1LossGradientOp<Context>::RunOnDevice() {
diff = ws()->GetTensor("/mnt/" + anchor() + "/l1_loss_diff"); diff = ws()->GetTensor("/mnt/" + Anchor() + "/l1_loss/diff");
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "Unsupported input types."; else LOG(FATAL) << "Unsupported input types.";
......
...@@ -29,7 +29,7 @@ template <class Context> ...@@ -29,7 +29,7 @@ template <class Context>
void L2LossOp<Context>::RunOnDevice() { void L2LossOp<Context>::RunOnDevice() {
CHECK_EQ(Input(0).count(), Input(1).count()); CHECK_EQ(Input(0).count(), Input(1).count());
Output(0)->Reshape(vector<TIndex>(1, 1)); Output(0)->Reshape(vector<TIndex>(1, 1));
diff = ws()->CreateTensor("/mnt/" + anchor() + "/l2_loss_diff"); diff = ws()->CreateTensor("/mnt/" + Anchor() + "/l2_loss/diff");
diff->ReshapeLike(Input(0)); diff->ReshapeLike(Input(0));
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
...@@ -64,7 +64,7 @@ void L2LossGradientOp<Context>::RunWithType() { ...@@ -64,7 +64,7 @@ void L2LossGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void L2LossGradientOp<Context>::RunOnDevice() { void L2LossGradientOp<Context>::RunOnDevice() {
diff = ws()->GetTensor("/mnt/" + anchor() + "/l2_loss_diff"); diff = ws()->GetTensor("/mnt/" + Anchor() + "/l2_loss/diff");
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "Unsupported input types."; else LOG(FATAL) << "Unsupported input types.";
......
...@@ -39,7 +39,7 @@ void SmoothL1LossOp<Context>::RunOnDevice() { ...@@ -39,7 +39,7 @@ void SmoothL1LossOp<Context>::RunOnDevice() {
if (InputSize() > 3) CHECK(Input(0).dims() == Input(3).dims()); if (InputSize() > 3) CHECK(Input(0).dims() == Input(3).dims());
Output(0)->Reshape(vector<TIndex>(1, 1)); Output(0)->Reshape(vector<TIndex>(1, 1));
diff = ws()->CreateTensor("/mnt/" + anchor() + "/smoothl1_loss_diff"); diff = ws()->CreateTensor("/mnt/" + Anchor() + "/smoothl1_loss/diff");
error = ws()->CreateTensor("/share/smoothl1_loss_error"); error = ws()->CreateTensor("/share/smoothl1_loss_error");
diff->ReshapeLike(Input(0)); diff->ReshapeLike(Input(0));
error->ReshapeLike(Input(0)); error->ReshapeLike(Input(0));
...@@ -86,7 +86,7 @@ void SmoothL1LossGradientOp<Context>::RunWithType() { ...@@ -86,7 +86,7 @@ void SmoothL1LossGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void SmoothL1LossGradientOp<Context>::RunOnDevice() { void SmoothL1LossGradientOp<Context>::RunOnDevice() {
diff = ws()->GetTensor("/mnt/" + anchor() + "/smoothl1_loss_diff"); diff = ws()->GetTensor("/mnt/" + Anchor() + "/smoothl1_loss/diff");
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "Unsupported input types."; else LOG(FATAL) << "Unsupported input types.";
......
...@@ -43,7 +43,7 @@ void SoftmaxCrossEntropyOp<Context>::RunOnDevice() { ...@@ -43,7 +43,7 @@ void SoftmaxCrossEntropyOp<Context>::RunOnDevice() {
<< "\nNumber of predictions must match the number of labels."; << "\nNumber of predictions must match the number of labels.";
losses.ReshapeLike(Input(0)); losses.ReshapeLike(Input(0));
softmax_op->Run(); softmax_op->Run();
prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob"); prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "Unsupported input types."; else LOG(FATAL) << "Unsupported input types.";
...@@ -85,7 +85,7 @@ void SoftmaxCrossEntropyGradientOp<Context>::RunWithType() { ...@@ -85,7 +85,7 @@ void SoftmaxCrossEntropyGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void SoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() { void SoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob"); prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
outer_dim = prob->count(0, axis); outer_dim = prob->count(0, axis);
inner_dim = prob->count(axis + 1); inner_dim = prob->count(axis + 1);
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
......
...@@ -51,7 +51,7 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunOnDevice() { ...@@ -51,7 +51,7 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunOnDevice() {
valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim)); valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
losses.Reshape(vector<TIndex>(1, outer_dim * inner_dim)); losses.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
softmax_op->Run(); softmax_op->Run();
prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob"); prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
else LOG(FATAL) << "Unsupported input types."; else LOG(FATAL) << "Unsupported input types.";
...@@ -100,7 +100,7 @@ void SparseSoftmaxCrossEntropyGradientOp<Context>::RunWithType() { ...@@ -100,7 +100,7 @@ void SparseSoftmaxCrossEntropyGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void SparseSoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() { void SparseSoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob"); prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
outer_dim = prob->count(0, axis); outer_dim = prob->count(0, axis);
inner_dim = prob->count(axis + 1); inner_dim = prob->count(axis + 1);
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
......
...@@ -57,8 +57,8 @@ void SparseSoftmaxFocalLossOp<Context>::RunOnDevice() { ...@@ -57,8 +57,8 @@ void SparseSoftmaxFocalLossOp<Context>::RunOnDevice() {
this->valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim)); this->valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
this->losses.Reshape(vector<TIndex>(1, outer_dim * inner_dim)); this->losses.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
this->softmax_op->Run(); this->softmax_op->Run();
this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob"); this->prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
scale = ws()->CreateTensor("/mnt/" + anchor() + "/focal_scale"); scale = ws()->CreateTensor("/mnt/" + Anchor() + "/focal/scale");
scale->ReshapeLike(*this->prob); scale->ReshapeLike(*this->prob);
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
...@@ -116,8 +116,8 @@ void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() { ...@@ -116,8 +116,8 @@ void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void SparseSoftmaxFocalLossGradientOp<Context>::RunOnDevice() { void SparseSoftmaxFocalLossGradientOp<Context>::RunOnDevice() {
this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob"); this->prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
scale = ws()->GetTensor("/mnt/" + anchor() + "/focal_scale"); scale = ws()->GetTensor("/mnt/" + Anchor() + "/focal/scale");
outer_dim = this->prob->count(0, axis); outer_dim = this->prob->count(0, axis);
inner_dim = this->prob->count(axis + 1); inner_dim = this->prob->count(axis + 1);
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
......
...@@ -61,7 +61,12 @@ OPERATOR_SCHEMA(GradientGather).NumOutputs(1); ...@@ -61,7 +61,12 @@ OPERATOR_SCHEMA(GradientGather).NumOutputs(1);
NO_GRADIENT(GradientGather); NO_GRADIENT(GradientGather);
template <class Context> template <class Context>
void StopGradientOp<Context>::RunOnDevice() {} void StopGradientOp<Context>::RunOnDevice() {
if (Output(0)->name() != Input(0).name()) {
Output(0)->ReshapeLike(Input(0));
Output(0)->Share(Input(0));
}
}
DEPLOY_CPU(StopGradient); DEPLOY_CPU(StopGradient);
#ifdef WITH_CUDA #ifdef WITH_CUDA
......
#include "operators/misc/python_op.h" #include "operators/misc/python_op.h"
#ifdef WITH_PYTHON
#ifdef WITH_PYTHON3 #ifdef WITH_PYTHON3
#define PyBytes_FromStringAndSize PyUnicode_FromStringAndSize #define PyBytes_FromStringAndSize PyUnicode_FromStringAndSize
#endif #endif
...@@ -36,7 +38,7 @@ RunOp<Context>::RunOp(const OperatorDef& op_def, Workspace* ws) ...@@ -36,7 +38,7 @@ RunOp<Context>::RunOp(const OperatorDef& op_def, Workspace* ws)
outputs = PyList_New(OutputSize()); outputs = PyList_New(OutputSize());
for (int i = 0; i < OutputSize(); i++) for (int i = 0; i < OutputSize(); i++)
PyList_SetItem(outputs, i, String(Output(i)->name().c_str())); PyList_SetItem(outputs, i, String(Output(i)->name().c_str()));
if (!this->allow_run()) return; if (!AllowRun()) return;
// setup // setup
if (PyObject_HasAttr(self, String("setup"))) if (PyObject_HasAttr(self, String("setup")))
...@@ -111,3 +113,5 @@ class GetTemplateGradient final : public GradientMakerBase { ...@@ -111,3 +113,5 @@ class GetTemplateGradient final : public GradientMakerBase {
REGISTER_GRADIENT(Template, GetTemplateGradient); REGISTER_GRADIENT(Template, GetTemplateGradient);
} // namespace dragon } // namespace dragon
#endif // WITH_PYTHON
\ No newline at end of file
...@@ -32,7 +32,7 @@ void RandomPickOp<Context>::RunOnDevice() { ...@@ -32,7 +32,7 @@ void RandomPickOp<Context>::RunOnDevice() {
inner_dim = Input(0).count(axis + 1); inner_dim = Input(0).count(axis + 1);
Output(0)->Reshape(output_dims); Output(0)->Reshape(output_dims);
pick_indices = ws()->CreateTensor("/mnt/" + anchor() + "/pick_indices"); pick_indices = ws()->CreateTensor("/mnt/" + Anchor() + "/pick/indices");
pick_indices->Reshape(vector<TIndex>(1, max_samples)); pick_indices->Reshape(vector<TIndex>(1, max_samples));
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
...@@ -65,7 +65,7 @@ void RandomPickGradientOp<Context>::RunWithType() { ...@@ -65,7 +65,7 @@ void RandomPickGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void RandomPickGradientOp<Context>::RunOnDevice() { void RandomPickGradientOp<Context>::RunOnDevice() {
pick_indices = ws()->GetTensor("/mnt/" + anchor() + "/pick_indices"); pick_indices = ws()->GetTensor("/mnt/" + Anchor() + "/pick/indices");
x_slice_dim = Input(0).dim(axis); x_slice_dim = Input(0).dim(axis);
y_slice_dim = pick_indices->count(); y_slice_dim = pick_indices->count();
......
...@@ -27,9 +27,9 @@ void TransposeOp<Context>::RunOnDevice() { ...@@ -27,9 +27,9 @@ void TransposeOp<Context>::RunOnDevice() {
<< "\nbut Tensor(" << Input(0).name() << ")'s dims are " << "\nbut Tensor(" << Input(0).name() << ")'s dims are "
<< Input(0).dim_string(); << Input(0).dim_string();
vector<TIndex> output_dims; vector<TIndex> output_dims;
order = ws()->CreateTensor("/mnt/" + anchor() + "/transpose_order"); order = ws()->CreateTensor("/mnt/" + Anchor() + "/transpose/order");
old_steps = ws()->CreateTensor("/mnt/" + anchor() + "/transpose_old_steps"); old_steps = ws()->CreateTensor("/mnt/" + Anchor() + "/transpose/old_steps");
new_steps = ws()->CreateTensor("/mnt/" + anchor() + "/transpose_new_steps"); new_steps = ws()->CreateTensor("/mnt/" + Anchor() + "/transpose/new_steps");
order->Reshape(vector<TIndex>(1, perms.size())); order->Reshape(vector<TIndex>(1, perms.size()));
old_steps->Reshape(vector<TIndex>(1, perms.size())); old_steps->Reshape(vector<TIndex>(1, perms.size()));
new_steps->Reshape(vector<TIndex>(1, perms.size())); new_steps->Reshape(vector<TIndex>(1, perms.size()));
...@@ -76,9 +76,9 @@ void TransposeGradientOp<Context>::RunWithType() { ...@@ -76,9 +76,9 @@ void TransposeGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void TransposeGradientOp<Context>::RunOnDevice() { void TransposeGradientOp<Context>::RunOnDevice() {
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
order = ws()->GetTensor("/mnt/" + anchor() + "/transpose_order"); order = ws()->GetTensor("/mnt/" + Anchor() + "/transpose/order");
old_steps = ws()->GetTensor("/mnt/" + anchor() + "/transpose_old_steps"); old_steps = ws()->GetTensor("/mnt/" + Anchor() + "/transpose/old_steps");
new_steps = ws()->GetTensor("/mnt/" + anchor() + "/transpose_new_steps"); new_steps = ws()->GetTensor("/mnt/" + Anchor() + "/transpose/new_steps");
if (Input(0).template IsType<float>()) RunWithType<float>(); if (Input(0).template IsType<float>()) RunWithType<float>();
#ifdef WITH_CUDA_FP16 #ifdef WITH_CUDA_FP16
......
...@@ -204,7 +204,7 @@ void BatchNormOp<Context>::Setup() { ...@@ -204,7 +204,7 @@ void BatchNormOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
var = ws()->CreateTensor("/mnt/" + anchor() + "/bn_var"); var = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/var");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
...@@ -377,7 +377,7 @@ void BatchNormGradientOp<Context>::Setup() { ...@@ -377,7 +377,7 @@ void BatchNormGradientOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
var = ws()->GetTensor("/mnt/" + anchor() + "/bn_var"); var = ws()->GetTensor("/mnt/" + Anchor() + "/bn/var");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
......
...@@ -279,9 +279,9 @@ void BatchRenormOp<Context>::Setup() { ...@@ -279,9 +279,9 @@ void BatchRenormOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
var = ws()->CreateTensor("/mnt/" + anchor() + "/bn_var"); var = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/var");
r = ws()->CreateTensor("/mnt/" + anchor() + "/bn_r"); r = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/r");
x_norm = ws()->CreateTensor("/mnt/" + anchor() + "/bn_x_norm"); x_norm = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/x_norm");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
...@@ -471,9 +471,9 @@ void BatchRenormGradientOp<Context>::Setup() { ...@@ -471,9 +471,9 @@ void BatchRenormGradientOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
var = ws()->GetTensor("/mnt/" + anchor() + "/bn_var"); var = ws()->GetTensor("/mnt/" + Anchor() + "/bn/var");
r = ws()->GetTensor("/mnt/" + anchor() + "/bn_r"); r = ws()->GetTensor("/mnt/" + Anchor() + "/bn/r");
x_norm = ws()->GetTensor("/mnt/" + anchor() + "/bn_x_norm"); x_norm = ws()->GetTensor("/mnt/" + Anchor() + "/bn/x_norm");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
......
...@@ -116,8 +116,8 @@ void CuDNNBatchNormOp<Context>::Setup() { ...@@ -116,8 +116,8 @@ void CuDNNBatchNormOp<Context>::Setup() {
C = Input(0).dim(channel_axis); C = Input(0).dim(channel_axis);
// make resource // make resource
mean = ws()->CreateTensor("/mnt/" + anchor() + "/bn_mean"); mean = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/mean");
var = ws()->CreateTensor("/mnt/" + anchor() + "/bn_var"); var = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/var");
// reshape // reshape
mean->Reshape(vector<TIndex>(1, C)); mean->Reshape(vector<TIndex>(1, C));
...@@ -160,8 +160,8 @@ void CuDNNBatchNormGradientOp<Context>::Setup() { ...@@ -160,8 +160,8 @@ void CuDNNBatchNormGradientOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
mean = ws()->GetTensor("/mnt/" + anchor() + "/bn_mean"); mean = ws()->GetTensor("/mnt/" + Anchor() + "/bn/mean");
var = ws()->GetTensor("/mnt/" + anchor() + "/bn_var"); var = ws()->GetTensor("/mnt/" + Anchor() + "/bn/var");
// reshape // reshape
mean->Reshape(vector<TIndex>(1, C)); mean->Reshape(vector<TIndex>(1, C));
......
...@@ -246,9 +246,9 @@ void FusedBatchNormOp<Context>::Setup() { ...@@ -246,9 +246,9 @@ void FusedBatchNormOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
mean = ws()->CreateTensor("/mnt/" + anchor() + "/bn_mean"); mean = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/mean");
var = ws()->CreateTensor("/mnt/" + anchor() + "/bn_var"); var = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/var");
x_norm = ws()->CreateTensor("/mnt/" + anchor() + "/bn_x_norm"); x_norm = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/x_norm");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
...@@ -506,9 +506,9 @@ void FusedBatchNormGradientOp<Context>::Setup() { ...@@ -506,9 +506,9 @@ void FusedBatchNormGradientOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
mean = ws()->GetTensor("/mnt/" + anchor() + "/bn_mean"); mean = ws()->GetTensor("/mnt/" + Anchor() + "/bn/mean");
var = ws()->GetTensor("/mnt/" + anchor() + "/bn_var"); var = ws()->GetTensor("/mnt/" + Anchor() + "/bn/var");
x_norm = ws()->GetTensor("/mnt/" + anchor() + "/bn_x_norm"); x_norm = ws()->GetTensor("/mnt/" + Anchor() + "/bn/x_norm");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
......
...@@ -227,9 +227,9 @@ void FusedGroupNormOp<Context>::Setup() { ...@@ -227,9 +227,9 @@ void FusedGroupNormOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
mean = ws()->CreateTensor("/mnt/" + anchor() + "/gn_mean"); mean = ws()->CreateTensor("/mnt/" + Anchor() + "/gn/mean");
var = ws()->CreateTensor("/mnt/" + anchor() + "/gn_var"); var = ws()->CreateTensor("/mnt/" + Anchor() + "/gn/var");
x_norm = ws()->CreateTensor("/mnt/" + anchor() + "/gn_x_norm"); x_norm = ws()->CreateTensor("/mnt/" + Anchor() + "/gn/x_norm");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
...@@ -467,9 +467,9 @@ void FusedGroupNormGradientOp<Context>::Setup() { ...@@ -467,9 +467,9 @@ void FusedGroupNormGradientOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
mean = ws()->GetTensor("/mnt/" + anchor() + "/gn_mean"); mean = ws()->GetTensor("/mnt/" + Anchor() + "/gn/mean");
var = ws()->GetTensor("/mnt/" + anchor() + "/gn_var"); var = ws()->GetTensor("/mnt/" + Anchor() + "/gn/var");
x_norm = ws()->GetTensor("/mnt/" + anchor() + "/gn_x_norm"); x_norm = ws()->GetTensor("/mnt/" + Anchor() + "/gn/x_norm");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
......
...@@ -185,7 +185,7 @@ void GroupNormOp<Context>::Setup() { ...@@ -185,7 +185,7 @@ void GroupNormOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
var = ws()->CreateTensor("/mnt/" + anchor() + "/gn_var"); var = ws()->CreateTensor("/mnt/" + Anchor() + "/gn/var");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
...@@ -337,7 +337,7 @@ void GroupNormGradientOp<Context>::Setup() { ...@@ -337,7 +337,7 @@ void GroupNormGradientOp<Context>::Setup() {
NS = N * S; NS = N * S;
// make resource // make resource
var = ws()->GetTensor("/mnt/" + anchor() + "/gn_var"); var = ws()->GetTensor("/mnt/" + Anchor() + "/gn/var");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
......
...@@ -110,7 +110,7 @@ void InstanceNormOp<Context>::Setup() { ...@@ -110,7 +110,7 @@ void InstanceNormOp<Context>::Setup() {
CS = C * S; CS = C * S;
// make resource // make resource
var = ws()->CreateTensor("/mnt/" + anchor() + "/ins_norm_var"); var = ws()->CreateTensor("/mnt/" + Anchor() + "/ins_norm/var");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
...@@ -243,7 +243,7 @@ void InstanceNormGradientOp<Context>::Setup() { ...@@ -243,7 +243,7 @@ void InstanceNormGradientOp<Context>::Setup() {
CS = C * S; CS = C * S;
// make resource // make resource
var = ws()->GetTensor("/mnt/" + anchor() + "/ins_norm_var"); var = ws()->GetTensor("/mnt/" + Anchor() + "/ins_norm/var");
stddev = ws()->GetBuffer(); stddev = ws()->GetBuffer();
stddev->ReshapeLike(Input(0)); stddev->ReshapeLike(Input(0));
......
...@@ -15,7 +15,7 @@ void L2NormOp<Context>::RunWithType() { ...@@ -15,7 +15,7 @@ void L2NormOp<Context>::RunWithType() {
buffer->Reshape(dims); buffer->Reshape(dims);
// normalize by inner_dim independently if not across it // normalize by inner_dim independently if not across it
norm = ws()->CreateTensor("/mnt/" + anchor() + "/l2norm_normalizer"); norm = ws()->CreateTensor("/mnt/" + Anchor() + "/l2norm/normalizer");
dims = Input(0).dims(); dims = Input(0).dims();
for (int i = axis; i < end_axis; i++) dims[i] = 1; for (int i = axis; i < end_axis; i++) dims[i] = 1;
norm->Reshape(dims); norm->Reshape(dims);
...@@ -96,7 +96,7 @@ void L2NormGradientOp<Context>::RunWithType() { ...@@ -96,7 +96,7 @@ void L2NormGradientOp<Context>::RunWithType() {
INIT_MULTIPLIER(multiplier, dim); INIT_MULTIPLIER(multiplier, dim);
// normalize by inner_dim independently if not across it // normalize by inner_dim independently if not across it
norm = ws()->GetTensor("/mnt/" + anchor() + "/l2norm_normalizer"); norm = ws()->GetTensor("/mnt/" + Anchor() + "/l2norm/normalizer");
buffer = ws()->GetBuffer(); buffer = ws()->GetBuffer();
vector<TIndex> dims = Input(0).dims(); vector<TIndex> dims = Input(0).dims();
for (int i = 0; i < axis; i++) dims[i] = 1; for (int i = 0; i < axis; i++) dims[i] = 1;
......
#include "operators/update/adam_update_op.h" #include "operators/update/adam_update_op.h"
#include "core/workspace.h"
#include "utils/op_kernel.h" #include "utils/op_kernel.h"
namespace dragon { namespace dragon {
template <class Context> template <class Context>
void AdamUpdateOp<Context>::ComputeRunWithFloat() { void AdamUpdateOp<Context>::ComputeRunWithFloat() {
if (!m.get()) { m = ws()->CreateTensor("/mnt/" + Slot() + "/adam/m");
m.reset(new Tensor()); m->ReshapeLike(Input(0)); v = ws()->CreateTensor("/mnt/" + Slot() + "/adam/v");
v.reset(new Tensor()); v->ReshapeLike(Input(0)); tmp = ws()->CreateTensor("/mnt/" + Slot() + "/adam/tmp");
} m->ReshapeLike(Input(0));
v->ReshapeLike(Input(0));
t++; t++;
coeff = sqrt(1. - pow(beta2, t)) / (1. - pow(beta1, t)); coeff = sqrt(1. - pow(beta2, t)) / (1. - pow(beta1, t));
lr = Param("base_lr") * coeff * this->lr_mult; lr = Param("base_lr") * coeff * this->lr_mult;
kernel::AdamUpdate<float, Context>(&Input(0), kernel::AdamUpdate<float, Context>(&Input(0),
m.get(), m, v, tmp,
v.get(),
&temp,
beta1, beta1,
beta2, beta2,
eps, eps,
......
#include "operators/update/nesterov_update_op.h" #include "operators/update/nesterov_update_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h" #include "utils/math_functions.h"
#include "utils/op_kernel.h" #include "utils/op_kernel.h"
...@@ -6,17 +7,17 @@ namespace dragon { ...@@ -6,17 +7,17 @@ namespace dragon {
template <class Context> template <class Context>
void NesterovUpdateOp<Context>::ComputeRunWithFloat() { void NesterovUpdateOp<Context>::ComputeRunWithFloat() {
if (!history.get()) { h = ws()->CreateTensor("/mnt/" + Slot() + "/nesterov/h");
history.reset(new Tensor()); tmp = ws()->CreateTensor("/mnt/" + Slot() + "/nesterov/tmp");
history->ReshapeLike(Input(0)); h->ReshapeLike(Input(0));
}
lr = Param("base_lr") * this->lr_mult; lr = Param("base_lr") * this->lr_mult;
auto* dXdata = Input(0).template mutable_data<float, Context>(); auto* dXdata = Input(0).template mutable_data<float, Context>();
auto* Hdata = history->template mutable_data<float, Context>(); auto* Hdata = h->template mutable_data<float, Context>();
kernel::NesterovUpdate<float, Context>(Input(0).count(), kernel::NesterovUpdate<float, Context>(Input(0).count(),
dXdata, dXdata,
Hdata, Hdata,
&temp, tmp,
momentum, momentum,
lr, lr,
&ctx()); &ctx());
......
...@@ -6,19 +6,17 @@ namespace dragon { ...@@ -6,19 +6,17 @@ namespace dragon {
template <class Context> template <class Context>
void RMSPropUpdateOp<Context>::ComputeRunWithFloat() { void RMSPropUpdateOp<Context>::ComputeRunWithFloat() {
if (!history.get()) { h = ws()->CreateTensor("/mnt/" + Slot() + "/rmsprop/h");
string slot = OperatorBase::GetSingleArg<string>("slot", ""); tmp = ws()->CreateTensor("/mnt/" + Slot() + "/rmsprop/tmp");
if (slot.empty()) history.reset(new Tensor()); h->ReshapeLike(Input(0));
else history.reset(ws()->CreateTensor("/mnt/" + name() + "/history"));
history->ReshapeLike(Input(0));
}
lr = Param("base_lr") * this->lr_mult; lr = Param("base_lr") * this->lr_mult;
auto* dXdata = Input(0).template mutable_data<float, Context>(); auto* dXdata = Input(0).template mutable_data<float, Context>();
auto* Hdata = history->template mutable_data<float, Context>(); auto* Hdata = h->template mutable_data<float, Context>();
kernel::RMSPropUpdate<float, Context>(Input(0).count(), kernel::RMSPropUpdate<float, Context>(Input(0).count(),
dXdata, dXdata,
Hdata, Hdata,
&temp, tmp,
decay, decay,
eps, eps,
lr); lr);
......
#include "operators/update/sgd_update_op.h" #include "operators/update/sgd_update_op.h"
#include "core/workspace.h"
#include "utils/math_functions.h" #include "utils/math_functions.h"
namespace dragon { namespace dragon {
template <class Context> template <class Context>
void SGDUpdateOp<Context>::ComputeRunWithFloat() { void SGDUpdateOp<Context>::ComputeRunWithFloat() {
if (!history.get()) { h = ws()->CreateTensor("/mnt/" + Slot() + "/sgd/h");
history.reset(new Tensor()); h->ReshapeLike(Input(0));
history->ReshapeLike(Input(0));
}
lr = Param("base_lr") * this->lr_mult; lr = Param("base_lr") * this->lr_mult;
auto* dXdata = Input(0).template mutable_data<float, Context>(); auto* dXdata = Input(0).template mutable_data<float, Context>();
auto* Hdata = history->template mutable_data<float, Context>(); auto* Hdata = h->template mutable_data<float, Context>();
math::Axpby<float, Context>(history->count(), lr, dXdata, momentum, Hdata); math::Axpby<float, Context>(h->count(), lr, dXdata, momentum, Hdata);
ctx().template Copy<float, Context, Context>(history->count(), dXdata, Hdata); ctx().template Copy<float, Context, Context>(h->count(), dXdata, Hdata);
} }
DEPLOY_CPU(SGDUpdate); DEPLOY_CPU(SGDUpdate);
......
...@@ -10,6 +10,12 @@ float UpdateOpBase<Context>::Param(const string& name) const { ...@@ -10,6 +10,12 @@ float UpdateOpBase<Context>::Param(const string& name) const {
->template mutable_data<float, CPUContext>()[0]; ->template mutable_data<float, CPUContext>()[0];
} }
template <class Context>
string UpdateOpBase<Context>::Slot() {
const string slot = OperatorBase::GetSingleArg<string>("slot", "");
return slot.empty() ? name() : slot;
}
template <class Context> template <typename T> template <class Context> template <typename T>
void UpdateOpBase<Context>::PreprocessRunWithType() { void UpdateOpBase<Context>::PreprocessRunWithType() {
// scale // scale
......
...@@ -15,18 +15,18 @@ void LRNOp<Context>::AcrossRunWithType() { ...@@ -15,18 +15,18 @@ void LRNOp<Context>::AcrossRunWithType() {
template <class Context> template <typename T> template <class Context> template <typename T>
void LRNOp<Context>::SplitRunWithType() { void LRNOp<Context>::SplitRunWithType() {
sqr_in = ws()->CreateTensor("/mnt/" + anchor() + "/sqr_in"); sqr_in = ws()->CreateTensor("/mnt/" + Anchor() + "/sqr/in");
sqr_in->ReshapeLike(Input(0)); sqr_in->ReshapeLike(Input(0));
sqr_in->Share(Input(0)); sqr_in->Share(Input(0));
prod_in = ws()->CreateTensor("/mnt/" + anchor() + "/prod_in"); prod_in = ws()->CreateTensor("/mnt/" + Anchor() + "/prod/in");
prod_in->ReshapeLike(Input(0)); prod_in->ReshapeLike(Input(0));
prod_in->Share(Input(0)); prod_in->Share(Input(0));
} }
template <class Context> template <typename T> template <class Context> template <typename T>
void LRNOp<Context>::SquareRunWithType() { void LRNOp<Context>::SquareRunWithType() {
sqr_out = ws()->CreateTensor("/mnt/" + anchor() + "/sqr_out"); sqr_out = ws()->CreateTensor("/mnt/" + Anchor() + "/sqr/out");
if (!sqr_op) { if (!sqr_op) {
Argument power; Argument power;
power.set_name("power"); power.set_f(2.0); power.set_name("power"); power.set_f(2.0);
...@@ -43,7 +43,7 @@ void LRNOp<Context>::SquareRunWithType() { ...@@ -43,7 +43,7 @@ void LRNOp<Context>::SquareRunWithType() {
template <class Context> template <typename T> template <class Context> template <typename T>
void LRNOp<Context>::PoolRunWithType() { void LRNOp<Context>::PoolRunWithType() {
pool_out = ws()->CreateTensor("/mnt/" + anchor() + "/pool_out"); pool_out = ws()->CreateTensor("/mnt/" + Anchor() + "/pool/out");
if (!pool_op) { if (!pool_op) {
Argument ks, s, p, m, df; Argument ks, s, p, m, df;
ks.set_name("kernel_size"); ks.add_ints(local_size); ks.set_name("kernel_size"); ks.add_ints(local_size);
...@@ -64,7 +64,7 @@ void LRNOp<Context>::PoolRunWithType() { ...@@ -64,7 +64,7 @@ void LRNOp<Context>::PoolRunWithType() {
template <class Context> template <typename T> template <class Context> template <typename T>
void LRNOp<Context>::PowRunWithType() { void LRNOp<Context>::PowRunWithType() {
pow_out = ws()->CreateTensor("/mnt/" + anchor() + "/pow_out"); pow_out = ws()->CreateTensor("/mnt/" + Anchor() + "/pow/out");
if (!pow_op) { if (!pow_op) {
Argument scale, shift, power; Argument scale, shift, power;
scale.set_name("scale"); scale.set_f(alpha); scale.set_name("scale"); scale.set_f(alpha);
...@@ -131,8 +131,8 @@ void LRNGradientOp<Context>::AcrossRunWithType() { ...@@ -131,8 +131,8 @@ void LRNGradientOp<Context>::AcrossRunWithType() {
template <class Context> template <typename T> template <class Context> template <typename T>
void LRNGradientOp<Context>::ProdRunWithType() { void LRNGradientOp<Context>::ProdRunWithType() {
prod_in = ws()->GetTensor("/mnt/" + anchor() + "/prod_in"); prod_in = ws()->GetTensor("/mnt/" + Anchor() + "/prod/in");
pow_out = ws()->GetTensor("/mnt/" + anchor() + "/pow_out"); pow_out = ws()->GetTensor("/mnt/" + Anchor() + "/pow/out");
if (!prod_op) { if (!prod_op) {
Argument operation; Argument operation;
operation.set_name("operation"); operation.set_s("PROD"); operation.set_name("operation"); operation.set_s("PROD");
...@@ -152,7 +152,7 @@ void LRNGradientOp<Context>::ProdRunWithType() { ...@@ -152,7 +152,7 @@ void LRNGradientOp<Context>::ProdRunWithType() {
template <class Context> template <typename T> template <class Context> template <typename T>
void LRNGradientOp<Context>::PowRunWithType() { void LRNGradientOp<Context>::PowRunWithType() {
pool_out = ws()->GetTensor("/mnt/" + anchor() + "/pool_out"); pool_out = ws()->GetTensor("/mnt/" + Anchor() + "/pool/out");
if (!pow_op) { if (!pow_op) {
Argument scale, shift, power; Argument scale, shift, power;
scale.set_name("scale"); scale.set_f(alpha); scale.set_name("scale"); scale.set_f(alpha);
...@@ -173,7 +173,7 @@ void LRNGradientOp<Context>::PowRunWithType() { ...@@ -173,7 +173,7 @@ void LRNGradientOp<Context>::PowRunWithType() {
template <class Context> template <typename T> template <class Context> template <typename T>
void LRNGradientOp<Context>::PoolRunWithType() { void LRNGradientOp<Context>::PoolRunWithType() {
sqr_out = ws()->GetTensor("/mnt/" + anchor() + "/sqr_out"); sqr_out = ws()->GetTensor("/mnt/" + Anchor() + "/sqr/out");
if (!pool_op) { if (!pool_op) {
Argument ks, s, p, m, df; Argument ks, s, p, m, df;
ks.set_name("kernel_size"); ks.add_ints(local_size); ks.set_name("kernel_size"); ks.add_ints(local_size);
...@@ -196,7 +196,7 @@ void LRNGradientOp<Context>::PoolRunWithType() { ...@@ -196,7 +196,7 @@ void LRNGradientOp<Context>::PoolRunWithType() {
template <class Context> template <typename T> template <class Context> template <typename T>
void LRNGradientOp<Context>::SquareRunWithType() { void LRNGradientOp<Context>::SquareRunWithType() {
sqr_in = ws()->GetTensor("/mnt/" + anchor() + "/sqr_in"); sqr_in = ws()->GetTensor("/mnt/" + Anchor() + "/sqr/in");
if (!sqr_op) { if (!sqr_op) {
Argument power; Argument power;
power.set_name("power"); power.set_f(2.0); power.set_name("power"); power.set_f(2.0);
......
...@@ -7,7 +7,7 @@ namespace dragon { ...@@ -7,7 +7,7 @@ namespace dragon {
template <class Context> template <typename T> template <class Context> template <typename T>
void Pooling2dOp<Context>::MAXRunWithType() { void Pooling2dOp<Context>::MAXRunWithType() {
mask = ws()->CreateTensor("/mnt/" + anchor() + "/max_pool_mask"); mask = ws()->CreateTensor("/mnt/" + Anchor() + "/max_pool/mask");
mask->ReshapeLike(*Output(0)); mask->ReshapeLike(*Output(0));
auto* Xdata = Input(0).template data<T, Context>(); auto* Xdata = Input(0).template data<T, Context>();
...@@ -122,7 +122,7 @@ OPERATOR_SCHEMA(Pooling2d).NumInputs(1).NumOutputs(1); ...@@ -122,7 +122,7 @@ OPERATOR_SCHEMA(Pooling2d).NumInputs(1).NumOutputs(1);
template <class Context> template <typename T> template <class Context> template <typename T>
void Pooling2dGradientOp<Context>::MAXRunWithType() { void Pooling2dGradientOp<Context>::MAXRunWithType() {
mask = ws()->GetTensor("/mnt/" + anchor() + "/max_pool_mask"); mask = ws()->GetTensor("/mnt/" + Anchor() + "/max_pool/mask");
auto* dYdata = Input(-1).template data<T, Context>(); auto* dYdata = Input(-1).template data<T, Context>();
auto* dXdata = Output(0)->template mutable_data<T, Context>(); auto* dXdata = Output(0)->template mutable_data<T, Context>();
......
...@@ -17,7 +17,7 @@ void ROIPoolingOp<Context>::RunWithType() { ...@@ -17,7 +17,7 @@ void ROIPoolingOp<Context>::RunWithType() {
template <class Context> template <class Context>
void ROIPoolingOp<Context>::RunOnDevice() { void ROIPoolingOp<Context>::RunOnDevice() {
mask = ws()->CreateTensor("/mnt/" + anchor() + "/roi_pool_mask"); mask = ws()->CreateTensor("/mnt/" + Anchor() + "/roi_pool/mask");
vector<TIndex> dims({Input(1).dim(0), Input(0).dim(1), pool_h, pool_w}); vector<TIndex> dims({Input(1).dim(0), Input(0).dim(1), pool_h, pool_w});
Output(0)->Reshape(dims); Output(0)->Reshape(dims);
...@@ -45,7 +45,7 @@ void ROIPoolingGradientOp<Context>::RunWithType() { ...@@ -45,7 +45,7 @@ void ROIPoolingGradientOp<Context>::RunWithType() {
template <class Context> template <class Context>
void ROIPoolingGradientOp<Context>::RunOnDevice() { void ROIPoolingGradientOp<Context>::RunOnDevice() {
mask = ws()->GetTensor("/mnt/" + anchor() + "/roi_pool_mask"); mask = ws()->GetTensor("/mnt/" + Anchor() + "/roi_pool/mask");
Output(0)->ReshapeLike(Input(0)); Output(0)->ReshapeLike(Input(0));
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!