Simplify Installation

Ting PAN
Commit ba81b99b authored Mar 30, 2018 by Ting PAN
Showing with 456 additions and 166 deletions
CMake/FindNumPy.cmake
CMake/FindPythonLibs.cmake
Dragon/CMakeLists.txt
Dragon/include/core/operator.h
Dragon/include/operators/loss/softmax_cross_entropy_op.h
Dragon/include/operators/loss/sparse_softmax_cross_entropy_op.h
Dragon/include/operators/misc/python_op.h
Dragon/include/operators/update/adam_update_op.h
Dragon/include/operators/update/nesterov_update_op.h
Dragon/include/operators/update/rmsprop_update_op.h
Dragon/include/operators/update/sgd_update_op.h
Dragon/include/operators/update/update_op_base.h
Dragon/modules/cc/CMakeLists.txt
Dragon/modules/python/CMakeLists.txt
Dragon/python/dragon/docs/install.rst
Dragon/python/dragon/updaters.py
Dragon/python/setup.py
Dragon/src/operators/activation/dropout_op.cc
Dragon/src/operators/arithmetic/clip_op.cc
Dragon/src/operators/control_flow/scan_op.cc
--- a/CMake/FindNumPy.cmake
+++ b/CMake/FindNumPy.cmake
+# - Find the NumPy libraries
+# This module finds if NumPy is installed, and sets the following variables
+# indicating where it is.
+#
+# TODO: Update to provide the libraries and paths for linking npymath lib.
+#
+#  NUMPY_FOUND               - was NumPy found
+#  NUMPY_VERSION             - the version of NumPy found as a string
+#  NUMPY_VERSION_MAJOR       - the major version number of NumPy
+#  NUMPY_VERSION_MINOR       - the minor version number of NumPy
+#  NUMPY_VERSION_PATCH       - the patch version number of NumPy
+#  NUMPY_VERSION_DECIMAL     - e.g. version 1.6.1 is 10601
+#  NUMPY_INCLUDE_DIR         - path to the NumPy include files
+
+unset(NUMPY_VERSION)
+unset(NUMPY_INCLUDE_DIR)
+
+if(PYTHONINTERP_FOUND)
+  execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+    "import numpy as n; print(n.__version__); print(n.get_include());"
+    RESULT_VARIABLE __result
+    OUTPUT_VARIABLE __output
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  if(__result MATCHES 0)
+    string(REGEX REPLACE ";" "\\\\;" __values ${__output})
+    string(REGEX REPLACE "\r?\n" ";"    __values ${__values})
+    list(GET __values 0 NUMPY_VERSION)
+    list(GET __values 1 NUMPY_INCLUDE_DIR)
+
+    string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
+    if(NOT "${__ver_check}" STREQUAL "")
+      set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
+      set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
+      set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
+      math(EXPR NUMPY_VERSION_DECIMAL
+        "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
+      string(REGEX REPLACE "\\\\" "/"  NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
+    else()
+     unset(NUMPY_VERSION)
+     unset(NUMPY_INCLUDE_DIR)
+     message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
+    endif()
+  endif()
+else()
+    message("Can not find Python interpretator.")
+    message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
+                                        VERSION_VAR   NUMPY_VERSION)
+
+if(NUMPY_FOUND)
+  message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
+endif()
\ No newline at end of file
--- a/CMake/FindPythonLibs.cmake
+++ b/CMake/FindPythonLibs.cmake
+# - Find python libraries
+# This module finds the libraries corresponding to the Python interpeter
+# FindPythonInterp provides.
+# This code sets the following variables:
+#
+#  PYTHONLIBS_FOUND           - have the Python libs been found
+#  PYTHON_PREFIX              - path to the Python installation
+#  PYTHON_LIBRARIES           - path to the python library
+#  PYTHON_INCLUDE_DIRS        - path to where Python.h is found
+#  PYTHON_MODULE_EXTENSION    - lib extension, e.g. '.so' or '.pyd'
+#  PYTHON_MODULE_PREFIX       - lib name prefix: usually an empty string
+#  PYTHON_SITE_PACKAGES       - path to installation site-packages
+#  PYTHON_IS_DEBUG            - whether the Python interpreter is a debug build
+#
+# Thanks to talljimbo for the patch adding the 'LDVERSION' config
+# variable usage.
+
+#=============================================================================
+# Copyright 2001-2009 Kitware, Inc.
+# Copyright 2012 Continuum Analytics, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the names of Kitware, Inc., the Insight Software Consortium,
+# nor the names of their contributors may be used to endorse or promote
+# products derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#=============================================================================
+
+# Checking for the extension makes sure that `LibsNew` was found and not just `Libs`.
+if(PYTHONLIBS_FOUND AND PYTHON_MODULE_EXTENSION)
+    return()
+endif()
+
+# Use the Python interpreter to find the libs.
+if(PythonLibsNew_FIND_REQUIRED)
+    find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} REQUIRED)
+else()
+    find_package(PythonInterp ${PythonLibsNew_FIND_VERSION})
+endif()
+
+if(NOT PYTHONINTERP_FOUND)
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+
+# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
+# testing whether sys has the gettotalrefcount function is a reliable, cross-platform
+# way to detect a CPython debug interpreter.
+#
+# The library suffix is from the config var LDVERSION sometimes, otherwise
+# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
+execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+    "from distutils import sysconfig as s;import sys;import struct;
+print('.'.join(str(v) for v in sys.version_info));
+print(sys.prefix);
+print(s.get_python_inc(plat_specific=True));
+print(s.get_python_lib(plat_specific=True));
+print(s.get_config_var('SO'));
+print(hasattr(sys, 'gettotalrefcount')+0);
+print(struct.calcsize('@P'));
+print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
+print(s.get_config_var('LIBDIR') or '');
+print(s.get_config_var('MULTIARCH') or '');
+"
+    RESULT_VARIABLE _PYTHON_SUCCESS
+    OUTPUT_VARIABLE _PYTHON_VALUES
+    ERROR_VARIABLE _PYTHON_ERROR_VALUE)
+
+if(NOT _PYTHON_SUCCESS MATCHES 0)
+    if(PythonLibsNew_FIND_REQUIRED)
+        message(FATAL_ERROR
+            "Python config failure:\n${_PYTHON_ERROR_VALUE}")
+    endif()
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+
+# Convert the process output into a list
+string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
+string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
+list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
+list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
+list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
+list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
+list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
+list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
+list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
+list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
+list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR)
+list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH)
+
+# Make sure the Python has the same pointer-size as the chosen compiler
+# Skip if CMAKE_SIZEOF_VOID_P is not defined
+if(CMAKE_SIZEOF_VOID_P AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
+    if(PythonLibsNew_FIND_REQUIRED)
+        math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
+        math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
+        message(FATAL_ERROR
+            "Python config failure: Python is ${_PYTHON_BITS}-bit, "
+            "chosen compiler is  ${_CMAKE_BITS}-bit")
+    endif()
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+
+# The built-in FindPython didn't always give the version numbers
+string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
+list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
+list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
+list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
+
+# Make sure all directory separators are '/'
+string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
+string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
+string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
+
+if(CMAKE_HOST_WIN32)
+    set(PYTHON_LIBRARY
+        "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+
+    # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
+    # original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
+    if(NOT EXISTS "${PYTHON_LIBRARY}")
+        get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
+        set(PYTHON_LIBRARY
+            "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+    endif()
+
+    # raise an error if the python libs are still not found.
+    if(NOT EXISTS "${PYTHON_LIBRARY}")
+        message(FATAL_ERROR "Python libraries not found")
+    endif()
+
+else()
+    if(PYTHON_MULTIARCH)
+        set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}/${PYTHON_MULTIARCH}" "${PYTHON_LIBDIR}")
+    else()
+        set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}")
+    endif()
+    #message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
+    # Probably this needs to be more involved. It would be nice if the config
+    # information the python interpreter itself gave us were more complete.
+    find_library(PYTHON_LIBRARY
+        NAMES "python${PYTHON_LIBRARY_SUFFIX}"
+        PATHS ${_PYTHON_LIBS_SEARCH}
+        NO_DEFAULT_PATH)
+
+    # If all else fails, just set the name/version and let the linker figure out the path.
+    if(NOT PYTHON_LIBRARY)
+        set(PYTHON_LIBRARY python${PYTHON_LIBRARY_SUFFIX})
+    endif()
+endif()
+
+MARK_AS_ADVANCED(
+  PYTHON_LIBRARY
+  PYTHON_INCLUDE_DIR
+)
+
+# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
+# cache entries because they are meant to specify the location of a single
+# library. We now set the variables listed by the documentation for this
+# module.
+SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
+SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
+SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
+
+find_package_message(PYTHON
+    "Found PythonLibs: ${PYTHON_LIBRARY}"
+    "${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
+
+set(PYTHONLIBS_FOUND TRUE)
--- a/Dragon/CMakeLists.txt
+++ b/Dragon/CMakeLists.txt
 # ---------------- Welcom To Use Dragon  ----------------

-PROJECT(dragon)
-CMAKE_MINIMUM_REQUIRED(VERSION 3.0.0)
+project(dragon)
+cmake_minimum_required(VERSION 3.0.0)

 # ---------------- Welcom To Use Dragon   ----------------

 # ---------------- User Config ----------------

-# set optional libraries
-option(WITH_PYTHON3                "Set ON to use PYTHON3 otherwise PYTHON2"  OFF)
+# Set optional libraries
+option(WITH_PYTHON                 "Set ON to use PYTHON"  ON)
 option(WITH_CUDA                   "Set ON to use CUDA"  ON)
-option(WITH_CUDNN                  "Set ON to use CUDNN" OFF)
-option(WITH_BLAS                   "Set ON to use BLAS"  OFF)
-option(WITH_OMP                    "Set ON to use OpenMP"  ON)
+option(WITH_CUDNN                  "Set ON to use CUDNN" ON)
+option(WITH_BLAS                   "Set ON to use BLAS"  ON)
+option(WITH_OMP                    "Set ON to use OpenMP"  OFF)
 option(WITH_SSE                    "Set ON to use SSE 4.1"  ON)
 option(WITH_MPI                    "Set ON to use MPI"  OFF)
 option(WITH_MPI_CUDA               "Set ON to use MPI-CUDA"  OFF)
 option(WITH_MPI_NCCL               "Set ON to use MPI-NCCL"  OFF)
 option(WITH_CUDA_FP16              "Set ON to use FP16"  ON)

-# set your 3rdparty
+# Set your 3rdparty
 set(3RDPARTY_DIR  ${PROJECT_SOURCE_DIR}/../3rdparty)

-# set your python environment
-set(PYTHON_INCLUDE_DIR /usr/include/python2.7)  # preferred
-#set(PYTHON_INCLUDE_DIR /usr/include/python3.x)  # optional, set specific version
-#set(ANACONDA_ROOT_DIR /xxx/anaconda)  # optional, preset for 2.7, 3.5, and 3.6
-set(NUMPY_ROOT_DIR /xxx/numpy)  # required
+# set your python "interpreter" if necessary
+# if not, a default interpreter will be used
+# here, provide several examples:
+# set(PYTHON_EXECUTABLE /usr/bin/python) # Linux, OS
+# set(PYTHON_EXECUTABLE /X/anaconda/bin/python) # Linux, Anaconda
+# set(PYTHON_EXECUTABLE X:/Anaconda/python) # Win, Anaconda

-# set CUDA compiling architecture
+# Set CUDA compiling architecture
 set(CUDA_ARCH     -gencode arch=compute_30,code=sm_30
                  -gencode arch=compute_35,code=sm_35
                  -gencode arch=compute_50,code=sm_50
                  -gencode arch=compute_60,code=sm_60)

+# Set CUDNN Libs if necessary (Linux Only)
+set(CUDNN_LIBRARIES /usr/local/cuda/lib64)
+
 # ---------------- User Config ----------------


@@ -61,8 +65,12 @@ set(CUDA_ARCH     -gencode arch=compute_30,code=sm_30


 # ---[ Dependencies
+if (WITH_PYTHON)
+    include(${PROJECT_SOURCE_DIR}/../CMake/FindPythonLibs.cmake)
+    include(${PROJECT_SOURCE_DIR}/../CMake/FindNumPy.cmake)
+endif()
 if (WITH_CUDA) 
-    FIND_PACKAGE(CUDA REQUIRED)
+    find_package(CUDA REQUIRED)
 endif()

 set(CMAKE_CXX_STANDARD 11)
@@ -74,35 +82,42 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "set build type to release")
 set(CMAKE_CONFIGURATION_TYPES  Release CACHE STRING "set build type to release" FORCE)

 # ---[ Includes
-set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
-include_directories(${INCLUDE_DIR})
 include_directories(${3RDPARTY_DIR}/include)
-include_directories(${3RDPARTY_DIR}/include/mpi)
-include_directories(${CUDA_INCLUDE_DIRS})
+include_directories(${PROJECT_SOURCE_DIR}/include)
 include_directories(${PROJECT_SOURCE_DIR}/src)
-include_directories(${PYTHON_INCLUDE_DIR})
-include_directories(${ANACONDA_ROOT_DIR}/include)
-include_directories(${ANACONDA_ROOT_DIR}/include/python2.7)
-include_directories(${ANACONDA_ROOT_DIR}/include/python3.5)
-include_directories(${ANACONDA_ROOT_DIR}/include/python3.6)
-include_directories(${NUMPY_ROOT_DIR}/core/include)
-include_directories(${NUMPY_ROOT_DIR}/include)
-include_directories(${NUMPY_ROOT_DIR})
+if (WITH_PYTHON)
+    include_directories(${PYTHON_INCLUDE_DIRS})
+    include_directories(${NUMPY_INCLUDE_DIR})
+endif()
+if (WITH_CUDA)
+    include_directories(${CUDA_INCLUDE_DIRS})
+endif()
+if (WITH_MPI)
+    include_directories(${3RDPARTY_DIR}/include/mpi)
+endif()

 # ---[ libs
 set(3RDPARTY_LIBS ${3RDPARTY_DIR}/lib)
-set(UINX_CUDNN_LIBS /usr/local/cuda/lib64)
 link_directories(${3RDPARTY_LIBS})
-link_directories(${UINX_CUDNN_LIBS})
+link_directories(${CUDNN_LIBRARIES})
+link_directories(${PYTHON_LIBRARIES})

 # ---[ Install
 set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR} CACHE STRING "set install prefix" FORCE)
 set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${3RDPARTY_LIBS})

 # ---[ defines
-if (WITH_PYTHON3) 
-    ADD_DEFINITIONS(-DWITH_PYTHON3)
-    message(STATUS "Use PYTHON3 [Optional]")
+if (WITH_PYTHON)
+    ADD_DEFINITIONS(-DWITH_PYTHON)
+    if (${PYTHON_VERSION_MAJOR} STREQUAL "2")
+        message(STATUS "Use Python2 [Optional]")
+    elseif (${PYTHON_VERSION_MAJOR} STREQUAL "3")
+        message(STATUS "Use Python3 [Optional]")
+        ADD_DEFINITIONS(-DWITH_PYTHON3)
+    else()
+        message("Invalid version of Python(Detected ${PYTHON_VERSION_STRING})")
+        message(FATAL_ERROR "Do you set PYTHON_EXECUTABLE correctly?")
+    endif()
 endif()
 if (WITH_CUDA) 
    ADD_DEFINITIONS(-DWITH_CUDA)
@@ -133,6 +148,7 @@ if (WITH_SSE)
 endif()
 if (WITH_MPI)
    ADD_DEFINITIONS(-DWITH_MPI)
+    #set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${3RDPARTY_LIBS}/../openmpi/install/lib)
    message(STATUS "Use MPI [Optional]")
 endif()
 if (WITH_MPI_CUDA)

--- a/Dragon/include/core/operator.h
+++ b/Dragon/include/core/operator.h
@@ -110,8 +110,8 @@ class Operator : public OperatorBase {
    virtual void RunOnDevice() = 0;

    inline Context& ctx() { return ctx_; }
-    inline string anchor() { return GetSingleArg("anchor", name()); }
-    inline bool allow_run() { return allow_run_; }
+    inline string Anchor() { return GetSingleArg("anchor", name()); }
+    inline bool AllowRun() { return allow_run_; }

 protected:
    Context ctx_;
@@ -155,7 +155,8 @@ OperatorBase* CreateOperator(const OperatorDef& op_def, Workspace* ws);
 #define USE_OPERATOR_FUNCTIONS(context) \
    USE_OPERATOR_BASE_FUNCTIONS; \
    using Operator<context>::ctx; \
-    using Operator<context>::anchor
+    using Operator<context>::Anchor; \
+    using Operator<context>::AllowRun

 DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase,const OperatorDef&, Workspace*);
 DECLARE_REGISTRY(CUDAOperatorRegistry, OperatorBase, const OperatorDef&, Workspace*);

--- a/Dragon/include/operators/loss/softmax_cross_entropy_op.h
+++ b/Dragon/include/operators/loss/softmax_cross_entropy_op.h
@@ -25,7 +25,7 @@ class SoftmaxCrossEntropyOp final : public Operator<Context> {
          normalization(OperatorBase::GetSingleArg<string>("normalization", "FULL")) {
        OperatorDef softmax_def = MakeOperatorDef("Softmax", "",
            vector<string>({ Input(0).name() }),
-            vector<string>({ "/mnt/" + anchor() + "/softmax_prob" }));
+            vector<string>({ "/mnt/" + Anchor() + "/softmax/prob" }));
        softmax_def.add_arg()->CopyFrom(this->arg("axis"));
        if (op_def.has_device_option())
            softmax_def.mutable_device_option()->CopyFrom(op_def.device_option());

--- a/Dragon/include/operators/loss/sparse_softmax_cross_entropy_op.h
+++ b/Dragon/include/operators/loss/sparse_softmax_cross_entropy_op.h
@@ -31,7 +31,7 @@ class SparseSoftmaxCrossEntropyOp : public Operator<Context> {
        }
        OperatorDef softmax_def = MakeOperatorDef("Softmax", "",
            vector<string>({ Input(0).name() }),
-            vector<string>({ "/mnt/" + anchor() + "/softmax_prob" }));
+            vector<string>({ "/mnt/" + Anchor() + "/softmax/prob" }));
        softmax_def.add_arg()->CopyFrom(this->arg("axis"));
        if (op_def.has_device_option())
            softmax_def.mutable_device_option()->CopyFrom(op_def.device_option());

--- a/Dragon/include/operators/misc/python_op.h
+++ b/Dragon/include/operators/misc/python_op.h
@@ -12,6 +12,8 @@
 #ifndef DRAGON_OPERATORS_MISC_PYTHON_OP_H_
 #define DRAGON_OPERATORS_MISC_PYTHON_OP_H_

+#ifdef WITH_PYTHON
+
 #include <Python.h>

 #include "core/operator.h"
@@ -53,4 +55,6 @@ public:

 }    // namespace dragon

+#endif    // WITH_PYTHON
+
 #endif    // DRAGON_OPERATORS_MISC_PYTHON_OP_H_
\ No newline at end of file
--- a/Dragon/include/operators/update/adam_update_op.h
+++ b/Dragon/include/operators/update/adam_update_op.h
@@ -33,8 +33,7 @@ class AdamUpdateOp final : public UpdateOpBase<Context> {
 protected:
    float lr, beta1, beta2, eps, coeff;
    int t;
-    unique_ptr<Tensor> m, v;
-    Tensor temp;
+    Tensor* m, *v, *tmp;
 };

 }    // namespace dragon

--- a/Dragon/include/operators/update/nesterov_update_op.h
+++ b/Dragon/include/operators/update/nesterov_update_op.h
@@ -29,8 +29,7 @@ class NesterovUpdateOp final : public UpdateOpBase<Context> {

 protected:
    float lr, momentum;
-    unique_ptr<Tensor> history;
-    Tensor temp;
+    Tensor* h, *tmp;
 };

 }    // namespace dragon

--- a/Dragon/include/operators/update/rmsprop_update_op.h
+++ b/Dragon/include/operators/update/rmsprop_update_op.h
@@ -30,8 +30,7 @@ class RMSPropUpdateOp final : public UpdateOpBase<Context> {

 protected:
    float lr, decay, eps;
-    unique_ptr<Tensor> history;
-    Tensor temp;
+    Tensor* h, *tmp;
 };

 }    // namespace dragon

--- a/Dragon/include/operators/update/sgd_update_op.h
+++ b/Dragon/include/operators/update/sgd_update_op.h
@@ -29,7 +29,7 @@ class SGDUpdateOp final : public UpdateOpBase<Context> {

 protected:    
    float lr, momentum;
-    unique_ptr<Tensor> history;
+    Tensor* h;

 };


--- a/Dragon/include/operators/update/update_op_base.h
+++ b/Dragon/include/operators/update/update_op_base.h
@@ -27,6 +27,7 @@ class UpdateOpBase : public Operator<Context> {
    USE_OPERATOR_FUNCTIONS(Context);

    float Param(const string& name) const;
+    string Slot();

    void RunOnDevice() override;
    template <typename T> void PreprocessRunWithType();
@@ -40,7 +41,8 @@ class UpdateOpBase : public Operator<Context> {
 };

 #define USE_UPDATER_FUNCTIONS(context) \
-    using UpdateOpBase<context>::Param
+    using UpdateOpBase<context>::Param; \
+    using UpdateOpBase<context>::Slot

 }    // namespace dragon 


--- a/Dragon/modules/cc/CMakeLists.txt
+++ b/Dragon/modules/cc/CMakeLists.txt
@@ -2,8 +2,6 @@ message(STATUS "Found CC Module: ${CMAKE_CURRENT_LIST_DIR}")

 FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc)
 FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc)
-FILE(GLOB_RECURSE REMOVE_FILES ../../src/python*)
-LIST(REMOVE_ITEM SRC_FILES ${REMOVE_FILES})

 # ---[ complier
 if (WITH_CUDA) 

--- a/Dragon/modules/python/CMakeLists.txt
+++ b/Dragon/modules/python/CMakeLists.txt
 message(STATUS "Found Python Module: ${CMAKE_CURRENT_LIST_DIR}")

+if (NOT WITH_PYTHON)
+    message(FATAL_ERROR "Set WITH_PYTHON as ON for this module.")
+endif()
+
 FILE(GLOB_RECURSE MODULE_FILES *.h *.hpp *.c *.cpp *.cu *.cc)
 FILE(GLOB_RECURSE SRC_FILES ../../src/*.c ../../src/*.cpp ../../src/*.cu ../../src/*.cc)


--- a/Dragon/python/dragon/docs/install.rst
+++ b/Dragon/python/dragon/docs/install.rst
--- a/Dragon/python/dragon/updaters.py
+++ b/Dragon/python/dragon/updaters.py
@@ -33,7 +33,7 @@ class BaseUpdater(object):
        scale_gradient : float
            The scale factor of gradients.
        clip_gradient : float
-            The clip factor of gradients.
+            The clip factor of gradients. \
        l2_decay : float
            The l2 decay factor. Default is ``-1.0`` (Disabled).
        slot : str

--- a/Dragon/python/setup.py
+++ b/Dragon/python/setup.py
@@ -36,9 +36,9 @@ find_packages('dragon')
 find_modules()

 setup(name = 'dragon',
-      version='0.2.1.15',
+      version='0.2.1.16',
      description = 'Dragon: A Computation Graph Virtual Machine Based Deep Learning Framework',
-      url='https://github.com/neopenx/Dragon',
+      url='https://github.com/seetaresearch/Dragon',
      author='Ting Pan',
      license='BSD 2-Clause',
      packages=packages,

--- a/Dragon/src/operators/activation/dropout_op.cc
+++ b/Dragon/src/operators/activation/dropout_op.cc
@@ -27,7 +27,7 @@ void DropoutOp<Context>::RunWithType() {
 template <class Context>
 void DropoutOp<Context>::RunOnDevice() {
    Output(0)->ReshapeLike(Input(0));
-    mask = ws()->CreateTensor("/mnt/" + anchor() + "/dropout_mask");
+    mask = ws()->CreateTensor("/mnt/" + Anchor() + "/dropout/mask");
    mask->ReshapeLike(Input(0));

    if (Input(0).template IsType<float>()) RunWithType<float>();
@@ -42,7 +42,7 @@ OPERATOR_SCHEMA(Dropout).NumInputs(1).NumOutputs(1).Inplace({ { 0, 0 } });

 template <class Context> template <typename T>
 void DropoutGradientOp<Context>::RunWithType() {
-    mask = ws()->GetTensor("/mnt/" + anchor() + "/dropout_mask");
+    mask = ws()->GetTensor("/mnt/" + Anchor() + "/dropout/mask");
    auto* dYdata = Input(-1).template data<T, Context>();
    auto* dXdata = Output(0)->template mutable_data<T, Context>();
    auto* Mdata = mask->template data<uint32_t, Context>();

--- a/Dragon/src/operators/arithmetic/clip_op.cc
+++ b/Dragon/src/operators/arithmetic/clip_op.cc
@@ -16,7 +16,7 @@ void ClipOp<Context>::RunWithType() {
 template <class Context>
 void ClipOp<Context>::RunOnDevice() {
    Output(0)->ReshapeLike(Input(0));
-    mask = ws()->CreateTensor("/mnt/" + anchor() + "/clip_mask");
+    mask = ws()->CreateTensor("/mnt/" + Anchor() + "/clip/mask");
    mask->ReshapeLike(Input(0));
    if (Input(0).template IsType<float>()) return RunWithType<float>();
    else LOG(FATAL) << "Unsupported input types.";
@@ -38,7 +38,7 @@ void ClipGradientOp<Context>::RunWithType() {
 template <class Context>
 void ClipGradientOp<Context>::RunOnDevice() {
    Output(0)->ReshapeLike(Input(0));
-    mask = ws()->GetTensor("/mnt/" + anchor() + "/clip_mask");
+    mask = ws()->GetTensor("/mnt/" + Anchor() + "/clip/mask");
    if (Input(0).template IsType<float>()) return RunWithType<float>();
    else LOG(FATAL) << "Unsupported input types.";
 }

--- a/Dragon/src/operators/control_flow/scan_op.cc
+++ b/Dragon/src/operators/control_flow/scan_op.cc
@@ -142,7 +142,7 @@ void ScanOp<Context>::UnrollTemplate() {
        new_def.add_target(Output(i)->name());
    }
    //  upload
-    Tensor* string_tensor = ws()->CreateTensor("/mnt/" + anchor() + "/raw_ops");
+    Tensor* string_tensor = ws()->CreateTensor("/mnt/" + Anchor() + "/raw_ops");
    string_tensor->Reshape(vector<TIndex>(1, 1));
    string* data = string_tensor->mutable_data <string, CPUContext>();
    data[0] = new_def.SerializeAsString();
@@ -171,7 +171,7 @@ void ScanGradientOp<Context>::MakeGradientOps() {
    else if (step_type == "Default") nsteps = Input(0).dim(axis);
    if (graphs.count(nsteps)) return;

-    Tensor* ops = ws()->GetTensor("/mnt/" + anchor() + "/raw_ops");
+    Tensor* ops = ws()->GetTensor("/mnt/" + Anchor() + "/raw_ops");
    forward_def.ParseFromString(ops->data<string, CPUContext>()[0]);
    vector<string> targets;
    for (auto& target : forward_def.target()) targets.push_back(target);

--- a/Dragon/src/operators/loss/l1_loss_op.cc
+++ b/Dragon/src/operators/loss/l1_loss_op.cc
@@ -31,7 +31,7 @@ template <class Context>
 void L1LossOp<Context>::RunOnDevice() {
    CHECK_EQ(Input(0).count(), Input(1).count());
    Output(0)->Reshape(vector<TIndex>(1, 1));
-    diff = ws()->CreateTensor("/mnt/" + anchor() + "/l1_loss_diff");
+    diff = ws()->CreateTensor("/mnt/" + Anchor() + "/l1_loss/diff");
    diff->ReshapeLike(Input(0));

    if (Input(0).template IsType<float>()) RunWithType<float>();
@@ -67,7 +67,7 @@ void L1LossGradientOp<Context>::RunWithType() {

 template <class Context>
 void L1LossGradientOp<Context>::RunOnDevice() {
-    diff = ws()->GetTensor("/mnt/" + anchor() + "/l1_loss_diff");
+    diff = ws()->GetTensor("/mnt/" + Anchor() + "/l1_loss/diff");

    if (Input(0).template IsType<float>()) RunWithType<float>();
    else LOG(FATAL) << "Unsupported input types.";

--- a/Dragon/src/operators/loss/l2_loss_op.cc
+++ b/Dragon/src/operators/loss/l2_loss_op.cc
@@ -29,7 +29,7 @@ template <class Context>
 void L2LossOp<Context>::RunOnDevice() {
    CHECK_EQ(Input(0).count(), Input(1).count());
    Output(0)->Reshape(vector<TIndex>(1, 1));
-    diff = ws()->CreateTensor("/mnt/" + anchor() + "/l2_loss_diff");
+    diff = ws()->CreateTensor("/mnt/" + Anchor() + "/l2_loss/diff");
    diff->ReshapeLike(Input(0));

    if (Input(0).template IsType<float>()) RunWithType<float>();
@@ -64,7 +64,7 @@ void L2LossGradientOp<Context>::RunWithType() {

 template <class Context>
 void L2LossGradientOp<Context>::RunOnDevice() {
-    diff = ws()->GetTensor("/mnt/" + anchor() + "/l2_loss_diff");
+    diff = ws()->GetTensor("/mnt/" + Anchor() + "/l2_loss/diff");

    if (Input(0).template IsType<float>()) RunWithType<float>();
    else LOG(FATAL) << "Unsupported input types.";

--- a/Dragon/src/operators/loss/smooth_l1_loss_op.cc
+++ b/Dragon/src/operators/loss/smooth_l1_loss_op.cc
@@ -39,7 +39,7 @@ void SmoothL1LossOp<Context>::RunOnDevice() {
    if (InputSize() > 3) CHECK(Input(0).dims() == Input(3).dims());
    Output(0)->Reshape(vector<TIndex>(1, 1));

-    diff = ws()->CreateTensor("/mnt/" + anchor() + "/smoothl1_loss_diff");
+    diff = ws()->CreateTensor("/mnt/" + Anchor() + "/smoothl1_loss/diff");
    error = ws()->CreateTensor("/share/smoothl1_loss_error");
    diff->ReshapeLike(Input(0));
    error->ReshapeLike(Input(0));
@@ -86,7 +86,7 @@ void SmoothL1LossGradientOp<Context>::RunWithType() {

 template <class Context>
 void SmoothL1LossGradientOp<Context>::RunOnDevice() {
-    diff = ws()->GetTensor("/mnt/" + anchor() + "/smoothl1_loss_diff");
+    diff = ws()->GetTensor("/mnt/" + Anchor() + "/smoothl1_loss/diff");

    if (Input(0).template IsType<float>()) RunWithType<float>();
    else LOG(FATAL) << "Unsupported input types.";

--- a/Dragon/src/operators/loss/softmax_cross_entropy_op.cc
+++ b/Dragon/src/operators/loss/softmax_cross_entropy_op.cc
@@ -43,7 +43,7 @@ void SoftmaxCrossEntropyOp<Context>::RunOnDevice() {
        << "\nNumber of predictions must match the number of labels.";
    losses.ReshapeLike(Input(0));
    softmax_op->Run();
-    prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob");
+    prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");

    if (Input(0).template IsType<float>()) RunWithType<float>();
    else LOG(FATAL) << "Unsupported input types.";
@@ -85,7 +85,7 @@ void SoftmaxCrossEntropyGradientOp<Context>::RunWithType() {

 template <class Context>
 void SoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
-    prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob");
+    prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
    outer_dim = prob->count(0, axis);
    inner_dim = prob->count(axis + 1);
    Output(0)->ReshapeLike(Input(0));

--- a/Dragon/src/operators/loss/sparse_softmax_cross_entropy_op.cc
+++ b/Dragon/src/operators/loss/sparse_softmax_cross_entropy_op.cc
@@ -51,7 +51,7 @@ void SparseSoftmaxCrossEntropyOp<Context>::RunOnDevice() {
    valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
    losses.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
    softmax_op->Run();
-    prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob");
+    prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
    
    if (Input(0).template IsType<float>()) RunWithType<float>();
    else LOG(FATAL) << "Unsupported input types.";
@@ -100,7 +100,7 @@ void SparseSoftmaxCrossEntropyGradientOp<Context>::RunWithType() {

 template <class Context>
 void SparseSoftmaxCrossEntropyGradientOp<Context>::RunOnDevice() {
-    prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob");
+    prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
    outer_dim = prob->count(0, axis);
    inner_dim = prob->count(axis + 1);
    Output(0)->ReshapeLike(Input(0));

--- a/Dragon/src/operators/loss/sparse_softmax_focal_loss_op.cc
+++ b/Dragon/src/operators/loss/sparse_softmax_focal_loss_op.cc
@@ -57,8 +57,8 @@ void SparseSoftmaxFocalLossOp<Context>::RunOnDevice() {
    this->valid.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
    this->losses.Reshape(vector<TIndex>(1, outer_dim * inner_dim));
    this->softmax_op->Run();
-    this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob");
-    scale = ws()->CreateTensor("/mnt/" + anchor() + "/focal_scale");
+    this->prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
+    scale = ws()->CreateTensor("/mnt/" + Anchor() + "/focal/scale");
    scale->ReshapeLike(*this->prob);
    
    if (Input(0).template IsType<float>()) RunWithType<float>();
@@ -116,8 +116,8 @@ void SparseSoftmaxFocalLossGradientOp<Context>::RunWithType() {

 template <class Context>
 void SparseSoftmaxFocalLossGradientOp<Context>::RunOnDevice() {
-    this->prob = ws()->GetTensor("/mnt/" + anchor() + "/softmax_prob");
-    scale = ws()->GetTensor("/mnt/" + anchor() + "/focal_scale");
+    this->prob = ws()->GetTensor("/mnt/" + Anchor() + "/softmax/prob");
+    scale = ws()->GetTensor("/mnt/" + Anchor() + "/focal/scale");
    outer_dim = this->prob->count(0, axis);
    inner_dim = this->prob->count(axis + 1);
    Output(0)->ReshapeLike(Input(0));

--- a/Dragon/src/operators/misc/gradient_op.cc
+++ b/Dragon/src/operators/misc/gradient_op.cc
@@ -61,7 +61,12 @@ OPERATOR_SCHEMA(GradientGather).NumOutputs(1);
 NO_GRADIENT(GradientGather);

 template <class Context>
-void StopGradientOp<Context>::RunOnDevice() {}
+void StopGradientOp<Context>::RunOnDevice() {
+    if (Output(0)->name() != Input(0).name()) {
+        Output(0)->ReshapeLike(Input(0));
+        Output(0)->Share(Input(0));
+    }
+}

 DEPLOY_CPU(StopGradient);
 #ifdef WITH_CUDA

--- a/Dragon/src/operators/misc/python_op.cc
+++ b/Dragon/src/operators/misc/python_op.cc
 #include "operators/misc/python_op.h"

+#ifdef WITH_PYTHON
+
 #ifdef WITH_PYTHON3
 #define PyBytes_FromStringAndSize PyUnicode_FromStringAndSize
 #endif
@@ -36,7 +38,7 @@ RunOp<Context>::RunOp(const OperatorDef& op_def, Workspace* ws)
    outputs = PyList_New(OutputSize());
    for (int i = 0; i < OutputSize(); i++)
        PyList_SetItem(outputs, i, String(Output(i)->name().c_str()));
-    if (!this->allow_run()) return;
+    if (!AllowRun()) return;

    //  setup
    if (PyObject_HasAttr(self, String("setup")))
@@ -111,3 +113,5 @@ class GetTemplateGradient final : public GradientMakerBase {
 REGISTER_GRADIENT(Template, GetTemplateGradient);

 }    // namespace dragon
+
+#endif    // WITH_PYTHON
\ No newline at end of file
--- a/Dragon/src/operators/ndarray/random_pick_op.cc
+++ b/Dragon/src/operators/ndarray/random_pick_op.cc
@@ -32,7 +32,7 @@ void RandomPickOp<Context>::RunOnDevice() {
    inner_dim = Input(0).count(axis + 1);
    Output(0)->Reshape(output_dims);

-    pick_indices = ws()->CreateTensor("/mnt/" + anchor() + "/pick_indices");
+    pick_indices = ws()->CreateTensor("/mnt/" + Anchor() + "/pick/indices");
    pick_indices->Reshape(vector<TIndex>(1, max_samples));

    if (Input(0).template IsType<float>()) RunWithType<float>();
@@ -65,7 +65,7 @@ void RandomPickGradientOp<Context>::RunWithType() {

 template <class Context>
 void RandomPickGradientOp<Context>::RunOnDevice() {
-    pick_indices = ws()->GetTensor("/mnt/" + anchor() + "/pick_indices");
+    pick_indices = ws()->GetTensor("/mnt/" + Anchor() + "/pick/indices");

    x_slice_dim = Input(0).dim(axis);
    y_slice_dim = pick_indices->count();

--- a/Dragon/src/operators/ndarray/transpose_op.cc
+++ b/Dragon/src/operators/ndarray/transpose_op.cc
@@ -27,9 +27,9 @@ void TransposeOp<Context>::RunOnDevice() {
        << "\nbut Tensor(" << Input(0).name() << ")'s dims are "
        << Input(0).dim_string();
    vector<TIndex> output_dims;
-    order = ws()->CreateTensor("/mnt/" + anchor() + "/transpose_order");
-    old_steps = ws()->CreateTensor("/mnt/" + anchor() + "/transpose_old_steps");
-    new_steps = ws()->CreateTensor("/mnt/" + anchor() + "/transpose_new_steps");
+    order = ws()->CreateTensor("/mnt/" + Anchor() + "/transpose/order");
+    old_steps = ws()->CreateTensor("/mnt/" + Anchor() + "/transpose/old_steps");
+    new_steps = ws()->CreateTensor("/mnt/" + Anchor() + "/transpose/new_steps");
    order->Reshape(vector<TIndex>(1, perms.size()));
    old_steps->Reshape(vector<TIndex>(1, perms.size()));
    new_steps->Reshape(vector<TIndex>(1, perms.size()));
@@ -76,9 +76,9 @@ void TransposeGradientOp<Context>::RunWithType() {
 template <class Context>
 void TransposeGradientOp<Context>::RunOnDevice() {
    Output(0)->ReshapeLike(Input(0));
-    order = ws()->GetTensor("/mnt/" + anchor() + "/transpose_order");
-    old_steps = ws()->GetTensor("/mnt/" + anchor() + "/transpose_old_steps");
-    new_steps = ws()->GetTensor("/mnt/" + anchor() + "/transpose_new_steps");
+    order = ws()->GetTensor("/mnt/" + Anchor() + "/transpose/order");
+    old_steps = ws()->GetTensor("/mnt/" + Anchor() + "/transpose/old_steps");
+    new_steps = ws()->GetTensor("/mnt/" + Anchor() + "/transpose/new_steps");

    if (Input(0).template IsType<float>()) RunWithType<float>();
 #ifdef WITH_CUDA_FP16

--- a/Dragon/src/operators/norm/batch_norm_op.cc
+++ b/Dragon/src/operators/norm/batch_norm_op.cc
@@ -204,7 +204,7 @@ void BatchNormOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    var = ws()->CreateTensor("/mnt/" + anchor() + "/bn_var");
+    var = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/var");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));

@@ -377,7 +377,7 @@ void BatchNormGradientOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    var = ws()->GetTensor("/mnt/" + anchor() + "/bn_var");
+    var = ws()->GetTensor("/mnt/" + Anchor() + "/bn/var");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));


--- a/Dragon/src/operators/norm/batch_renorm_op.cc
+++ b/Dragon/src/operators/norm/batch_renorm_op.cc
@@ -279,9 +279,9 @@ void BatchRenormOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    var = ws()->CreateTensor("/mnt/" + anchor() + "/bn_var");
-    r = ws()->CreateTensor("/mnt/" + anchor() + "/bn_r");
-    x_norm = ws()->CreateTensor("/mnt/" + anchor() + "/bn_x_norm");
+    var = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/var");
+    r = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/r");
+    x_norm = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/x_norm");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));

@@ -471,9 +471,9 @@ void BatchRenormGradientOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    var = ws()->GetTensor("/mnt/" + anchor() + "/bn_var");
-    r = ws()->GetTensor("/mnt/" + anchor() + "/bn_r");
-    x_norm = ws()->GetTensor("/mnt/" + anchor() + "/bn_x_norm");
+    var = ws()->GetTensor("/mnt/" + Anchor() + "/bn/var");
+    r = ws()->GetTensor("/mnt/" + Anchor() + "/bn/r");
+    x_norm = ws()->GetTensor("/mnt/" + Anchor() + "/bn/x_norm");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));


--- a/Dragon/src/operators/norm/cudnn_batch_norm_op.cc
+++ b/Dragon/src/operators/norm/cudnn_batch_norm_op.cc
@@ -116,8 +116,8 @@ void CuDNNBatchNormOp<Context>::Setup() {
    C = Input(0).dim(channel_axis);

    //  make resource
-    mean = ws()->CreateTensor("/mnt/" + anchor() + "/bn_mean");
-    var = ws()->CreateTensor("/mnt/" + anchor() + "/bn_var");
+    mean = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/mean");
+    var = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/var");

    //  reshape
    mean->Reshape(vector<TIndex>(1, C));
@@ -160,8 +160,8 @@ void CuDNNBatchNormGradientOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    mean = ws()->GetTensor("/mnt/" + anchor() + "/bn_mean");
-    var = ws()->GetTensor("/mnt/" + anchor() + "/bn_var");
+    mean = ws()->GetTensor("/mnt/" + Anchor() + "/bn/mean");
+    var = ws()->GetTensor("/mnt/" + Anchor() + "/bn/var");

    //  reshape
    mean->Reshape(vector<TIndex>(1, C));

--- a/Dragon/src/operators/norm/fused_batch_norm.cc
+++ b/Dragon/src/operators/norm/fused_batch_norm.cc
@@ -246,9 +246,9 @@ void FusedBatchNormOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    mean = ws()->CreateTensor("/mnt/" + anchor() + "/bn_mean");
-    var = ws()->CreateTensor("/mnt/" + anchor() + "/bn_var");
-    x_norm = ws()->CreateTensor("/mnt/" + anchor() + "/bn_x_norm");
+    mean = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/mean");
+    var = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/var");
+    x_norm = ws()->CreateTensor("/mnt/" + Anchor() + "/bn/x_norm");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));

@@ -506,9 +506,9 @@ void FusedBatchNormGradientOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    mean = ws()->GetTensor("/mnt/" + anchor() + "/bn_mean");
-    var = ws()->GetTensor("/mnt/" + anchor() + "/bn_var");
-    x_norm = ws()->GetTensor("/mnt/" + anchor() + "/bn_x_norm");
+    mean = ws()->GetTensor("/mnt/" + Anchor() + "/bn/mean");
+    var = ws()->GetTensor("/mnt/" + Anchor() + "/bn/var");
+    x_norm = ws()->GetTensor("/mnt/" + Anchor() + "/bn/x_norm");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));


--- a/Dragon/src/operators/norm/fused_group_norm.cc
+++ b/Dragon/src/operators/norm/fused_group_norm.cc
@@ -227,9 +227,9 @@ void FusedGroupNormOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    mean = ws()->CreateTensor("/mnt/" + anchor() + "/gn_mean");
-    var = ws()->CreateTensor("/mnt/" + anchor() + "/gn_var");
-    x_norm = ws()->CreateTensor("/mnt/" + anchor() + "/gn_x_norm");
+    mean = ws()->CreateTensor("/mnt/" + Anchor() + "/gn/mean");
+    var = ws()->CreateTensor("/mnt/" + Anchor() + "/gn/var");
+    x_norm = ws()->CreateTensor("/mnt/" + Anchor() + "/gn/x_norm");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));

@@ -467,9 +467,9 @@ void FusedGroupNormGradientOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    mean = ws()->GetTensor("/mnt/" + anchor() + "/gn_mean");
-    var = ws()->GetTensor("/mnt/" + anchor() + "/gn_var");
-    x_norm = ws()->GetTensor("/mnt/" + anchor() + "/gn_x_norm");
+    mean = ws()->GetTensor("/mnt/" + Anchor() + "/gn/mean");
+    var = ws()->GetTensor("/mnt/" + Anchor() + "/gn/var");
+    x_norm = ws()->GetTensor("/mnt/" + Anchor() + "/gn/x_norm");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));


--- a/Dragon/src/operators/norm/group_norm_op.cc
+++ b/Dragon/src/operators/norm/group_norm_op.cc
@@ -185,7 +185,7 @@ void GroupNormOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    var = ws()->CreateTensor("/mnt/" + anchor() + "/gn_var");
+    var = ws()->CreateTensor("/mnt/" + Anchor() + "/gn/var");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));

@@ -337,7 +337,7 @@ void GroupNormGradientOp<Context>::Setup() {
    NS = N * S;

    //  make resource
-    var = ws()->GetTensor("/mnt/" + anchor() + "/gn_var");
+    var = ws()->GetTensor("/mnt/" + Anchor() + "/gn/var");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));


--- a/Dragon/src/operators/norm/instance_norm_op.cc
+++ b/Dragon/src/operators/norm/instance_norm_op.cc
@@ -110,7 +110,7 @@ void InstanceNormOp<Context>::Setup() {
    CS = C * S;

    //  make resource
-    var = ws()->CreateTensor("/mnt/" + anchor() + "/ins_norm_var");
+    var = ws()->CreateTensor("/mnt/" + Anchor() + "/ins_norm/var");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));

@@ -243,7 +243,7 @@ void InstanceNormGradientOp<Context>::Setup() {
    CS = C * S;

    //  make resource
-    var = ws()->GetTensor("/mnt/" + anchor() + "/ins_norm_var");
+    var = ws()->GetTensor("/mnt/" + Anchor() + "/ins_norm/var");
    stddev = ws()->GetBuffer();
    stddev->ReshapeLike(Input(0));


--- a/Dragon/src/operators/norm/l2_norm_op.cc
+++ b/Dragon/src/operators/norm/l2_norm_op.cc
@@ -15,7 +15,7 @@ void L2NormOp<Context>::RunWithType() {
    buffer->Reshape(dims);

    //  normalize by inner_dim independently if not across it
-    norm = ws()->CreateTensor("/mnt/" + anchor() + "/l2norm_normalizer");
+    norm = ws()->CreateTensor("/mnt/" + Anchor() + "/l2norm/normalizer");
    dims = Input(0).dims();
    for (int i = axis; i < end_axis; i++) dims[i] = 1;
    norm->Reshape(dims);
@@ -96,7 +96,7 @@ void L2NormGradientOp<Context>::RunWithType() {
    INIT_MULTIPLIER(multiplier, dim);

    //  normalize by inner_dim independently if not across it
-    norm = ws()->GetTensor("/mnt/" + anchor() + "/l2norm_normalizer");
+    norm = ws()->GetTensor("/mnt/" + Anchor() + "/l2norm/normalizer");
    buffer = ws()->GetBuffer();
    vector<TIndex> dims = Input(0).dims();
    for (int i = 0; i < axis; i++) dims[i] = 1;

--- a/Dragon/src/operators/update/adam_update_op.cc
+++ b/Dragon/src/operators/update/adam_update_op.cc
 #include "operators/update/adam_update_op.h"
+#include "core/workspace.h"
 #include "utils/op_kernel.h"

 namespace dragon {

 template <class Context>
 void AdamUpdateOp<Context>::ComputeRunWithFloat() {
-    if (!m.get()) {
-        m.reset(new Tensor()); m->ReshapeLike(Input(0));
-        v.reset(new Tensor()); v->ReshapeLike(Input(0));
-    }
+    m = ws()->CreateTensor("/mnt/" + Slot() + "/adam/m");
+    v = ws()->CreateTensor("/mnt/" + Slot() + "/adam/v");
+    tmp = ws()->CreateTensor("/mnt/" + Slot() + "/adam/tmp");
+    m->ReshapeLike(Input(0));
+    v->ReshapeLike(Input(0));
    t++;
    coeff = sqrt(1. - pow(beta2, t)) / (1. - pow(beta1, t));
    lr = Param("base_lr") * coeff * this->lr_mult;
-    kernel::AdamUpdate<float, Context>(&Input(0), 
-                                         m.get(), 
-                                         v.get(), 
-                                           &temp,
-                                           beta1, 
-                                           beta2, 
-                                             eps, 
+    kernel::AdamUpdate<float, Context>(&Input(0),
+                                       m, v, tmp,
+                                           beta1,
+                                           beta2,
+                                             eps,
                                             lr);
 }


--- a/Dragon/src/operators/update/nesterov_update_op.cc
+++ b/Dragon/src/operators/update/nesterov_update_op.cc
 #include "operators/update/nesterov_update_op.h"
+#include "core/workspace.h"
 #include "utils/math_functions.h"
 #include "utils/op_kernel.h"

@@ -6,19 +7,19 @@ namespace dragon {

 template <class Context>
 void NesterovUpdateOp<Context>::ComputeRunWithFloat() {
-    if (!history.get()) {
-        history.reset(new Tensor());
-        history->ReshapeLike(Input(0));
-    }
+    h = ws()->CreateTensor("/mnt/" + Slot() + "/nesterov/h");
+    tmp = ws()->CreateTensor("/mnt/" + Slot() + "/nesterov/tmp");
+    h->ReshapeLike(Input(0));
+
    lr = Param("base_lr") * this->lr_mult;
    auto* dXdata = Input(0).template mutable_data<float, Context>();
-    auto* Hdata = history->template mutable_data<float, Context>();
-    kernel::NesterovUpdate<float, Context>(Input(0).count(), 
-                                                     dXdata, 
-                                                      Hdata, 
-                                                      &temp, 
-                                                   momentum, 
-                                                         lr, 
+    auto* Hdata = h->template mutable_data<float, Context>();
+    kernel::NesterovUpdate<float, Context>(Input(0).count(),
+                                                     dXdata,
+                                                      Hdata,
+                                                        tmp,
+                                                   momentum,
+                                                         lr,
                                                    &ctx());
 }


--- a/Dragon/src/operators/update/rmsprop_update_op.cc
+++ b/Dragon/src/operators/update/rmsprop_update_op.cc
@@ -6,21 +6,19 @@ namespace dragon {

 template <class Context>
 void RMSPropUpdateOp<Context>::ComputeRunWithFloat() {
-    if (!history.get()) {
-        string slot = OperatorBase::GetSingleArg<string>("slot", "");
-        if (slot.empty()) history.reset(new Tensor());
-        else history.reset(ws()->CreateTensor("/mnt/" + name() + "/history"));
-        history->ReshapeLike(Input(0));
-    }
+    h = ws()->CreateTensor("/mnt/" + Slot() + "/rmsprop/h");
+    tmp = ws()->CreateTensor("/mnt/" + Slot() + "/rmsprop/tmp");
+    h->ReshapeLike(Input(0));
+
    lr = Param("base_lr") * this->lr_mult;
    auto* dXdata = Input(0).template mutable_data<float, Context>();
-    auto* Hdata = history->template mutable_data<float, Context>();
-    kernel::RMSPropUpdate<float, Context>(Input(0).count(), 
-                                                    dXdata, 
-                                                     Hdata, 
-                                                     &temp, 
-                                                     decay, 
-                                                       eps, 
+    auto* Hdata = h->template mutable_data<float, Context>();
+    kernel::RMSPropUpdate<float, Context>(Input(0).count(),
+                                                    dXdata,
+                                                     Hdata,
+                                                       tmp,
+                                                     decay,
+                                                       eps,
                                                       lr);
 }


--- a/Dragon/src/operators/update/sgd_update_op.cc
+++ b/Dragon/src/operators/update/sgd_update_op.cc
 #include "operators/update/sgd_update_op.h"
+#include "core/workspace.h"
 #include "utils/math_functions.h"

 namespace dragon {

 template <class Context>
 void SGDUpdateOp<Context>::ComputeRunWithFloat() {
-    if (!history.get()) {
-        history.reset(new Tensor());
-        history->ReshapeLike(Input(0));
-    }
+    h = ws()->CreateTensor("/mnt/" + Slot() + "/sgd/h");
+    h->ReshapeLike(Input(0));
+
    lr = Param("base_lr") * this->lr_mult;
    auto* dXdata = Input(0).template mutable_data<float, Context>();
-    auto* Hdata = history->template mutable_data<float, Context>();
-    math::Axpby<float, Context>(history->count(), lr, dXdata, momentum, Hdata);
-    ctx().template Copy<float, Context, Context>(history->count(), dXdata, Hdata);
+    auto* Hdata = h->template mutable_data<float, Context>();
+    math::Axpby<float, Context>(h->count(), lr, dXdata, momentum, Hdata);
+    ctx().template Copy<float, Context, Context>(h->count(), dXdata, Hdata);
 }

 DEPLOY_CPU(SGDUpdate);

--- a/Dragon/src/operators/update/update_op_base.cc
+++ b/Dragon/src/operators/update/update_op_base.cc
@@ -10,6 +10,12 @@ float UpdateOpBase<Context>::Param(const string& name) const {
               ->template mutable_data<float, CPUContext>()[0];
 }

+template <class Context>
+string UpdateOpBase<Context>::Slot() {
+    const string slot = OperatorBase::GetSingleArg<string>("slot", "");
+    return slot.empty() ? name() : slot;
+}
+
 template <class Context> template <typename T>
 void UpdateOpBase<Context>::PreprocessRunWithType() {
    //  scale

--- a/Dragon/src/operators/vision/lrn_op.cc
+++ b/Dragon/src/operators/vision/lrn_op.cc
@@ -15,18 +15,18 @@ void LRNOp<Context>::AcrossRunWithType() {

 template <class Context> template <typename T>
 void LRNOp<Context>::SplitRunWithType() {
-    sqr_in = ws()->CreateTensor("/mnt/" + anchor() + "/sqr_in");
+    sqr_in = ws()->CreateTensor("/mnt/" + Anchor() + "/sqr/in");
    sqr_in->ReshapeLike(Input(0));
    sqr_in->Share(Input(0));

-    prod_in = ws()->CreateTensor("/mnt/" + anchor() + "/prod_in");
+    prod_in = ws()->CreateTensor("/mnt/" + Anchor() + "/prod/in");
    prod_in->ReshapeLike(Input(0));
    prod_in->Share(Input(0));
 }

 template <class Context> template <typename T>
 void LRNOp<Context>::SquareRunWithType() {
-    sqr_out = ws()->CreateTensor("/mnt/" + anchor() + "/sqr_out");
+    sqr_out = ws()->CreateTensor("/mnt/" + Anchor() + "/sqr/out");
    if (!sqr_op) {
        Argument power;
        power.set_name("power"); power.set_f(2.0);
@@ -43,7 +43,7 @@ void LRNOp<Context>::SquareRunWithType() {

 template <class Context> template <typename T>
 void LRNOp<Context>::PoolRunWithType() {
-    pool_out = ws()->CreateTensor("/mnt/" + anchor() + "/pool_out");
+    pool_out = ws()->CreateTensor("/mnt/" + Anchor() + "/pool/out");
    if (!pool_op) {
        Argument ks, s, p, m, df;
        ks.set_name("kernel_size"); ks.add_ints(local_size);
@@ -64,7 +64,7 @@ void LRNOp<Context>::PoolRunWithType() {

 template <class Context> template <typename T>
 void LRNOp<Context>::PowRunWithType() {
-    pow_out = ws()->CreateTensor("/mnt/" + anchor() + "/pow_out");
+    pow_out = ws()->CreateTensor("/mnt/" + Anchor() + "/pow/out");
    if (!pow_op) {
        Argument scale, shift, power;
        scale.set_name("scale"); scale.set_f(alpha);
@@ -131,8 +131,8 @@ void LRNGradientOp<Context>::AcrossRunWithType() {

 template <class Context> template <typename T>
 void LRNGradientOp<Context>::ProdRunWithType() {
-    prod_in = ws()->GetTensor("/mnt/" + anchor() + "/prod_in");
-    pow_out = ws()->GetTensor("/mnt/" + anchor() + "/pow_out");
+    prod_in = ws()->GetTensor("/mnt/" + Anchor() + "/prod/in");
+    pow_out = ws()->GetTensor("/mnt/" + Anchor() + "/pow/out");
    if (!prod_op) {
        Argument operation;
        operation.set_name("operation"); operation.set_s("PROD");
@@ -152,7 +152,7 @@ void LRNGradientOp<Context>::ProdRunWithType() {

 template <class Context> template <typename T>
 void LRNGradientOp<Context>::PowRunWithType() {
-    pool_out = ws()->GetTensor("/mnt/" + anchor() + "/pool_out");
+    pool_out = ws()->GetTensor("/mnt/" + Anchor() + "/pool/out");
    if (!pow_op) {
        Argument scale, shift, power;
        scale.set_name("scale"); scale.set_f(alpha);
@@ -173,7 +173,7 @@ void LRNGradientOp<Context>::PowRunWithType() {

 template <class Context> template <typename T>
 void LRNGradientOp<Context>::PoolRunWithType() {
-    sqr_out = ws()->GetTensor("/mnt/" + anchor() + "/sqr_out");
+    sqr_out = ws()->GetTensor("/mnt/" + Anchor() + "/sqr/out");
    if (!pool_op) {
        Argument ks, s, p, m, df;
        ks.set_name("kernel_size"); ks.add_ints(local_size);
@@ -196,7 +196,7 @@ void LRNGradientOp<Context>::PoolRunWithType() {

 template <class Context> template <typename T>
 void LRNGradientOp<Context>::SquareRunWithType() {
-    sqr_in = ws()->GetTensor("/mnt/" + anchor() + "/sqr_in");
+    sqr_in = ws()->GetTensor("/mnt/" + Anchor() + "/sqr/in");
    if (!sqr_op) {
        Argument power;
        power.set_name("power"); power.set_f(2.0);

--- a/Dragon/src/operators/vision/pooling2d_op.cc
+++ b/Dragon/src/operators/vision/pooling2d_op.cc
@@ -7,7 +7,7 @@ namespace dragon {

 template <class Context> template <typename T>
 void Pooling2dOp<Context>::MAXRunWithType() {
-    mask = ws()->CreateTensor("/mnt/" + anchor() + "/max_pool_mask");
+    mask = ws()->CreateTensor("/mnt/" + Anchor() + "/max_pool/mask");
    mask->ReshapeLike(*Output(0));

    auto* Xdata = Input(0).template data<T, Context>();
@@ -122,7 +122,7 @@ OPERATOR_SCHEMA(Pooling2d).NumInputs(1).NumOutputs(1);

 template <class Context> template <typename T>
 void Pooling2dGradientOp<Context>::MAXRunWithType() {
-    mask = ws()->GetTensor("/mnt/" + anchor() + "/max_pool_mask");
+    mask = ws()->GetTensor("/mnt/" + Anchor() + "/max_pool/mask");

    auto* dYdata = Input(-1).template data<T, Context>();
    auto* dXdata = Output(0)->template mutable_data<T, Context>();

--- a/Dragon/src/operators/vision/roi_pooling_op.cc
+++ b/Dragon/src/operators/vision/roi_pooling_op.cc
@@ -17,7 +17,7 @@ void ROIPoolingOp<Context>::RunWithType() {

 template <class Context>
 void ROIPoolingOp<Context>::RunOnDevice() {
-    mask = ws()->CreateTensor("/mnt/" + anchor() + "/roi_pool_mask");
+    mask = ws()->CreateTensor("/mnt/" + Anchor() + "/roi_pool/mask");

    vector<TIndex> dims({Input(1).dim(0), Input(0).dim(1), pool_h, pool_w});
    Output(0)->Reshape(dims);
@@ -45,7 +45,7 @@ void ROIPoolingGradientOp<Context>::RunWithType() {

 template <class Context>
 void ROIPoolingGradientOp<Context>::RunOnDevice() {
-    mask = ws()->GetTensor("/mnt/" + anchor() + "/roi_pool_mask");
+    mask = ws()->GetTensor("/mnt/" + Anchor() + "/roi_pool/mask");

    Output(0)->ReshapeLike(Input(0));