Commit 58c5371e by Ting PAN

Add support to query the size for the allocated memory

Summary:
This commit adds the ``memory_allocated`` API for ``dragon.Workspace``
to query the size of allocated memory (and optionally on a specified device).
1 parent a7a7e4fc
......@@ -45,11 +45,9 @@ class Dropout(Layer):
def __init__(self, layer_param):
super(Dropout, self).__init__(layer_param)
param = layer_param.dropout_param
self.arguments = {
'prob': param.dropout_ratio,
'scale': param.scale_train
if hasattr(param, 'scale_train') else True,
}
if not param.scale_train:
raise ValueError('Unscaled dropout is not supported.')
self.arguments = {'prob': param.dropout_ratio}
def __call__(self, bottom):
return activation_ops.dropout(bottom, **self.arguments)
......
......@@ -69,7 +69,11 @@ set_cuda_data
size
####
.. doxygenfunction:: dragon::UnifiedMemory::size
.. doxygenfunction:: dragon::UnifiedMemory::size() const
size
####
.. doxygenfunction:: dragon::UnifiedMemory::size(const string &device_type, int device_id) const
state
#####
......
......@@ -30,6 +30,10 @@ has_tensor
##########
.. automethod:: dragon.Workspace.has_tensor
memory_allocated
################
.. automethod:: dragon.Workspace.memory_allocated
merge_from
##########
.. automethod:: dragon.Workspace.merge_from
......
......@@ -16,7 +16,7 @@ dragon.cuda
: Return the index of current selected device.
`enable_cudnn(...) <cuda/enable_cudnn.html>`_
: Enable the CuDNN library.
: Enable backend to use the cuDNN library.
`get_device_capability(...) <cuda/get_device_capability.html>`_
: Return the capability of specified device.
......@@ -24,6 +24,9 @@ dragon.cuda
`is_available(...) <cuda/is_available.html>`_
: Return a bool reporting if runtime is available.
`memory_allocated(...) <cuda/memory_allocated.html>`_
: Return the size of memory used by tensors in current workspace.
`set_default_device(...) <cuda/set_default_device.html>`_
: Set the default device.
......@@ -40,6 +43,7 @@ dragon.cuda
cuda/enable_cudnn
cuda/get_device_capability
cuda/is_available
cuda/memory_allocated
cuda/set_default_device
cuda/set_device
cuda/Stream
......
memory_allocated
================
.. autofunction:: dragon.cuda.memory_allocated
.. _dragon.Workspace.memory_allocated(...): ../Workspace.html#memory-allocated
.. raw:: html
<style>
h1:before {
content: "dragon.cuda.";
color: #103d3e;
}
</style>
......@@ -41,16 +41,20 @@ class CUDAObjects {
~CUDAObjects() {
for (int i = 0; i < CUDA_MAX_DEVICES; i++) {
#ifdef USE_NCCL
for (auto& comm : nccl_comms_[i]) {
/*!
* Temporarily disable the comm destroying,
* to avoid an unhandled error.
*/
for (auto& comm_iter : nccl_comms_[i]) {
if (comm_iter.second) {
NCCL_CHECK(ncclCommDestroy(comm_iter.second));
}
}
#endif
#ifdef USE_CUDNN
for (auto& handle : cudnn_handles_[i]) {
if (handle) CUDNN_CHECK(cudnnDestroy(handle));
/*!
* Temporarily disable the handle destroying,
* to avoid the segmentation fault in CUDNN v8.
*
* if (handle) CUDNN_CHECK(cudnnDestroy(handle));
*/
}
#endif
for (auto& handle : cublas_handles_[i]) {
......
......@@ -76,6 +76,16 @@ class DRAGON_API UnifiedMemory {
return size_;
}
/*! \brief Return the total number of bytes on given device */
size_t size(const string& device_type, int device_id) const {
if (device_type == "cuda") {
if (own_cuda_ptr_ && cuda_ptr_ && device_id_ == device_id) {
return size_;
}
}
return size_t(0);
}
/*! \brief Return the number of memory chunks */
size_t num_chunks() const {
return num_chunks_;
......@@ -159,15 +169,18 @@ class DRAGON_API UnifiedMemory {
/*! \brief The cpu data pointer */
void* cpu_ptr_ = nullptr;
/*! \brief The ownership of cpu data pointer */
bool own_cpu_ptr_ = true;
/*! \brief The cuda data pointer */
void* cuda_ptr_ = nullptr;
/*! \brief The ownership of cuda data pointer */
bool own_cuda_ptr_ = true;
/*! \brief The cnml data pointer */
void* cnml_ptr_ = nullptr;
/*! \brief The ownership of data pointers */
int own_cpu_ptr_ = 1, own_cuda_ptr_ = 1;
/*! \brief The binding cpu tensor for cnml */
cnmlCpuTensor_t cnml_cpu_tensor_ = nullptr;
......
......@@ -92,11 +92,11 @@ class DRAGON_API Tensor {
if (d > 0) new_size *= d;
}
if (capacity_ < new_size * meta_.itemsize()) {
if (own_memory_) {
internal_memory_.reset();
if (own_memory_ptr_) {
memory_.reset();
} else {
external_memory_ = nullptr;
own_memory_ = true;
mapped_memory_ = nullptr;
own_memory_ptr_ = true;
}
capacity_ = 0;
}
......@@ -155,26 +155,26 @@ class DRAGON_API Tensor {
if (memory != nullptr) {
CHECK_LE(size_, memory->size())
<< "\nShare an external memory with smaller capacity.";
internal_memory_.reset();
memory_.reset();
capacity_ = memory->size();
} else {
if (internal_memory_) {
capacity_ = internal_memory_->size();
if (memory_) {
capacity_ = memory_->size();
}
}
external_memory_ = memory;
own_memory_ = (memory == nullptr);
mapped_memory_ = memory;
own_memory_ptr_ = (memory == nullptr);
}
/*! \brief Reset tensor to release all resources */
void Reset() {
dims_.clear();
strides_.clear();
internal_memory_.reset();
memory_.reset();
meta_ = TypeMeta();
size_ = capacity_ = 0;
own_memory_ = true;
external_memory_ = nullptr;
own_memory_ptr_ = true;
mapped_memory_ = nullptr;
if (ExternalDeleter != nullptr) {
ExternalDeleter();
ExternalDeleter = nullptr;
......@@ -283,11 +283,11 @@ class DRAGON_API Tensor {
/*! \brief Return the number of elements counting along the given axes */
int64_t count(int64_t start, int64_t end) const {
int64_t nelements = 1;
int64_t num = 1;
for (int64_t i = start; i < end; i++) {
nelements *= dim(i);
num *= dim(i);
}
return nelements;
return num;
}
/*! \brief Return the number of elements counting from the given axis */
......@@ -302,12 +302,12 @@ class DRAGON_API Tensor {
/*! \brief Return whether the memory is set */
bool has_memory() const {
return internal_memory_ != nullptr || external_memory_ != nullptr;
return memory_ != nullptr || mapped_memory_ != nullptr;
}
/*! \brief Return the memory pointer */
UnifiedMemory* memory(bool required = false) const {
auto* ptr = own_memory_ ? internal_memory_.get() : external_memory_;
UnifiedMemory* memory(bool required = false, bool owned = false) const {
auto* ptr = own_memory_ptr_ || owned ? memory_.get() : mapped_memory_;
if (required) CHECK(ptr) << "\nAccess the empty memory.";
return ptr;
}
......@@ -320,15 +320,15 @@ class DRAGON_API Tensor {
/*! \brief Try to return the raw const data pointer */
template <class Context>
const void* const_data_ptr() const {
TypeId ctx_type = TypeMeta::Id<Context>();
if (ctx_type == TypeMeta::Id<CPUContext>()) {
TypeId context_type = TypeMeta::Id<Context>();
if (context_type == TypeMeta::Id<CPUContext>()) {
return memory(true)->cpu_data(nbytes());
} else if (ctx_type == TypeMeta::Id<CUDAContext>()) {
} else if (context_type == TypeMeta::Id<CUDAContext>()) {
return memory(true)->cuda_data(nbytes());
} else if (ctx_type == TypeMeta::Id<CNMLContext>()) {
} else if (context_type == TypeMeta::Id<CNMLContext>()) {
return memory(true)->cnml_data();
} else {
LOG(FATAL) << "Unknown memory type.";
LOG(FATAL) << "Unsupported context type.";
return nullptr;
}
}
......@@ -336,19 +336,19 @@ class DRAGON_API Tensor {
/*! \brief Try to return the raw mutable data pointer */
template <class Context>
void mutable_data_ptr(void** data_ptr) {
auto* mem = memory();
if (!mem) {
auto* memory_ptr = memory();
if (!memory_ptr) {
*data_ptr = nullptr;
} else {
TypeId ctx_type = TypeMeta::Id<Context>();
if (ctx_type == TypeMeta::Id<CPUContext>()) {
*data_ptr = mem->mutable_cpu_data(nbytes());
} else if (ctx_type == TypeMeta::Id<CUDAContext>()) {
*data_ptr = mem->mutable_cuda_data(nbytes());
} else if (ctx_type == TypeMeta::Id<CNMLContext>()) {
*data_ptr = mem->mutable_cnml_data();
TypeId context_type = TypeMeta::Id<Context>();
if (context_type == TypeMeta::Id<CPUContext>()) {
*data_ptr = memory_ptr->mutable_cpu_data(nbytes());
} else if (context_type == TypeMeta::Id<CUDAContext>()) {
*data_ptr = memory_ptr->mutable_cuda_data(nbytes());
} else if (context_type == TypeMeta::Id<CNMLContext>()) {
*data_ptr = memory_ptr->mutable_cnml_data();
} else {
LOG(FATAL) << "Unknown memory type.";
LOG(FATAL) << "Unsupported context type.";
}
}
}
......@@ -368,7 +368,7 @@ class DRAGON_API Tensor {
CHECK_GT(size_, 0) << "\nInvalid tensor size.";
meta_ = meta;
capacity_ = size_ * meta.itemsize();
internal_memory_.reset(new UnifiedMemory(meta_, capacity_));
memory_.reset(new UnifiedMemory(meta_, capacity_));
mutable_data_ptr<Context>(&data_ptr);
if (meta_.ctor()) meta_.ctor()(data_ptr, size_);
return data_ptr;
......@@ -426,11 +426,11 @@ class DRAGON_API Tensor {
}
/*! \brief Set to manage the memory */
void set_memory(UnifiedMemory* memory) {
if (memory != internal_memory_.get()) {
internal_memory_.reset(memory);
void set_memory(UnifiedMemory* memory_ptr) {
if (memory_ptr != memory_.get()) {
memory_.reset(memory_ptr);
}
capacity_ = memory->size();
capacity_ = memory_ptr->size();
}
private:
......@@ -449,14 +449,14 @@ class DRAGON_API Tensor {
/*! \brief The dimensions and strides */
vec64_t dims_, strides_;
/*! \brief The internal memory */
unique_ptr<UnifiedMemory> internal_memory_;
/*! \brief The managed memory */
unique_ptr<UnifiedMemory> memory_;
/*! \brief The external memory */
UnifiedMemory* external_memory_ = nullptr;
/*! \brief The mapped memory */
UnifiedMemory* mapped_memory_ = nullptr;
/*! \brief The external memory indicator */
bool own_memory_ = true;
/*! \brief The ownership of memory pointer */
bool own_memory_ptr_ = true;
DISABLE_COPY_AND_ASSIGN(Tensor);
};
......
......@@ -148,14 +148,16 @@ string Workspace::UniqueName(
return name + "_" + str::to(index_map[required_name]++) + suffix;
}
vector<string> Workspace::tensors() const {
vector<string> Workspace::tensors(bool external) const {
vector<string> names;
for (const auto& it : tensor_map_) {
names.push_back(it.first);
}
if (external) {
for (const auto& it : external_tensor_map_) {
names.push_back(it.first);
}
}
return names;
}
......
......@@ -80,7 +80,7 @@ class DRAGON_API Workspace {
}
/*! \brief Return the name of cached tensors */
vector<string> tensors() const;
vector<string> tensors(bool external = true) const;
/*! \brief Return the name of cached graphs */
vector<string> graphs() const;
......
......@@ -27,14 +27,12 @@ class CudaStream {
#ifdef USE_CUDA
CUDADeviceGuard guard(device_id);
CUDA_CHECK(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
#else
CUDA_NOT_COMPILED;
#endif
}
~CudaStream() {
#ifdef USE_CUDA
cudaStreamDestroy(stream_);
CUDA_CHECK(cudaStreamDestroy(stream_));
#endif
}
......
......@@ -135,6 +135,24 @@ PYBIND11_MODULE(libdragon_python, m) {
}
})
/*! \brief Return the size of memory used by tensors on given device */
.def(
"MemoryAllocated",
[](Workspace* self, const string& device_type, int device_id) {
size_t size = 0;
for (const auto& name : self->tensors(false)) {
auto* memory = self->GetTensor(name)->memory(false, true);
if (memory) {
if (device_type == "cpu") {
size += memory->size();
} else {
size += memory->size(device_type, device_id);
}
}
}
return size;
})
/*! \brief Run the operator */
.def(
"RunOperator",
......@@ -200,7 +218,7 @@ PYBIND11_MODULE(libdragon_python, m) {
if (could_be_serialized) {
auto msg = string("\n") + def.DebugString();
msg.pop_back();
PRINT(INFO) << "graph {" << str::replace_all(msg, "\n", "\n ")
LOG(INFO) << "\ngraph {" << str::replace_all(msg, "\n", "\n ")
<< "\n}\n";
}
}
......
......@@ -9,12 +9,10 @@ template <class Context>
template <typename T>
void DropBlock2dOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
if (phase() == "TEST") {
Y->ReshapeLike(X)->CopyFrom(X, ctx());
} else if (phase() == "TRAIN") {
int64_t feat_h, feat_w, seed_h, seed_w;
if (data_format() == "NCHW") {
feat_h = X.dim(2), feat_w = X.dim(3);
} else if (data_format() == "NHWC") {
......@@ -22,11 +20,9 @@ void DropBlock2dOp<Context>::DoRunWithType() {
} else {
LOG(FATAL) << "Unknown DataFormat: " << data_format();
}
seed_h = feat_h - block_size_ + 1;
seed_w = feat_w - block_size_ + 1;
CHECK(seed_h > 0 && seed_w > 0) << "\nExcepted block_size <= feat_size.";
// Schedule the keep ratio
auto kp = keep_prob();
if (decrement_ > 0.f && prob_ > kp) {
......@@ -34,27 +30,23 @@ void DropBlock2dOp<Context>::DoRunWithType() {
} else {
prob_ = kp; // Fixed to the limit value
}
// Compute the drop ratio
float gamma = (1.f - prob_) / std::pow(block_size_, 2);
gamma *= (alpha_ * (feat_h * feat_w) / (seed_h * seed_w));
// Prepare buffers
auto* mask = Buffer("mask")
->ReshapeLike(X)
->template mutable_data<uint8_t, Context>();
auto* scale = Buffer("scale")
->Reshape({})
->template mutable_data<float, CPUContext>();
auto scratches = ws()->template data<Context>({
X.dim(0) * seed_h * seed_w * sizeof(uint32_t), // seed points
X.count() * sizeof(int), // int32 mask for seed growing
});
// Fill the mask with ones
// Fill mask with ones
math::Set(X.count(), 1, (int*)scratches[1], ctx());
// Generate 2d mask from seed region
// Generate 2d mask from the seed region
kernel::DropBlock2d(
X.dim(0),
data_format() == "NCHW" ? X.dim(1) : X.dim(-1),
......@@ -68,13 +60,12 @@ void DropBlock2dOp<Context>::DoRunWithType() {
(uint32_t*)scratches[0],
(int*)scratches[1],
ctx());
// Convert to uint8 mask for applying
// Convert to uint8 mask
kernel::Cast(X.count(), (int*)scratches[1], mask, ctx());
// Count && Apply
// Count the number of zeros to compute scale factor
float normalizer = math::Sum(X.count(), 1.f, (int*)scratches[1], ctx());
scale[0] = (float)X.count() / std::max(normalizer, 1.f);
// Apply mask to the feature
kernel::ApplyMask(
X.count(),
scale[0],
......@@ -97,7 +88,6 @@ template <class Context>
template <typename T>
void DropBlock2dGradientOp<Context>::DoRunWithType() {
auto &dY = Input(0), *dX = Output(0);
if (phase() == "TEST") {
NOT_IMPLEMENTED;
} else if (phase() == "TRAIN") {
......
......@@ -9,7 +9,6 @@ template <class Context>
template <typename T>
void DropPathOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
if (phase() == "TEST") {
Y->ReshapeLike(X)->CopyFrom(X, ctx());
} else if (phase() == "TRAIN") {
......@@ -20,20 +19,15 @@ void DropPathOp<Context>::DoRunWithType() {
} else {
drop_prob_ = dp; // Fixed to the limit value
}
auto* mask = Buffer("mask")
->Reshape({X.dim(0)})
->template mutable_data<float, Context>();
auto* scale = Buffer("scale")
->Reshape({})
->template mutable_data<float, CPUContext>();
scale[0] = 1.f / (1.f - drop_prob_);
// Generate mask for each example
math::RandomUniform(X.dim(0), 0.f, 1.f, mask, ctx());
// Apply mask to the feature
kernel::DropPath(
X.dim(0),
......@@ -57,7 +51,6 @@ template <class Context>
template <typename T>
void DropPathGradientOp<Context>::DoRunWithType() {
auto &dY = Input(0), *dX = Output(0);
if (phase() == "TEST") {
NOT_IMPLEMENTED;
} else if (phase() == "TRAIN") {
......
......@@ -9,24 +9,14 @@ template <class Context>
template <typename T>
void DropoutOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
auto scale = use_scale_ ? 1.f / (1.f - prob()) : 1.f;
if (phase() == "TEST") {
Y->ReshapeLike(X)->CopyFrom(X, ctx());
if (!use_scale_) {
math::Scale(
X.count(),
1.f - prob(),
Y->template data<T, Context>(),
Y->template mutable_data<T, Context>(),
ctx());
}
} else if (phase() == "TRAIN") {
Buffer("mask")->ReshapeLike(X);
kernel::Dropout(
X.count(),
prob(),
scale,
1.f / (1.f - prob()),
X.template data<T, Context>(),
Buffer("mask")->template mutable_data<uint8_t, Context>(),
Y->ReshapeLike(X)->template mutable_data<T, Context>(),
......@@ -46,14 +36,12 @@ template <class Context>
template <typename T>
void DropoutGradientOp<Context>::DoRunWithType() {
auto &dY = Input(0), *dX = Output(0);
auto scale = use_scale_ ? 1.f / (1.f - prob()) : 1.f;
if (phase() == "TEST") {
NOT_IMPLEMENTED;
} else if (phase() == "TRAIN") {
kernel::ApplyMask(
dY.count(),
scale,
1.f / (1.f - prob()),
dY.template data<T, Context>(),
Buffer("mask")->template data<uint8_t, Context>(),
dX->ReshapeLike(dY)->template mutable_data<T, Context>(),
......
......@@ -21,7 +21,7 @@ template <class Context>
class DropoutOp : public Operator<Context> {
public:
DropoutOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), use_scale_(OpArg<bool>("scale", true)) {
: Operator<Context>(def, ws) {
GET_ARG_WITH_DESC(float, prob, 0.5f);
}
USE_OPERATOR_FUNCTIONS;
......@@ -32,7 +32,6 @@ class DropoutOp : public Operator<Context> {
void DoRunWithType();
protected:
bool use_scale_;
DECLARE_ARG_WITH_DESC(float, prob);
};
......@@ -40,7 +39,7 @@ template <class Context>
class DropoutGradientOp : public Operator<Context> {
public:
DropoutGradientOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws), use_scale_(OpArg<bool>("scale", true)) {
: Operator<Context>(def, ws) {
GET_ARG_WITH_DESC(float, prob, 0.5f);
}
USE_OPERATOR_FUNCTIONS;
......@@ -51,7 +50,6 @@ class DropoutGradientOp : public Operator<Context> {
void DoRunWithType();
protected:
bool use_scale_;
DECLARE_ARG_WITH_DESC(float, prob);
};
......
......@@ -11,10 +11,7 @@ template <class Context>
template <typename T>
void CuDNNDropoutOp<Context>::DoRunWithType() {
auto &X = Input(0), *Y = Output(0, {0});
CHECK(this->use_scale_) << "\nCuDNN only supports the scaled dropout.";
CuDNNSetTensorDesc<T>(&input_desc_, {X.count(), 1, 1, 1});
if (phase() == "TEST") {
Y->ReshapeLike(X)->CopyFrom(X, ctx());
} else if (phase() == "TRAIN") {
......@@ -46,12 +43,9 @@ void CuDNNDropoutOp<Context>::DoRunWithType() {
rng_seed_));
}
}
// Allocate for the reserve space
size_t reserve_size;
CUDNN_CHECK(cudnnDropoutGetReserveSpaceSize(input_desc_, &reserve_size));
auto* X_mask = Buffer("X_mask")->Reshape({(int64_t)reserve_size});
CUDNN_CHECK(cudnnDropoutForward(
ctx()->cudnn_handle(),
dropout_desc_,
......@@ -76,11 +70,9 @@ template <typename T>
void CuDNNDropoutGradientOp<Context>::DoRunWithType() {
auto &dY = Input(0), *dX = Output(0);
CuDNNSetTensorDesc<T>(&input_desc_, {dY.count(), 1, 1, 1});
if (phase() == "TEST") {
NOT_IMPLEMENTED;
} else if (phase() == "TRAIN") {
CHECK(this->use_scale_) << "\nCuDNN only supports the scaled dropout.";
// Initialize the dropout states
if (!states_initialized_) {
states_initialized_ = true;
......@@ -102,13 +94,12 @@ void CuDNNDropoutGradientOp<Context>::DoRunWithType() {
LOG(FATAL) << "Missing dropout states with seed: " << rng_seed_;
}
}
// Check the reserve space
size_t reserve_size;
CUDNN_CHECK(cudnnDropoutGetReserveSpaceSize(input_desc_, &reserve_size));
auto* X_mask = Buffer("X_mask");
CHECK_EQ(X_mask->size(), reserve_size);
// Compute the gradient using mask
CUDNN_CHECK(cudnnDropoutBackward(
ctx()->cudnn_handle(),
dropout_desc_,
......
......@@ -21,6 +21,7 @@ from dragon.core.device.cuda import current_device
from dragon.core.device.cuda import enable_cudnn
from dragon.core.device.cuda import get_device_capability
from dragon.core.device.cuda import is_available
from dragon.core.device.cuda import memory_allocated
from dragon.core.device.cuda import set_default_device
from dragon.core.device.cuda import set_device
from dragon.core.device.cuda import synchronize
......
......@@ -8,7 +8,7 @@
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
"""Define the options for autograph utilities."""
"""Autograph options."""
from __future__ import absolute_import
from __future__ import division
......@@ -52,9 +52,9 @@ def set_optimization(level=1):
* level = ``1``: Eliminate the unused outputs and operators.
* level = ``2``: Apply inplace to the inputs if available.
* level = ``2``: Apply the inplace to inputs if available.
* level = ``3``: Allocate shared buffer for the outputs.
* level = ``3``: Allocate the shared buffer to outputs if available.
Parameters
----------
......
......@@ -16,6 +16,7 @@ from __future__ import print_function
from dragon import backend
from dragon.core.framework import config
from dragon.core.framework import workspace
class Stream(backend.CudaStream):
......@@ -62,14 +63,14 @@ def current_device():
def enable_cudnn(enabled=True, benchmark=False):
"""Enable the CuDNN library.
"""Enable backend to use the cuDNN library.
Parameters
----------
enabled : bool, optional, default=True
**True** to enable the CuDNN.
Use cuDNN library or not.
benchmark : bool, optional, default=False
**True** to select algorithms according to benchmark.
Select algorithms according to the benchmark or not.
"""
return backend.cudaEnableDNN(enabled, benchmark)
......@@ -107,6 +108,32 @@ def is_available():
return backend.cudaIsDriverSufficient()
def memory_allocated(device_index=None):
"""Return the size of memory used by tensors in current workspace.
If ``device_index`` is **None**, the current device will be selected.
Parameters
----------
device_index : int, optional
The device index.
Returns
-------
int
The total number of allocated bytes.
See Also
--------
`dragon.Workspace.memory_allocated(...)`_
"""
if device_index is None:
device_index = current_device()
current_ws = workspace.get_workspace()
return current_ws.memory_allocated('cuda', device_index)
def set_default_device(device_index=0):
"""Set the default device.
......
......@@ -24,6 +24,7 @@ from dragon.core.framework import mapping
from dragon.core.framework import proto_util
from dragon.core.framework import types
from dragon.core.proto import dragon_pb2
from dragon.core.util import logging
from dragon.core.util import serialization
from dragon.core.util import tls
......@@ -149,7 +150,8 @@ class Workspace(backend.Workspace):
"""
cfg = config.config()
if cfg.graph_verbosity == 2:
print(graph_def)
msg = '\n' + str(graph_def)[:-1]
logging.info('\ngraph {' + msg.replace('\n', '\n ') + '\n}\n')
return self.CreateGraph(
serialization.serialize_proto(graph_def),
cfg.graph_verbosity == 1)
......@@ -259,6 +261,24 @@ class Workspace(backend.Workspace):
"""
return self.HasTensor(_stringify_object(tensor))
def memory_allocated(self, device_type='cpu', device_index=0):
"""Return the size of memory used by tensors on given device.
Parameters
----------
device_type : str, optional
The device type.
device_index : int, optional
The device index.
Returns
-------
int
The total number of allocated bytes.
"""
return self.MemoryAllocated(device_type, device_index)
def merge_from(self, other):
"""Merge resources from the other.
......
......@@ -16,7 +16,6 @@ from __future__ import print_function
from dragon.core.eager import context
from dragon.core.framework import ops
from dragon.core.framework import types
from dragon.core.ops import math_ops_lib
from dragon.core.ops.utils import OpSchema
from dragon.core.ops.utils import parse_args
......@@ -122,10 +121,8 @@ def affine(inputs, axis=1, num_axes=1, **kwargs):
op_lib = math_ops_lib.Affine
if context.executing_eagerly():
return op_lib \
.instantiate(
axis=axis,
num_axes=num_axes,
).apply(inputs)
.instantiate(axis=axis, num_axes=num_axes) \
.apply(inputs)
else:
return op_lib.blend(**args)
......@@ -568,10 +565,8 @@ def fully_connected(inputs, axis=1, transpose_w=True, **kwargs):
op_lib = math_ops_lib.FullyConnected
if context.executing_eagerly():
return op_lib \
.instantiate(
axis=axis,
transpose_w=transpose_w,
).apply(inputs)
.instantiate(axis=axis, transpose_w=transpose_w) \
.apply(inputs)
else:
args.pop('transpose_w')
args['transW'] = transpose_w
......
......@@ -25,17 +25,18 @@ from dragon.core.testing.unittest.common_utils import TEST_CUDA
class TestCUDA(unittest.TestCase):
"""Test the cuda utilities."""
@unittest.skipIf(not TEST_CUDA, 'CUDA unavailable')
def test_stream(self):
stream = dragon.cuda.Stream(device_index=0)
self.assertGreater(stream.ptr, 0)
self.assertGreater(stream.ptr, 0 if TEST_CUDA else -1)
stream.synchronize()
dragon.cuda.synchronize()
@unittest.skipIf(not TEST_CUDA, 'CUDA unavailable')
def test_cudnn(self):
dragon.cuda.enable_cudnn()
def test_device(self):
major, minor = dragon.cuda.get_device_capability(0)
self.assertGreaterEqual(major, 1)
self.assertGreaterEqual(major, 1 if TEST_CUDA else 0)
self.assertGreaterEqual(minor, 0)
dragon.cuda.set_device(0)
self.assertEqual(dragon.cuda.current_device(), 0)
......
......@@ -190,6 +190,12 @@ class TestWorkspace(unittest.TestCase):
pass
dragon.reset_workspace()
def test_memory_allocated(self):
w = dragon.Workspace()
with w.as_default():
_ = w.memory_allocated()
_ = dragon.cuda.memory_allocated()
if __name__ == '__main__':
run_tests()
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!