From a3f19e3be873a8c9a902d21894dc3f10d99b2e7f Mon Sep 17 00:00:00 2001 From: cliffburdick Date: Mon, 20 Mar 2023 11:06:48 -0700 Subject: [PATCH] Adding ref-count for DLPack for clients to let local tensors go out of scope --- include/matx/core/tensor.h | 38 ++++++++++++++++++------------ test/00_tensor/BasicTensorTests.cu | 2 ++ 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/include/matx/core/tensor.h b/include/matx/core/tensor.h index 81b9dbda..52cfa5c0 100644 --- a/include/matx/core/tensor.h +++ b/include/matx/core/tensor.h @@ -1034,7 +1034,6 @@ class tensor_t : public detail::tensor_impl_t { return storage_.use_count(); } - /** * Create an overlapping tensor view * @@ -1752,6 +1751,10 @@ class tensor_t : public detail::tensor_impl_t { * returns a DLPack structure based on a tensor_t. The caller is responsible for freeing the memory * by calling ->deleter(self). * + * **Note**: This function will increment the reference count of the tensor. It is expected that once a tensor + * is converted to DLPack someone will eventually call deleter(). If that does not happen a memory leak + * will occur. + * * @returns Pointer to new DLManagedTensorVersioned pointer. The caller must call the deleter function when finished. */ DLManagedTensor *GetDLPackTensor() const { @@ -1762,14 +1765,14 @@ class tensor_t : public detail::tensor_impl_t { CUpointer_attribute attr[] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE, CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL}; CUmemorytype mem_type; int dev_ord; - void *data[2] = {&mem_type, &dev_ord}; + void *data[2] = {&mem_type, &dev_ord}; - t->data = static_cast(this->ldata_); + t->data = static_cast(this->ldata_); t->device.device_id = 0; // Determine where this memory resides - auto kind = GetPointerKind(this->ldata_); - auto mem_res = cuPointerGetAttributes(sizeof(attr)/sizeof(attr[0]), attr, data, reinterpret_cast(this->ldata_)); + auto kind = GetPointerKind(this->ldata_); + auto mem_res = cuPointerGetAttributes(sizeof(attr)/sizeof(attr[0]), attr, data, reinterpret_cast(this->ldata_)); MATX_ASSERT_STR_EXP(mem_res, CUDA_SUCCESS, matxCudaError, "Error returned from cuPointerGetAttributes"); if (kind == MATX_INVALID_MEMORY) { if (mem_type == CU_MEMORYTYPE_DEVICE) { @@ -1802,28 +1805,33 @@ class tensor_t : public detail::tensor_impl_t { } } - t->ndim = RANK; - t->dtype = detail::TypeToDLPackType(); - t->shape = new int64_t[RANK]; - t->strides = new int64_t[RANK]; + t->ndim = RANK; + t->dtype = detail::TypeToDLPackType(); + t->shape = new int64_t[RANK]; + t->strides = new int64_t[RANK]; for (int r = 0; r < RANK; r++) { - t->shape[r] = this->Size(r); + t->shape[r] = this->Size(r); t->strides[r] = this->Stride(r); } - t->byte_offset = 0; + t->byte_offset = 0; - mt->manager_ctx = nullptr; + // Increment reference count by making a copy of the shared_ptr by allocating on the heap and + // setting it as the context + auto t_copy = new self_type{*this}; + //*t_copy = *this; + mt->manager_ctx = t_copy; //mt->flags = 0; // Only for v1.0 //auto deleter = [](struct DLManagedTensorVersioned *mtv) { // v1.0 auto deleter = [](struct DLManagedTensor *mtv) { delete [] mtv->dl_tensor.shape; delete [] mtv->dl_tensor.strides; + delete static_cast(mtv->manager_ctx); delete mtv; - mtv->dl_tensor.shape = nullptr; - mtv->dl_tensor.strides = nullptr; - mtv = nullptr; + mtv->dl_tensor.shape = nullptr; + mtv->dl_tensor.strides = nullptr; + mtv = nullptr; }; mt->deleter = deleter; diff --git a/test/00_tensor/BasicTensorTests.cu b/test/00_tensor/BasicTensorTests.cu index 56bc8ccf..d26674bb 100644 --- a/test/00_tensor/BasicTensorTests.cu +++ b/test/00_tensor/BasicTensorTests.cu @@ -471,9 +471,11 @@ TYPED_TEST(BasicTensorTestsAll, DLPack) ASSERT_EQ(dl->dl_tensor.strides[0], t.Stride(0)); ASSERT_EQ(dl->dl_tensor.strides[1], t.Stride(1)); ASSERT_EQ(dl->dl_tensor.strides[2], t.Stride(2)); + ASSERT_EQ(t.GetRefCount(), 2); dl->deleter(dl); ASSERT_EQ(dl->dl_tensor.shape, nullptr); ASSERT_EQ(dl->dl_tensor.strides, nullptr); + ASSERT_EQ(t.GetRefCount(), 1); MATX_EXIT_HANDLER(); }