Skip to content

Commit

Permalink
Updated version of cuTENSOR and fixed paths (#166)
Browse files Browse the repository at this point in the history
  • Loading branch information
cliffburdick authored Apr 22, 2022
1 parent 66bd576 commit 24dd054
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 16 deletions.
2 changes: 1 addition & 1 deletion cmake/FindcuTENSOR.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ else()
endif()

if(NOT cuTENSOR_FOUND)
set(CUTENSOR_VERSION 1.4.0.6)
set(CUTENSOR_VERSION 1.5.0.3)
set(CUTENSOR_FILENAME libcutensor-linux-x86_64-${CUTENSOR_VERSION}-archive)

message(STATUS "cuTENSOR not found. Downloading library. By continuing this download you accept to the license terms of cuTENSOR")
Expand Down
6 changes: 3 additions & 3 deletions cmake/FindcuTensorNet.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ endif()
if(NOT cuTensorNet_FOUND)
message(STATUS "cuTensorNet not found. Downloading library. By continuing this download you accept to the license terms of cuQuantum SDK")

set(CUTENSORNET_VERSION 0.1.0.30)
set(CUTENSORNET_VERSION 22.03.0.40)
set(CUTENSORNET_FILENAME cuquantum-linux-x86_64-${CUTENSORNET_VERSION}-archive)
file(DOWNLOAD https://developer.download.nvidia.com/compute/cuquantum/redist/linux-x86_64/${CUTENSORNET_FILENAME}.tar.xz

file(DOWNLOAD https://developer.download.nvidia.com/compute/cuquantum/redist/cuquantum/linux-x86_64/${CUTENSORNET_FILENAME}.tar.xz
${CMAKE_BINARY_DIR}/${CUTENSORNET_FILENAME}.tar.xz)

file(ARCHIVE_EXTRACT INPUT ${CMAKE_BINARY_DIR}/${CUTENSORNET_FILENAME}.tar.xz DESTINATION ${CMAKE_BINARY_DIR}/cutensornet/)
Expand Down
48 changes: 37 additions & 11 deletions include/matx_einsum.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,15 @@ class matxEinsumHandle_t {
MATX_ASSERT_STR(cutensornetCreateContractionOptimizerInfo(handle_, descNet_, &optimizerInfo) == CUTENSORNET_STATUS_SUCCESS, matxcuTensorError,
"Failed to create cuTensorNet contraction optimizer info");

int imbalance_factor = 30;
MATX_ASSERT_STR(cutensornetContractionOptimizerConfigSetAttribute(
handle_,
optimizerConfig,
CUTENSORNET_CONTRACTION_OPTIMIZER_CONFIG_GRAPH_IMBALANCE_FACTOR,
&imbalance_factor,
sizeof(imbalance_factor)) == CUTENSORNET_STATUS_SUCCESS,
matxcuTensorError, "Failed to run contraction optimizer");

size_t freeMem, totalMem;
MATX_ASSERT(cudaMemGetInfo(&freeMem, &totalMem) == cudaSuccess, matxCudaError);

Expand All @@ -150,13 +159,37 @@ class matxEinsumHandle_t {

MATX_ASSERT(params_.num_slices_ > 0, matxcuTensorError);

MATX_ASSERT(cutensornetCreateWorkspaceDescriptor(handle_, &workDesc_) == CUTENSORNET_STATUS_SUCCESS, matxcuTensorError);

uint64_t requiredWorkspaceSize = 0;
MATX_ASSERT(cutensornetWorkspaceComputeSizes(handle_,
descNet_,
optimizerInfo,
workDesc_) == CUTENSORNET_STATUS_SUCCESS, matxcuTensorError);

MATX_ASSERT(cutensornetWorkspaceGetSize(handle_,
workDesc_,
CUTENSORNET_WORKSIZE_PREF_MIN,
CUTENSORNET_MEMSPACE_DEVICE,
&requiredWorkspaceSize) == CUTENSORNET_STATUS_SUCCESS, matxcuTensorError);

MATX_ASSERT_STR(workSize_ > requiredWorkspaceSize, matxOutOfMemory, "Not enough workspace memory is available.");

matxAlloc(&workspace_, workSize_, MATX_ASYNC_DEVICE_MEMORY, stream);

MATX_ASSERT (cutensornetWorkspaceSet(handle_,
workDesc_,
CUTENSORNET_MEMSPACE_DEVICE,
workspace_,
workSize_) == CUTENSORNET_STATUS_SUCCESS, matxcuTensorError);

/*******************************
* Initialize all pair-wise contraction plans (for cuTENSOR)
*******************************/
MATX_ASSERT_STR(cutensornetCreateContractionPlan(handle_,
descNet_,
optimizerInfo,
workSize_,
workDesc_,
&plan_) == CUTENSORNET_STATUS_SUCCESS,
matxcuTensorError, "cutensornetCreateContractionPlan failed");

Expand All @@ -169,12 +202,6 @@ class matxEinsumHandle_t {
&autotunePref) == CUTENSORNET_STATUS_SUCCESS,
matxcuTensorError, "cutensornetCreateContractionAutotunePreference failed");

// Allocate the real amount needed and free the old amount
MATX_ASSERT_STR(cutensornetContractionGetWorkspaceSize(handle_, descNet_, optimizerInfo, &workSize_) == CUTENSORNET_STATUS_SUCCESS,
matxcuTensorError, "cutensornetContractionGetWorkspaceSize failed");

matxAlloc(&workspace_, workSize_, MATX_ASYNC_DEVICE_MEMORY, stream);

const int numAutotuningIterations = 5; // may be 0
MATX_ASSERT_STR(cutensornetContractionAutotunePreferenceSetAttribute(
handle_,
Expand All @@ -189,8 +216,7 @@ class matxEinsumHandle_t {
plan_,
data_in,
out.Data(),
workspace_,
workSize_,
workDesc_,
autotunePref,
stream) == CUTENSORNET_STATUS_SUCCESS,
matxcuTensorError, "cutensornetContractionAutotune failed");
Expand Down Expand Up @@ -329,8 +355,7 @@ class matxEinsumHandle_t {
plan_,
data_in,
out.Data(),
workspace_,
workSize_,
workDesc_,
slice,
stream) == CUTENSORNET_STATUS_SUCCESS,
matxcuTensorError, "cutensornetContraction failed");
Expand All @@ -348,6 +373,7 @@ class matxEinsumHandle_t {
cutensornetContractionPlan_t plan_;
uint64_t workSize_;
void *workspace_;
cutensornetWorkspaceDescriptor_t workDesc_;
cutensornetHandle_t handle_;
cutensornetNetworkDescriptor_t descNet_;
EinsumParams_t<InT...> params_;
Expand Down
2 changes: 1 addition & 1 deletion test/00_tensor/EinsumTests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ TYPED_TEST_SUITE(EinsumTestsIntegral, MatXAllIntegralTypes);
TYPED_TEST_SUITE(EinsumTestsNumericNonComplex, MatXNumericNonComplexTypes);
TYPED_TEST_SUITE(EinsumTestsBoolean, MatXBoolTypes);

#if ENABLE_CUTENSOR
#if MATX_ENABLE_CUTENSOR
TYPED_TEST(EinsumTestsFloatNonComplexNonHalfTypes, Contraction3D)
{
MATX_ENTER_HANDLER();
Expand Down

0 comments on commit 24dd054

Please sign in to comment.