Skip to content

Commit

Permalink
Merge branch 'main' into cccl_std_conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
cliffburdick authored May 21, 2024
2 parents 67e5d54 + 75b032d commit 8b75eae
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 59 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ option(MATX_EN_CUTENSOR OFF)
option(MATX_EN_FILEIO OFF)
option(MATX_EN_NVPL OFF, "Enable NVIDIA Performance Libraries for optimized ARM CPU support")
option(MATX_DISABLE_CUB_CACHE "Disable caching for CUB allocations" ON)
option(MATX_EN_COVERAGE OFF "Enable code coverage reporting")

set(MATX_EN_PYBIND11 OFF CACHE BOOL "Enable pybind11 support")

Expand Down Expand Up @@ -181,6 +182,11 @@ if (MATX_DISABLE_CUB_CACHE)
target_compile_definitions(matx INTERFACE MATX_DISABLE_CUB_CACHE=1)
endif()

if (MATX_EN_COVERAGE)
target_compile_options(matx INTERFACE -fprofile-arcs -ftest-coverage)
target_link_options(matx INTERFACE -lgcov --coverage)
endif()

# Get the tensor libraries if we need them
if (MATX_EN_CUTENSOR)
set(CUTENSORNET_VERSION 24.03.0.4)
Expand Down
2 changes: 1 addition & 1 deletion bench/00_transform/svd_power.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ void svdbpi_batch(nvbench::state &state,

MATX_NVTX_START_RANGE( "Exec", matx_nvxtLogLevels::MATX_NVTX_LOG_ALL, 1 )
state.exec(
[&U, &S, &VT, &A, &iterations, &r](nvbench::launch &launch) {
[&U, &S, &VT, &A, &iterations](nvbench::launch &launch) {
(mtie(U, S, VT) = svdbpi(A, iterations)).run(cudaExecutor{launch.get_stream()}); });
MATX_NVTX_END_RANGE( 1 )
}
Expand Down
12 changes: 7 additions & 5 deletions include/matx/core/stacktrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,25 @@
#include <iostream>
#include <sstream>
#include <string>
#include <array>

namespace matx {
namespace detail{

static constexpr int MAX_FRAMES = 63;

/** Print a demangled stack backtrace of the caller function to FILE* out. */
static inline void printStackTrace(std::ostream &eout = std::cerr,
unsigned int max_frames = 63)
static inline void printStackTrace(std::ostream &eout = std::cerr)
{
#ifdef _WIN32
// TODO add code for windows stack trace
#else
std::stringstream out;
// storage array for stack trace address data
void *addrlist[max_frames + 1];
std::array<void *, MAX_FRAMES + 1> addrlist;
// retrieve current stack addresses
int addrlen =
backtrace(addrlist, static_cast<int>(sizeof(addrlist) / sizeof(void *)));
backtrace(reinterpret_cast<void **>(&addrlist), static_cast<int>(addrlist.size()));

if (addrlen == 0) {
out << " <empty, possibly corrupt>\n";
Expand All @@ -72,7 +74,7 @@ static inline void printStackTrace(std::ostream &eout = std::cerr,

// resolve addresses into strings containing "filename(function+address)",
// this array must be free()-ed
char **symbollist = backtrace_symbols(addrlist, addrlen);
char **symbollist = backtrace_symbols(reinterpret_cast<void *const *>(&addrlist), addrlen);
// allocate string which will be filled with the demangled function name
size_t funcnamesize = 256;
char *funcname = (char *)malloc(funcnamesize);
Expand Down
2 changes: 1 addition & 1 deletion include/matx/core/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1466,7 +1466,7 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
int d = 0;
bool def_stride = (strides[0] == -1);

int end_count = 0;
[[maybe_unused]] int end_count = 0;
for (int i = 0; i < RANK; i++) {
if (ends[i] == matxDropDim) {
end_count++;
Expand Down
67 changes: 15 additions & 52 deletions include/matx/core/tensor_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,6 @@ namespace detail {
else {
return cuda::std::invoke(std::forward<Func>(f), cuda::std::get<S>(std::forward<Tuple>(tuple))...);
}

if constexpr (!(is_std_tuple<remove_cvref_t<Tuple>>::value || is_std_array<remove_cvref_t<Tuple>>::value)) {
return cuda::std::invoke(std::forward<Func>(f), cuda::std::get<S>(std::forward<Tuple>(tuple))...);
}
else {
return cuda::std::invoke(std::forward<Func>(f), cuda::std::get<S>(std::forward<Tuple>(tuple))...);
}
}

template <class Func, class Tuple>
Expand All @@ -208,17 +201,6 @@ namespace detail {
std::forward<Func>(f), std::forward<Tuple>(t),
std::make_index_sequence<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
}

if constexpr (!(is_std_tuple<remove_cvref_t<Tuple>>::value || is_std_array<remove_cvref_t<Tuple>>::value)) {
return apply_impl(
std::forward<Func>(f), std::forward<Tuple>(t),
std::make_index_sequence<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
}
else {
return apply_impl(
std::forward<Func>(f), std::forward<Tuple>(t),
std::make_index_sequence<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
}
}

template <class Func, class Tuple>
Expand All @@ -234,17 +216,6 @@ namespace detail {
std::forward<Func>(f), std::forward<Tuple>(t),
make_index_sequence_rev<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
}

if constexpr (!(is_std_tuple<remove_cvref_t<Tuple>>::value || is_std_array<remove_cvref_t<Tuple>>::value)) {
return apply_impl(
std::forward<Func>(f), std::forward<Tuple>(t),
make_index_sequence_rev<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
}
else {
return apply_impl(
std::forward<Func>(f), std::forward<Tuple>(t),
make_index_sequence_rev<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
}
}

template <typename T0, typename T1, typename... Tn>
Expand Down Expand Up @@ -332,20 +303,6 @@ namespace detail {
return i(args...);
}, sliced_tup);
}

if constexpr (!(T::Rank() == int(sizeof...(Is)) || T::Rank() == matxNoRank)) {
// Construct an integer sequence of the length of the tuple, but only using the last indices
using seq = offset_sequence_t<sizeof...(Is) - T::Rank(), std::make_index_sequence<T::Rank()>>;
auto tup = cuda::std::make_tuple(indices...);
auto sliced_tup = select_tuple(std::forward<decltype(tup)>(tup), seq{});
return cuda::std::apply([&](auto... args) {
return i(args...);
}, sliced_tup);
}
else
{
return i(indices...);
}
}


Expand All @@ -360,15 +317,6 @@ namespace detail {
{
return i;
}

if constexpr (!is_matx_op<T>())
{
return i;
}
else
{
return get_matx_value(i, indices...);
}
}

template <typename T> __MATX_INLINE__ std::string to_short_str() {
Expand Down Expand Up @@ -1184,6 +1132,21 @@ void print(const Op &op, [[maybe_unused]] Args... dims) {
cuda::std::apply([&](auto &&...args) { fprint(stdout, op, args...); }, tp);
}

/**
* @brief Print a tensor's all values to stdout
*
* This form of `print()` is a specialization for 0D tensors.
*
* @tparam Op Operator input type
* @param op Operator input
*/
template <typename Op,
std::enable_if_t<(Op::Rank() == 0), bool> = true>
void print(const Op &op)
{
fprint(stdout, op);
}

#endif // not DOXYGEN_ONLY

template <typename Op>
Expand Down
49 changes: 49 additions & 0 deletions test/00_io/PrintTests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,21 @@ TEST_F(PrintTest, DefaultTest4)
MATX_EXIT_HANDLER();
}

TEST_F(PrintTest, DefaultTest5)
{
MATX_ENTER_HANDLER();
auto pft = get_print_format_type();
ASSERT_EQ(MATX_PRINT_FORMAT_DEFAULT, pft);

auto testSlice = matx::slice<0>(A1, {0}, {matx::matxDropDim});

print_checker(testSlice,
"Tensor{complex<double>} Rank: 0, Sizes:[], Strides:[]\n"
"-9.2466e-01+9.9114e-01j \n");

MATX_EXIT_HANDLER();
}

TEST_F(PrintTest, MlabTest1)
{
MATX_ENTER_HANDLER();
Expand Down Expand Up @@ -281,6 +296,22 @@ TEST_F(PrintTest, MlabTest4)
MATX_EXIT_HANDLER();
}

TEST_F(PrintTest, MlabTest5)
{
MATX_ENTER_HANDLER();
set_print_format_type(MATX_PRINT_FORMAT_MLAB);
auto pft = get_print_format_type();
ASSERT_EQ(MATX_PRINT_FORMAT_MLAB, pft);

auto testSlice = matx::slice<0>(A1, {0}, {matx::matxDropDim});

print_checker(testSlice,
"Tensor{complex<double>} Rank: 0, Sizes:[], Strides:[]\n"
"-9.2466e-01+9.9114e-01j \n");

MATX_EXIT_HANDLER();
}

TEST_F(PrintTest, PythonTest1)
{
MATX_ENTER_HANDLER();
Expand Down Expand Up @@ -370,3 +401,21 @@ TEST_F(PrintTest, PythonTest4)

MATX_EXIT_HANDLER();
}

TEST_F(PrintTest, PythonTest5)
{
MATX_ENTER_HANDLER();
set_print_format_type(MATX_PRINT_FORMAT_PYTHON);
auto pft = get_print_format_type();
ASSERT_EQ(MATX_PRINT_FORMAT_PYTHON, pft);

auto testSlice = matx::slice<0>(A1, {0}, {matx::matxDropDim});

print_checker(testSlice,
"Tensor{complex<double>} Rank: 0, Sizes:[], Strides:[]\n"
"-9.2466e-01+9.9114e-01j \n");

MATX_EXIT_HANDLER();
}


0 comments on commit 8b75eae

Please sign in to comment.