Converting old std:: types to cuda::std:: types (#629)

* Converting old std:: types to cuda::std:: types. Paving the way for removing relaxed constexpr.
NVIDIA · Jun 3, 2024 · 70686b4 · 70686b4
1 parent 381a6b2
commit 70686b4
Show file tree

Hide file tree

Showing 169 changed files with 1,649 additions and 1,703 deletions.
diff --git a/docs_input/api/manipulation/joinrepeat/clone.rst b/docs_input/api/manipulation/joinrepeat/clone.rst
@@ -6,7 +6,7 @@ clone
 Clone one or more dimensions of an operator to a higher rank
 
 .. doxygenfunction:: clone(Op t, const index_t (&shape)[Rank])
-.. doxygenfunction:: clone(Op t, const std::array<index_t, Rank> &shape)
+.. doxygenfunction:: clone(Op t, const cuda::std::array<index_t, Rank> &shape)
 
 Examples
 ~~~~~~~~

diff --git a/docs_input/api/manipulation/rearranging/overlap.rst b/docs_input/api/manipulation/rearranging/overlap.rst
@@ -22,7 +22,7 @@ end of the data to make the tensor rectangular.
     Only 1D input operators are accepted at this time
 
 .. doxygenfunction:: overlap( const OpType &op, const index_t (&windows)[N], const index_t (&strides)[N])
-.. doxygenfunction:: overlap( const OpType &op, const std::array<index_t, N> &windows, const std::array<index_t, N> &strides)
+.. doxygenfunction:: overlap( const OpType &op, const cuda::std::array<index_t, N> &windows, const cuda::std::array<index_t, N> &strides)
 
 Examples
 ~~~~~~~~

diff --git a/docs_input/api/manipulation/rearranging/permute.rst b/docs_input/api/manipulation/rearranging/permute.rst
@@ -6,7 +6,7 @@ permute
 Permute the dimensions of an operator
 
 .. doxygenfunction:: permute(const T &op, const int32_t (&dims)[T::Rank()])
-.. doxygenfunction:: permute(const T &op, const std::array<int32_t, T::Rank()> &dims)
+.. doxygenfunction:: permute(const T &op, const cuda::std::array<int32_t, T::Rank()> &dims)
 
 Examples
 ~~~~~~~~

diff --git a/docs_input/api/polynomials/legendre.rst b/docs_input/api/polynomials/legendre.rst
@@ -7,7 +7,7 @@ Return Legendre polynomial coefficients at the input operator
 
 .. doxygenfunction:: legendre(T1 n, T2 m, const T3 in)
 .. doxygenfunction:: legendre(T1 n, T2 m, const T3 in, int (&axis)[2])
-.. doxygenfunction:: legendre(T1 n, T2 m, const T3 in, std::array<int, 2> axis)  
+.. doxygenfunction:: legendre(T1 n, T2 m, const T3 in, cuda::std::array<int, 2> axis)  
 
 Examples
 ~~~~~~~~

diff --git a/docs_input/api/signalimage/filtering/filter.rst b/docs_input/api/signalimage/filtering/filter.rst
@@ -10,7 +10,7 @@ used for IIR filters, but it will call the appropriate functions for FIR if the
 .. note::
    This function is currently is not supported with host-based executors (CPU)
 
-.. doxygenfunction:: matx::filter(const OpA &a, const std::array<FilterType, NR> h_rec, const std::array<FilterType, NNR> h_nonrec)
+.. doxygenfunction:: matx::filter(const OpA &a, const cuda::std::array<FilterType, NR> h_rec, const cuda::std::array<FilterType, NNR> h_nonrec)
 
 Examples
 ~~~~~~~~

diff --git a/docs_input/basics/creation.rst b/docs_input/basics/creation.rst
@@ -196,7 +196,7 @@ As mentioned in the descriptor section, any type that conforms to the shape sema
 
 .. code-block:: cpp
 
-    std::array<int, 3> = {10, 20, 30};
+    cuda::std::array<int, 3> = {10, 20, 30};
     auto t = make_tensor<float>(array);
 
 Creating From A Descriptor

diff --git a/examples/recursive_filter.cu b/examples/recursive_filter.cu
@@ -80,8 +80,8 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   tensor_t<InType, 1> solView({numSamples});
 
   // Create views into data objects
-  auto rCoeffs = std::array<FilterType, 2>{0.4f, -0.1f};
-  auto nrCoeffs = std::array<FilterType, 2>{2.0f, 1.0f};
+  auto rCoeffs = cuda::std::array<FilterType, 2>{0.4f, -0.1f};
+  auto nrCoeffs = cuda::std::array<FilterType, 2>{2.0f, 1.0f};
 
   // initialize input data
   for (index_t b = 0; b < batches; b++) {

diff --git a/examples/spectrogram.cu b/examples/spectrogram.cu
@@ -77,9 +77,9 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   constexpr uint32_t num_iterations = 100;
   float time_ms;
 
-  std::array<index_t, 1> num_samps{N};
-  std::array<index_t, 1> half_win{nfft / 2 + 1};
-  std::array<index_t, 1> s_time_shape{(N - noverlap) / nstep};
+  cuda::std::array<index_t, 1> num_samps{N};
+  cuda::std::array<index_t, 1> half_win{nfft / 2 + 1};
+  cuda::std::array<index_t, 1> s_time_shape{(N - noverlap) / nstep};
 
   auto time = make_tensor<float>({N});
   auto modulation = make_tensor<float>({N});

diff --git a/examples/spectrogram_graph.cu b/examples/spectrogram_graph.cu
@@ -78,9 +78,9 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   constexpr uint32_t num_iterations = 20;
   float time_ms;
 
-  std::array<index_t, 1> num_samps{N};
-  std::array<index_t, 1> half_win{nfft / 2 + 1};
-  std::array<index_t, 1> s_time_shape{(N - noverlap) / nstep};
+  cuda::std::array<index_t, 1> num_samps{N};
+  cuda::std::array<index_t, 1> half_win{nfft / 2 + 1};
+  cuda::std::array<index_t, 1> s_time_shape{(N - noverlap) / nstep};
 
   tensor_t<float, 1> time({N});
   tensor_t<float, 1> modulation({N});

diff --git a/examples/svd_power.cu b/examples/svd_power.cu
@@ -91,7 +91,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   (A = random<float>({m, n}, NORMAL)).run(exec);
 
 #endif
-  std::array<index_t, U.Rank()> Dshape;
+  cuda::std::array<index_t, U.Rank()> Dshape;
   Dshape.fill(matxKeepDim);
   Dshape[U.Rank()-2] = m;
   // cloning D across

diff --git a/include/matx/core/allocator.h b/include/matx/core/allocator.h
@@ -43,6 +43,8 @@
 
 #include "matx/core/error.h"
 #include "matx/core/nvtx.h"
+#include <cuda/std/__algorithm>
+#include <cuda/std/__algorithm>
 
 #pragma once
 
@@ -203,7 +205,7 @@ struct MemTracker {
     [[maybe_unused]] std::unique_lock lck(memory_mtx);
     matxMemoryStats.currentBytesAllocated += bytes;
     matxMemoryStats.totalBytesAllocated += bytes;
-    matxMemoryStats.maxBytesAllocated = std::max(
+    matxMemoryStats.maxBytesAllocated = cuda::std::max(
         matxMemoryStats.maxBytesAllocated, matxMemoryStats.currentBytesAllocated);
     allocationMap[*ptr] = {bytes, space, stream};
   }

diff --git a/include/matx/core/cache.h b/include/matx/core/cache.h
@@ -114,7 +114,7 @@ class matxCache_t {
  * Converts elements in a POD container to a hash value
  */
 template <typename T, int len>
-inline size_t PodArrayToHash(std::array<T, len> c)
+inline size_t PodArrayToHash(cuda::std::array<T, len> c)
 {
   size_t hash = 0;
   for (auto &el : c) {

diff --git a/include/matx/core/get_grid_dims.h b/include/matx/core/get_grid_dims.h
@@ -37,7 +37,7 @@ namespace matx {
 namespace detail {
 
 template <int RANK>
-inline bool get_grid_dims(dim3 &blocks, dim3 &threads, const std::array<index_t, RANK> &sizes,
+inline bool get_grid_dims(dim3 &blocks, dim3 &threads, const cuda::std::array<index_t, RANK> &sizes,
                           int max_cta_size = 1024)
 {
   bool stride = false;

diff --git a/include/matx/core/half_complex.h b/include/matx/core/half_complex.h
@@ -147,26 +147,6 @@ template <typename T> struct alignas(sizeof(T) * 2) matxHalfComplex {
     return {x, y};
   }
 
-  /**
-   * @brief std::complex<float> cast operator
-   * 
-   * @return std::complex<float> value
-   */
-  __MATX_HOST__ __MATX_DEVICE__ __MATX_INLINE__ operator std::complex<float>()
-  {
-    return {x, y};
-  }
-
-  /**
-   * @brief std::complex<double> cast operator
-   * 
-   * @return std::complex<double> value
-   */
-  __MATX_HOST__ __MATX_DEVICE__ __MATX_INLINE__ operator std::complex<double>()
-  {
-    return {x, y};
-  }
-
   /**
    * @brief Copy assignment operator
    * 

diff --git a/include/matx/core/iterator.h b/include/matx/core/iterator.h
@@ -76,7 +76,7 @@ struct RandomOperatorIterator {
     }
     else {
       auto arrs = detail::GetIdxFromAbs(t_, offset_);
-      return detail::mapply([&](auto &&...args) {
+      return cuda::std::apply([&](auto &&...args) {
           return static_cast<value_type>(t_.operator()(args...));
         }, arrs);     
     }
@@ -199,7 +199,7 @@ struct RandomOperatorOutputIterator {
     else {
       auto arrs = detail::GetIdxFromAbs(t_, offset_);
 
-      return std::apply([&](auto &&...args) -> reference {
+      return cuda::std::apply([&](auto &&...args) -> reference {
           return (reference)(t_.operator()(args...));
         }, arrs);    
     }

diff --git a/include/matx/core/make_tensor.h b/include/matx/core/make_tensor.h
@@ -111,7 +111,7 @@ auto make_tensor_p( const index_t (&shape)[RANK],
 /**
  * Create a tensor from a conforming container type
  *
- * Conforming containers have sequential iterators defined (both const and non-const). std::array
+ * Conforming containers have sequential iterators defined (both const and non-const). cuda::std::array
  * and std::vector meet this criteria.
  *
  * @param shape Shape of tensor
@@ -130,7 +130,7 @@ auto make_tensor( ShapeType &&shape,
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
 
   T *ptr;
-  constexpr int rank = static_cast<int>(std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
+  constexpr int rank = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<rank> desc{std::move(shape)};
 
   size_t size = static_cast<size_t>(desc.TotalSize()) * sizeof(T);
@@ -140,15 +140,15 @@ auto make_tensor( ShapeType &&shape,
   basic_storage<decltype(rp)> s{std::move(rp)};
 
   return tensor_t<T,
-    std::tuple_size<typename remove_cvref<ShapeType>::type>::value,
+    cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value,
     decltype(s),
     decltype(desc)>{std::move(s), std::move(desc)};
 }
 
 /**
  * Create a tensor from a conforming container type
  *
- * Conforming containers have sequential iterators defined (both const and non-const). std::array
+ * Conforming containers have sequential iterators defined (both const and non-const). cuda::std::array
  * and std::vector meet this criteria.
  *
  * @param tensor Tensor object to store newly-created tensor into
@@ -173,7 +173,7 @@ auto make_tensor( TensorType &tensor,
 /**
  * Create a tensor from a conforming container type
  *
- * Conforming containers have sequential iterators defined (both const and non-const). std::array
+ * Conforming containers have sequential iterators defined (both const and non-const). cuda::std::array
  * and std::vector meet this criteria.  Caller is responsible for deleting tensor.
  *
  * @param shape  Shape of tensor
@@ -191,23 +191,23 @@ auto make_tensor_p( ShapeType &&shape,
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
 
   T *ptr;
-  DefaultDescriptor<static_cast<int>(std::tuple_size<typename remove_cvref<ShapeType>::type>::value)> desc{std::move(shape)};
+  DefaultDescriptor<static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value)> desc{std::move(shape)};
 
   size_t size = static_cast<size_t>(desc.TotalSize()) * sizeof(T);
   matxAlloc((void**)&ptr, size, space, stream);
 
   raw_pointer_buffer<T, matx_allocator<T>> rp(ptr, size, true);
   basic_storage<decltype(rp)> s{std::move(rp)};
   return new tensor_t<T,
-  std::tuple_size<typename remove_cvref<ShapeType>::type>::value,
+  cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value,
   decltype(s),
   decltype(desc)>{std::move(s), std::move(desc)};
 }
 
 /**
  * Create a tensor from a conforming container type
  *
- * Conforming containers have sequential iterators defined (both const and non-const). std::array
+ * Conforming containers have sequential iterators defined (both const and non-const). cuda::std::array
  * and std::vector meet this criteria.  Caller is responsible for deleting tensor.
  *
  * @param tensor Tensor object to store newly-created tensor into
@@ -242,7 +242,7 @@ template <typename T>
 auto make_tensor( [[maybe_unused]] const std::initializer_list<detail::no_size_t> t,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
-  std::array<index_t, 0> shape;
+  cuda::std::array<index_t, 0> shape;
 
   return make_tensor<T, decltype(shape)>(std::move(shape), space, stream);
 }
@@ -279,7 +279,7 @@ auto make_tensor_p( [[maybe_unused]] const std::initializer_list<detail::no_size
                     matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                     cudaStream_t stream = 0) {
 
-  std::array<index_t, 0> shape;
+  cuda::std::array<index_t, 0> shape;
   return make_tensor_p<T, decltype(shape)>(std::move(shape), space, stream);
 }
 
@@ -349,7 +349,7 @@ auto make_tensor( T *data,
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
 
-  constexpr int RANK = static_cast<int>(std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
+  constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK>
     desc{std::forward<ShapeType>(shape)};
   raw_pointer_buffer<T, matx_allocator<T>> rp{data, static_cast<size_t>(desc.TotalSize())*sizeof(T), owning};
@@ -396,7 +396,7 @@ template <typename T>
 auto make_tensor( T *ptr,
                   [[maybe_unused]] const std::initializer_list<detail::no_size_t> t,
                   bool owning = false) {
-  std::array<index_t, 0> shape;
+  cuda::std::array<index_t, 0> shape;
   return make_tensor<T, decltype(shape)>(ptr, std::move(shape), owning);
 }
 
@@ -440,7 +440,7 @@ auto make_tensor_p( T *const data,
                     bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
 
-  constexpr int RANK = static_cast<int>(std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
+  constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK>
     desc{std::forward<ShapeType>(shape)};
   raw_pointer_buffer<T, matx_allocator<T>> rp{data, static_cast<size_t>(desc.TotalSize())*sizeof(T), owning};
@@ -465,7 +465,7 @@ auto make_tensor( Storage &&s,
                   ShapeType &&shape) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
 
-  constexpr int RANK = static_cast<int>(std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
+  constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK>
     desc{std::forward<ShapeType>(shape)};
   using T = typename Storage::T;

diff --git a/include/matx/core/operator_utils.h b/include/matx/core/operator_utils.h
@@ -108,8 +108,8 @@ namespace matx {
   }
 
   template <typename Op>
-  std::array<index_t, Op::Rank()> Shape(const Op &op) {
-    std::array<index_t, Op::Rank()> shape;
+  cuda::std::array<index_t, Op::Rank()> Shape(const Op &op) {
+    cuda::std::array<index_t, Op::Rank()> shape;
     for (int r = 0; r < Op::Rank(); r++) {
       shape[r] = op.Size(r);
     }

diff --git a/include/matx/core/pybind.h b/include/matx/core/pybind.h
@@ -381,7 +381,7 @@ class MatXPybind {
 
     assert(info.ndim == RANK);
 
-    std::array<matx::index_t, RANK> shape;
+    cuda::std::array<matx::index_t, RANK> shape;
     std::copy_n(info.shape.begin(), RANK, std::begin(shape));
 
     auto ten =  make_tensor<T> (shape);
@@ -437,12 +437,14 @@ class MatXPybind {
   }
 
   template <typename TensorType,
-            typename CT = matx_convert_complex_type<typename TensorType::scalar_type>>
+            typename CT = matx_convert_cuda_complex_type<typename TensorType::scalar_type>>
   std::optional<TestFailResult<CT>>
   CompareOutput(const TensorType &ten,
                 const std::string fname, double thresh, bool debug = false)
   {
-    using ntype = matx_convert_complex_type<typename TensorType::scalar_type>;
+    using raw_type = typename TensorType::scalar_type;    
+    using ntype = matx_convert_complex_type<raw_type>;
+    using ctype = matx_convert_cuda_complex_type<raw_type>;
     auto resobj = res_dict[fname.c_str()];
     auto ften = pybind11::array_t<ntype>(resobj);
     constexpr int RANK = TensorType::Rank();
@@ -453,7 +455,7 @@ class MatXPybind {
       auto file_val = ften.at();
       auto ten_val = ConvertComplex(ten());
       if (!CompareVals(ten_val, file_val, thresh, fname, debug)) {
-        return TestFailResult<ntype>{Index2Str(0), "0", ten_val, file_val,
+        return TestFailResult<ctype>{Index2Str(0), "0", ten_val, file_val,
                                      thresh};
       }
     }
@@ -468,7 +470,7 @@ class MatXPybind {
                     auto file_val = ften.at(s1, s2, s3, s4);
                     auto ten_val = ConvertComplex(ten(s1, s2, s3, s4));
                     if (!CompareVals(ten_val, file_val, thresh, fname, debug)) {
-                      return TestFailResult<ntype>{Index2Str(s1, s2, s3, s4),
+                      return TestFailResult<ctype>{Index2Str(s1, s2, s3, s4),
                                                    fname, ten_val, file_val,
                                                    thresh};
                     }
@@ -478,7 +480,7 @@ class MatXPybind {
                   auto file_val = ften.at(s1, s2, s3);
                   auto ten_val = ConvertComplex(ten(s1, s2, s3));
                   if (!CompareVals(ten_val, file_val, thresh, fname, debug)) {
-                    return TestFailResult<ntype>{Index2Str(s1, s2, s3), fname,
+                    return TestFailResult<ctype>{Index2Str(s1, s2, s3), fname,
                                                  ten_val, file_val, thresh};
                   }
                 }
@@ -488,7 +490,7 @@ class MatXPybind {
               auto file_val = ften.at(s1, s2);
               auto ten_val = ConvertComplex(ten(s1, s2));
               if (!CompareVals(ten_val, file_val, thresh, fname, debug)) {
-                return TestFailResult<ntype>{Index2Str(s1, s2), fname, ten_val,
+                return TestFailResult<ctype>{Index2Str(s1, s2), fname, ten_val,
                                              file_val, thresh};
               }
             }
@@ -498,7 +500,7 @@ class MatXPybind {
           auto file_val = ften.at(s1);
           auto ten_val = ConvertComplex(ten(s1));
           if (!CompareVals(ten_val, file_val, thresh, fname, debug)) {
-            return TestFailResult<ntype>{Index2Str(s1), fname, ten_val,
+            return TestFailResult<ctype>{Index2Str(s1), fname, ten_val,
                                          file_val, thresh};
           }
         }