More nvc++ warning fixes. Increase minimum supported CUDA to 11.5 to …

…get rid of redundant if constexpr (#627)
NVIDIA · May 17, 2024 · d71d80b · d71d80b
1 parent 115a932
commit d71d80b
Show file tree

Hide file tree

Showing 14 changed files with 57 additions and 180 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -109,8 +109,8 @@ target_compile_features(matx INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>
 target_compile_options(matx INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
 
 # 11.2 and above required for async allocation
-if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.4)
-    message(FATAL_ERROR "MatX requires CUDA 11.4 or higher. Please update before using.")
+if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
+    message(FATAL_ERROR "MatX requires CUDA 11.5 or higher. Please update before using.")
 endif()
 
 # libcudacxx is now part of CCCL, so grab that repo if needed
@@ -126,29 +126,31 @@ if (NOT CMAKE_BUILD_TYPE OR ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
     set(MATX_CUDA_FLAGS ${MATX_CUDA_FLAGS} -g -lineinfo)
 endif()
 
-# Set preferred compiler warning flags
-set(WARN_FLAGS  -Wall
-                -Wextra
-                -Wcast-align
-                -Wunused
-                -Wshadow)
-
-if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
-    set(WARN_FLAGS ${WARN_FLAGS}
+# Set preferred compiler warning flags. nvc++ doesn't support most warnings
+string(FIND "${CMAKE_CUDA_HOST_COMPILER}" "nvc++" IS_NVCPP)
+if (NOT ${IS_NVCPP} GREATER -1)
+    set(WARN_FLAGS
+        -Wall
+        -Wextra
+        -Wcast-align
+        -Wunused
+        -Wshadow    
         -Wno-unknown-pragmas
         -Wnon-virtual-dtor)
-endif()                
-
-if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    set(WARN_FLAGS ${WARN_FLAGS}
-        -Wconversion
-        -Wmisleading-indentation
-        -Wduplicated-cond
-        -Wduplicated-branches
-        -Wlogical-op
-        -Wnull-dereference)
+
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        set(WARN_FLAGS ${WARN_FLAGS}
+            -Wconversion
+            -Wmisleading-indentation
+            -Wduplicated-cond
+            -Wduplicated-branches
+            -Wlogical-op
+            -Wnull-dereference)
+    endif()        
 endif()
 
+
+
 set(WARN_FLAGS ${WARN_FLAGS} $<$<COMPILE_LANGUAGE:CUDA>:-Werror all-warnings>)
 set(WARN_FLAGS ${WARN_FLAGS} $<$<COMPILE_LANGUAGE:CXX>:-Werror>)
 

diff --git a/README.md b/README.md
@@ -49,9 +49,9 @@ are necessary
 ## Requirements
 MatX support is currently limited to **Linux only** due to the time to test Windows. If you'd like to voice your support for native Windows support using Visual Studio, please comment on the issue here: /~https://github.com/NVIDIA/MatX/issues/153.
 
-**Note**: CUDA 12.0.0 through 12.2.0 have an issue that causes building MatX unit tests to show a compiler error or cause a segfault in the compiler. Please use CUDA 11.4-11.8 or CUDA 12.2.1+ with MatX.
+**Note**: CUDA 12.0.0 through 12.2.0 have an issue that causes building MatX unit tests to show a compiler error or cause a segfault in the compiler. Please use CUDA 11.5-11.8 or CUDA 12.2.1+ with MatX.
 
-MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.4 and g++9 or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads).
+MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.5 and g++9 or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads).
 
 MatX has been tested on and supports Pascal, Turing, Volta, Ampere, Ada, and Hopper GPU architectures. Jetson products are supported with Jetpack 5.0 or above.
 

diff --git a/docs_input/build.rst b/docs_input/build.rst
@@ -19,7 +19,7 @@ the CPM_ documentation or the documentation for each package for more informatio
 
 System Requirements
 -------------------
-MatX requires **CUDA 11.4** or higher, and **g++ 9.3+** or **clang 17+** for the host compiler. Clang may work as well, but it's currently 
+MatX requires **CUDA 11.5** or higher, and **g++ 9.3+** or **clang 17+** for the host compiler. Clang may work as well, but it's currently 
 untested. Other requirements for optional components are listed below.
 
 .. warning:: Using MatX with an unsupported compiler may result in compiler and/or runtime errors.

diff --git a/examples/spectrogram_graph.cu b/examples/spectrogram_graph.cu
@@ -124,7 +124,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
     (fftStackedMatrix = conj(fftStackedMatrix) * fftStackedMatrix)
         .run(stream);
     // Get real part and transpose
-    auto Sxx = fftStackedMatrix.RealView().Permute({1, 0});
+    [[maybe_unused]] auto Sxx = fftStackedMatrix.RealView().Permute({1, 0});
 
     // Spectral time axis
     (s_time = linspace<0>(s_time_shape, static_cast<float>(nperseg) / 2.0f,

diff --git a/include/matx/core/allocator.h b/include/matx/core/allocator.h
@@ -91,11 +91,10 @@ struct MemTracker {
   }
 
   void update_stream(void *ptr, cudaStream_t stream) {
-    std::unique_lock lck(memory_mtx);
+    [[maybe_unused]] std::unique_lock lck(memory_mtx);
     auto iter = allocationMap.find(ptr);
     if (iter == allocationMap.end()) {
       MATX_THROW(matxInvalidParameter, "Couldn't find pointer in allocation cache");
-      return;
     }
 
     iter->second.stream = stream;
@@ -105,7 +104,7 @@ struct MemTracker {
   auto deallocate_internal(void *ptr, [[maybe_unused]] StreamType st) {
     MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)
 
-    std::unique_lock lck(memory_mtx);
+    [[maybe_unused]] std::unique_lock lck(memory_mtx);
     auto iter = allocationMap.find(ptr);
 
     if (iter == allocationMap.end()) {
@@ -114,8 +113,9 @@ struct MemTracker {
       // and a deallocation occurs in a different one than it was allocated. Allow the user to ignore
       // these cases if they know the issue.
       MATX_THROW(matxInvalidParameter, "Couldn't find pointer in allocation cache");
+  #else
+      return;      
   #endif    
-      return;
     }
 
     size_t bytes = iter->second.size;
@@ -194,14 +194,13 @@ struct MemTracker {
       break;
     case MATX_INVALID_MEMORY:
       MATX_THROW(matxInvalidType, "Invalid memory kind when allocating!");
-      break;
     };
 
     if (*ptr == nullptr) {
       MATX_THROW(matxOutOfMemory, "Failed to allocate memory");
     }
 
-    std::unique_lock lck(memory_mtx);
+    [[maybe_unused]] std::unique_lock lck(memory_mtx);
     matxMemoryStats.currentBytesAllocated += bytes;
     matxMemoryStats.totalBytesAllocated += bytes;
     matxMemoryStats.maxBytesAllocated = std::max(
@@ -214,7 +213,7 @@ struct MemTracker {
       return false;
     }
 
-    std::unique_lock lck(memory_mtx);
+    [[maybe_unused]] std::unique_lock lck(memory_mtx);
     auto iter = allocationMap.find(ptr);
 
     return iter != allocationMap.end();    
@@ -225,7 +224,7 @@ struct MemTracker {
       return MATX_INVALID_MEMORY;
     }
 
-    std::unique_lock lck(memory_mtx);
+    [[maybe_unused]] std::unique_lock lck(memory_mtx);
     auto iter = allocationMap.find(ptr);
 
     if (iter != allocationMap.end()) {

diff --git a/include/matx/core/error.h b/include/matx/core/error.h
@@ -75,55 +75,41 @@ namespace matx
     {
     case matxSuccess:
       return "matxSuccess";
-      break;
     case matxNotSupported:
       return "matxNotSupported";
-      break;
     case matxInvalidParameter:
       return "matxInvalidParameter";
-      break;
     case matxInvalidDim:
       return "matxInvalidDim";
-      break;
     case matxInvalidSize:
       return "matxInvalidSize";
-      break;
     case matxCudaError:
       return "matxCudaError";
-      break;
     case matxCufftError:
       return "matxCufftError";
-      break;
     case matxMatMulError:
       return "matxMatMulError";
-      break;
     case matxOutOfMemory:
       return "matxOutOfMemory";
-      break;
     case matxIOError:
       return "matxIOError";
-      break;
     case matxAssertError:
       return "matxAssertError";
-      break;
     case matxInvalidType:
       return "matxInvalidType";
-      break;
     case matxLUError:
       return "matxLUError";
-      break;
     case matxInverseError:
       return "matxInverseError";
-      break;
     case matxSolverError:
       return "matxSolverError";
-      break;
     case matxcuTensorError:
-      return "matxcuTensorError";
       break;
     default:
       return "Unknown";
     };
+
+    return "Unknown";
   }
 
   namespace detail {

diff --git a/include/matx/core/nvtx.h b/include/matx/core/nvtx.h
@@ -177,7 +177,7 @@ inline matx_nvxtLogLevels globalNvtxLevel = matx_nvxtLogLevels::MATX_NVTX_LOG_AP
 ////////////////////////////////////////////////////////////////////////////////
 [[maybe_unused]] static int getNVTX_Range_ID( )
 {
-  std::unique_lock lck(nvtx_memory_mtx);
+  [[maybe_unused]] std::unique_lock lck(nvtx_memory_mtx);
 
   int newID = ++numNvtxTrackedRanges;
   auto foundIter = nvtx_eventMap.find( newID );
@@ -200,7 +200,7 @@ inline matx_nvxtLogLevels globalNvtxLevel = matx_nvxtLogLevels::MATX_NVTX_LOG_AP
 ////////////////////////////////////////////////////////////////////////////////
 [[maybe_unused]] static void setNVTXLogLevel( matx_nvxtLogLevels newNVTXLevel)
 {
-  std::unique_lock lck(nvtx_memory_mtx);  
+  [[maybe_unused]] std::unique_lock lck(nvtx_memory_mtx);  
   globalNvtxLevel = newNVTXLevel;
 }
 
@@ -212,7 +212,7 @@ inline matx_nvxtLogLevels globalNvtxLevel = matx_nvxtLogLevels::MATX_NVTX_LOG_AP
 ////////////////////////////////////////////////////////////////////////////////
 [[maybe_unused]] static void registerEvent( int registerId, nvtxRangeId_t eventId )
 {
-  std::unique_lock lck(nvtx_memory_mtx);
+  [[maybe_unused]] std::unique_lock lck(nvtx_memory_mtx);
 
   std::pair< int, nvtxRangeId_t > newPair( registerId, eventId );
   nvtx_eventMap.insert(newPair);
@@ -227,7 +227,7 @@ inline matx_nvxtLogLevels globalNvtxLevel = matx_nvxtLogLevels::MATX_NVTX_LOG_AP
 ////////////////////////////////////////////////////////////////////////////////
 [[maybe_unused]] static void endEvent( int id )
 {
-  std::unique_lock lck(nvtx_memory_mtx);
+  [[maybe_unused]] std::unique_lock lck(nvtx_memory_mtx);
 
   auto foundIter = nvtx_eventMap.find( id );
 
@@ -351,7 +351,7 @@ class NvtxEvent
 {
   int newID = matx::getNVTX_Range_ID();
 
-  matx::NvtxEvent MATX_UNIQUE_NAME(nvtxFlag_)( functionName, message, nvtxLevel, newID );
+  [[maybe_unused]] matx::NvtxEvent MATX_UNIQUE_NAME(nvtxFlag_)( functionName, message, nvtxLevel, newID );
 
   return newID;
 

diff --git a/include/matx/core/tensor_impl.h b/include/matx/core/tensor_impl.h
@@ -642,14 +642,6 @@ class tensor_impl_t {
       else {
         return 0;
       }
-
-      // nvcc bug workaround
-      if constexpr (!(I < sizeof...(Is))) {
-        return 0;
-      }
-      else {
-        return GetVal<I+1, Is...>(tup) + std::get<I>(tup)*this->desc_.Stride(I);
-      }
     }
 
     template <int I = 0, typename ...Is>
@@ -659,14 +651,7 @@ class tensor_impl_t {
       }
       else {
         return 0;
-      }    
-
-      if constexpr (!(I < sizeof...(Is))) {
-        return 0;
-      }
-      else {
-        return GetValC<I+1, Is...>(tup) + std::get<I>(tup)*this->desc_.Stride(I);
-      }            
+      }             
     }    
 
     /**

diff --git a/include/matx/core/tensor_utils.h b/include/matx/core/tensor_utils.h
@@ -83,8 +83,6 @@ namespace matx
 
       return total;
     }
-
-    return 0;
   }
 
 
@@ -259,8 +257,6 @@ namespace detail {
     else {
         return matx_max(matx_max(t0, t1), tn...);
     }
-
-    return t0; // 11.4 compiler has a bug. This is dead code
   }
 
   template <class T, class M = T>
@@ -270,12 +266,6 @@ namespace detail {
       return T::Rank();
     else
       return -1;
-
-    // work around for compiler bug/warning
-    if constexpr (!is_matx_op<M>())
-      return -1;
-    else
-      return T::Rank();
   }
 
   template <class T, class M = T>
@@ -286,12 +276,6 @@ namespace detail {
       return a.Size(dim);
     else
       return 1;
-
-    // work around for compiler bug/warning
-    if constexpr (!is_matx_op<M>())
-      return 1;
-    else
-      return a.Size(dim);
   }
 
   template <int RANK, class T, class M = T>
@@ -998,7 +982,7 @@ void PrintData(FILE* fp, const Op &op, Args... dims) {
       CUmemorytype mtype;
       void *data[] = {&mtype};
       CUpointer_attribute attrs[] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE};
-      auto ret = cuPointerGetAttributes(1,
+      [[maybe_unused]] auto ret = cuPointerGetAttributes(1,
                                         &attrs[0],
                                         data,
                                         reinterpret_cast<CUdeviceptr>(op.Data()));
@@ -1194,7 +1178,7 @@ void fprint(FILE* fp, const Op &op, Args... dims) {
  */
 template <typename Op, typename... Args,
           std::enable_if_t<(Op::Rank() > 0 && sizeof...(Args) == 0), bool> = true>
-void print(const Op &op, Args... dims) {
+void print(const Op &op, [[maybe_unused]] Args... dims) {
   std::array<int, Op::Rank()> arr = {0};
   auto tp = std::tuple_cat(arr);
   std::apply([&](auto &&...args) { fprint(stdout, op, args...); }, tp);