Skip to content

Commit

Permalink
More nvc++ warning fixes. Increase minimum supported CUDA to 11.5 to …
Browse files Browse the repository at this point in the history
…get rid of redundant if constexpr (#627)
  • Loading branch information
cliffburdick authored May 17, 2024
1 parent 115a932 commit d71d80b
Show file tree
Hide file tree
Showing 14 changed files with 57 additions and 180 deletions.
44 changes: 23 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ target_compile_features(matx INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>
target_compile_options(matx INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)

# 11.2 and above required for async allocation
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.4)
message(FATAL_ERROR "MatX requires CUDA 11.4 or higher. Please update before using.")
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
message(FATAL_ERROR "MatX requires CUDA 11.5 or higher. Please update before using.")
endif()

# libcudacxx is now part of CCCL, so grab that repo if needed
Expand All @@ -126,29 +126,31 @@ if (NOT CMAKE_BUILD_TYPE OR ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(MATX_CUDA_FLAGS ${MATX_CUDA_FLAGS} -g -lineinfo)
endif()

# Set preferred compiler warning flags
set(WARN_FLAGS -Wall
-Wextra
-Wcast-align
-Wunused
-Wshadow)

if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
set(WARN_FLAGS ${WARN_FLAGS}
# Set preferred compiler warning flags. nvc++ doesn't support most warnings
string(FIND "${CMAKE_CUDA_HOST_COMPILER}" "nvc++" IS_NVCPP)
if (NOT ${IS_NVCPP} GREATER -1)
set(WARN_FLAGS
-Wall
-Wextra
-Wcast-align
-Wunused
-Wshadow
-Wno-unknown-pragmas
-Wnon-virtual-dtor)
endif()

if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(WARN_FLAGS ${WARN_FLAGS}
-Wconversion
-Wmisleading-indentation
-Wduplicated-cond
-Wduplicated-branches
-Wlogical-op
-Wnull-dereference)

if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(WARN_FLAGS ${WARN_FLAGS}
-Wconversion
-Wmisleading-indentation
-Wduplicated-cond
-Wduplicated-branches
-Wlogical-op
-Wnull-dereference)
endif()
endif()



set(WARN_FLAGS ${WARN_FLAGS} $<$<COMPILE_LANGUAGE:CUDA>:-Werror all-warnings>)
set(WARN_FLAGS ${WARN_FLAGS} $<$<COMPILE_LANGUAGE:CXX>:-Werror>)

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ are necessary
## Requirements
MatX support is currently limited to **Linux only** due to the time to test Windows. If you'd like to voice your support for native Windows support using Visual Studio, please comment on the issue here: /~https://github.com/NVIDIA/MatX/issues/153.

**Note**: CUDA 12.0.0 through 12.2.0 have an issue that causes building MatX unit tests to show a compiler error or cause a segfault in the compiler. Please use CUDA 11.4-11.8 or CUDA 12.2.1+ with MatX.
**Note**: CUDA 12.0.0 through 12.2.0 have an issue that causes building MatX unit tests to show a compiler error or cause a segfault in the compiler. Please use CUDA 11.5-11.8 or CUDA 12.2.1+ with MatX.

MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.4 and g++9 or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads).
MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.5 and g++9 or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads).

MatX has been tested on and supports Pascal, Turing, Volta, Ampere, Ada, and Hopper GPU architectures. Jetson products are supported with Jetpack 5.0 or above.

Expand Down
2 changes: 1 addition & 1 deletion docs_input/build.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ the CPM_ documentation or the documentation for each package for more informatio

System Requirements
-------------------
MatX requires **CUDA 11.4** or higher, and **g++ 9.3+** or **clang 17+** for the host compiler. Clang may work as well, but it's currently
MatX requires **CUDA 11.5** or higher, and **g++ 9.3+** or **clang 17+** for the host compiler. Clang may work as well, but it's currently
untested. Other requirements for optional components are listed below.

.. warning:: Using MatX with an unsupported compiler may result in compiler and/or runtime errors.
Expand Down
2 changes: 1 addition & 1 deletion examples/spectrogram_graph.cu
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
(fftStackedMatrix = conj(fftStackedMatrix) * fftStackedMatrix)
.run(stream);
// Get real part and transpose
auto Sxx = fftStackedMatrix.RealView().Permute({1, 0});
[[maybe_unused]] auto Sxx = fftStackedMatrix.RealView().Permute({1, 0});

// Spectral time axis
(s_time = linspace<0>(s_time_shape, static_cast<float>(nperseg) / 2.0f,
Expand Down
15 changes: 7 additions & 8 deletions include/matx/core/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,10 @@ struct MemTracker {
}

void update_stream(void *ptr, cudaStream_t stream) {
std::unique_lock lck(memory_mtx);
[[maybe_unused]] std::unique_lock lck(memory_mtx);
auto iter = allocationMap.find(ptr);
if (iter == allocationMap.end()) {
MATX_THROW(matxInvalidParameter, "Couldn't find pointer in allocation cache");
return;
}

iter->second.stream = stream;
Expand All @@ -105,7 +104,7 @@ struct MemTracker {
auto deallocate_internal(void *ptr, [[maybe_unused]] StreamType st) {
MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)

std::unique_lock lck(memory_mtx);
[[maybe_unused]] std::unique_lock lck(memory_mtx);
auto iter = allocationMap.find(ptr);

if (iter == allocationMap.end()) {
Expand All @@ -114,8 +113,9 @@ struct MemTracker {
// and a deallocation occurs in a different one than it was allocated. Allow the user to ignore
// these cases if they know the issue.
MATX_THROW(matxInvalidParameter, "Couldn't find pointer in allocation cache");
#else
return;
#endif
return;
}

size_t bytes = iter->second.size;
Expand Down Expand Up @@ -194,14 +194,13 @@ struct MemTracker {
break;
case MATX_INVALID_MEMORY:
MATX_THROW(matxInvalidType, "Invalid memory kind when allocating!");
break;
};

if (*ptr == nullptr) {
MATX_THROW(matxOutOfMemory, "Failed to allocate memory");
}

std::unique_lock lck(memory_mtx);
[[maybe_unused]] std::unique_lock lck(memory_mtx);
matxMemoryStats.currentBytesAllocated += bytes;
matxMemoryStats.totalBytesAllocated += bytes;
matxMemoryStats.maxBytesAllocated = std::max(
Expand All @@ -214,7 +213,7 @@ struct MemTracker {
return false;
}

std::unique_lock lck(memory_mtx);
[[maybe_unused]] std::unique_lock lck(memory_mtx);
auto iter = allocationMap.find(ptr);

return iter != allocationMap.end();
Expand All @@ -225,7 +224,7 @@ struct MemTracker {
return MATX_INVALID_MEMORY;
}

std::unique_lock lck(memory_mtx);
[[maybe_unused]] std::unique_lock lck(memory_mtx);
auto iter = allocationMap.find(ptr);

if (iter != allocationMap.end()) {
Expand Down
18 changes: 2 additions & 16 deletions include/matx/core/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,55 +75,41 @@ namespace matx
{
case matxSuccess:
return "matxSuccess";
break;
case matxNotSupported:
return "matxNotSupported";
break;
case matxInvalidParameter:
return "matxInvalidParameter";
break;
case matxInvalidDim:
return "matxInvalidDim";
break;
case matxInvalidSize:
return "matxInvalidSize";
break;
case matxCudaError:
return "matxCudaError";
break;
case matxCufftError:
return "matxCufftError";
break;
case matxMatMulError:
return "matxMatMulError";
break;
case matxOutOfMemory:
return "matxOutOfMemory";
break;
case matxIOError:
return "matxIOError";
break;
case matxAssertError:
return "matxAssertError";
break;
case matxInvalidType:
return "matxInvalidType";
break;
case matxLUError:
return "matxLUError";
break;
case matxInverseError:
return "matxInverseError";
break;
case matxSolverError:
return "matxSolverError";
break;
case matxcuTensorError:
return "matxcuTensorError";
break;
default:
return "Unknown";
};

return "Unknown";
}

namespace detail {
Expand Down
10 changes: 5 additions & 5 deletions include/matx/core/nvtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ inline matx_nvxtLogLevels globalNvtxLevel = matx_nvxtLogLevels::MATX_NVTX_LOG_AP
////////////////////////////////////////////////////////////////////////////////
[[maybe_unused]] static int getNVTX_Range_ID( )
{
std::unique_lock lck(nvtx_memory_mtx);
[[maybe_unused]] std::unique_lock lck(nvtx_memory_mtx);

int newID = ++numNvtxTrackedRanges;
auto foundIter = nvtx_eventMap.find( newID );
Expand All @@ -200,7 +200,7 @@ inline matx_nvxtLogLevels globalNvtxLevel = matx_nvxtLogLevels::MATX_NVTX_LOG_AP
////////////////////////////////////////////////////////////////////////////////
[[maybe_unused]] static void setNVTXLogLevel( matx_nvxtLogLevels newNVTXLevel)
{
std::unique_lock lck(nvtx_memory_mtx);
[[maybe_unused]] std::unique_lock lck(nvtx_memory_mtx);
globalNvtxLevel = newNVTXLevel;
}

Expand All @@ -212,7 +212,7 @@ inline matx_nvxtLogLevels globalNvtxLevel = matx_nvxtLogLevels::MATX_NVTX_LOG_AP
////////////////////////////////////////////////////////////////////////////////
[[maybe_unused]] static void registerEvent( int registerId, nvtxRangeId_t eventId )
{
std::unique_lock lck(nvtx_memory_mtx);
[[maybe_unused]] std::unique_lock lck(nvtx_memory_mtx);

std::pair< int, nvtxRangeId_t > newPair( registerId, eventId );
nvtx_eventMap.insert(newPair);
Expand All @@ -227,7 +227,7 @@ inline matx_nvxtLogLevels globalNvtxLevel = matx_nvxtLogLevels::MATX_NVTX_LOG_AP
////////////////////////////////////////////////////////////////////////////////
[[maybe_unused]] static void endEvent( int id )
{
std::unique_lock lck(nvtx_memory_mtx);
[[maybe_unused]] std::unique_lock lck(nvtx_memory_mtx);

auto foundIter = nvtx_eventMap.find( id );

Expand Down Expand Up @@ -351,7 +351,7 @@ class NvtxEvent
{
int newID = matx::getNVTX_Range_ID();

matx::NvtxEvent MATX_UNIQUE_NAME(nvtxFlag_)( functionName, message, nvtxLevel, newID );
[[maybe_unused]] matx::NvtxEvent MATX_UNIQUE_NAME(nvtxFlag_)( functionName, message, nvtxLevel, newID );

return newID;

Expand Down
17 changes: 1 addition & 16 deletions include/matx/core/tensor_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -642,14 +642,6 @@ class tensor_impl_t {
else {
return 0;
}

// nvcc bug workaround
if constexpr (!(I < sizeof...(Is))) {
return 0;
}
else {
return GetVal<I+1, Is...>(tup) + std::get<I>(tup)*this->desc_.Stride(I);
}
}

template <int I = 0, typename ...Is>
Expand All @@ -659,14 +651,7 @@ class tensor_impl_t {
}
else {
return 0;
}

if constexpr (!(I < sizeof...(Is))) {
return 0;
}
else {
return GetValC<I+1, Is...>(tup) + std::get<I>(tup)*this->desc_.Stride(I);
}
}
}

/**
Expand Down
20 changes: 2 additions & 18 deletions include/matx/core/tensor_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ namespace matx

return total;
}

return 0;
}


Expand Down Expand Up @@ -259,8 +257,6 @@ namespace detail {
else {
return matx_max(matx_max(t0, t1), tn...);
}

return t0; // 11.4 compiler has a bug. This is dead code
}

template <class T, class M = T>
Expand All @@ -270,12 +266,6 @@ namespace detail {
return T::Rank();
else
return -1;

// work around for compiler bug/warning
if constexpr (!is_matx_op<M>())
return -1;
else
return T::Rank();
}

template <class T, class M = T>
Expand All @@ -286,12 +276,6 @@ namespace detail {
return a.Size(dim);
else
return 1;

// work around for compiler bug/warning
if constexpr (!is_matx_op<M>())
return 1;
else
return a.Size(dim);
}

template <int RANK, class T, class M = T>
Expand Down Expand Up @@ -998,7 +982,7 @@ void PrintData(FILE* fp, const Op &op, Args... dims) {
CUmemorytype mtype;
void *data[] = {&mtype};
CUpointer_attribute attrs[] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE};
auto ret = cuPointerGetAttributes(1,
[[maybe_unused]] auto ret = cuPointerGetAttributes(1,
&attrs[0],
data,
reinterpret_cast<CUdeviceptr>(op.Data()));
Expand Down Expand Up @@ -1194,7 +1178,7 @@ void fprint(FILE* fp, const Op &op, Args... dims) {
*/
template <typename Op, typename... Args,
std::enable_if_t<(Op::Rank() > 0 && sizeof...(Args) == 0), bool> = true>
void print(const Op &op, Args... dims) {
void print(const Op &op, [[maybe_unused]] Args... dims) {
std::array<int, Op::Rank()> arr = {0};
auto tp = std::tuple_cat(arr);
std::apply([&](auto &&...args) { fprint(stdout, op, args...); }, tp);
Expand Down
Loading

0 comments on commit d71d80b

Please sign in to comment.