Merge pull request PaddlePaddle#47 from mthreads/cpp_lint

[MTAI-484] fix(build): modify code format for cpplint check
hanhaowen-mt · Aug 16, 2023 · 9155af9 · 9155af9
2 parents eb7311f + 2a7c365
commit 9155af9
Show file tree

Hide file tree

Showing 33 changed files with 210 additions and 230 deletions.
diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt
@@ -64,7 +64,9 @@ if(WITH_DGC)
   set(dgc_deps dgc)
 endif()
 
-if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
+if(WITH_GPU
+   OR WITH_ROCM
+   OR WITH_MUSA)
   set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
 endif()
 
@@ -91,14 +93,13 @@ if(WITH_ROCM)
     DEPS simple_threadpool enforce)
 endif()
 if(WITH_MUSA)
-  musa_library(
-    stream_callback_manager
-    SRCS stream_callback_manager.cc
-    DEPS simple_threadpool enforce)
+  musa_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS
+               simple_threadpool enforce)
 endif()
 
-
-if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
+if(WITH_GPU
+   OR WITH_ROCM
+   OR WITH_MUSA)
   set(STREAM_CALLBACK_DEPS stream_callback_manager)
 else()
   set(STREAM_CALLBACK_DEPS)
@@ -144,7 +145,9 @@ cc_library(
   SRCS collective_helper.cc gen_comm_id_helper.cc
   DEPS framework_proto device_context enforce)
 
-if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
+if(WITH_GPU
+   OR WITH_ROCM
+   OR WITH_MUSA)
   target_link_libraries(device_context gpu_resource_pool)
 endif()
 
@@ -243,10 +246,7 @@ if(WITH_ROCM)
 endif()
 
 if(WITH_MUSA)
-  musa_library(
-    device_event_gpu
-    SRCS device_event_gpu.cc
-    DEPS device_event_base)
+  musa_library(device_event_gpu SRCS device_event_gpu.cc DEPS device_event_base)
   set(DEVICE_EVENT_LIBS
       device_event_gpu
       CACHE INTERNAL "device event libs")
@@ -301,14 +301,17 @@ elseif(WITH_ROCM)
 elseif(WITH_MUSA)
   musa_library(
     profiler
-    SRCS profiler.cc profiler.cu
-    DEPS phi
-         gpu_info
-         enforce
-         new_profiler
-         stats
-         op_proto_maker
-         shape_inference)
+    SRCS
+    profiler.cc
+    profiler.cu
+    DEPS
+    phi
+    gpu_info
+    enforce
+    new_profiler
+    stats
+    op_proto_maker
+    shape_inference)
 elseif(WITH_XPU)
   cc_library(
     profiler
@@ -368,10 +371,7 @@ if(WITH_GPU)
 endif()
 
 if(WITH_MUSA)
-  musa_library(
-    cuda_device_guard
-    SRCS cuda_device_guard.cc
-    DEPS gpu_info)
+  musa_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info)
 endif()
 
 if(WITH_ROCM)

diff --git a/paddle/fluid/platform/device/CMakeLists.txt b/paddle/fluid/platform/device/CMakeLists.txt
@@ -1,7 +1,9 @@
 set(DEV_LIBS custom_device)
 
 # GPU
-if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
+if(WITH_GPU
+   OR WITH_ROCM
+   OR WITH_MUSA)
   add_subdirectory(gpu)
 endif()
 

diff --git a/paddle/fluid/platform/device/gpu/CMakeLists.txt b/paddle/fluid/platform/device/gpu/CMakeLists.txt
@@ -25,8 +25,14 @@ elseif(WITH_ROCM)
 elseif(WITH_MUSA)
   musa_library(
     gpu_info
-    SRCS gpu_info.cc
-    DEPS phi glog enforce monitor dynload_cuda)
+    SRCS
+    gpu_info.cc
+    DEPS
+    phi
+    glog
+    enforce
+    monitor
+    dynload_cuda)
 endif()
 
 cc_library(

diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt
@@ -73,10 +73,7 @@ if(WITH_ROCM)
     SRCS warpctc.cc
     DEPS dynamic_loader warpctc phi)
 elseif(WITH_MUSA)
-  musa_library(
-    dynload_cuda
-    SRCS ${MUSA_SRCS}
-    DEPS dynamic_loader phi)
+  musa_library(dynload_cuda SRCS ${MUSA_SRCS} DEPS dynamic_loader phi)
   cc_library(
     dynload_warpctc
     SRCS warpctc.cc

diff --git a/paddle/fluid/platform/dynload/mublas.h b/paddle/fluid/platform/dynload/mublas.h
@@ -38,36 +38,36 @@ namespace dynload {
   extern DynLoad__##__name __name
 
 #define MUBLAS_BLAS_ROUTINE_EACH(__macro) \
-  __macro(mublasSaxpy);                \
-  __macro(mublasDaxpy);                \
-  __macro(mublasCaxpy);                \
-  __macro(mublasZaxpy);                \
-  __macro(mublasSscal);                \
-  __macro(mublasDscal);                \
-  __macro(mublasScopy);                \
-  __macro(mublasDcopy);                \
-  __macro(mublasSgemv);                \
-  __macro(mublasDgemv);                \
-  __macro(mublasCgemv);                \
-  __macro(mublasZgemv);                \
-  __macro(mublasSgemm);                \
-  __macro(mublasDgemm);                \
-  __macro(mublasCgemm);                \
-  __macro(mublasZgemm);                \
-  __macro(mublasHgemm);                \
-  __macro(mublasSgeam);                \
-  __macro(mublasDgeam);                \
-  __macro(mublasDtrsm);                \
-  __macro(mublasCtrsm);                \
-  __macro(mublasZtrsm);                \
-  __macro(mublasCreate);               \
-  __macro(mublasDestroy);              \
-  __macro(mublasSetStream);            \
-  __macro(mublasSetPointerMode);       \
-  __macro(mublasGetPointerMode);       \
-  __macro(mublasSgemmBatched);         \
-  __macro(mublasDgemmBatched);         \
-  __macro(mublasCgemmBatched);         \
+  __macro(mublasSaxpy);                   \
+  __macro(mublasDaxpy);                   \
+  __macro(mublasCaxpy);                   \
+  __macro(mublasZaxpy);                   \
+  __macro(mublasSscal);                   \
+  __macro(mublasDscal);                   \
+  __macro(mublasScopy);                   \
+  __macro(mublasDcopy);                   \
+  __macro(mublasSgemv);                   \
+  __macro(mublasDgemv);                   \
+  __macro(mublasCgemv);                   \
+  __macro(mublasZgemv);                   \
+  __macro(mublasSgemm);                   \
+  __macro(mublasDgemm);                   \
+  __macro(mublasCgemm);                   \
+  __macro(mublasZgemm);                   \
+  __macro(mublasHgemm);                   \
+  __macro(mublasSgeam);                   \
+  __macro(mublasDgeam);                   \
+  __macro(mublasDtrsm);                   \
+  __macro(mublasCtrsm);                   \
+  __macro(mublasZtrsm);                   \
+  __macro(mublasCreate);                  \
+  __macro(mublasDestroy);                 \
+  __macro(mublasSetStream);               \
+  __macro(mublasSetPointerMode);          \
+  __macro(mublasGetPointerMode);          \
+  __macro(mublasSgemmBatched);            \
+  __macro(mublasDgemmBatched);            \
+  __macro(mublasCgemmBatched);            \
   __macro(mublasZgemmBatched);
 
 MUBLAS_BLAS_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MUBLAS_WRAP)

diff --git a/paddle/fluid/platform/dynload/musa_driver.h b/paddle/fluid/platform/dynload/musa_driver.h
@@ -56,4 +56,3 @@ PLATFORM_MUSA_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MUSA_WRAP);
 }  // namespace dynload
 }  // namespace platform
 }  // namespace paddle
-
diff --git a/paddle/phi/backends/dynload/mublas.h b/paddle/phi/backends/dynload/mublas.h
@@ -52,39 +52,38 @@ extern void *mublas_dso_handle;
   extern DynLoad__##__name __name
 
 #define MUBLAS_BLAS_ROUTINE_EACH(__macro) \
-  __macro(mublasSaxpy);                \
-  __macro(mublasDaxpy);                \
-  __macro(mublasCaxpy);                \
-  __macro(mublasZaxpy);                \
-  __macro(mublasSscal);                \
-  __macro(mublasDscal);                \
-  __macro(mublasScopy);                \
-  __macro(mublasDcopy);                \
-  __macro(mublasSgemv);                \
-  __macro(mublasDgemv);                \
-  __macro(mublasCgemv);                \
-  __macro(mublasZgemv);                \
-  __macro(mublasSgemm);                \
-  __macro(mublasDgemm);                \
-  __macro(mublasCgemm);                \
-  __macro(mublasZgemm);                \
-  __macro(mublasHgemm);                \
-  __macro(mublasSgeam);                \
-  __macro(mublasDgeam);                \
-  __macro(mublasDtrsm);                \
-  __macro(mublasCtrsm);                \
-  __macro(mublasZtrsm);                \
-  __macro(mublasCreate);               \
-  __macro(mublasDestroy);              \
-  __macro(mublasSetStream);            \
-  __macro(mublasSetPointerMode);       \
-  __macro(mublasGetPointerMode);       \
-  __macro(mublasSgemmBatched);         \
-  __macro(mublasDgemmBatched);         \
-  __macro(mublasCgemmBatched);         \
+  __macro(mublasSaxpy);                   \
+  __macro(mublasDaxpy);                   \
+  __macro(mublasCaxpy);                   \
+  __macro(mublasZaxpy);                   \
+  __macro(mublasSscal);                   \
+  __macro(mublasDscal);                   \
+  __macro(mublasScopy);                   \
+  __macro(mublasDcopy);                   \
+  __macro(mublasSgemv);                   \
+  __macro(mublasDgemv);                   \
+  __macro(mublasCgemv);                   \
+  __macro(mublasZgemv);                   \
+  __macro(mublasSgemm);                   \
+  __macro(mublasDgemm);                   \
+  __macro(mublasCgemm);                   \
+  __macro(mublasZgemm);                   \
+  __macro(mublasHgemm);                   \
+  __macro(mublasSgeam);                   \
+  __macro(mublasDgeam);                   \
+  __macro(mublasDtrsm);                   \
+  __macro(mublasCtrsm);                   \
+  __macro(mublasZtrsm);                   \
+  __macro(mublasCreate);                  \
+  __macro(mublasDestroy);                 \
+  __macro(mublasSetStream);               \
+  __macro(mublasSetPointerMode);          \
+  __macro(mublasGetPointerMode);          \
+  __macro(mublasSgemmBatched);            \
+  __macro(mublasDgemmBatched);            \
+  __macro(mublasCgemmBatched);            \
   __macro(mublasZgemmBatched);
 
-
 MUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MUBLAS_WRAP)
 
 #undef DECLARE_DYNAMIC_LOAD_MUBLAS_WRAP

diff --git a/paddle/phi/backends/dynload/musa_driver.cc b/paddle/phi/backends/dynload/musa_driver.cc
@@ -31,4 +31,3 @@ bool HasCUDADriver() {
 
 }  // namespace dynload
 }  // namespace phi
-
diff --git a/paddle/phi/backends/dynload/musartc.h b/paddle/phi/backends/dynload/musartc.h
@@ -28,18 +28,18 @@ extern std::once_flag musartc_dso_flag;
 extern void* musartc_dso_handle;
 extern bool HasNVRTC();
 
-#define DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name)                        \
-  struct DynLoad__##__name {                                           \
-    template <typename... Args>                                        \
-    auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) {   \
-      using musartc_func = decltype(&::__name);                        \
-      std::call_once(musartc_dso_flag, []() {                          \
-        musartc_dso_handle = phi::dynload::GetNVRTCDsoHandle();        \
-      });                                                              \
-      static void* p_##__name = dlsym(musartc_dso_handle, #__name);    \
-      return reinterpret_cast<musartc_func>(p_##__name)(args...);      \
-    }                                                                  \
-  };                                                                   \
+#define DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name)                      \
+  struct DynLoad__##__name {                                         \
+    template <typename... Args>                                      \
+    auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
+      using musartc_func = decltype(&::__name);                      \
+      std::call_once(musartc_dso_flag, []() {                        \
+        musartc_dso_handle = phi::dynload::GetNVRTCDsoHandle();      \
+      });                                                            \
+      static void* p_##__name = dlsym(musartc_dso_handle, #__name);  \
+      return reinterpret_cast<musartc_func>(p_##__name)(args...);    \
+    }                                                                \
+  };                                                                 \
   extern struct DynLoad__##__name __name
 
 /**

diff --git a/paddle/phi/backends/gpu/forwards.h b/paddle/phi/backends/gpu/forwards.h
@@ -75,9 +75,9 @@ using ncclComm_t = struct ncclComm *;
 // Forward declaration of MUSA runtime types.
 using musaStream_t = struct MUstream_st *;
 using musaEvent_t = struct MUevent_st *;
-using mublasHandle_t = struct _mublasHandle_t*;
-using mudnnHandle_t = class Handle*;
-using musparseHandle_t = struct _musparse_handle*;
+using mublasHandle_t = struct _mublasHandle_t *;
+using mudnnHandle_t = class Handle *;
+using musparseHandle_t = struct _musparse_handle *;
 
 /// Forward declaration of ROCM types.
 #include <cstddef>

diff --git a/paddle/phi/backends/gpu/gpu_primitives.h b/paddle/phi/backends/gpu/gpu_primitives.h
@@ -61,7 +61,8 @@ CUDA_ATOMIC_WRAPPER(Add, int64_t) {
       static_cast<unsigned long long int>(val));            // NOLINT
 }
 
-#if defined(__HIPCC__) || defined(__MUSACC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600)
+#if defined(__HIPCC__) || defined(__MUSACC__) || \
+    (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600)
 USE_CUDA_ATOMIC(Add, double);
 #else
 CUDA_ATOMIC_WRAPPER(Add, double) {

diff --git a/paddle/phi/backends/gpu/musa/musa_device_function.h b/paddle/phi/backends/gpu/musa/musa_device_function.h
@@ -102,11 +102,11 @@ __forceinline__ __device__ phi::dtype::complex<double> CudaShuffleDownSync(
 }
 
 // TODO(@MTAI): there is compiling error when compiling the following code
-//template <>
-//__forceinline__ __device__ phi::dtype::float16 CudaShuffleXorSync(
-//    unsigned mask, phi::dtype::float16 val, int width) {
-//  return phi::dtype::float16(__shfl_xor_sync(mask, val.to_half(), width));
-//}
+// template <>
+// __forceinline__ __device__ phi::dtype::float16 CudaShuffleXorSync(
+//     unsigned mask, phi::dtype::float16 val, int width) {
+//   return phi::dtype::float16(__shfl_xor_sync(mask, val.to_half(), width));
+// }
 
 template <>
 __forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleXorSync(
@@ -187,4 +187,3 @@ __device__ T reduceSum(T val, int tid, int len) {
 }  // namespace gpu
 }  // namespace backends
 }  // namespace phi
-
diff --git a/paddle/phi/backends/gpu/musa/musa_helper.h b/paddle/phi/backends/gpu/musa/musa_helper.h
@@ -21,14 +21,13 @@ namespace gpu {
 #define CUDNN_VERSION_MIN(major, minor, patch) \
   (0 >= ((major)*1000 + (minor)*100 + (patch)))
 
-#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type)                           \
-  int64_t __index__ =                                                       \
-      static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x;          \
-  int64_t __stride__ = static_cast<int64_t>(blockDim.x) * gridDim.x;        \
-  for (index_type i = __index__; __index__ < (num);                         \
+#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type)                    \
+  int64_t __index__ =                                                \
+      static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x;   \
+  int64_t __stride__ = static_cast<int64_t>(blockDim.x) * gridDim.x; \
+  for (index_type i = __index__; __index__ < (num);                  \
        __index__ += __stride__, i = __index__)
 
 }  // namespace gpu
 }  // namespace backends
 }  // namespace phi
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -56,4 +56,3 @@ PLATFORM_MUSA_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MUSA_WRAP);
		} // namespace dynload
		} // namespace platform
		} // namespace paddle
Original file line number	Diff line number	Diff line change
Expand Up		@@ -31,4 +31,3 @@ bool HasCUDADriver() {

		} // namespace dynload
		} // namespace phi