Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#47 from mthreads/cpp_lint
Browse files Browse the repository at this point in the history
[MTAI-484] fix(build): modify code format for cpplint check
  • Loading branch information
caizhi-mt authored and mt-robot committed Aug 16, 2023
2 parents eb7311f + 2a7c365 commit 9155af9
Show file tree
Hide file tree
Showing 33 changed files with 210 additions and 230 deletions.
48 changes: 24 additions & 24 deletions paddle/fluid/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ if(WITH_DGC)
set(dgc_deps dgc)
endif()

if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
if(WITH_GPU
OR WITH_ROCM
OR WITH_MUSA)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
endif()

Expand All @@ -91,14 +93,13 @@ if(WITH_ROCM)
DEPS simple_threadpool enforce)
endif()
if(WITH_MUSA)
musa_library(
stream_callback_manager
SRCS stream_callback_manager.cc
DEPS simple_threadpool enforce)
musa_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS
simple_threadpool enforce)
endif()


if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
if(WITH_GPU
OR WITH_ROCM
OR WITH_MUSA)
set(STREAM_CALLBACK_DEPS stream_callback_manager)
else()
set(STREAM_CALLBACK_DEPS)
Expand Down Expand Up @@ -144,7 +145,9 @@ cc_library(
SRCS collective_helper.cc gen_comm_id_helper.cc
DEPS framework_proto device_context enforce)

if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
if(WITH_GPU
OR WITH_ROCM
OR WITH_MUSA)
target_link_libraries(device_context gpu_resource_pool)
endif()

Expand Down Expand Up @@ -243,10 +246,7 @@ if(WITH_ROCM)
endif()

if(WITH_MUSA)
musa_library(
device_event_gpu
SRCS device_event_gpu.cc
DEPS device_event_base)
musa_library(device_event_gpu SRCS device_event_gpu.cc DEPS device_event_base)
set(DEVICE_EVENT_LIBS
device_event_gpu
CACHE INTERNAL "device event libs")
Expand Down Expand Up @@ -301,14 +301,17 @@ elseif(WITH_ROCM)
elseif(WITH_MUSA)
musa_library(
profiler
SRCS profiler.cc profiler.cu
DEPS phi
gpu_info
enforce
new_profiler
stats
op_proto_maker
shape_inference)
SRCS
profiler.cc
profiler.cu
DEPS
phi
gpu_info
enforce
new_profiler
stats
op_proto_maker
shape_inference)
elseif(WITH_XPU)
cc_library(
profiler
Expand Down Expand Up @@ -368,10 +371,7 @@ if(WITH_GPU)
endif()

if(WITH_MUSA)
musa_library(
cuda_device_guard
SRCS cuda_device_guard.cc
DEPS gpu_info)
musa_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info)
endif()

if(WITH_ROCM)
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/platform/device/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
set(DEV_LIBS custom_device)

# GPU
if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
if(WITH_GPU
OR WITH_ROCM
OR WITH_MUSA)
add_subdirectory(gpu)
endif()

Expand Down
10 changes: 8 additions & 2 deletions paddle/fluid/platform/device/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@ elseif(WITH_ROCM)
elseif(WITH_MUSA)
musa_library(
gpu_info
SRCS gpu_info.cc
DEPS phi glog enforce monitor dynload_cuda)
SRCS
gpu_info.cc
DEPS
phi
glog
enforce
monitor
dynload_cuda)
endif()

cc_library(
Expand Down
5 changes: 1 addition & 4 deletions paddle/fluid/platform/dynload/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,7 @@ if(WITH_ROCM)
SRCS warpctc.cc
DEPS dynamic_loader warpctc phi)
elseif(WITH_MUSA)
musa_library(
dynload_cuda
SRCS ${MUSA_SRCS}
DEPS dynamic_loader phi)
musa_library(dynload_cuda SRCS ${MUSA_SRCS} DEPS dynamic_loader phi)
cc_library(
dynload_warpctc
SRCS warpctc.cc
Expand Down
60 changes: 30 additions & 30 deletions paddle/fluid/platform/dynload/mublas.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,36 +38,36 @@ namespace dynload {
extern DynLoad__##__name __name

#define MUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(mublasSaxpy); \
__macro(mublasDaxpy); \
__macro(mublasCaxpy); \
__macro(mublasZaxpy); \
__macro(mublasSscal); \
__macro(mublasDscal); \
__macro(mublasScopy); \
__macro(mublasDcopy); \
__macro(mublasSgemv); \
__macro(mublasDgemv); \
__macro(mublasCgemv); \
__macro(mublasZgemv); \
__macro(mublasSgemm); \
__macro(mublasDgemm); \
__macro(mublasCgemm); \
__macro(mublasZgemm); \
__macro(mublasHgemm); \
__macro(mublasSgeam); \
__macro(mublasDgeam); \
__macro(mublasDtrsm); \
__macro(mublasCtrsm); \
__macro(mublasZtrsm); \
__macro(mublasCreate); \
__macro(mublasDestroy); \
__macro(mublasSetStream); \
__macro(mublasSetPointerMode); \
__macro(mublasGetPointerMode); \
__macro(mublasSgemmBatched); \
__macro(mublasDgemmBatched); \
__macro(mublasCgemmBatched); \
__macro(mublasSaxpy); \
__macro(mublasDaxpy); \
__macro(mublasCaxpy); \
__macro(mublasZaxpy); \
__macro(mublasSscal); \
__macro(mublasDscal); \
__macro(mublasScopy); \
__macro(mublasDcopy); \
__macro(mublasSgemv); \
__macro(mublasDgemv); \
__macro(mublasCgemv); \
__macro(mublasZgemv); \
__macro(mublasSgemm); \
__macro(mublasDgemm); \
__macro(mublasCgemm); \
__macro(mublasZgemm); \
__macro(mublasHgemm); \
__macro(mublasSgeam); \
__macro(mublasDgeam); \
__macro(mublasDtrsm); \
__macro(mublasCtrsm); \
__macro(mublasZtrsm); \
__macro(mublasCreate); \
__macro(mublasDestroy); \
__macro(mublasSetStream); \
__macro(mublasSetPointerMode); \
__macro(mublasGetPointerMode); \
__macro(mublasSgemmBatched); \
__macro(mublasDgemmBatched); \
__macro(mublasCgemmBatched); \
__macro(mublasZgemmBatched);

MUBLAS_BLAS_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MUBLAS_WRAP)
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/platform/dynload/musa_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,3 @@ PLATFORM_MUSA_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MUSA_WRAP);
} // namespace dynload
} // namespace platform
} // namespace paddle

61 changes: 30 additions & 31 deletions paddle/phi/backends/dynload/mublas.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,39 +52,38 @@ extern void *mublas_dso_handle;
extern DynLoad__##__name __name

#define MUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(mublasSaxpy); \
__macro(mublasDaxpy); \
__macro(mublasCaxpy); \
__macro(mublasZaxpy); \
__macro(mublasSscal); \
__macro(mublasDscal); \
__macro(mublasScopy); \
__macro(mublasDcopy); \
__macro(mublasSgemv); \
__macro(mublasDgemv); \
__macro(mublasCgemv); \
__macro(mublasZgemv); \
__macro(mublasSgemm); \
__macro(mublasDgemm); \
__macro(mublasCgemm); \
__macro(mublasZgemm); \
__macro(mublasHgemm); \
__macro(mublasSgeam); \
__macro(mublasDgeam); \
__macro(mublasDtrsm); \
__macro(mublasCtrsm); \
__macro(mublasZtrsm); \
__macro(mublasCreate); \
__macro(mublasDestroy); \
__macro(mublasSetStream); \
__macro(mublasSetPointerMode); \
__macro(mublasGetPointerMode); \
__macro(mublasSgemmBatched); \
__macro(mublasDgemmBatched); \
__macro(mublasCgemmBatched); \
__macro(mublasSaxpy); \
__macro(mublasDaxpy); \
__macro(mublasCaxpy); \
__macro(mublasZaxpy); \
__macro(mublasSscal); \
__macro(mublasDscal); \
__macro(mublasScopy); \
__macro(mublasDcopy); \
__macro(mublasSgemv); \
__macro(mublasDgemv); \
__macro(mublasCgemv); \
__macro(mublasZgemv); \
__macro(mublasSgemm); \
__macro(mublasDgemm); \
__macro(mublasCgemm); \
__macro(mublasZgemm); \
__macro(mublasHgemm); \
__macro(mublasSgeam); \
__macro(mublasDgeam); \
__macro(mublasDtrsm); \
__macro(mublasCtrsm); \
__macro(mublasZtrsm); \
__macro(mublasCreate); \
__macro(mublasDestroy); \
__macro(mublasSetStream); \
__macro(mublasSetPointerMode); \
__macro(mublasGetPointerMode); \
__macro(mublasSgemmBatched); \
__macro(mublasDgemmBatched); \
__macro(mublasCgemmBatched); \
__macro(mublasZgemmBatched);


MUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MUBLAS_WRAP)

#undef DECLARE_DYNAMIC_LOAD_MUBLAS_WRAP
Expand Down
1 change: 0 additions & 1 deletion paddle/phi/backends/dynload/musa_driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,3 @@ bool HasCUDADriver() {

} // namespace dynload
} // namespace phi

24 changes: 12 additions & 12 deletions paddle/phi/backends/dynload/musartc.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@ extern std::once_flag musartc_dso_flag;
extern void* musartc_dso_handle;
extern bool HasNVRTC();

#define DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using musartc_func = decltype(&::__name); \
std::call_once(musartc_dso_flag, []() { \
musartc_dso_handle = phi::dynload::GetNVRTCDsoHandle(); \
}); \
static void* p_##__name = dlsym(musartc_dso_handle, #__name); \
return reinterpret_cast<musartc_func>(p_##__name)(args...); \
} \
}; \
#define DECLARE_DYNAMIC_LOAD_NVRTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
using musartc_func = decltype(&::__name); \
std::call_once(musartc_dso_flag, []() { \
musartc_dso_handle = phi::dynload::GetNVRTCDsoHandle(); \
}); \
static void* p_##__name = dlsym(musartc_dso_handle, #__name); \
return reinterpret_cast<musartc_func>(p_##__name)(args...); \
} \
}; \
extern struct DynLoad__##__name __name

/**
Expand Down
6 changes: 3 additions & 3 deletions paddle/phi/backends/gpu/forwards.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ using ncclComm_t = struct ncclComm *;
// Forward declaration of MUSA runtime types.
using musaStream_t = struct MUstream_st *;
using musaEvent_t = struct MUevent_st *;
using mublasHandle_t = struct _mublasHandle_t*;
using mudnnHandle_t = class Handle*;
using musparseHandle_t = struct _musparse_handle*;
using mublasHandle_t = struct _mublasHandle_t *;
using mudnnHandle_t = class Handle *;
using musparseHandle_t = struct _musparse_handle *;

/// Forward declaration of ROCM types.
#include <cstddef>
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/backends/gpu/gpu_primitives.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ CUDA_ATOMIC_WRAPPER(Add, int64_t) {
static_cast<unsigned long long int>(val)); // NOLINT
}

#if defined(__HIPCC__) || defined(__MUSACC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600)
#if defined(__HIPCC__) || defined(__MUSACC__) || \
(defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600)
USE_CUDA_ATOMIC(Add, double);
#else
CUDA_ATOMIC_WRAPPER(Add, double) {
Expand Down
11 changes: 5 additions & 6 deletions paddle/phi/backends/gpu/musa/musa_device_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ __forceinline__ __device__ phi::dtype::complex<double> CudaShuffleDownSync(
}

// TODO(@MTAI): there is compiling error when compiling the following code
//template <>
//__forceinline__ __device__ phi::dtype::float16 CudaShuffleXorSync(
// unsigned mask, phi::dtype::float16 val, int width) {
// return phi::dtype::float16(__shfl_xor_sync(mask, val.to_half(), width));
//}
// template <>
// __forceinline__ __device__ phi::dtype::float16 CudaShuffleXorSync(
// unsigned mask, phi::dtype::float16 val, int width) {
// return phi::dtype::float16(__shfl_xor_sync(mask, val.to_half(), width));
// }

template <>
__forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleXorSync(
Expand Down Expand Up @@ -187,4 +187,3 @@ __device__ T reduceSum(T val, int tid, int len) {
} // namespace gpu
} // namespace backends
} // namespace phi

11 changes: 5 additions & 6 deletions paddle/phi/backends/gpu/musa/musa_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,13 @@ namespace gpu {
#define CUDNN_VERSION_MIN(major, minor, patch) \
(0 >= ((major)*1000 + (minor)*100 + (patch)))

#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \
static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \
int64_t __stride__ = static_cast<int64_t>(blockDim.x) * gridDim.x; \
for (index_type i = __index__; __index__ < (num); \
#define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \
int64_t __index__ = \
static_cast<int64_t>(blockIdx.x) * blockDim.x + threadIdx.x; \
int64_t __stride__ = static_cast<int64_t>(blockDim.x) * gridDim.x; \
for (index_type i = __index__; __index__ < (num); \
__index__ += __stride__, i = __index__)

} // namespace gpu
} // namespace backends
} // namespace phi

Loading

0 comments on commit 9155af9

Please sign in to comment.