Skip to content

Commit

Permalink
[MTAI-484] feat(build): fix code style for cpp lint
Browse files Browse the repository at this point in the history
  • Loading branch information
caizhi-mt committed Aug 16, 2023
1 parent 2a7c365 commit ce74631
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 17 deletions.
4 changes: 3 additions & 1 deletion paddle/phi/backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ if(NOT APPLE AND NOT WIN32)
list(APPEND BACKENDS_SRCS device_code.cc)
endif()

if(WITH_GPU OR WITH_ROCM OR WITH_MUSA)
if(WITH_GPU
OR WITH_ROCM
OR WITH_MUSA)
list(APPEND BACKENDS_SRCS gpu/gpu_context.cc gpu/gpu_info.cc
gpu/gpu_resources.cc)
if(WITH_GPU)
Expand Down
1 change: 0 additions & 1 deletion paddle/phi/backends/dynload/mublas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,3 @@ MUBLAS_BLAS_ROUTINE_EACH(DEFINE_WRAP);

} // namespace dynload
} // namespace phi

1 change: 0 additions & 1 deletion paddle/phi/backends/dynload/musa_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ extern bool HasCUDADriver();
__macro(muDeviceGetAttribute); \
__macro(muDeviceGet);


MUSA_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MUSA_WRAP);

#undef DECLARE_DYNAMIC_LOAD_MUSA_WRAP
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/common/cpstring_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) {
}
#endif

#if PD_PSTRING_LITTLE_ENDIAN || (defined(__NVCC__) || defined(__HIPCC__) || defined(__MUSACC__))
#if PD_PSTRING_LITTLE_ENDIAN || \
(defined(__NVCC__) || defined(__HIPCC__) || defined(__MUSACC__))
#define PD_le32toh(x) x
#else // PD_PSTRING_LITTLE_ENDIAN
#define PD_le32toh(x) swap32(x)
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/core/hostdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
#include "xpu/kernel/math.h"
#endif

#if (defined(__CUDACC__) || defined(__HIPCC__) || defined(__MUSACC__) || defined(__xpu__))
#if (defined(__CUDACC__) || defined(__HIPCC__) || defined(__MUSACC__) || \
defined(__xpu__))
#define HOSTDEVICE __host__ __device__
#define DEVICE __device__
#define HOST __host__
Expand Down
10 changes: 5 additions & 5 deletions paddle/phi/kernels/funcs/embedding_grad.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ __global__ void EmbeddingGradDeterministicKernel(T* table,
unsigned long long int matchmask = // NOLINT
__ballot(match_found_this_thread); // NOLINT
int first_remaining_peer = __ffsll(matchmask) - 1;
#else // MUSA and CUDA
// If and only if match_found_this_thread of the Nth thread is non-zero,
// set the Nth bit of matchmask to 1.
#else // MUSA and CUDA
// If and only if match_found_this_thread of the Nth thread is non-zero,
// set the Nth bit of matchmask to 1.
unsigned int matchmask =
__ballot_sync(0xffffffff, match_found_this_thread);
// Find the position of the first bit set to 1 in matchmask.
Expand All @@ -112,7 +112,7 @@ __global__ void EmbeddingGradDeterministicKernel(T* table,
while (matchmask) {
#ifdef PADDLE_WITH_HIP
first_remaining_peer = __ffsll(matchmask) - 1;
#else // CUDA and MUSA
#else // CUDA and MUSA
first_remaining_peer = __ffs(matchmask) - 1;
#endif
my_s[threadIdx.x] +=
Expand Down Expand Up @@ -142,7 +142,7 @@ void LaunchEmbeddingGradDeterministicKernel(const GPUContext& ctx,
#ifdef PADDLE_WITH_HIP
constexpr int kWarpSize = 64;
constexpr int kBlockDimY = 16;
#else // CUDA and MUSA
#else // CUDA and MUSA
constexpr int kWarpSize = 32;
constexpr int kBlockDimY = 32;
#endif
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/funcs/layer_norm_impl.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -1353,8 +1353,8 @@ __global__ void LayerNormBackwardComputeGradInput(const T *__restrict__ dout,
// WARP_SHFL_XOR(sum_loss, mask);
sum_loss1 += __shfl_xor(sum_loss1, mask, warpSize);
sum_loss2 += __shfl_xor(sum_loss2, mask, warpSize);
#else // CUDA and MUSA
// WARP_SHFL_XOR(sum_loss, mask);
#else // CUDA and MUSA
// WARP_SHFL_XOR(sum_loss, mask);
sum_loss1 += __shfl_xor_sync(0xffffffff, sum_loss1, mask, warpSize);
sum_loss2 += __shfl_xor_sync(0xffffffff, sum_loss2, mask, warpSize);
#endif
Expand Down Expand Up @@ -1504,8 +1504,8 @@ __global__ void LayerNormBackwardComputeGradInputWithSmallFeatureSize(
// WARP_SHFL_XOR(sum_loss, mask);
sum_loss1 += __shfl_xor(sum_loss1, mask, warpSize);
sum_loss2 += __shfl_xor(sum_loss2, mask, warpSize);
#else // CUDA and MUSA
// WARP_SHFL_XOR(sum_loss, mask);
#else // CUDA and MUSA
// WARP_SHFL_XOR(sum_loss, mask);
sum_loss1 += __shfl_xor_sync(0xffffffff, sum_loss1, mask, WarpSize);
sum_loss2 += __shfl_xor_sync(0xffffffff, sum_loss2, mask, WarpSize);
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/check_numerics_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ void CheckNumericsKernel(const Context& ctx,
#ifdef PADDLE_WITH_MUSA
PADDLE_THROW(phi::errors::Unimplemented(
"OP check_numerics is unsupported for MUSA backend now!"));
return;
return;
#else
int dev_id = tensor.place().device;
VLOG(6) << "op_type=" << op_type << ", var_name=" << var_name
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/kernels/impl/elementwise_kernel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
#include "paddle/phi/kernels/elementwise_kernel.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#if defined(__NVCC__) || defined(__HIPCC__) || defined(__MUSACC__) || defined(__xpu__)
#if defined(__NVCC__) || defined(__HIPCC__) || defined(__MUSACC__) || \
defined(__xpu__)
#include "paddle/phi/kernels/funcs/broadcast_function.h"
#endif

Expand Down
1 change: 0 additions & 1 deletion paddle/phi/kernels/impl/matmul_kernel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,6 @@ void MatmulWithFlattenKernel(const Context& dev_ctx,
}

auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);

blas.MatMul(x_matrix, y_matrix, out);
if (z_dim.size() != 2) {
out->Resize(z_dim);
Expand Down

0 comments on commit ce74631

Please sign in to comment.