Skip to content

Commit

Permalink
fix int32 overflow in cuda kernel loop
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiqiu committed Dec 10, 2021
1 parent 43f19cc commit 9ebe930
Showing 1 changed file with 3 additions and 6 deletions.
9 changes: 3 additions & 6 deletions paddle/fluid/operators/label_smooth_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ template <typename T>
__global__ void LabelSmoothRunOriginKernel(const int N, const float epsilon,
const int label_dim, const T* src,
T* dst) {
int idx = blockDim.x * blockIdx.x + threadIdx.x;
for (; idx < N; idx += blockDim.x * gridDim.x) {
CUDA_KERNEL_LOOP(idx, N) {
dst[idx] = static_cast<T>(1 - epsilon) * src[idx] +
static_cast<T>(epsilon / label_dim);
}
Expand All @@ -32,8 +31,7 @@ template <typename T>
__global__ void LabelSmoothRunDistKernel(const int N, const float epsilon,
const int dist_numel, const T* src,
const T* dist_data, T* dst) {
int idx = blockDim.x * blockIdx.x + threadIdx.x;
for (; idx < N; idx += blockDim.x * gridDim.x) {
CUDA_KERNEL_LOOP(idx, N) {
int dist_idx = idx % dist_numel;
dst[idx] = static_cast<T>(1 - epsilon) * src[idx] +
static_cast<T>(epsilon) * dist_data[dist_idx];
Expand All @@ -43,8 +41,7 @@ __global__ void LabelSmoothRunDistKernel(const int N, const float epsilon,
template <typename T>
__global__ void LabelSmoothGradRunKernel(const int N, const float epsilon,
const T* src, T* dst) {
int idx = blockDim.x * blockIdx.x + threadIdx.x;
for (; idx < N; idx += blockDim.x * gridDim.x) {
CUDA_KERNEL_LOOP(idx, N) {
dst[idx] = static_cast<T>(1 - epsilon) * src[idx];
}
}
Expand Down

0 comments on commit 9ebe930

Please sign in to comment.