From cf9ca61d4f14badb1c4b42c1250ac3893d5b054a Mon Sep 17 00:00:00 2001 From: Zhang Ting Date: Thu, 13 Oct 2022 11:37:37 +0800 Subject: [PATCH] Revert #46111 (#46961) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "【Hackathon No.56&38】deformable_conv_v1 算子实现 float16 数据类型支持&前向运行加速 (#46111)" --- .../cpu/deformable_conv_grad_kernel.cc | 34 +++---- .../kernels/funcs/deformable_conv_functor.cc | 6 +- .../kernels/funcs/deformable_conv_functor.cu | 39 +++----- .../kernels/funcs/deformable_conv_functor.h | 55 ++++++------ .../gpu/deformable_conv_grad_kernel.cu | 90 +++++++++---------- .../phi/kernels/gpu/deformable_conv_kernel.cu | 3 +- .../impl/deformable_conv_grad_kernel_impl.h | 56 +++++------- .../impl/deformable_conv_kernel_impl.h | 46 ++-------- .../unittests/test_deformable_conv_v1_op.py | 79 ++-------------- 9 files changed, 135 insertions(+), 273 deletions(-) diff --git a/paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc b/paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc index 050c61596fee5..a4d43ef8fbe89 100644 --- a/paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc @@ -97,14 +97,14 @@ inline void ModulatedDeformableCol2imCPUKernel( width); *(grad_im + cur_bottom_grad_pos) = - *(grad_im + cur_bottom_grad_pos) + (weight * cur_top_grad); + *(grad_im + cur_bottom_grad_pos) + weight * cur_top_grad; } } } } } -template +template void ModulatedDeformableCol2im(const Context& dev_ctx, const T* data_col, const T* data_offset, @@ -116,7 +116,7 @@ void ModulatedDeformableCol2im(const Context& dev_ctx, const std::vector& stride, const std::vector& dilation, const int deformable_group, - MT* grad_im) { + T* grad_im) { int channel_per_deformable_group = im_shape[0] / deformable_group; int num_kernels = col_shape[0] * col_shape[1] * col_shape[2] * col_shape[3]; @@ -222,22 +222,22 @@ void ModulatedDeformableCol2imCoordCPUKernel( if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { inv_h = inv_w = -2; } else { - mval += data_col_ptr[col_pos] * funcs::DmcnIm2colBilinear( - data_im_ptr + cnt * height * width, - width, - height, - width, - inv_h, - inv_w); + mval += data_col_ptr[col_pos] * + funcs::DmcnIm2colBilinear(data_im_ptr + cnt * height * width, + width, + height, + width, + inv_h, + inv_w); } const T weight = - DmcnGetCoordinateWeight(inv_h, - inv_w, - height, - width, - data_im_ptr + cnt * height * width, - width, - bp_dir); + DmcnGetCoordinateWeight(inv_h, + inv_w, + height, + width, + data_im_ptr + cnt * height * width, + width, + bp_dir); if (data_mask_ptr) { const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); diff --git a/paddle/phi/kernels/funcs/deformable_conv_functor.cc b/paddle/phi/kernels/funcs/deformable_conv_functor.cc index 253a66adfc6a2..48858fa59390e 100644 --- a/paddle/phi/kernels/funcs/deformable_conv_functor.cc +++ b/paddle/phi/kernels/funcs/deformable_conv_functor.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/phi/kernels/funcs/deformable_conv_functor.h" + #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/amp_type_traits.h" namespace phi { namespace funcs { @@ -82,8 +82,8 @@ inline void ModulatedDeformableIm2colCPUKernel( const T h_im = h_in + i * dilation_h + offset_h; const T w_im = w_in + j * dilation_w + offset_w; if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { - val = DmcnIm2colBilinear( - data_im_ptr, width, height, width, h_im, w_im); + val = + DmcnIm2colBilinear(data_im_ptr, width, height, width, h_im, w_im); } *data_col_ptr = val; if (data_mask_ptr) { diff --git a/paddle/phi/kernels/funcs/deformable_conv_functor.cu b/paddle/phi/kernels/funcs/deformable_conv_functor.cu index 0d5076a4937c3..48105d1f517e9 100644 --- a/paddle/phi/kernels/funcs/deformable_conv_functor.cu +++ b/paddle/phi/kernels/funcs/deformable_conv_functor.cu @@ -14,9 +14,6 @@ #include "paddle/phi/kernels/funcs/deformable_conv_functor.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/common/float16.h" -#include "paddle/phi/core/device_context.h" namespace phi { namespace funcs { @@ -54,8 +51,6 @@ __global__ void ModulatedDeformableIm2colGpuKernel( T* data_col) { int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; - - using MT = typename phi::dtype::MPTypeTrait::Type; for (size_t i = index; i < nthreads; i += offset) { const int w_col = i % width_col; const int h_col = (i / width_col) % height_col; @@ -90,22 +85,22 @@ __global__ void ModulatedDeformableIm2colGpuKernel( ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; - const MT offset_h = static_cast(data_offset_ptr[data_offset_h_ptr]); - const MT offset_w = static_cast(data_offset_ptr[data_offset_w_ptr]); - MT val = static_cast(0); - const MT h_im = h_in + i * dilation_h + offset_h; - const MT w_im = w_in + j * dilation_w + offset_w; + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + T val = static_cast(0); + const T h_im = h_in + i * dilation_h + offset_h; + const T w_im = w_in + j * dilation_w + offset_w; if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { - val = DmcnIm2colBilinear( - data_im_ptr, width, height, width, h_im, w_im); + val = + DmcnIm2colBilinear(data_im_ptr, width, height, width, h_im, w_im); } + *data_col_ptr = val; if (data_mask_ptr) { const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; - const MT mask = static_cast(data_mask_ptr[data_mask_hw_ptr]); - val *= mask; + const T mask = data_mask_ptr[data_mask_hw_ptr]; + *data_col_ptr *= mask; } - *data_col_ptr = static_cast(val); data_col_ptr += batch_size * height_col * width_col; } } @@ -169,20 +164,6 @@ template void ModulatedDeformableIm2col( const int deformable_groups, float* data_col); -template void ModulatedDeformableIm2col( - const phi::GPUContext& dev_ctx, - const phi::dtype::float16* data_im, - const phi::dtype::float16* data_offset, - const phi::dtype::float16* data_mask, - const std::vector& im_shape, - const std::vector& col_shape, - const std::vector& filter_shape, - const std::vector& paddings, - const std::vector& strides, - const std::vector& dilations, - const int deformable_groups, - phi::dtype::float16* data_col); - template void ModulatedDeformableIm2col( const phi::GPUContext& dev_ctx, const double* data_im, diff --git a/paddle/phi/kernels/funcs/deformable_conv_functor.h b/paddle/phi/kernels/funcs/deformable_conv_functor.h index 62e42cd58334f..eecda72927510 100644 --- a/paddle/phi/kernels/funcs/deformable_conv_functor.h +++ b/paddle/phi/kernels/funcs/deformable_conv_functor.h @@ -14,47 +14,44 @@ #pragma once -#include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/common/float16.h" #include "paddle/phi/core/dense_tensor.h" namespace phi { namespace funcs { -template -HOSTDEVICE MT DmcnIm2colBilinear(const T* bottom_data, - const int data_width, - const int height, - const int width, - MT h, - MT w) { +template +HOSTDEVICE T DmcnIm2colBilinear(const T* bottom_data, + const int data_width, + const int height, + const int width, + T h, + T w) { int h_low = floor(h); int w_low = floor(w); int h_high = h_low + 1; int w_high = w_low + 1; - MT lh = h - h_low; - MT lw = w - w_low; - MT hh = 1 - lh; - MT hw = 1 - lw; + T lh = h - h_low; + T lw = w - w_low; + T hh = 1 - lh; + T hw = 1 - lw; - MT v1 = (h_low >= 0 && w_low >= 0) - ? static_cast(bottom_data[h_low * data_width + w_low]) - : 0; - MT v2 = (h_low >= 0 && w_high <= width - 1) - ? static_cast(bottom_data[h_low * data_width + w_high]) - : 0; - MT v3 = (h_high <= height - 1 && w_low >= 0) - ? static_cast(bottom_data[h_high * data_width + w_low]) - : 0; - MT v4 = (h_high <= height - 1 && w_high <= width - 1) - ? static_cast(bottom_data[h_high * data_width + w_high]) - : 0; + T v1 = + (h_low >= 0 && w_low >= 0) ? bottom_data[h_low * data_width + w_low] : 0; + T v2 = (h_low >= 0 && w_high <= width - 1) + ? bottom_data[h_low * data_width + w_high] + : 0; + T v3 = (h_high <= height - 1 && w_low >= 0) + ? bottom_data[h_high * data_width + w_low] + : 0; + T v4 = (h_high <= height - 1 && w_high <= width - 1) + ? bottom_data[h_high * data_width + w_high] + : 0; - MT w1 = hh * hw; - MT w2 = hh * lw; - MT w3 = lh * hw; - MT w4 = lh * lw; + T w1 = hh * hw; + T w2 = hh * lw; + T w3 = lh * hw; + T w4 = lh * lw; return w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4; } diff --git a/paddle/phi/kernels/gpu/deformable_conv_grad_kernel.cu b/paddle/phi/kernels/gpu/deformable_conv_grad_kernel.cu index 5d2f4727c53e2..b46f1f4a3314d 100644 --- a/paddle/phi/kernels/gpu/deformable_conv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/deformable_conv_grad_kernel.cu @@ -29,7 +29,7 @@ static inline int NumBlocks(const int N) { kNumMaximumNumBlocks); } -template +template __global__ void ModulatedDeformableCol2imGpuKernel( const int nthreads, const T* data_col, @@ -51,10 +51,9 @@ __global__ void ModulatedDeformableCol2imGpuKernel( const int deformable_group, const int height_col, const int width_col, - MT* grad_im) { + T* grad_im) { int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; - // using MT = typename phi::dtype::MPTypeTrait::Type; for (size_t thread = index; thread < nthreads; thread += offset) { const int j = (thread / width_col / height_col / batch_size) % kernel_w; const int i = @@ -79,17 +78,17 @@ __global__ void ModulatedDeformableCol2imGpuKernel( ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; - const MT offset_h = static_cast(data_offset_ptr[data_offset_h_ptr]); - const MT offset_w = static_cast(data_offset_ptr[data_offset_w_ptr]); - const MT cur_inv_h_data = h_in + i * dilation_h + offset_h; - const MT cur_inv_w_data = w_in + j * dilation_w + offset_w; + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + const T cur_inv_h_data = h_in + i * dilation_h + offset_h; + const T cur_inv_w_data = w_in + j * dilation_w + offset_w; - MT cur_top_grad = static_cast(data_col[thread]); + T cur_top_grad = data_col[thread]; if (data_mask) { const T* data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; - const MT mask = static_cast(data_mask_ptr[data_mask_hw_ptr]); + const T mask = data_mask_ptr[data_mask_hw_ptr]; cur_top_grad *= mask; } const int cur_h = static_cast(cur_inv_h_data); @@ -101,12 +100,13 @@ __global__ void ModulatedDeformableCol2imGpuKernel( abs(cur_inv_w_data - (cur_w + dx)) < 1) { int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; - MT weight = DmcnGetGradientWeight(cur_inv_h_data, - cur_inv_w_data, - cur_h + dy, - cur_w + dx, - height, - width); + T weight = DmcnGetGradientWeight(cur_inv_h_data, + cur_inv_w_data, + cur_h + dy, + cur_w + dx, + height, + width); + paddle::platform::CudaAtomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); } @@ -115,7 +115,7 @@ __global__ void ModulatedDeformableCol2imGpuKernel( } } -template +template void ModulatedDeformableCol2im(const Context& dev_ctx, const T* data_col, const T* data_offset, @@ -127,13 +127,13 @@ void ModulatedDeformableCol2im(const Context& dev_ctx, const std::vector& stride, const std::vector& dilation, const int deformable_group, - MT* grad_im) { + T* grad_im) { int channel_per_deformable_group = im_shape[0] / deformable_group; int num_kernels = col_shape[0] * col_shape[1] * col_shape[2] * col_shape[3]; int blocks = NumBlocks(num_kernels); int threads = kNumCUDAThreads; - ModulatedDeformableCol2imGpuKernel + ModulatedDeformableCol2imGpuKernel <<>>(num_kernels, data_col, data_offset, @@ -185,9 +185,8 @@ __global__ void ModulatedDeformableCol2imCoordGpuKernel( T* grad_mask) { int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; - using MT = typename phi::dtype::MPTypeTrait::Type; for (size_t i = index; i < nthreads; i += offset) { - MT val = 0, mval = 0; + T val = 0, mval = 0; const int w = i % width_col; const int h = (i / width_col) % height_col; const int c = (i / width_col / height_col) % offset_channels; @@ -232,42 +231,40 @@ __global__ void ModulatedDeformableCol2imCoordGpuKernel( const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); - - const MT offset_h = static_cast(data_offset_ptr[data_offset_h_ptr]); - const MT offset_w = static_cast(data_offset_ptr[data_offset_w_ptr]); - MT inv_h = h_in + i * dilation_h + offset_h; - MT inv_w = w_in + j * dilation_w + offset_w; + const T offset_h = data_offset_ptr[data_offset_h_ptr]; + const T offset_w = data_offset_ptr[data_offset_w_ptr]; + T inv_h = h_in + i * dilation_h + offset_h; + T inv_w = w_in + j * dilation_w + offset_w; if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { inv_h = inv_w = -2; } else { - mval += - static_cast(data_col_ptr[col_pos]) * - funcs::DmcnIm2colBilinear(data_im_ptr + cnt * height * width, - width, - height, - width, - inv_h, - inv_w); + mval += data_col_ptr[col_pos] * + funcs::DmcnIm2colBilinear(data_im_ptr + cnt * height * width, + width, + height, + width, + inv_h, + inv_w); } - const MT weight = - DmcnGetCoordinateWeight(inv_h, - inv_w, - height, - width, - data_im_ptr + cnt * height * width, - width, - bp_dir); + const T weight = + DmcnGetCoordinateWeight(inv_h, + inv_w, + height, + width, + data_im_ptr + cnt * height * width, + width, + bp_dir); if (data_mask_ptr) { const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); - const MT mask = static_cast(data_mask_ptr[data_mask_hw_ptr]); - val += weight * static_cast(data_col_ptr[col_pos]) * mask; + const T mask = data_mask_ptr[data_mask_hw_ptr]; + val += weight * data_col_ptr[col_pos] * mask; } else { - val += weight * static_cast(data_col_ptr[col_pos]); + val += weight * data_col_ptr[col_pos]; } cnt += 1; } - grad_offset[i] = static_cast(val); + grad_offset[i] = val; if (grad_mask && offset_c % 2 == 0) grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + @@ -362,5 +359,4 @@ PD_REGISTER_KERNEL(deformable_conv_grad, ALL_LAYOUT, phi::DeformableConvGradKernel, float, - double, - paddle::platform::float16) {} + double) {} diff --git a/paddle/phi/kernels/gpu/deformable_conv_kernel.cu b/paddle/phi/kernels/gpu/deformable_conv_kernel.cu index 021791ca93061..2476dcbafb984 100644 --- a/paddle/phi/kernels/gpu/deformable_conv_kernel.cu +++ b/paddle/phi/kernels/gpu/deformable_conv_kernel.cu @@ -23,5 +23,4 @@ PD_REGISTER_KERNEL(deformable_conv, ALL_LAYOUT, phi::DeformableConvKernel, float, - double, - phi::dtype::float16) {} + double) {} diff --git a/paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h b/paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h index 7402a2273365b..744c48b2bfbd6 100644 --- a/paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h @@ -14,10 +14,8 @@ #pragma once -#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/hostdevice.h" -#include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -60,14 +58,14 @@ HOSTDEVICE T DmcnGetGradientWeight(T argmax_h, return weight; } -template -HOSTDEVICE MT DmcnGetCoordinateWeight(MT argmax_h, - MT argmax_w, - const int height, - const int width, - const T* im_data, - const int data_width, - const int bp_dir) { +template +HOSTDEVICE T DmcnGetCoordinateWeight(T argmax_h, + T argmax_w, + const int height, + const int width, + const T* im_data, + const int data_width, + const int bp_dir) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { return 0; @@ -78,51 +76,43 @@ HOSTDEVICE MT DmcnGetCoordinateWeight(MT argmax_h, int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; - MT weight = 0; + T weight = 0; if (bp_dir == 0) { weight += (argmax_h_low >= 0 && argmax_w_low >= 0) ? -1 * (argmax_w_low + 1 - argmax_w) * - static_cast( - im_data[argmax_h_low * data_width + argmax_w_low]) + im_data[argmax_h_low * data_width + argmax_w_low] : 0; weight += (argmax_h_low >= 0 && argmax_w_high <= width - 1) ? -1 * (argmax_w - argmax_w_low) * - static_cast( - im_data[argmax_h_low * data_width + argmax_w_high]) + im_data[argmax_h_low * data_width + argmax_w_high] : 0; weight += (argmax_h_high <= height - 1 && argmax_w_low >= 0) ? (argmax_w_low + 1 - argmax_w) * - static_cast( - im_data[argmax_h_high * data_width + argmax_w_low]) + im_data[argmax_h_high * data_width + argmax_w_low] : 0; weight += (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) ? (argmax_w - argmax_w_low) * - static_cast( - im_data[argmax_h_high * data_width + argmax_w_high]) + im_data[argmax_h_high * data_width + argmax_w_high] : 0; } else if (bp_dir == 1) { weight += (argmax_h_low >= 0 && argmax_w_low >= 0) ? -1 * (argmax_h_low + 1 - argmax_h) * - static_cast( - im_data[argmax_h_low * data_width + argmax_w_low]) + im_data[argmax_h_low * data_width + argmax_w_low] : 0; weight += (argmax_h_low >= 0 && argmax_w_high <= width - 1) ? (argmax_h_low + 1 - argmax_h) * - static_cast( - im_data[argmax_h_low * data_width + argmax_w_high]) + im_data[argmax_h_low * data_width + argmax_w_high] : 0; weight += (argmax_h_high <= height - 1 && argmax_w_low >= 0) ? -1 * (argmax_h - argmax_h_low) * - static_cast( - im_data[argmax_h_high * data_width + argmax_w_low]) + im_data[argmax_h_high * data_width + argmax_w_low] : 0; weight += (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) ? (argmax_h - argmax_h_low) * - static_cast( - im_data[argmax_h_high * data_width + argmax_w_high]) + im_data[argmax_h_high * data_width + argmax_w_high] : 0; } @@ -145,7 +135,7 @@ void ModulatedDeformableCol2imCoord(const Context& dev_ctx, T* grad_offset, T* grad_mask); -template +template void ModulatedDeformableCol2im(const Context& dev_ctx, const T* data_col, const T* data_offset, @@ -157,7 +147,7 @@ void ModulatedDeformableCol2im(const Context& dev_ctx, const std::vector& stride, const std::vector& dilation, const int deformable_group, - MT* grad_im); + T* grad_im); template void FilterGradAddup(const Context& dev_ctx, @@ -186,7 +176,7 @@ void DeformableConvGradKernel(const Context& dev_ctx, DenseTensor* filter_grad, DenseTensor* mask_grad) { const int batch_size = static_cast(x.dims()[0]); - using MT = typename phi::dtype::MPTypeTrait::Type; + DDim input_shape = phi::slice_ddim(x.dims(), 1, x.dims().size()); std::vector input_shape_vec = phi::vectorize(input_shape); std::vector filter_shape_vec(phi::vectorize(filter.dims())); @@ -302,8 +292,8 @@ void DeformableConvGradKernel(const Context& dev_ctx, mask_grad_data_ptr); } if (dx) { - MT* mt_dx_ptr = dev_ctx.template Alloc(dx); - + T* dx_ptr = dx->data(); + // get grad of input ModulatedDeformableCol2im(dev_ctx, col_buffer_ptr, offset_ptr + i * im2col_step * input_offset_dim, @@ -315,7 +305,7 @@ void DeformableConvGradKernel(const Context& dev_ctx, strides, dilations, deformable_groups, - mt_dx_ptr + i * im2col_step * input_dim); + dx_ptr + i * im2col_step * input_dim); dx->Resize(x.dims()); } diff --git a/paddle/phi/kernels/impl/deformable_conv_kernel_impl.h b/paddle/phi/kernels/impl/deformable_conv_kernel_impl.h index d66f4e58e5b61..f864c2e5f0ed0 100644 --- a/paddle/phi/kernels/impl/deformable_conv_kernel_impl.h +++ b/paddle/phi/kernels/impl/deformable_conv_kernel_impl.h @@ -14,13 +14,11 @@ #pragma once -#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/deformable_conv_functor.h" -#include "paddle/phi/kernels/transpose_kernel.h" #include "paddle/utils/optional.h" namespace phi { @@ -40,12 +38,6 @@ void DeformableConvKernel(const Context& dev_ctx, DenseTensor* out) { const int batch_size = static_cast(x.dims()[0]); - int temp_step = std::min(64, batch_size); - if (batch_size % temp_step == 0) { - im2col_step = temp_step; - } - - using MT = typename phi::dtype::MPTypeTrait::Type; std::vector filter_shape_vec(phi::vectorize(filter.dims())); std::vector output_shape_vec(phi::vectorize(out->dims())); @@ -109,11 +101,8 @@ void DeformableConvKernel(const Context& dev_ctx, dilations, deformable_groups, col_buffer_ptr); - DenseTensor output_3d = output_4d.Slice(i, i + 1).Resize(phi::slice_ddim( - output_4d.dims(), - 1, - output_4d.dims().size())); // group * C/group * (im2step * H * W) - + DenseTensor output_3d = output_4d.Slice(i, i + 1).Resize( + phi::slice_ddim(output_4d.dims(), 1, output_4d.dims().size())); // get the product of pixel and weight for (int g = 0; g < groups; ++g) { DenseTensor weight_3d_slice = weight_3d.Slice(g, g + 1).Resize( @@ -121,11 +110,8 @@ void DeformableConvKernel(const Context& dev_ctx, DenseTensor col_buffer_3d_slice = col_buffer_3d.Slice(g, g + 1).Resize(phi::slice_ddim( col_buffer_3d.dims(), 1, col_buffer_3d.dims().size())); - DenseTensor output_3d_slice = - output_3d.Slice(g, g + 1).Resize(phi::slice_ddim( - output_3d.dims(), - 1, - output_3d.dims().size())); // C * ((im2col_step)*H*W)) + DenseTensor output_3d_slice = output_3d.Slice(g, g + 1).Resize( + phi::slice_ddim(output_3d.dims(), 1, output_3d.dims().size())); blas.MatMul(weight_3d_slice, false, col_buffer_3d_slice, @@ -135,29 +121,7 @@ void DeformableConvKernel(const Context& dev_ctx, T(0.0)); } } - - // swap axis to get the right result when im2col_step is greater than 1 - if (im2col_step > 1) { - std::vector axis(4); - axis[0] = 0; - axis[1] = 2; - axis[2] = 1; - axis[3] = 3; - - DenseTensor real_output_buffer = phi::Transpose( - dev_ctx, - output_4d.Resize( - phi::make_ddim({batch_size / im2col_step, - output_shape_vec[1], - im2col_step, - output_shape_vec[2] * output_shape_vec[3]})), - axis); - - out->ShareDataWith(real_output_buffer) - .Resize(phi::make_ddim(output_shape_vec)); - } else { - out->ShareDataWith(output_buffer).Resize(phi::make_ddim(output_shape_vec)); - } + out->ShareDataWith(output_buffer).Resize(phi::make_ddim(output_shape_vec)); } } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py b/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py index 0e149b171048e..eacf6dba27312 100644 --- a/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py +++ b/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py @@ -19,8 +19,6 @@ from op_test import OpTest from paddle.fluid.framework import _test_eager_guard -paddle.enable_static() - def dmc_bilinear(data_im, height, width, h, w): h_low = int(np.floor(h)) @@ -60,8 +58,8 @@ def dconv_im2col_gemm(input, offset, filter, group, conv_param): assert f_c * group == in_c assert np.mod(out_c, group) == 0 - stride, pad, dilation = conv_param['stride'], conv_param['pad'], \ - conv_param['dilation'] + stride, pad, dilation = conv_param['stride'], conv_param['pad'],\ + conv_param['dilation'] out_h = 1 + (in_h + 2 * pad[0] - (dilation[0] * (f_h - 1) + 1)) // stride[0] out_w = 1 + (in_w + 2 * pad[1] - (dilation[1] * (f_w - 1) + 1)) // stride[1] assert out_h == in_h @@ -75,18 +73,18 @@ def dconv_im2col_gemm(input, offset, filter, group, conv_param): for kh in range(f_h): for kw in range(f_w): offset_h_table = \ - offset[n, ::2, h, w].reshape(f_h, f_w) + offset[n, ::2, h, w].reshape(f_h, f_w) offset_w_table = \ - offset[n, 1::2, h, w].reshape(f_h, f_w) + offset[n, 1::2, h, w].reshape(f_h, f_w) offset_h = offset_h_table[kh, kw] offset_w = offset_w_table[kh, kw] val = 0 im_h = h * stride[0] + kh * dilation[0] \ - + offset_h - pad[0] + + offset_h - pad[0] im_w = w * stride[0] + kw * dilation[0] \ - + offset_w - pad[1] + + offset_w - pad[1] if im_h > -1 and im_w > -1 and \ - im_h < in_h and im_w < in_h: + im_h < in_h and im_w < in_h: val = dmc_bilinear(input[n, c], in_h, in_w, im_h, im_w) val_out = val @@ -285,69 +283,6 @@ def init_type(self): self.dtype = np.float64 -class TestFP16(unittest.TestCase): - - def check_main(self, input_np, offset_np, filter_np, dtype): - paddle.disable_static() - input_np = input_np.astype(dtype) - offset_np = offset_np.astype(dtype) - filter_np = filter_np.astype(dtype) - - input = paddle.to_tensor(input_np) - offset = paddle.to_tensor(offset_np) - filter = paddle.to_tensor(filter_np) - - input.stop_gradient = False - offset.stop_gradient = False - filter.stop_gradient = False - - y = paddle.vision.ops.deform_conv2d(input, offset, filter) - input_grad, offset_grad, filter_grad = paddle.grad( - y, [input, offset, filter]) - y_np = y.numpy().astype('float32') - input_grad_np = input_grad.numpy().astype('float32') - offset_grad_np = offset_grad.numpy().astype('float32') - filter_grad_np = filter_grad.numpy().astype('float32') - paddle.enable_static() - return y_np, input_grad_np, offset_grad_np, filter_grad_np - - def test_main(self): - if not paddle.is_compiled_with_cuda(): - return - self.pad = [1, 1] - self.stride = [1, 1] - self.dilations = [1, 1] - self.groups = 1 - self.input_size = [2, 3, 5, 5] # NCHW - assert np.mod(self.input_size[1], self.groups) == 0 - f_c = self.input_size[1] // self.groups - self.filter_size = [40, f_c, 1, 1] - self.im2col_step = 1 - self.deformable_groups = 1 - offset_c = 2 * self.deformable_groups * self.filter_size[ - 2] * self.filter_size[3] - self.offset_size = [ - self.input_size[0], offset_c, self.input_size[2], self.input_size[3] - ] - - input = np.random.random(self.input_size) - offset = 10 * np.random.random(self.offset_size) - filter = np.random.random(self.filter_size) - - y_np_1, input_g_np_1, offset_g_np_1, filter_g_np_1 = self.check_main( - input, offset, filter, 'float16') - y_np_2, input_g_np_2, offset_g_np_2, filter_g_np_2 = self.check_main( - input, offset, filter, 'float32') - - def assert_equal(x, y): - np.testing.assert_allclose(x, y, atol=3e-2) - - assert_equal(y_np_1, y_np_2) - assert_equal(input_g_np_1, input_g_np_2) - assert_equal(offset_g_np_1, offset_g_np_2) - assert_equal(filter_g_np_1, filter_g_np_2) - - class TestModulatedDeformableConvV1InvalidInput(unittest.TestCase): def test_error(self):