Skip to content

Commit

Permalink
[OpenCL][Kernel]support dilation>1 for opencl conv2d_transpose test=d…
Browse files Browse the repository at this point in the history
…evelop (#8429)
  • Loading branch information
zhenlin-work authored Feb 14, 2022
1 parent 69f109a commit 2b9e371
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 54 deletions.
48 changes: 31 additions & 17 deletions lite/backends/opencl/cl_kernel/image/conv2d_transpose_kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ __kernel void conv2d_transpose(
__private const int2 align_shape,
__private const int2 padding_shape,
__private const int2 kernel_shape,
__private const int2 dilation_shape,
__private const int2 kernel_prev_shape,
__private const int kernel_size,
__private const int input_c_blks) {
const int out_c_blk_idx = get_global_id(0);
Expand Down Expand Up @@ -67,7 +69,7 @@ __kernel void conv2d_transpose(
CL_DTYPE4 weights0, weights1, weights2, weights3;
#ifndef IS_DEPTHWISE
for (int ic = 0; ic < input_c_blks; ic++) {
int kernel_y_base = mul24(ic, kernel_size);
int kernel_y_base = mul24(ic, kernel_prev_shape.x * kernel_prev_shape.y);
int in_idx = mul24(ic, input_shape.x);
kernel_x_0 = out_c_blk_idx << 2;
kernel_x_1 = kernel_x_0 + 1;
Expand All @@ -85,23 +87,35 @@ __kernel void conv2d_transpose(
int in_width0 = kernel_start_x;
for (int k_x = valid_kernel_width; k_x >= 0; k_x -= stride_shape.x) {
#ifndef IS_DEPTHWISE
kernel_y = mad24(k_y,
kernel_shape.x,
k_x + kernel_y_base); // (k_y * k_w + k_x) + k_y_base
#endif
#ifndef IS_DEPTHWISE
weights0 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_0, kernel_y));
weights1 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_1, kernel_y));
weights2 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_2, kernel_y));
weights3 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_3, kernel_y));
if (k_x % dilation_shape.x == 0 && k_y % dilation_shape.y == 0) {
kernel_y = mad24(k_y / dilation_shape.y,
kernel_prev_shape.x,
k_x / dilation_shape.x + kernel_y_base);
weights0 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_0, kernel_y));
weights1 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_1, kernel_y));
weights2 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_2, kernel_y));
weights3 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_3, kernel_y));
} else {
weights0 = (CL_DTYPE4)(0.0f);
weights1 = (CL_DTYPE4)(0.0f);
weights2 = (CL_DTYPE4)(0.0f);
weights3 = (CL_DTYPE4)(0.0f);
}
#else
int kernel_x = mad24(out_c_blk_idx, kernel_shape.x, k_x);
weights0 =
READ_IMG_TYPE(CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x, k_y));
if (k_x % dilation_shape.x == 0 && k_y % dilation_shape.y == 0) {
int kernel_x =
mad24(out_c_blk_idx, kernel_prev_shape.x, k_x / dilation_shape.x);
weights0 = READ_IMG_TYPE(CL_DTYPE_CHAR,
filter,
SAMPLER,
(int2)(kernel_x, k_y / dilation_shape.y));
} else {
weights0 = (CL_DTYPE4)(0.0f);
}
#endif
int in_width_value0 = in_width0;
in_width_value0 =
Expand Down
11 changes: 10 additions & 1 deletion lite/kernels/opencl/conv_transpose_image_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ void ConvTransposeImageCompute::PrepareForRun() {
/*********************************************
* Upload filter, bias to opencl device
*********************************************/
filter_tensor_h_ = dilation_h_ * (filter_tensor_h_ - 1) + 1;
filter_tensor_w_ = dilation_w_ * (filter_tensor_w_ - 1) + 1;
auto* filter_cpu = conv_param_->filter->mutable_data<float>();

filter_gpu_image_ = std::unique_ptr<Tensor>(new Tensor);
Expand Down Expand Up @@ -279,7 +281,10 @@ void ConvTransposeImageCompute::SetArgs() {
cl_int2 output_wh = {output_tensor_w_, output_tensor_h_};
cl_int2 filter_wh = {filter_tensor_w_, filter_tensor_h_};
cl_int2 stride_wh = {stride_w_, stride_h_};

cl_int2 dilation_wh = {dilation_w_, dilation_h_};
cl_int2 filter_prev_wh = {
static_cast<cl_int>(conv_param_->filter->dims()[3]),
static_cast<cl_int>(conv_param_->filter->dims()[2])};
auto kernel = &kernel_;

uint32_t idx = 0;
Expand Down Expand Up @@ -308,6 +313,10 @@ void ConvTransposeImageCompute::SetArgs() {
CL_CHECK_FATAL(status);
kernel->setArg(idx++, filter_wh);
CL_CHECK_FATAL(status);
kernel->setArg(idx++, dilation_wh);
CL_CHECK_FATAL(status);
kernel->setArg(idx++, filter_prev_wh);
CL_CHECK_FATAL(status);
kernel->setArg(idx++,
static_cast<int32_t>(filter_tensor_w_ * filter_tensor_h_));
CL_CHECK_FATAL(status);
Expand Down
54 changes: 33 additions & 21 deletions lite/tests/unittest_py/op/test_conv2d_transpose_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,20 @@ def __init__(self, *args, **kwargs):
PrecisionType.FP16,
DataLayoutType.NCHW,
thread=[1, 4])
# opencl_valid_places = [
# Place(TargetType.OpenCL, PrecisionType.FP16,
# DataLayoutType.ImageDefault), Place(
# TargetType.OpenCL, PrecisionType.FP16,
# DataLayoutType.ImageFolder),
# Place(TargetType.OpenCL, PrecisionType.FP32, DataLayoutType.NCHW),
# Place(TargetType.OpenCL, PrecisionType.Any,
# DataLayoutType.ImageDefault), Place(
# TargetType.OpenCL, PrecisionType.Any,
# DataLayoutType.ImageFolder),
# Place(TargetType.OpenCL, PrecisionType.Any, DataLayoutType.NCHW),
# Place(TargetType.Host, PrecisionType.FP32)
# ]
# self.enable_testing_on_place(places=opencl_valid_places)
opencl_valid_places = [
Place(TargetType.OpenCL, PrecisionType.FP16,
DataLayoutType.ImageDefault), Place(
TargetType.OpenCL, PrecisionType.FP16,
DataLayoutType.ImageFolder),
Place(TargetType.OpenCL, PrecisionType.FP32, DataLayoutType.NCHW),
Place(TargetType.OpenCL, PrecisionType.Any,
DataLayoutType.ImageDefault), Place(
TargetType.OpenCL, PrecisionType.Any,
DataLayoutType.ImageFolder),
Place(TargetType.OpenCL, PrecisionType.Any, DataLayoutType.NCHW),
Place(TargetType.Host, PrecisionType.FP32)
]
self.enable_testing_on_place(places=opencl_valid_places)

def is_program_valid(self,
program_config: ProgramConfig,
Expand All @@ -89,10 +89,10 @@ def is_program_valid(self,

def sample_program_configs(self, draw):
input_n = draw(st.integers(min_value=1, max_value=4))
input_c = draw(st.integers(min_value=1, max_value=128))
input_h = draw(st.integers(min_value=1, max_value=128))
input_w = draw(st.integers(min_value=1, max_value=128))
filter_m = draw(st.integers(min_value=1, max_value=16))
input_c = draw(st.integers(min_value=1, max_value=64))
input_h = draw(st.integers(min_value=1, max_value=64))
input_w = draw(st.integers(min_value=1, max_value=64))
filter_m = draw(st.integers(min_value=1, max_value=64))
filter_c = input_c
filter_h = draw(st.integers(min_value=1, max_value=7))
filter_w = draw(st.integers(min_value=1, max_value=7))
Expand Down Expand Up @@ -253,13 +253,25 @@ def generate_bias(*args, **kwargs):
return program_config

def sample_predictor_configs(self):
return self.get_predictor_configs(), ["conv2d_transpose"], (1e-5, 1e-5)
atol, rtol = 1e-5, 1e-5
target_str = self.get_target()
if target_str == "OpenCL":
atol, rtol = 1e-4, 1e-4
return self.get_predictor_configs(), ["conv2d_transpose"], (atol, rtol)

def add_ignore_pass_case(self):
pass
def teller1(program_config, predictor_config):
groups = program_config.ops[0].attrs["groups"]
if predictor_config.target() == TargetType.OpenCL and groups > 1:
return True

self.add_ignore_check_case(
teller1, IgnoreReasons.PADDLELITE_NOT_SUPPORT,
"Lite does not support this op in a specific case on opencl. We need to fix it as soon as possible."
)

def test(self, *args, **kwargs):
self.run_and_statis(quant=False, max_examples=300)
self.run_and_statis(quant=False, max_examples=100)


if __name__ == "__main__":
Expand Down
30 changes: 15 additions & 15 deletions lite/tests/unittest_py/op/test_depthwise_conv2d_transpose_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,20 @@ def __init__(self, *args, **kwargs):
PrecisionType.FP16,
DataLayoutType.NCHW,
thread=[1, 4])
# opencl_places = [
# Place(TargetType.OpenCL, PrecisionType.FP16,
# DataLayoutType.ImageDefault), Place(
# TargetType.OpenCL, PrecisionType.FP16,
# DataLayoutType.ImageFolder),
# Place(TargetType.OpenCL, PrecisionType.FP32, DataLayoutType.NCHW),
# Place(TargetType.OpenCL, PrecisionType.Any,
# DataLayoutType.ImageDefault), Place(
# TargetType.OpenCL, PrecisionType.Any,
# DataLayoutType.ImageFolder),
# Place(TargetType.OpenCL, PrecisionType.Any, DataLayoutType.NCHW),
# Place(TargetType.Host, PrecisionType.FP32)
# ]
# self.enable_testing_on_place(places=opencl_places)
opencl_places = [
Place(TargetType.OpenCL, PrecisionType.FP16,
DataLayoutType.ImageDefault), Place(
TargetType.OpenCL, PrecisionType.FP16,
DataLayoutType.ImageFolder),
Place(TargetType.OpenCL, PrecisionType.FP32, DataLayoutType.NCHW),
Place(TargetType.OpenCL, PrecisionType.Any,
DataLayoutType.ImageDefault), Place(
TargetType.OpenCL, PrecisionType.Any,
DataLayoutType.ImageFolder),
Place(TargetType.OpenCL, PrecisionType.Any, DataLayoutType.NCHW),
Place(TargetType.Host, PrecisionType.FP32)
]
self.enable_testing_on_place(places=opencl_places)

def is_program_valid(self,
program_config: ProgramConfig,
Expand Down Expand Up @@ -247,7 +247,7 @@ def add_ignore_pass_case(self):
pass

def test(self, *args, **kwargs):
self.run_and_statis(quant=False, max_examples=300)
self.run_and_statis(quant=False, max_examples=100)


if __name__ == "__main__":
Expand Down

0 comments on commit 2b9e371

Please sign in to comment.