Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OpenCL][Kernel]support dilation>1 for opencl conv2d_transpose test=develop #8429

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 31 additions & 17 deletions lite/backends/opencl/cl_kernel/image/conv2d_transpose_kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ __kernel void conv2d_transpose(
__private const int2 align_shape,
__private const int2 padding_shape,
__private const int2 kernel_shape,
__private const int2 dilation_shape,
__private const int2 kernel_prev_shape,
__private const int kernel_size,
__private const int input_c_blks) {
const int out_c_blk_idx = get_global_id(0);
Expand Down Expand Up @@ -67,7 +69,7 @@ __kernel void conv2d_transpose(
CL_DTYPE4 weights0, weights1, weights2, weights3;
#ifndef IS_DEPTHWISE
for (int ic = 0; ic < input_c_blks; ic++) {
int kernel_y_base = mul24(ic, kernel_size);
int kernel_y_base = mul24(ic, kernel_prev_shape.x * kernel_prev_shape.y);
int in_idx = mul24(ic, input_shape.x);
kernel_x_0 = out_c_blk_idx << 2;
kernel_x_1 = kernel_x_0 + 1;
Expand All @@ -85,23 +87,35 @@ __kernel void conv2d_transpose(
int in_width0 = kernel_start_x;
for (int k_x = valid_kernel_width; k_x >= 0; k_x -= stride_shape.x) {
#ifndef IS_DEPTHWISE
kernel_y = mad24(k_y,
kernel_shape.x,
k_x + kernel_y_base); // (k_y * k_w + k_x) + k_y_base
#endif
#ifndef IS_DEPTHWISE
weights0 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_0, kernel_y));
weights1 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_1, kernel_y));
weights2 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_2, kernel_y));
weights3 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_3, kernel_y));
if (k_x % dilation_shape.x == 0 && k_y % dilation_shape.y == 0) {
kernel_y = mad24(k_y / dilation_shape.y,
kernel_prev_shape.x,
k_x / dilation_shape.x + kernel_y_base);
weights0 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_0, kernel_y));
weights1 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_1, kernel_y));
weights2 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_2, kernel_y));
weights3 = READ_IMG_TYPE(
CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x_3, kernel_y));
} else {
weights0 = (CL_DTYPE4)(0.0f);
weights1 = (CL_DTYPE4)(0.0f);
weights2 = (CL_DTYPE4)(0.0f);
weights3 = (CL_DTYPE4)(0.0f);
}
#else
int kernel_x = mad24(out_c_blk_idx, kernel_shape.x, k_x);
weights0 =
READ_IMG_TYPE(CL_DTYPE_CHAR, filter, SAMPLER, (int2)(kernel_x, k_y));
if (k_x % dilation_shape.x == 0 && k_y % dilation_shape.y == 0) {
int kernel_x =
mad24(out_c_blk_idx, kernel_prev_shape.x, k_x / dilation_shape.x);
weights0 = READ_IMG_TYPE(CL_DTYPE_CHAR,
filter,
SAMPLER,
(int2)(kernel_x, k_y / dilation_shape.y));
} else {
weights0 = (CL_DTYPE4)(0.0f);
}
#endif
int in_width_value0 = in_width0;
in_width_value0 =
Expand Down
11 changes: 10 additions & 1 deletion lite/kernels/opencl/conv_transpose_image_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ void ConvTransposeImageCompute::PrepareForRun() {
/*********************************************
* Upload filter, bias to opencl device
*********************************************/
filter_tensor_h_ = dilation_h_ * (filter_tensor_h_ - 1) + 1;
filter_tensor_w_ = dilation_w_ * (filter_tensor_w_ - 1) + 1;
auto* filter_cpu = conv_param_->filter->mutable_data<float>();

filter_gpu_image_ = std::unique_ptr<Tensor>(new Tensor);
Expand Down Expand Up @@ -279,7 +281,10 @@ void ConvTransposeImageCompute::SetArgs() {
cl_int2 output_wh = {output_tensor_w_, output_tensor_h_};
cl_int2 filter_wh = {filter_tensor_w_, filter_tensor_h_};
cl_int2 stride_wh = {stride_w_, stride_h_};

cl_int2 dilation_wh = {dilation_w_, dilation_h_};
cl_int2 filter_prev_wh = {
static_cast<cl_int>(conv_param_->filter->dims()[3]),
static_cast<cl_int>(conv_param_->filter->dims()[2])};
auto kernel = &kernel_;

uint32_t idx = 0;
Expand Down Expand Up @@ -308,6 +313,10 @@ void ConvTransposeImageCompute::SetArgs() {
CL_CHECK_FATAL(status);
kernel->setArg(idx++, filter_wh);
CL_CHECK_FATAL(status);
kernel->setArg(idx++, dilation_wh);
CL_CHECK_FATAL(status);
kernel->setArg(idx++, filter_prev_wh);
CL_CHECK_FATAL(status);
kernel->setArg(idx++,
static_cast<int32_t>(filter_tensor_w_ * filter_tensor_h_));
CL_CHECK_FATAL(status);
Expand Down
54 changes: 33 additions & 21 deletions lite/tests/unittest_py/op/test_conv2d_transpose_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,20 @@ def __init__(self, *args, **kwargs):
PrecisionType.FP16,
DataLayoutType.NCHW,
thread=[1, 4])
# opencl_valid_places = [
# Place(TargetType.OpenCL, PrecisionType.FP16,
# DataLayoutType.ImageDefault), Place(
# TargetType.OpenCL, PrecisionType.FP16,
# DataLayoutType.ImageFolder),
# Place(TargetType.OpenCL, PrecisionType.FP32, DataLayoutType.NCHW),
# Place(TargetType.OpenCL, PrecisionType.Any,
# DataLayoutType.ImageDefault), Place(
# TargetType.OpenCL, PrecisionType.Any,
# DataLayoutType.ImageFolder),
# Place(TargetType.OpenCL, PrecisionType.Any, DataLayoutType.NCHW),
# Place(TargetType.Host, PrecisionType.FP32)
# ]
# self.enable_testing_on_place(places=opencl_valid_places)
opencl_valid_places = [
Place(TargetType.OpenCL, PrecisionType.FP16,
DataLayoutType.ImageDefault), Place(
TargetType.OpenCL, PrecisionType.FP16,
DataLayoutType.ImageFolder),
Place(TargetType.OpenCL, PrecisionType.FP32, DataLayoutType.NCHW),
Place(TargetType.OpenCL, PrecisionType.Any,
DataLayoutType.ImageDefault), Place(
TargetType.OpenCL, PrecisionType.Any,
DataLayoutType.ImageFolder),
Place(TargetType.OpenCL, PrecisionType.Any, DataLayoutType.NCHW),
Place(TargetType.Host, PrecisionType.FP32)
]
self.enable_testing_on_place(places=opencl_valid_places)

def is_program_valid(self,
program_config: ProgramConfig,
Expand All @@ -89,10 +89,10 @@ def is_program_valid(self,

def sample_program_configs(self, draw):
input_n = draw(st.integers(min_value=1, max_value=4))
input_c = draw(st.integers(min_value=1, max_value=128))
input_h = draw(st.integers(min_value=1, max_value=128))
input_w = draw(st.integers(min_value=1, max_value=128))
filter_m = draw(st.integers(min_value=1, max_value=16))
input_c = draw(st.integers(min_value=1, max_value=64))
input_h = draw(st.integers(min_value=1, max_value=64))
input_w = draw(st.integers(min_value=1, max_value=64))
filter_m = draw(st.integers(min_value=1, max_value=64))
filter_c = input_c
filter_h = draw(st.integers(min_value=1, max_value=7))
filter_w = draw(st.integers(min_value=1, max_value=7))
Expand Down Expand Up @@ -253,13 +253,25 @@ def generate_bias(*args, **kwargs):
return program_config

def sample_predictor_configs(self):
return self.get_predictor_configs(), ["conv2d_transpose"], (1e-5, 1e-5)
atol, rtol = 1e-5, 1e-5
target_str = self.get_target()
if target_str == "OpenCL":
atol, rtol = 1e-4, 1e-4
return self.get_predictor_configs(), ["conv2d_transpose"], (atol, rtol)

def add_ignore_pass_case(self):
pass
def teller1(program_config, predictor_config):
groups = program_config.ops[0].attrs["groups"]
if predictor_config.target() == TargetType.OpenCL and groups > 1:
zhenlin-work marked this conversation as resolved.
Show resolved Hide resolved
return True

self.add_ignore_check_case(
teller1, IgnoreReasons.PADDLELITE_NOT_SUPPORT,
"Lite does not support this op in a specific case on opencl. We need to fix it as soon as possible."
)

def test(self, *args, **kwargs):
self.run_and_statis(quant=False, max_examples=300)
self.run_and_statis(quant=False, max_examples=100)


if __name__ == "__main__":
Expand Down
28 changes: 14 additions & 14 deletions lite/tests/unittest_py/op/test_depthwise_conv2d_transpose_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,19 @@ def __init__(self, *args, **kwargs):
PrecisionType.FP16,
DataLayoutType.NCHW,
thread=[1, 4])
# opencl_places = [
# Place(TargetType.OpenCL, PrecisionType.FP16,
# DataLayoutType.ImageDefault), Place(
# TargetType.OpenCL, PrecisionType.FP16,
# DataLayoutType.ImageFolder),
# Place(TargetType.OpenCL, PrecisionType.FP32, DataLayoutType.NCHW),
# Place(TargetType.OpenCL, PrecisionType.Any,
# DataLayoutType.ImageDefault), Place(
# TargetType.OpenCL, PrecisionType.Any,
# DataLayoutType.ImageFolder),
# Place(TargetType.OpenCL, PrecisionType.Any, DataLayoutType.NCHW),
# Place(TargetType.Host, PrecisionType.FP32)
# ]
opencl_places = [
Place(TargetType.OpenCL, PrecisionType.FP16,
DataLayoutType.ImageDefault), Place(
TargetType.OpenCL, PrecisionType.FP16,
DataLayoutType.ImageFolder),
Place(TargetType.OpenCL, PrecisionType.FP32, DataLayoutType.NCHW),
Place(TargetType.OpenCL, PrecisionType.Any,
DataLayoutType.ImageDefault), Place(
TargetType.OpenCL, PrecisionType.Any,
DataLayoutType.ImageFolder),
Place(TargetType.OpenCL, PrecisionType.Any, DataLayoutType.NCHW),
Place(TargetType.Host, PrecisionType.FP32)
]
# self.enable_testing_on_place(places=opencl_places)
zhenlin-work marked this conversation as resolved.
Show resolved Hide resolved

def is_program_valid(self,
Expand Down Expand Up @@ -247,7 +247,7 @@ def add_ignore_pass_case(self):
pass

def test(self, *args, **kwargs):
self.run_and_statis(quant=False, max_examples=300)
self.run_and_statis(quant=False, max_examples=100)


if __name__ == "__main__":
Expand Down