diff --git a/lite/kernels/x86/CMakeLists.txt b/lite/kernels/x86/CMakeLists.txt index 3c1f87e24b1..0a13e5c1377 100644 --- a/lite/kernels/x86/CMakeLists.txt +++ b/lite/kernels/x86/CMakeLists.txt @@ -38,11 +38,8 @@ add_kernel(gru_unit_compute_x86 X86 basic SRCS gru_unit_compute.cc DEPS ${lite_k add_kernel(sequence_expand_as_compute_x86 X86 basic SRCS sequence_expand_as_compute.cc DEPS ${lite_kernel_deps}) add_kernel(sequence_conv_compute_x86 X86 basic SRCS sequence_conv_compute.cc DEPS ${lite_kernel_deps} math_function blas context_project) -# lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86) add_kernel(gather_compute_x86 X86 extra SRCS gather_compute.cc DEPS ${lite_kernel_deps} fluid_data_type) -# lite_cc_test(test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86) -# lite_cc_test(test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86) -# lite_cc_test(test_batch_norm_compute_x86 SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_x86) +add_kernel(grid_sampler_compute_x86 X86 extra SRCS grid_sampler_compute.cc DEPS ${lite_kernel_deps} math_function) add_kernel(mul_compute_x86 X86 basic SRCS mul_compute.cc DEPS ${lite_kernel_deps} blas) add_kernel(concat_compute_x86 X86 basic SRCS concat_compute.cc DEPS ${lite_kernel_deps}) add_kernel(sequence_pool_compute_x86 X86 basic SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} sequence_pooling) diff --git a/lite/kernels/x86/grid_sampler_compute.cc b/lite/kernels/x86/grid_sampler_compute.cc new file mode 100644 index 00000000000..90dce46e4ba --- /dev/null +++ b/lite/kernels/x86/grid_sampler_compute.cc @@ -0,0 +1,347 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/x86/grid_sampler_compute.h" +#include +#include "lite/backends/x86/math/math_function.h" +#include "lite/core/op_registry.h" +#include "lite/fluid/eigen.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +using EigenTensor = lite::fluid::EigenTensor; + +using Array4 = Eigen::DSizes; + +template +inline bool IsInBound(T x, T y, T x_max, T y_max) { + return !(x < static_cast(0) || x > x_max || y < static_cast(0) || + y > y_max); +} + +template +void Unnormalize(const X86Context& ctx, + Tensor* grid_slice, + const int max_val, // height-1 or width-1 + bool align_corners) { + auto place = lite::fluid::EigenDeviceType(); + auto grid_slice_t = EigenTensor::From(*grid_slice); + + if (!align_corners) { + auto factor = static_cast((max_val + 1) * 0.5); + grid_slice_t.device(place) = + (grid_slice_t + static_cast(1)) * factor - static_cast(0.5); + } else { + auto factor = static_cast(max_val * 0.5); + grid_slice_t.device(place) = (grid_slice_t + static_cast(1)) * factor; + } +} + +template +void Clip(const X86Context& ctx, + Tensor* grid_slice, + const int max_val, // height-1 or width-1 + bool align_corners, + std::string padding_mode) { + auto place = lite::fluid::EigenDeviceType(); + auto grid_slice_t = EigenTensor::From(*grid_slice); + if (padding_mode == "border") { + grid_slice_t.device(place) = grid_slice_t.cwiseMax(static_cast(0)) + .cwiseMin(static_cast(max_val)); + } else if (padding_mode == "reflection") { + if (align_corners) { + auto double_range = static_cast(max_val * 2); + auto grid_abs = grid_slice_t.abs(); + auto extra = grid_abs - (grid_abs / double_range).floor() * double_range; + grid_slice_t.device(place) = extra.cwiseMin(double_range - extra); + } else { + auto double_range = static_cast((max_val + 1) * 2); + auto grid_abs = (grid_slice_t + static_cast(0.5)).abs(); + auto extra = grid_abs - (grid_abs / double_range).floor() * double_range; + grid_slice_t.device(place) = + extra.cwiseMin(double_range - extra) - static_cast(0.5); + grid_slice_t.device(place) = grid_slice_t.cwiseMax(static_cast(0)) + .cwiseMin(static_cast(max_val)); + } + } +} + +template +void CalcGridLocations(const X86Context& ctx, + const Tensor& grid, + const int in_h, + const int in_w, + bool align_corners, + std::string padding_mode, + Tensor* grid_x, + Tensor* grid_y) { + const int n = grid.dims()[0]; + const int out_h = grid.dims()[1]; + const int out_w = grid.dims()[2]; + + // split grid with shape (n, h, w, 2) into (x, y) by the 3rd Dim + DDim grid_dim{{n, out_h, out_w}}; + grid_x->Resize(grid_dim); + grid_y->Resize(grid_dim); + T* grid_x_data = grid_x->template mutable_data(); + T* grid_y_data = grid_y->template mutable_data(); + const T* grid_data = grid.data(); + for (int i = 0; i < n * out_h * out_w; i++) { + grid_x_data[i] = grid_data[2 * i]; + grid_y_data[i] = grid_data[(2 * i) + 1]; + } + + Unnormalize(ctx, grid_x, in_w - 1, align_corners); + Unnormalize(ctx, grid_y, in_h - 1, align_corners); + + Clip(ctx, grid_x, in_w - 1, align_corners, padding_mode); + Clip(ctx, grid_y, in_h - 1, align_corners, padding_mode); +} + +template +void GetGridPointValue(const Tensor& input, + Tensor* output, + const Tensor& x, + const Tensor& y) { + const int n = input.dims()[0]; + const int c = input.dims()[1]; + const int in_h = input.dims()[2]; + const int in_w = input.dims()[3]; + const int out_h = x.dims()[1]; + const int out_w = x.dims()[2]; + auto x_t = EigenTensor::From(x); + auto y_t = EigenTensor::From(y); + auto output_t = + EigenTensor::From(*output).setConstant(static_cast(0)); + auto input_t = EigenTensor::From(input); + + for (int i = 0; i < n; i++) { + for (int k = 0; k < out_h; k++) { + for (int l = 0; l < out_w; l++) { + if (IsInBound(x_t(i, k, l), + y_t(i, k, l), + static_cast(in_w - 1), + static_cast(in_h - 1))) { + for (int j = 0; j < c; j++) { + output_t(i, j, k, l) = + input_t(i, + j, + static_cast(round(y_t(i, k, l))), + static_cast(round(x_t(i, k, l)))); + } + } + } + } + } +} + +template +void AllNeigbors(const X86Context& ctx, + const Tensor& input, + Tensor* grid_x, + Tensor* grid_y, + Tensor* x_w, + Tensor* x_e, + Tensor* y_n, + Tensor* y_s, // positions + Tensor* d_w, + Tensor* d_e, + Tensor* d_n, + Tensor* d_s, // distance + Tensor* v_wn, + Tensor* v_en, + Tensor* v_ws, + Tensor* v_es) { // values + auto place = lite::fluid::EigenDeviceType(); + + const int c = input.dims()[1]; + const int n = grid_x->dims()[0]; + const int out_h = grid_x->dims()[1]; + const int out_w = grid_x->dims()[2]; + // calculate coords of 4 corner points + DDim dim{{n, out_h, out_w}}; + x_w->Resize(dim); + x_e->Resize(dim); + y_n->Resize(dim); + y_s->Resize(dim); + x_w->template mutable_data(); + x_e->template mutable_data(); + y_n->template mutable_data(); + y_s->template mutable_data(); + auto x_w_t = EigenTensor::From(*x_w); + auto x_e_t = EigenTensor::From(*x_e); + auto y_n_t = EigenTensor::From(*y_n); + auto y_s_t = EigenTensor::From(*y_s); + + auto grid_x_t = EigenTensor::From(*grid_x); + auto grid_y_t = EigenTensor::From(*grid_y); + + x_w_t.device(place) = grid_x_t.floor(); + x_e_t.device(place) = x_w_t + static_cast(1); + y_n_t.device(place) = grid_y_t.floor(); + y_s_t.device(place) = y_n_t + static_cast(1); + + // calculate distances to 4 sides + d_w->Resize(dim); + d_e->Resize(dim); + d_n->Resize(dim); + d_s->Resize(dim); + d_w->template mutable_data(); + d_e->template mutable_data(); + d_n->template mutable_data(); + d_s->template mutable_data(); + auto d_w_t = EigenTensor::From(*d_w); + auto d_e_t = EigenTensor::From(*d_e); + auto d_n_t = EigenTensor::From(*d_n); + auto d_s_t = EigenTensor::From(*d_s); + d_w_t.device(place) = grid_x_t - x_w_t; + d_e_t.device(place) = x_e_t - grid_x_t; + d_n_t.device(place) = grid_y_t - y_n_t; + d_s_t.device(place) = y_s_t - grid_y_t; + + // calc 4 corner points value + DDim v_dim{{n, c, out_h, out_w}}; + v_wn->Resize(v_dim); + v_en->Resize(v_dim); + v_ws->Resize(v_dim); + v_es->Resize(v_dim); + v_wn->template mutable_data(); + v_en->template mutable_data(); + v_ws->template mutable_data(); + v_es->template mutable_data(); + GetGridPointValue(input, v_wn, *x_w, *y_n); + GetGridPointValue(input, v_en, *x_e, *y_n); + GetGridPointValue(input, v_ws, *x_w, *y_s); + GetGridPointValue(input, v_es, *x_e, *y_s); +} + +template +void BilinearInter(const X86Context& ctx, + const Tensor& input, + Tensor* grid_x, + Tensor* grid_y, + Tensor* out) { + auto place = lite::fluid::EigenDeviceType(); + const int n = grid_x->dims()[0]; + const int out_h = grid_x->dims()[1]; + const int out_w = grid_x->dims()[2]; + const int c = input.dims()[1]; + + Tensor x_w, x_e, y_n, y_s; + Tensor d_w, d_e, d_n, d_s; + Tensor v_wn, v_en, v_ws, v_es; + + AllNeigbors(ctx, + input, + grid_x, + grid_y, + &x_w, + &x_e, + &y_n, + &y_s, + &d_w, + &d_e, + &d_n, + &d_s, + &v_wn, + &v_en, + &v_ws, + &v_es); + + auto d_w_t = EigenTensor::From(d_w); + auto d_e_t = EigenTensor::From(d_e); + auto d_n_t = EigenTensor::From(d_n); + auto d_s_t = EigenTensor::From(d_s); + + auto d_w_scaled_t = + d_w_t.reshape(Array4(n, 1, out_h, out_w)).broadcast(Array4(1, c, 1, 1)); + auto d_e_scaled_t = + d_e_t.reshape(Array4(n, 1, out_h, out_w)).broadcast(Array4(1, c, 1, 1)); + auto d_n_scaled_t = + d_n_t.reshape(Array4(n, 1, out_h, out_w)).broadcast(Array4(1, c, 1, 1)); + auto d_s_scaled_t = + d_s_t.reshape(Array4(n, 1, out_h, out_w)).broadcast(Array4(1, c, 1, 1)); + auto v_wn_t = EigenTensor::From(v_wn); + auto v_en_t = EigenTensor::From(v_en); + auto v_ws_t = EigenTensor::From(v_ws); + auto v_es_t = EigenTensor::From(v_es); + auto output_t = EigenTensor::From(*out); + // bilinear interpolaetion by 4 corner points + output_t.device(place) = v_wn_t * d_e_scaled_t * d_s_scaled_t + + v_en_t * d_w_scaled_t * d_s_scaled_t + + v_ws_t * d_e_scaled_t * d_n_scaled_t + + v_es_t * d_w_scaled_t * d_n_scaled_t; +} + +template +void GridSamplerCompute::Run() { + auto& param = this->Param(); + auto& context = ctx_->As(); + auto* input = param.x; + auto* grid = param.grid; + auto* output = param.out; + const std::string padding_mode = param.padding_mode; + const std::string mode = param.mode; + const bool align_corners = param.align_corners; + + auto input_dims = input->dims(); + const int in_h = input_dims[2]; + const int in_w = input_dims[3]; + + output->template mutable_data(); + lite::x86::math::SetConstant set_zero; + set_zero(context, output, static_cast(0)); + + Tensor grid_x, grid_y; + CalcGridLocations(context, + *grid, + in_h, + in_w, + align_corners, + padding_mode, + &grid_x, + &grid_y); + if (mode == "bilinear") { + BilinearInter(context, *input, &grid_x, &grid_y, output); + } else if (mode == "nearest") { + auto grid_x_t = EigenTensor::From(grid_x); + auto grid_y_t = EigenTensor::From(grid_y); + grid_x_t = grid_x_t.round(); + grid_y_t = grid_y_t.round(); + GetGridPointValue(*input, output, grid_x, grid_y); + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(grid_sampler, + kX86, + kFloat, + kNCHW, + paddle::lite::kernels::x86::GridSamplerCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("Grid", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/lite/kernels/x86/grid_sampler_compute.h b/lite/kernels/x86/grid_sampler_compute.h new file mode 100644 index 00000000000..93f9398df54 --- /dev/null +++ b/lite/kernels/x86/grid_sampler_compute.h @@ -0,0 +1,36 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "lite/core/kernel.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +class GridSamplerCompute : public KernelLite { + public: + using param_t = operators::GridSamplerParam; + + void Run() override; + + virtual ~GridSamplerCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index 0ff29c55805..c47b9f5af3b 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -1807,9 +1807,9 @@ struct GroupNormParam : ParamBase { /// --------------------- grid sampler operators -------------------- struct GridSamplerParam : ParamBase { - lite::Tensor* x{}; - lite::Tensor* out{}; - lite::Tensor* grid{}; + const lite::Tensor* x{nullptr}; + const lite::Tensor* grid{nullptr}; + lite::Tensor* out{nullptr}; bool align_corners{true}; std::string padding_mode{"zeros"}; std::string mode{"bilinear"}; diff --git a/lite/tests/kernels/grid_sampler_compute_test.cc b/lite/tests/kernels/grid_sampler_compute_test.cc index f8da390338d..78d746dcbf7 100644 --- a/lite/tests/kernels/grid_sampler_compute_test.cc +++ b/lite/tests/kernels/grid_sampler_compute_test.cc @@ -258,12 +258,15 @@ void test_grid_sampler(Place place) { #ifdef LITE_WITH_ARM auto& ctx = tester->context()->As(); ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 1); +#endif +#ifdef LITE_WITH_X86 + if (padding_mode == "reflection" || mode == "nearest") continue; #endif arena::Arena arena(std::move(tester), place, 6e-5); - LOG(INFO) << "run n: " << n << ", c: " << c << ", h: " << h - << ", w: " << w << ", align_corners:" << align_corners - << ", mode:" << mode - << ", padding_mode:" << padding_mode; + VLOG(5) << "run n: " << n << ", c: " << c << ", h: " << h + << ", w: " << w << ", align_corners:" << align_corners + << ", mode:" << mode + << ", padding_mode:" << padding_mode; if (!arena.TestPrecision()) { LOG(ERROR) << "No Pass!!"; return; @@ -281,10 +284,16 @@ void test_grid_sampler(Place place) { } TEST(GridSampler, precision) { -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_grid_sampler(place); + Place place; +#if defined(LITE_WITH_ARM) + place = TARGET(kARM); +#elif defined(LITE_WITH_X86) + place = TARGET(kX86); +#else + return; #endif + + test_grid_sampler(place); } } // namespace lite