From d1f9d4e3dfd64d3ce0c2a7361b55bff62aa1b20c Mon Sep 17 00:00:00 2001 From: zhupengyang Date: Tue, 9 Mar 2021 13:24:27 +0800 Subject: [PATCH] [X86][XPU] add reduce_max; fix xpu fill_any_like (#5633) --- lite/backends/arm/math/reduce_max.cc | 4 +- lite/kernels/x86/reduce_compute.cc | 10 +++++ lite/kernels/x86/reduce_compute.h | 45 +++++++++++++++++++ lite/kernels/xpu/fill_any_like_compute.cc | 1 + lite/tests/kernels/reduce_max_compute_test.cc | 27 +++++------ 5 files changed, 72 insertions(+), 15 deletions(-) diff --git a/lite/backends/arm/math/reduce_max.cc b/lite/backends/arm/math/reduce_max.cc index 0ca0cf2abb3..b20b93cadf3 100644 --- a/lite/backends/arm/math/reduce_max.cc +++ b/lite/backends/arm/math/reduce_max.cc @@ -88,9 +88,9 @@ void reduce_third_of_three( dst[i * second_in + j] = src[i * second_in * third_in + j * second_in]; for (int k = 0; k < third_in; k++) { dst[i * second_in + j] = - src[i * second_in * third_in + j * second_in + k] > + src[i * second_in * third_in + j * third_in + k] > dst[i * second_in + j] - ? src[i * second_in * third_in + j * second_in + k] + ? src[i * second_in * third_in + j * third_in + k] : dst[i * second_in + j]; } } diff --git a/lite/kernels/x86/reduce_compute.cc b/lite/kernels/x86/reduce_compute.cc index edeac0a84eb..f740cbdf259 100644 --- a/lite/kernels/x86/reduce_compute.cc +++ b/lite/kernels/x86/reduce_compute.cc @@ -33,3 +33,13 @@ REGISTER_LITE_KERNEL(reduce_mean, .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) .Finalize(); + +REGISTER_LITE_KERNEL(reduce_max, + kX86, + kFloat, + kNCHW, + paddle::lite::kernels::x86::ReduceMaxCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/lite/kernels/x86/reduce_compute.h b/lite/kernels/x86/reduce_compute.h index 8822e0aff0b..7fcddbbb862 100644 --- a/lite/kernels/x86/reduce_compute.h +++ b/lite/kernels/x86/reduce_compute.h @@ -38,6 +38,13 @@ struct MeanFunctor { } }; +struct MaxFunctor { + template + void operator()(X* x, Y* y, const Dim& dim) { + y->device(lite::fluid::EigenDeviceType()) = x->maximum(dim); + } +}; + #define HANDLE_DIM(NDIM, RDIM, FUNCTOR) \ if (ndim == NDIM && rdim == RDIM) { \ paddle::lite::kernels::x86:: \ @@ -120,6 +127,44 @@ class ReduceMeanCompute : public KernelLite { virtual ~ReduceMeanCompute() = default; }; +template +class ReduceMaxCompute : public KernelLite { + public: + using param_t = operators::ReduceParam; + + void Run() override { + auto& param = *param_.get_mutable(); + auto* input = param.X; + auto* Out = param.Out; + param.Out->template mutable_data(); + + const auto& dims = param.dim; + bool keep_dim = param.keep_dim; + + if (dims.size() == 0) { + // Flatten and reduce 1-D tensor + auto x = lite::fluid::EigenVector::Flatten(*input); + auto out = lite::fluid::EigenScalar::From(Out); + auto reduce_dim = Eigen::array({{0}}); + MaxFunctor functor; + functor(&x, &out, reduce_dim); + } else { + int ndim = input->dims().size(); + int rdim = dims.size(); + HANDLE_DIM(4, 3, MaxFunctor); + HANDLE_DIM(4, 2, MaxFunctor); + HANDLE_DIM(4, 1, MaxFunctor); + HANDLE_DIM(3, 2, MaxFunctor); + HANDLE_DIM(3, 1, MaxFunctor); + HANDLE_DIM(2, 2, MaxFunctor); + HANDLE_DIM(2, 1, MaxFunctor); + HANDLE_DIM(1, 1, MaxFunctor); + } + } + + virtual ~ReduceMaxCompute() = default; +}; + } // namespace x86 } // namespace kernels } // namespace lite diff --git a/lite/kernels/xpu/fill_any_like_compute.cc b/lite/kernels/xpu/fill_any_like_compute.cc index 15baa5c8803..60cee09973a 100644 --- a/lite/kernels/xpu/fill_any_like_compute.cc +++ b/lite/kernels/xpu/fill_any_like_compute.cc @@ -52,6 +52,7 @@ void FillAnyLikeCompute::Run() { static_cast(param.value)); break; } + case -1: case 5: { auto data = param.Out->mutable_data(TARGET(kXPU)); r = xdnn::constant(ctx.GetRawContext(), diff --git a/lite/tests/kernels/reduce_max_compute_test.cc b/lite/tests/kernels/reduce_max_compute_test.cc index 769efcd156a..d038c7c2e97 100644 --- a/lite/tests/kernels/reduce_max_compute_test.cc +++ b/lite/tests/kernels/reduce_max_compute_test.cc @@ -229,9 +229,9 @@ void reduce_third_of_three( dst[i * second_in + j] = src[i * second_in * third_in + j * second_in]; for (int k = 0; k < third_in; k++) { dst[i * second_in + j] = - src[i * second_in * third_in + j * second_in + k] > + src[i * second_in * third_in + j * third_in + k] > dst[i * second_in + j] - ? src[i * second_in * third_in + j * second_in + k] + ? src[i * second_in * third_in + j * third_in + k] : dst[i * second_in + j]; } } @@ -275,7 +275,7 @@ class ReduceMaxComputeTester : public arena::TestCase { auto* out = scope->NewTensor(output_); auto x_rank = x_dims_.size(); if (!dim_.empty()) { - for (int i = 0; i < dim_.size(); i++) { + for (size_t i = 0; i < dim_.size(); i++) { if (dim_[i] < 0) { dim_[i] += x_rank; } @@ -295,7 +295,7 @@ class ReduceMaxComputeTester : public arena::TestCase { out_dims.push_back(1); } } else { - for (int i = 0; i < x_dims_.size(); i++) { + for (size_t i = 0; i < x_dims_.size(); i++) { out_dims.push_back(x_dims_[i]); } if (keep_dim_) { @@ -398,7 +398,7 @@ class ReduceMaxComputeTester : public arena::TestCase { } }; -void test_reduce_max(Place place) { +void test_reduce_max_4d(Place place) { std::vector> reduce_dim{ {0}, {1}, {2}, {3}, {0, 1}, {1, 2}, {2, 3}, {-2, -1}}; for (auto n : {1, 3}) { @@ -421,7 +421,7 @@ void test_reduce_max(Place place) { } } -void test_reduce_max_for_three(Place place) { +void test_reduce_max_3d(Place place) { std::vector> reduce_dim{{0}, {1}, {2}}; for (bool keep_dim : {false, true}) { for (auto dim : reduce_dim) { @@ -435,14 +435,15 @@ void test_reduce_max_for_three(Place place) { } TEST(ReduceMax, precision) { -// #ifdef LITE_WITH_X86 -// Place place(TARGET(kX86)); -// #endif -#ifdef LITE_WITH_ARM - Place place(TARGET(kARM)); - test_reduce_max(place); - test_reduce_max_for_three(place); + Place place; +#if defined(LITE_WITH_ARM) + place = TARGET(kARM); +#elif defined(LITE_WITH_X86) + place = TARGET(kX86); #endif + + test_reduce_max_4d(place); + test_reduce_max_3d(place); } } // namespace lite