Skip to content

Commit

Permalink
[XPU]. Add meshgrid op on xpu and bindings of floordiv/min/mod.
Browse files Browse the repository at this point in the history
  • Loading branch information
wbn03 committed Jul 18, 2022
1 parent 4c173df commit dd26736
Show file tree
Hide file tree
Showing 6 changed files with 292 additions and 1 deletion.
5 changes: 4 additions & 1 deletion lite/core/optimizer/mir/__xpu__static_kernel_pick_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,11 +327,14 @@ class XPUStaticKernelPickPass : public mir::StmtPass {
"conv2d_transpose",
"elementwise_mul",
"elementwise_add",
"elementwise_mod",
"elementwise_floordiv",
"reduce_mean",
"tile",
"transpose",
"pixel_shuffle",
"expand_v2"};
"expand_v2",
"meshgrid"};
const std::set<std::string> xpu_inplace_op_{"reshape",
"reshape2",
"flatten",
Expand Down
1 change: 1 addition & 0 deletions lite/kernels/xpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ add_kernel(grid_sampler_compute_xpu XPU basic SRCS grid_sampler_compute.cc)
add_kernel(fill_zeros_like_compute_xpu XPU basic SRCS fill_zeros_like_compute.cc)
add_kernel(reduce_compute_xpu XPU basic SRCS reduce_compute.cc)
add_kernel(expand_v2_compute_xpu XPU basic SRCS expand_v2_compute.cc)
add_kernel(meshgrid_compute_xpu XPU basic SRCS meshgrid_compute.cc)

# extra
add_kernel(lookup_table_compute_xpu XPU extra SRCS lookup_table_compute.cc)
Expand Down
127 changes: 127 additions & 0 deletions lite/kernels/xpu/elementwise_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,42 @@ struct MaxFunctor {
}
};

template <typename T>
struct MinFunctor {
inline int operator()(xdnn::Context* ctx,
const T* x,
const T* y,
T* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) const {
return xdnn::broadcast_min<T>(ctx, x, y, z, xshape, yshape);
}
};

template <typename T>
struct ModFunctor {
inline int operator()(xdnn::Context* ctx,
const T* x,
const T* y,
T* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) const {
return xdnn::broadcast_mod<T>(ctx, x, y, z, xshape, yshape);
}
};

template <typename T>
struct FloordivFunctor {
inline int operator()(xdnn::Context* ctx,
const T* x,
const T* y,
T* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) const {
return xdnn::broadcast_floordiv<T>(ctx, x, y, z, xshape, yshape);
}
};

template <class T, class Functor, PrecisionType PType>
void ElementwiseCompute<T, Functor, PType>::Run() {
auto& param = this->template Param<param_t>();
Expand Down Expand Up @@ -171,6 +207,34 @@ using MaxFloat16 = xpu::ElementwiseCompute<float16,
using MaxInt32 =
xpu::ElementwiseCompute<int, xpu::MaxFunctor<int>, PRECISION(kFloat)>;

using MinFloat32 =
xpu::ElementwiseCompute<float, xpu::MinFunctor<float>, PRECISION(kFloat)>;

using MinFloat16 = xpu::ElementwiseCompute<float16,
xpu::MinFunctor<float16>,
PRECISION(kFP16)>;
using MinInt32 =
xpu::ElementwiseCompute<int, xpu::MinFunctor<int>, PRECISION(kFloat)>;

using ModFloat32 =
xpu::ElementwiseCompute<float, xpu::ModFunctor<float>, PRECISION(kFloat)>;

using ModFloat16 = xpu::ElementwiseCompute<float16,
xpu::ModFunctor<float16>,
PRECISION(kFP16)>;
using ModInt32 =
xpu::ElementwiseCompute<int, xpu::ModFunctor<int>, PRECISION(kFloat)>;

using FloordivFloat32 = xpu::ElementwiseCompute<float,
xpu::FloordivFunctor<float>,
PRECISION(kFloat)>;

using FloordivFloat16 = xpu::ElementwiseCompute<float16,
xpu::FloordivFunctor<float16>,
PRECISION(kFP16)>;
using FloordivInt32 =
xpu::ElementwiseCompute<int, xpu::FloordivFunctor<int>, PRECISION(kFloat)>;

REGISTER_LITE_KERNEL(elementwise_add, kXPU, kFloat, kNCHW, AddFloat32, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
Expand Down Expand Up @@ -259,3 +323,66 @@ REGISTER_LITE_KERNEL(elementwise_max, kXPU, kFloat, kNCHW, MaxInt32, int32)
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_min, kXPU, kFloat, kNCHW, MinFloat32, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(
elementwise_min, kXPU, kFP16, kNCHW, MinFloat16, DISABLE_XPU1_MinFloat16)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_min, kXPU, kFloat, kNCHW, MinInt32, int32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_mod, kXPU, kFloat, kNCHW, ModFloat32, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(
elementwise_mod, kXPU, kFP16, kNCHW, ModFloat16, DISABLE_XPU1_ModFloat16)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_mod, kXPU, kFloat, kNCHW, ModInt32, int32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.Finalize();

REGISTER_LITE_KERNEL(
elementwise_floordiv, kXPU, kFloat, kNCHW, FloordivFloat32, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();

REGISTER_LITE_KERNEL(elementwise_floordiv,
kXPU,
kFP16,
kNCHW,
FloordivFloat16,
DISABLE_XPU1_FloordivFloat16)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kFP16))})
.Finalize();

REGISTER_LITE_KERNEL(
elementwise_floordiv, kXPU, kFloat, kNCHW, FloordivInt32, int32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
.Finalize();
120 changes: 120 additions & 0 deletions lite/kernels/xpu/meshgrid_compute.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/xpu/meshgrid_compute.h"
#include <vector>
#include "lite/backends/xpu/math.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {

template <typename T, PrecisionType PType>
void MeshgridCompute<T, PType>::Run() {
auto& param = this->template Param<operators::MeshgridParam>();
auto& ctx = this->ctx_->template As<XPUContext>();
std::vector<lite::Tensor*>& ins = param.X;
std::vector<lite::Tensor*>& outs = param.Out;
int64_t size = ins.size();

std::vector<const T*> x_list;
std::vector<std::vector<int>> x_shape_list;
for (int i = 0; i < size; ++i) {
std::vector<int> x_shape(1);
switch (ins[i]->dims().size()) {
case 0:
x_shape[0] = 1;
break;
case 1:
x_shape[0] = ins[i]->dims()[0];
break;
default:
LOG(FATAL) << "Meshgrid Op expected scalar or 1D tensor in the input "
"tensor list";
break;
}
x_shape_list.push_back(x_shape);
x_list.push_back(reinterpret_cast<const T*>(ins[i]->template data<T>()));
}

std::vector<T*> out_ptrs;
for (auto out : outs) {
out_ptrs.push_back(out->template mutable_data<T>(TARGET(kXPU)));
}

int r =
xdnn::meshgrid<T>(ctx.GetRawContext(), x_list, out_ptrs, x_shape_list);

CHECK_EQ(r, 0);
}

} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle

using meshgridFP32 =
paddle::lite::kernels::xpu::MeshgridCompute<float, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(meshgrid, kXPU, kFloat, kAny, meshgridFP32, float32)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.Finalize();

using meshgridFP16 =
paddle::lite::kernels::xpu::MeshgridCompute<float16, PRECISION(kFP16)>;
REGISTER_LITE_KERNEL(meshgrid, kXPU, kFP16, kAny, meshgridFP16, float16)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kFP16),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kFP16),
DATALAYOUT(kAny))})
.Finalize();

using meshgridInt32 =
paddle::lite::kernels::xpu::MeshgridCompute<int, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(meshgrid, kXPU, kFloat, kAny, meshgridInt32, int32)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kInt32),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kInt32),
DATALAYOUT(kAny))})
.Finalize();

using meshgridInt64 =
paddle::lite::kernels::xpu::MeshgridCompute<int64_t, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(meshgrid, kXPU, kFloat, kAny, meshgridInt64, int64)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kInt64),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kXPU),
PRECISION(kInt64),
DATALAYOUT(kAny))})
.Finalize();
38 changes: 38 additions & 0 deletions lite/kernels/xpu/meshgrid_compute.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "lite/core/kernel.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {

template <typename T, PrecisionType PType>
class MeshgridCompute
: public KernelLite<TARGET(kXPU), PType, DATALAYOUT(kAny)> {
public:
using param_t = operators::MeshgridParam;

virtual void Run();

virtual ~MeshgridCompute() = default;
};

} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
2 changes: 2 additions & 0 deletions lite/tests/kernels/meshgrid_compute_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ TEST(meshgrid, precision) {
#else
return;
#endif
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU);
#elif defined(LITE_WITH_ARM) || defined(LITE_WITH_X86)
place = TARGET(kHost);
#else
Expand Down

0 comments on commit dd26736

Please sign in to comment.