Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon No.70】[PHI decoupling] move jit kernels from fluid to phi #50911

Merged
merged 6 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ add_subdirectory(optimizers)
add_subdirectory(reduce_ops)
add_subdirectory(sequence_ops)
add_subdirectory(string)
add_subdirectory(jit)
add_subdirectory(prim_ops)


Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/operators/crf_decoding_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ limitations under the License. */

#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
Expand Down Expand Up @@ -137,9 +137,9 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
phi::DenseTensor track;
int* track_value =
track.mutable_data<int>(emission_dims, platform::CPUPlace());
auto ker =
jit::KernelFuncs<jit::CRFDecodingTuple<T>, platform::CPUPlace>::Cache()
.At(tag_num);
auto ker = phi::jit::KernelFuncs<phi::jit::CRFDecodingTuple<T>,
platform::CPUPlace>::Cache()
.At(tag_num);
ker(static_cast<int>(seq_len), x, w, alpha_value, track_value, tag_num);
T max_score = -std::numeric_limits<T>::max();
int max_i = 0;
Expand Down
26 changes: 13 additions & 13 deletions paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -108,17 +108,17 @@ struct EmbeddingVSumFunctor {
"But received the ids's LoD[0] = %d.",
ids_lod.size()));

jit::emb_seq_pool_attr_t attr(table_height,
table_width,
0,
idx_width,
out_width,
jit::SeqPoolType::kSum);
phi::jit::emb_seq_pool_attr_t attr(table_height,
table_width,
0,
idx_width,
out_width,
phi::jit::SeqPoolType::kSum);
for (size_t i = 0; i != ids_lod.size() - 1; ++i) {
attr.index_height = ids_lod[i + 1] - ids_lod[i];
auto emb_seqpool =
jit::KernelFuncs<jit::EmbSeqPoolTuple<T>, platform::CPUPlace>::Cache()
.At(attr);
auto emb_seqpool = phi::jit::KernelFuncs<phi::jit::EmbSeqPoolTuple<T>,
platform::CPUPlace>::Cache()
.At(attr);
emb_seqpool(
table, ids + ids_lod[i] * idx_width, output + i * out_width, &attr);
}
Expand Down Expand Up @@ -265,9 +265,9 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
T *d_table_data = d_table_value->mutable_data<T>(context.GetPlace());
const T *d_output_data = d_output->data<T>();

auto vbroadcast =
jit::KernelFuncs<jit::VBroadcastTuple<T>, platform::CPUPlace>::Cache()
.At(out_width);
auto vbroadcast = phi::jit::KernelFuncs<phi::jit::VBroadcastTuple<T>,
platform::CPUPlace>::Cache()
.At(out_width);
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
int64_t h = static_cast<int64_t>(lod[i + 1] - lod[i]);
const T *src = d_output_data + i * out_width;
Expand Down
56 changes: 28 additions & 28 deletions paddle/fluid/operators/fused/fusion_gru_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ limitations under the License. */
#include <vector>

#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/fc_functor.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h"
#include "paddle/phi/kernels/funcs/sequence2batch.h"

namespace paddle {
Expand Down Expand Up @@ -273,33 +273,33 @@ class FusionGRUKernel : public framework::OpKernel<T> {
const int total_T = x_mat_dims[0]; \
const int D3 = wh_dims[1]

#define INIT_OTHER_DEFINES \
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* wx = ctx.Input<phi::DenseTensor>("WeightX"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden"); \
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
const int M = x_mat_dims[1]; \
const int D = wh_dims[0]; \
const int D2 = D * 2; \
const jit::gru_attr_t attr( \
D, \
jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("activation"))); \
jit::gru_t one_step; \
auto ComputeH1 = \
jit::KernelFuncs<jit::GRUH1Tuple<T>, platform::CPUPlace>::Cache().At( \
attr); \
auto ComputeHtPart1 = \
jit::KernelFuncs<jit::GRUHtPart1Tuple<T>, platform::CPUPlace>::Cache() \
.At(attr); \
auto ComputeHtPart2 = \
jit::KernelFuncs<jit::GRUHtPart2Tuple<T>, platform::CPUPlace>::Cache() \
.At(attr); \
const T* x_data = x->data<T>(); \
const T* wx_data = wx->data<T>(); \
const T* wh_data = wh->data<T>(); \
auto place = ctx.GetPlace(); \
#define INIT_OTHER_DEFINES \
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* wx = ctx.Input<phi::DenseTensor>("WeightX"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden"); \
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
const int M = x_mat_dims[1]; \
const int D = wh_dims[0]; \
const int D2 = D * 2; \
const phi::jit::gru_attr_t attr( \
D, \
phi::jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
phi::jit::to_kerneltype(ctx.Attr<std::string>("activation"))); \
phi::jit::gru_t one_step; \
auto ComputeH1 = phi::jit::KernelFuncs<phi::jit::GRUH1Tuple<T>, \
platform::CPUPlace>::Cache() \
.At(attr); \
auto ComputeHtPart1 = phi::jit::KernelFuncs<phi::jit::GRUHtPart1Tuple<T>, \
platform::CPUPlace>::Cache() \
.At(attr); \
auto ComputeHtPart2 = phi::jit::KernelFuncs<phi::jit::GRUHtPart2Tuple<T>, \
platform::CPUPlace>::Cache() \
.At(attr); \
const T* x_data = x->data<T>(); \
const T* wx_data = wx->data<T>(); \
const T* wh_data = wh->data<T>(); \
auto place = ctx.GetPlace(); \
T* xx_data = xx->mutable_data<T>(place)

void SeqCompute(const framework::ExecutionContext& ctx) const {
Expand Down
60 changes: 30 additions & 30 deletions paddle/fluid/operators/fused/fusion_lstm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ limitations under the License. */

#include <string>

#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/fc_functor.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h"
#include "paddle/phi/kernels/funcs/sequence2batch.h"

namespace paddle {
Expand Down Expand Up @@ -320,35 +320,35 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
const int D = wh_dims[0]; \
const int D4 = wh_dims[1]

#define INIT_OTHER_DEFINES \
const T* x_data = x->data<T>(); \
const T* wx_data = wx->data<T>(); \
const T* wh_data = wh->data<T>(); \
/* diagonal weight*/ \
const T* wp_data = bias->data<T>() + D4; \
/* for peephole only*/ \
T* checked_cell_data = nullptr; \
auto place = ctx.GetPlace(); \
if (use_peepholes) { \
/* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/ \
auto* checked_cell = ctx.Output<phi::DenseTensor>("CheckedCell"); \
checked_cell_data = checked_cell->mutable_data<T>(place); \
} \
const jit::lstm_attr_t attr( \
D, \
jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("candidate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("cell_activation")), \
use_peepholes); \
jit::lstm_t one_step; \
one_step.wp = wp_data; \
one_step.checked = checked_cell_data; \
auto ComputeC1H1 = \
jit::KernelFuncs<jit::LSTMC1H1Tuple<T>, platform::CPUPlace>::Cache().At( \
attr); \
auto ComputeCtHt = \
jit::KernelFuncs<jit::LSTMCtHtTuple<T>, platform::CPUPlace>::Cache().At( \
attr)
#define INIT_OTHER_DEFINES \
const T* x_data = x->data<T>(); \
const T* wx_data = wx->data<T>(); \
const T* wh_data = wh->data<T>(); \
/* diagonal weight*/ \
const T* wp_data = bias->data<T>() + D4; \
/* for peephole only*/ \
T* checked_cell_data = nullptr; \
auto place = ctx.GetPlace(); \
if (use_peepholes) { \
/* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/ \
auto* checked_cell = ctx.Output<phi::DenseTensor>("CheckedCell"); \
checked_cell_data = checked_cell->mutable_data<T>(place); \
} \
const phi::jit::lstm_attr_t attr( \
D, \
phi::jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
phi::jit::to_kerneltype(ctx.Attr<std::string>("candidate_activation")), \
phi::jit::to_kerneltype(ctx.Attr<std::string>("cell_activation")), \
use_peepholes); \
phi::jit::lstm_t one_step; \
one_step.wp = wp_data; \
one_step.checked = checked_cell_data; \
auto ComputeC1H1 = phi::jit::KernelFuncs<phi::jit::LSTMC1H1Tuple<T>, \
platform::CPUPlace>::Cache() \
.At(attr); \
auto ComputeCtHt = phi::jit::KernelFuncs<phi::jit::LSTMCtHtTuple<T>, \
platform::CPUPlace>::Cache() \
.At(attr)

// Wh GEMM
#define GEMM_WH_ADDON(bs, prev, out) \
Expand Down
23 changes: 13 additions & 10 deletions paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <string>
#include <vector>

#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -122,14 +122,17 @@ void FusionRepeatedFCReluOpMaker::Make() {
}

template <typename T>
static void fc_relu(
const T* x, const T* w, const T* b, T* y, const jit::matmul_attr_t& attr) {
auto matmul =
jit::KernelFuncs<jit::MatMulTuple<T>, platform::CPUPlace>::Cache().At(
attr);
auto addbias_relu =
jit::KernelFuncs<jit::VAddReluTuple<T>, platform::CPUPlace>::Cache().At(
attr.n);
static void fc_relu(const T* x,
const T* w,
const T* b,
T* y,
const phi::jit::matmul_attr_t& attr) {
auto matmul = phi::jit::KernelFuncs<phi::jit::MatMulTuple<T>,
platform::CPUPlace>::Cache()
.At(attr);
auto addbias_relu = phi::jit::KernelFuncs<phi::jit::VAddReluTuple<T>,
platform::CPUPlace>::Cache()
.At(attr.n);
matmul(x, w, y, &attr);
T* dst = y;
for (int i = 0; i < attr.m; ++i) {
Expand All @@ -152,7 +155,7 @@ class FusionRepeatedFCReluKernel : public framework::OpKernel<T> {

auto i_dims = in->dims();
const auto& w_dims = weights[0]->dims();
jit::matmul_attr_t attr;
phi::jit::matmul_attr_t attr;
attr.m = i_dims[0];
attr.n = w_dims[1];
attr.k = w_dims[0];
Expand Down
14 changes: 7 additions & 7 deletions paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <string>
#include <vector>

#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -121,15 +121,15 @@ class FusionSeqPoolConcatKernel : public framework::OpKernel<T> {
"dims[1] is %d, w is %d.",
y_dims[1],
w));
jit::seq_pool_attr_t attr(w, jit::SeqPoolType::kSum);
phi::jit::seq_pool_attr_t attr(w, phi::jit::SeqPoolType::kSum);
if (pooltype == "AVERAGE") {
attr.type = jit::SeqPoolType::kAvg;
attr.type = phi::jit::SeqPoolType::kAvg;
} else if (pooltype == "SQRT") {
attr.type = jit::SeqPoolType::kSqrt;
attr.type = phi::jit::SeqPoolType::kSqrt;
}
auto seqpool =
jit::KernelFuncs<jit::SeqPoolTuple<T>, platform::CPUPlace>::Cache().At(
attr);
auto seqpool = phi::jit::KernelFuncs<phi::jit::SeqPoolTuple<T>,
platform::CPUPlace>::Cache()
.At(attr);
size_t n = ins.size();
size_t dst_step_size = n * w;
for (size_t i = 0; i < n; ++i) {
Expand Down
14 changes: 7 additions & 7 deletions paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <string>
#include <vector>

#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -122,15 +122,15 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel<T> {
0,
paddle::platform::errors::InvalidArgument(
"The output of dims[1] should be dividable of w"));
jit::seq_pool_attr_t attr(w, jit::SeqPoolType::kSum);
phi::jit::seq_pool_attr_t attr(w, phi::jit::SeqPoolType::kSum);
if (pooltype == "AVERAGE") {
attr.type = jit::SeqPoolType::kAvg;
attr.type = phi::jit::SeqPoolType::kAvg;
} else if (pooltype == "SQRT") {
attr.type = jit::SeqPoolType::kSqrt;
attr.type = phi::jit::SeqPoolType::kSqrt;
}
auto seqpool =
jit::KernelFuncs<jit::SeqPoolTuple<T>, platform::CPUPlace>::Cache().At(
attr);
auto seqpool = phi::jit::KernelFuncs<phi::jit::SeqPoolTuple<T>,
platform::CPUPlace>::Cache()
.At(attr);
size_t n = ins.size();
size_t dst_step_size = n * w;
for (size_t i = 0; i < n; ++i) {
Expand Down
40 changes: 20 additions & 20 deletions paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <string>
#include <vector>

#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/phi/kernels/funcs/jit/kernels.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -99,30 +99,30 @@ class FusionSquaredMatSubKernel : public framework::OpKernel<T> {

auto x_dims = x->dims();
auto y_dims = y->dims();
jit::matmul_attr_t attr;
phi::jit::matmul_attr_t attr;
attr.m = x_dims[0];
attr.k = x_dims[1];
attr.n = y_dims[1];
int o_numel = attr.m * attr.n;

auto vsquare_x =
jit::KernelFuncs<jit::VSquareTuple<T>, platform::CPUPlace>::Cache().At(
attr.m * attr.k);
auto vsquare_y =
jit::KernelFuncs<jit::VSquareTuple<T>, platform::CPUPlace>::Cache().At(
attr.k * attr.n);
auto vsquare_xy =
jit::KernelFuncs<jit::VSquareTuple<T>, platform::CPUPlace>::Cache().At(
o_numel);
auto vsub =
jit::KernelFuncs<jit::VSubTuple<T>, platform::CPUPlace>::Cache().At(
o_numel);
auto vscal =
jit::KernelFuncs<jit::VScalTuple<T>, platform::CPUPlace>::Cache().At(
o_numel);
auto matmul =
jit::KernelFuncs<jit::MatMulTuple<T>, platform::CPUPlace>::Cache().At(
attr);
auto vsquare_x = phi::jit::KernelFuncs<phi::jit::VSquareTuple<T>,
platform::CPUPlace>::Cache()
.At(attr.m * attr.k);
auto vsquare_y = phi::jit::KernelFuncs<phi::jit::VSquareTuple<T>,
platform::CPUPlace>::Cache()
.At(attr.k * attr.n);
auto vsquare_xy = phi::jit::KernelFuncs<phi::jit::VSquareTuple<T>,
platform::CPUPlace>::Cache()
.At(o_numel);
auto vsub = phi::jit::KernelFuncs<phi::jit::VSubTuple<T>,
platform::CPUPlace>::Cache()
.At(o_numel);
auto vscal = phi::jit::KernelFuncs<phi::jit::VScalTuple<T>,
platform::CPUPlace>::Cache()
.At(o_numel);
auto matmul = phi::jit::KernelFuncs<phi::jit::MatMulTuple<T>,
platform::CPUPlace>::Cache()
.At(attr);

const T* x_data = x->data<T>();
const T* y_data = y->data<T>();
Expand Down
Loading