Skip to content

Commit

Permalink
move gather.h gather.cu.h scatter.h scatter.cu.h to phi library
Browse files Browse the repository at this point in the history
  • Loading branch information
sneaxiy committed Mar 1, 2022
1 parent 2565077 commit 9dcc33e
Show file tree
Hide file tree
Showing 30 changed files with 698 additions and 623 deletions.
1 change: 0 additions & 1 deletion paddle/fluid/operators/detection/bbox_util.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ limitations under the License. */
#include <hipcub/hipcub.hpp>
namespace cub = hipcub;
#endif
#include "paddle/fluid/operators/gather.cu.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/math_function.h"
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ namespace cub = hipcub;
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/collect_fpn_proposals_op.h"
#include "paddle/fluid/operators/gather.cu.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -160,9 +160,9 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
sorted_rois.mutable_data<T>({real_post_num, kBBoxSize}, dev_ctx.GetPlace());
Tensor sorted_batch_id;
sorted_batch_id.mutable_data<int>({real_post_num}, dev_ctx.GetPlace());
GPUGather<T>(dev_ctx, concat_rois, index_out_t, &sorted_rois);
GPUGather<int>(dev_ctx, roi_batch_id_list_gpu, index_out_t,
&sorted_batch_id);
phi::funcs::GPUGather<T>(dev_ctx, concat_rois, index_out_t, &sorted_rois);
phi::funcs::GPUGather<int>(dev_ctx, roi_batch_id_list_gpu, index_out_t,
&sorted_batch_id);

Tensor batch_index_t;
int* batch_idx_in =
Expand Down Expand Up @@ -190,7 +190,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
out_id_data, batch_idx_in, index_out_t.data<int>(), real_post_num, 0,
sizeof(int) * 8, dev_ctx.stream());

GPUGather<T>(dev_ctx, sorted_rois, index_out_t, fpn_rois);
phi::funcs::GPUGather<T>(dev_ctx, sorted_rois, index_out_t, fpn_rois);

Tensor length_lod;
int* length_lod_data =
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/detection/collect_fpn_proposals_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ limitations under the License.*/
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
Expand Down Expand Up @@ -66,7 +65,8 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel<T> {

auto multi_layer_scores =
context.MultiInput<paddle::framework::LoDTensor>("MultiLevelScores");
auto multi_rois_num = context.MultiInput<Tensor>("MultiLevelRoIsNum");
auto multi_rois_num =
context.MultiInput<framework::Tensor>("MultiLevelRoIsNum");
int num_size = multi_rois_num.size();

auto* fpn_rois = context.Output<paddle::framework::LoDTensor>("FpnRois");
Expand Down Expand Up @@ -176,7 +176,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel<T> {
}
num_per_batch.emplace_back(post_nms_topN - pre_idx);
if (context.HasOutput("RoisNum")) {
auto* rois_num = context.Output<Tensor>("RoisNum");
auto* rois_num = context.Output<framework::Tensor>("RoisNum");
int* rois_num_data =
rois_num->mutable_data<int>({batch_size}, context.GetPlace());
for (int i = 0; i < batch_size; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ namespace cub = hipcub;
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/distribute_fpn_proposals_op.h"
#include "paddle/fluid/operators/gather.cu.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
Expand Down Expand Up @@ -193,7 +193,8 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
start = end;
multi_fpn_rois[i]->mutable_data<T>({sub_rois_num, kBoxDim},
dev_ctx.GetPlace());
GPUGather<T>(dev_ctx, *fpn_rois, sub_idx, multi_fpn_rois[i]);
phi::funcs::GPUGather<T>(dev_ctx, *fpn_rois, sub_idx,
multi_fpn_rois[i]);
} else {
multi_fpn_rois[i]->mutable_data<T>({sub_rois_num, kBoxDim},
dev_ctx.GetPlace());
Expand Down
15 changes: 8 additions & 7 deletions paddle/fluid/operators/detection/distribute_fpn_proposals_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
namespace operators {

const int kBoxDim = 4;

inline std::vector<size_t> GetLodFromRoisNum(const Tensor* rois_num) {
inline std::vector<size_t> GetLodFromRoisNum(
const framework::Tensor* rois_num) {
std::vector<size_t> rois_lod;
auto* rois_num_data = rois_num->data<int>();
Tensor cpu_tensor;
framework::Tensor cpu_tensor;
if (platform::is_gpu_place(rois_num->place())) {
paddle::framework::TensorCopySync(*rois_num, platform::CPUPlace(),
&cpu_tensor);
Expand Down Expand Up @@ -93,7 +93,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
std::vector<size_t> fpn_rois_lod;
int fpn_rois_num;
if (context.HasInput("RoisNum")) {
auto* rois_num = context.Input<Tensor>("RoisNum");
auto* rois_num = context.Input<framework::Tensor>("RoisNum");
fpn_rois_lod = GetLodFromRoisNum(rois_num);
} else {
fpn_rois_lod = fpn_rois->lod().back();
Expand All @@ -105,7 +105,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
std::vector<int> num_rois_level(num_level, 0);
std::vector<int> num_rois_level_integral(num_level + 1, 0);
for (size_t i = 0; i < fpn_rois_lod.size() - 1; ++i) {
Tensor fpn_rois_slice =
auto fpn_rois_slice =
fpn_rois->Slice(fpn_rois_lod[i], fpn_rois_lod[i + 1]);
const T* rois_data = fpn_rois_slice.data<T>();
for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) {
Expand Down Expand Up @@ -140,7 +140,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
std::vector<int> restore_index_inter(fpn_rois_num, -1);
// distribute the rois into different fpn level by target level
for (size_t i = 0; i < fpn_rois_lod.size() - 1; ++i) {
Tensor fpn_rois_slice =
auto fpn_rois_slice =
fpn_rois->Slice(fpn_rois_lod[i], fpn_rois_lod[i + 1]);
const T* rois_data = fpn_rois_slice.data<T>();
size_t cur_offset = fpn_rois_lod[i];
Expand All @@ -163,7 +163,8 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
for (int i = 0; i < fpn_rois_num; ++i) {
restore_index_data[restore_index_inter[i]] = i;
}
auto multi_rois_num = context.MultiOutput<Tensor>("MultiLevelRoIsNum");
auto multi_rois_num =
context.MultiOutput<framework::Tensor>("MultiLevelRoIsNum");
if (multi_rois_num.size() > 0) {
int batch_size = fpn_rois_lod.size() - 1;
for (int i = 0; i < num_level; ++i) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/mask_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/phi/kernels/funcs/math_function.h"

Expand Down
16 changes: 8 additions & 8 deletions paddle/fluid/operators/detection/generate_proposal_labels_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/phi/kernels/funcs/gather.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
Expand Down Expand Up @@ -281,22 +281,22 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,

Tensor fg_boxes, bg_boxes, fg_labels, bg_labels;
fg_boxes.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
CPUGather<T>(context, boxes, fg_inds_t, &fg_boxes);
phi::funcs::CPUGather<T>(context, boxes, fg_inds_t, &fg_boxes);
bg_boxes.mutable_data<T>({bg_num, kBoxDim}, context.GetPlace());
CPUGather<T>(context, boxes, bg_inds_t, &bg_boxes);
phi::funcs::CPUGather<T>(context, boxes, bg_inds_t, &bg_boxes);
Concat<T>(context, fg_boxes, bg_boxes, sampled_boxes);
CPUGather<T>(context, gt_boxes, gt_box_inds_t, sampled_gts);
phi::funcs::CPUGather<T>(context, gt_boxes, gt_box_inds_t, sampled_gts);
fg_labels.mutable_data<int>({fg_num}, context.GetPlace());
CPUGather<int>(context, gt_classes, gt_label_inds_t, &fg_labels);
phi::funcs::CPUGather<int>(context, gt_classes, gt_label_inds_t, &fg_labels);
bg_labels.mutable_data<int>({bg_num}, context.GetPlace());
phi::funcs::set_constant(context, &bg_labels, 0);
Concat<int>(context, fg_labels, bg_labels, sampled_labels);

Tensor fg_max_overlap, bg_max_overlap;
fg_max_overlap.mutable_data<T>({fg_num}, context.GetPlace());
CPUGather<T>(context, max_overlap, fg_inds_t, &fg_max_overlap);
phi::funcs::CPUGather<T>(context, max_overlap, fg_inds_t, &fg_max_overlap);
bg_max_overlap.mutable_data<T>({bg_num}, context.GetPlace());
CPUGather<T>(context, max_overlap, bg_inds_t, &bg_max_overlap);
phi::funcs::CPUGather<T>(context, max_overlap, bg_inds_t, &bg_max_overlap);
Concat<T>(context, fg_max_overlap, bg_max_overlap, sampled_max_overlap);
}

Expand Down Expand Up @@ -334,7 +334,7 @@ std::vector<Tensor> SampleRoisForOneImage(
} else {
proposals_num = keep.numel();
roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
CPUGather<T>(context, rpn_rois, keep, &roi_filter);
phi::funcs::CPUGather<T>(context, rpn_rois, keep, &roi_filter);
}
T* roi_filter_dt = roi_filter.data<T>();
memcpy(rpn_rois_dt, roi_filter_dt, roi_filter.numel() * sizeof(T));
Expand Down
18 changes: 9 additions & 9 deletions paddle/fluid/operators/detection/generate_proposals_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/nms_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/phi/kernels/funcs/gather.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
Expand Down Expand Up @@ -196,10 +196,10 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
anchor_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
var_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());

CPUGather<T>(ctx, scores_slice, index_t, &scores_sel);
CPUGather<T>(ctx, bbox_deltas_slice, index_t, &bbox_sel);
CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
CPUGather<T>(ctx, variances, index_t, &var_sel);
phi::funcs::CPUGather<T>(ctx, scores_slice, index_t, &scores_sel);
phi::funcs::CPUGather<T>(ctx, bbox_deltas_slice, index_t, &bbox_sel);
phi::funcs::CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
phi::funcs::CPUGather<T>(ctx, variances, index_t, &var_sel);

Tensor proposals;
proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
Expand All @@ -223,8 +223,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
Tensor scores_filter;
bbox_sel.mutable_data<T>({keep.numel(), 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({keep.numel(), 1}, ctx.GetPlace());
CPUGather<T>(ctx, proposals, keep, &bbox_sel);
CPUGather<T>(ctx, scores_sel, keep, &scores_filter);
phi::funcs::CPUGather<T>(ctx, proposals, keep, &bbox_sel);
phi::funcs::CPUGather<T>(ctx, scores_sel, keep, &scores_filter);
if (nms_thresh <= 0) {
return std::make_pair(bbox_sel, scores_filter);
}
Expand All @@ -237,8 +237,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {

proposals.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace());
scores_sel.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace());
CPUGather<T>(ctx, bbox_sel, keep_nms, &proposals);
CPUGather<T>(ctx, scores_filter, keep_nms, &scores_sel);
phi::funcs::CPUGather<T>(ctx, bbox_sel, keep_nms, &proposals);
phi::funcs::CPUGather<T>(ctx, scores_filter, keep_nms, &scores_sel);

return std::make_pair(proposals, scores_sel);
}
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/operators/detection/generate_proposals_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/detection/bbox_util.cu.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
Expand Down Expand Up @@ -85,8 +86,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
}
proposals_filter.mutable_data<T>({keep_num, 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({keep_num, 1}, ctx.GetPlace());
GPUGather<T>(ctx, proposals, keep_index, &proposals_filter);
GPUGather<T>(ctx, scores_sort, keep_index, &scores_filter);
phi::funcs::GPUGather<T>(ctx, proposals, keep_index, &proposals_filter);
phi::funcs::GPUGather<T>(ctx, scores_sort, keep_index, &scores_filter);

if (nms_thresh <= 0) {
return std::make_pair(proposals_filter, scores_filter);
Expand All @@ -102,8 +103,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
Tensor scores_nms, proposals_nms;
proposals_nms.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace());
scores_nms.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace());
GPUGather<T>(ctx, proposals_filter, keep_nms, &proposals_nms);
GPUGather<T>(ctx, scores_filter, keep_nms, &scores_nms);
phi::funcs::GPUGather<T>(ctx, proposals_filter, keep_nms, &proposals_nms);
phi::funcs::GPUGather<T>(ctx, scores_filter, keep_nms, &scores_nms);

return std::make_pair(proposals_nms, scores_nms);
}
Expand Down
18 changes: 9 additions & 9 deletions paddle/fluid/operators/detection/generate_proposals_v2_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/nms_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/phi/kernels/funcs/gather.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
Expand Down Expand Up @@ -197,10 +197,10 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
anchor_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
var_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());

CPUGather<T>(ctx, scores_slice, index_t, &scores_sel);
CPUGather<T>(ctx, bbox_deltas_slice, index_t, &bbox_sel);
CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
CPUGather<T>(ctx, variances, index_t, &var_sel);
phi::funcs::CPUGather<T>(ctx, scores_slice, index_t, &scores_sel);
phi::funcs::CPUGather<T>(ctx, bbox_deltas_slice, index_t, &bbox_sel);
phi::funcs::CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
phi::funcs::CPUGather<T>(ctx, variances, index_t, &var_sel);

Tensor proposals;
proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
Expand All @@ -227,8 +227,8 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
Tensor scores_filter;
bbox_sel.mutable_data<T>({keep.numel(), 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({keep.numel(), 1}, ctx.GetPlace());
CPUGather<T>(ctx, proposals, keep, &bbox_sel);
CPUGather<T>(ctx, scores_sel, keep, &scores_filter);
phi::funcs::CPUGather<T>(ctx, proposals, keep, &bbox_sel);
phi::funcs::CPUGather<T>(ctx, scores_sel, keep, &scores_filter);
if (nms_thresh <= 0) {
return std::make_pair(bbox_sel, scores_filter);
}
Expand All @@ -242,8 +242,8 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {

proposals.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace());
scores_sel.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace());
CPUGather<T>(ctx, bbox_sel, keep_nms, &proposals);
CPUGather<T>(ctx, scores_filter, keep_nms, &scores_sel);
phi::funcs::CPUGather<T>(ctx, bbox_sel, keep_nms, &proposals);
phi::funcs::CPUGather<T>(ctx, scores_filter, keep_nms, &scores_sel);

return std::make_pair(proposals, scores_sel);
}
Expand Down
9 changes: 5 additions & 4 deletions paddle/fluid/operators/detection/generate_proposals_v2_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/detection/bbox_util.cu.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
Expand Down Expand Up @@ -86,8 +87,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
}
proposals_filter.mutable_data<T>({keep_num, 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({keep_num, 1}, ctx.GetPlace());
GPUGather<T>(ctx, proposals, keep_index, &proposals_filter);
GPUGather<T>(ctx, scores_sort, keep_index, &scores_filter);
phi::funcs::GPUGather<T>(ctx, proposals, keep_index, &proposals_filter);
phi::funcs::GPUGather<T>(ctx, scores_sort, keep_index, &scores_filter);

if (nms_thresh <= 0) {
return std::make_pair(proposals_filter, scores_filter);
Expand All @@ -104,8 +105,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
Tensor scores_nms, proposals_nms;
proposals_nms.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace());
scores_nms.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace());
GPUGather<T>(ctx, proposals_filter, keep_nms, &proposals_nms);
GPUGather<T>(ctx, scores_filter, keep_nms, &scores_nms);
phi::funcs::GPUGather<T>(ctx, proposals_filter, keep_nms, &proposals_nms);
phi::funcs::GPUGather<T>(ctx, scores_filter, keep_nms, &scores_nms);

return std::make_pair(proposals_nms, scores_nms);
}
Expand Down
Loading

0 comments on commit 9dcc33e

Please sign in to comment.