Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Pten】Support data transform in C++ API #39263

Merged
merged 11 commits into from
Feb 4, 2022
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ paddle/fluid/API_DEV.spec
paddle/fluid/API_PR.spec
paddle/fluid/op_use_default_grad_maker_DEV.spec
paddle/fluid/op_use_default_grad_maker_PR.spec
paddle/pten/api/*/api.*
paddle/pten/api/*/backward*
paddle/pten/api/include/api.h
paddle/pten/api/lib/api.cc
paddle/pten/api/backward/backward_api.h
paddle/pten/api/lib/backward_api.cc
paddle/pten/include/*
paddle/pten/extension.h

Expand Down
12 changes: 8 additions & 4 deletions paddle/pten/api/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS pten_tensor pten_context
cc_library(op_meta_info SRCS op_meta_info.cc DEPS pten_tensor)
cc_library(op_kernel_info SRCS op_kernel_info.cc DEPS pten_tensor)


set(api_gen_utils ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/gen_utils.py)

# forward api file
set(api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api_gen.py)
set(api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml)
Expand Down Expand Up @@ -46,7 +49,7 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} ${api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} ${api_source_file}
COMMENT "copy_if_different ${api_header_file} ${api_source_file}"
DEPENDS ${api_yaml_file} ${api_gen_file}
DEPENDS ${api_yaml_file} ${api_gen_file} ${api_gen_utils}
VERBATIM)

# generate backward api
Expand All @@ -59,9 +62,10 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp} ${bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp} ${bw_api_source_file}
COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}"
DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file}
DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_utils}
VERBATIM)

cc_library(pten_data_transform SRCS data_transform.cc DEPS pten_tensor transfer_layout_kernel cast_kernel data_device_transform)
cc_library(manual_api SRCS manual_api.cc DEPS pten_tensor pten kernel_dispatch)
cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch)
cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_function_api)
cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch pten_data_transform)
cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_data_transform pten_function_api)
232 changes: 232 additions & 0 deletions paddle/pten/api/lib/data_transform.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/pten/api/lib/data_transform.h"

#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/api/lib/kernel_dispatch.h"
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/kernels/cast_kernel.h"
#include "paddle/pten/kernels/transfer_layout_kernel.h"

#include "paddle/fluid/framework/data_device_transform.h"

namespace paddle {
namespace experimental {

inline bool NeedTransformDataType(const DataType& input,
const DataType& target,
const TransformFlag& transform_flag) {
return input != target &&
(transform_flag.need_trans_data_type() ||
target == DataType::COMPLEX64 || target == DataType::COMPLEX128);
}

inline bool NeedTransformPlace(const paddle::platform::Place& input,
const Backend& target,
const TransformFlag& transform_flag) {
bool ret = transform_flag.need_trans_backend() &&
target != Backend::ALL_BACKEND &&
!platform::is_same_place(input, pten::TransToFluidPlace(target));
return ret;
}

inline bool NeedTransformLayout(const DataLayout& input,
const DataLayout& target,
const TransformFlag& transform_flag) {
bool ret = transform_flag.need_trans_layout() &&
(input != DataLayout::ALL_LAYOUT &&
target != DataLayout::ALL_LAYOUT && input != target);
return ret;
}

inline pten::DenseTensor TransDataLayout(const pten::DenseTensor& tensor,
DataLayout layout) {
auto& pool = paddle::platform::DeviceContextPool::Instance();
VLOG(3) << "DataLayoutTransform src_layout: " << tensor.layout()
<< " dst_layout: " << layout;
if (platform::is_cpu_place(tensor.place())) {
auto* dev_ctx = static_cast<pten::CPUContext*>(pool.Get(tensor.place()));
return pten::TransferLayout(*dev_ctx, tensor, layout);
} else {
PADDLE_THROW(pten::errors::PreconditionNotMet(
"Unsupported data layout cast from CPU to GPU."));
}
}

template <typename Context>
pten::DenseTensor CastDateType(const Context& dev_ctx,
const pten::DenseTensor& tensor,
DataType dtype) {
switch (tensor.dtype()) {
case DataType::FLOAT32:
return pten::Cast<float>(dev_ctx, tensor, dtype);
case DataType::FLOAT64:
return pten::Cast<double>(dev_ctx, tensor, dtype);
case DataType::INT32:
return pten::Cast<int32_t>(dev_ctx, tensor, dtype);
case DataType::INT64:
return pten::Cast<int64_t>(dev_ctx, tensor, dtype);
case DataType::FLOAT16:
return pten::Cast<pten::dtype::float16>(dev_ctx, tensor, dtype);
case DataType::BFLOAT16:
return pten::Cast<pten::dtype::bfloat16>(dev_ctx, tensor, dtype);
case DataType::BOOL:
return pten::Cast<bool>(dev_ctx, tensor, dtype);
case DataType::INT16:
return pten::Cast<int16_t>(dev_ctx, tensor, dtype);
case DataType::UINT8:
return pten::Cast<uint8_t>(dev_ctx, tensor, dtype);
default:
PADDLE_THROW(pten::errors::Unimplemented(
"Data type (%s) is not supported when casting data type.",
tensor.dtype()));
}
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
pten::DenseTensor CastDateType(const pten::GPUContext& dev_ctx,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里为什么需要单独重载一个函数

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GPU的Cast不支持bfloat16类型,所以单独分出来处理了

const pten::DenseTensor& tensor,
DataType dtype) {
switch (tensor.dtype()) {
case DataType::FLOAT32:
return pten::Cast<float>(dev_ctx, tensor, dtype);
case DataType::FLOAT64:
return pten::Cast<double>(dev_ctx, tensor, dtype);
case DataType::INT32:
return pten::Cast<int32_t>(dev_ctx, tensor, dtype);
case DataType::INT64:
return pten::Cast<int64_t>(dev_ctx, tensor, dtype);
case DataType::FLOAT16:
return pten::Cast<pten::dtype::float16>(dev_ctx, tensor, dtype);
case DataType::BOOL:
return pten::Cast<bool>(dev_ctx, tensor, dtype);
case DataType::INT16:
return pten::Cast<int16_t>(dev_ctx, tensor, dtype);
case DataType::UINT8:
return pten::Cast<uint8_t>(dev_ctx, tensor, dtype);
default:
PADDLE_THROW(pten::errors::Unimplemented(
"Data type (%s) is not supported when casting data type.",
tensor.dtype()));
}
}
#endif

inline pten::DenseTensor TransDataType(const pten::DenseTensor& tensor,
DataType dtype) {
auto& pool = paddle::platform::DeviceContextPool::Instance();

VLOG(3) << "DataTypeTransform src_dtype: " << tensor.dtype()
<< " dst_dtype: " << dtype;

pten::DenseTensor out(
pten::make_intrusive<paddle::experimental::SharedStorage>(tensor.place()),
{dtype, tensor.dims(), tensor.layout()});

if (platform::is_cpu_place(tensor.place())) {
auto* dev_ctx = static_cast<pten::CPUContext*>(pool.Get(tensor.place()));
return CastDateType(*dev_ctx, tensor, dtype);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} else if (platform::is_gpu_place(tensor.place())) {
auto* dev_ctx = static_cast<pten::GPUContext*>(pool.Get(tensor.place()));
return CastDateType(*dev_ctx, tensor, dtype);
#endif
} else {
PADDLE_THROW(pten::errors::Unimplemented(
"Place type is not supported when casting data type."));
}
return out;
}

pten::DenseTensor TransformData(const pten::DenseTensor& tensor,
const pten::TensorArgDef& target_args_def,
const TransformFlag& transform_flag) {
pten::DenseTensor out = tensor;
if (NeedTransformLayout(
tensor.layout(), target_args_def.layout, transform_flag)) {
out = TransDataLayout(out, target_args_def.layout);
}

if (NeedTransformDataType(
tensor.dtype(), target_args_def.dtype, transform_flag)) {
out = TransDataType(out, target_args_def.dtype);
}

if (NeedTransformPlace(
out.place(), target_args_def.backend, transform_flag)) {
pten::DenseTensor result(
pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(target_args_def.backend)),
{out.dtype(), out.dims(), out.layout()});
framework::TransDataDevice(
out, pten::TransToFluidPlace(target_args_def.backend), &result);
out = result;
}
return out;
}

std::shared_ptr<pten::DenseTensor> PrepareData(
const Tensor& input,
const pten::TensorArgDef& target_args_def,
const TransformFlag& transform_flag) {
const auto& tensor_in = input.impl();
if (!transform_flag.NeedTransform() || !tensor_in->initialized() ||
(!NeedTransformPlace(
tensor_in->place(), target_args_def.backend, transform_flag) &&
!NeedTransformDataType(
tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(
tensor_in->layout(), target_args_def.layout, transform_flag))) {
return std::dynamic_pointer_cast<pten::DenseTensor>(tensor_in);
}

pten::DenseTensor out =
TransformData(*(static_cast<pten::DenseTensor*>(tensor_in.get())),
target_args_def,
transform_flag);
return std::make_shared<pten::DenseTensor>(out);
}

std::unique_ptr<std::vector<pten::DenseTensor>> PrepareData(
const std::vector<Tensor>& inputs,
const pten::TensorArgDef& target_args_def,
const TransformFlag& transform_flag) {
auto pt_tensors = std::make_unique<std::vector<pten::DenseTensor>>();
pt_tensors->reserve(inputs.size());

for (const auto& input : inputs) {
const auto& tensor_in = input.impl();
if (!transform_flag.NeedTransform() || !tensor_in->initialized() ||
(!NeedTransformPlace(
tensor_in->place(), target_args_def.backend, transform_flag) &&
!NeedTransformDataType(
tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(
tensor_in->layout(), target_args_def.layout, transform_flag))) {
pt_tensors->emplace_back(
*std::dynamic_pointer_cast<pten::DenseTensor>(tensor_in));
} else {
pt_tensors->emplace_back(
TransformData(*(static_cast<pten::DenseTensor*>(tensor_in.get())),
target_args_def,
transform_flag));
}
}

return std::move(pt_tensors);
}

} // namespace experimental
} // namespace paddle
75 changes: 75 additions & 0 deletions paddle/pten/api/lib/data_transform.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/core/kernel_factory.h"

namespace paddle {
namespace experimental {

class TransformFlag {
public:
TransformFlag(bool stop_transform = false,
bool trans_dtype = false,
bool trans_backend = true,
bool trans_layout = true)
: stop_transform_(stop_transform),
trans_data_type_(trans_dtype),
trans_backend_(trans_backend),
trans_layout_(trans_layout) {}

bool NeedTransform() const {
return !stop_transform_ &&
(trans_data_type_ || trans_backend_ || trans_layout_);
}

bool need_trans_data_type() const {
return !stop_transform_ && trans_data_type_;
}

bool need_trans_backend() const { return !stop_transform_ && trans_backend_; }

bool need_trans_layout() const { return !stop_transform_ && trans_layout_; }

private:
// This is the highest priority in flags,
// and can be setted by api[data_transform->skip_transform] in the yaml file.
bool stop_transform_ = false;

// trans_data_type_ can be setted by api[data_transform->support_trans_dtype]
// in the yaml file.
// trans_data_type_ only affect the non complex types,
// the complex is always transferd, except stop_transform_ is true.
bool trans_data_type_ = false;

// trans_backend_ and trans_layout_ are true defalutly,
// and they can only be setted by global flag.
bool trans_backend_ = true;
bool trans_layout_ = true;
};

std::shared_ptr<pten::DenseTensor> PrepareData(
const Tensor& input,
const pten::TensorArgDef& target_args_def,
const TransformFlag& transform_flag);

std::unique_ptr<std::vector<pten::DenseTensor>> PrepareData(
const std::vector<Tensor>& inputs,
const pten::TensorArgDef& target_args_def,
const TransformFlag& transform_flag);

} // namespace experimental
} // namespace paddle
4 changes: 2 additions & 2 deletions paddle/pten/core/kernel_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ const Kernel& KernelFactory::SelectKernelOrThrowError(
auto kernel_iter = iter->second.find(kernel_key);
// TODO(chenweihang): polish refind impl here
if (kernel_iter == iter->second.end() &&
kernel_key.layout() != pten::DataLayout::ANY) {
kernel_key.layout() != pten::DataLayout::ALL_LAYOUT) {
pten::KernelKey any_layout_kernel_key(
kernel_key.backend(), pten::DataLayout::ANY, kernel_key.dtype());
kernel_key.backend(), pten::DataLayout::ALL_LAYOUT, kernel_key.dtype());
kernel_iter = iter->second.find(any_layout_kernel_key);
}
PADDLE_ENFORCE_NE(
Expand Down
1 change: 1 addition & 0 deletions paddle/pten/core/kernel_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(paddle::platform::float16);
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const Scalar&);
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(DataType);
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(DataLayout);
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector<int64_t>&);
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const ScalarArray&);
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector<int>&);
Expand Down
6 changes: 6 additions & 0 deletions paddle/pten/infermeta/unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,12 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
return return_meta;
}

DenseTensorMeta TransferLayoutInferMeta(const DenseTensorMeta& x_meta,
DataLayout layout) {
DenseTensorMeta out_meta(x_meta.dtype, x_meta.dims, layout);
return out_meta;
}

} // namespace pten

PT_REGISTER_INFER_META_FN(sign, pten::UnchangedInferMetaNew);
4 changes: 4 additions & 0 deletions paddle/pten/infermeta/unary.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,8 @@ DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim);

DenseTensorMeta TransferLayoutInferMeta(const DenseTensorMeta& x_meta,
DataLayout layout);

} // namespace pten
Loading