diff --git a/.gitignore b/.gitignore index 3b92ce1c0b6064..ae61959a4bd86e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,10 @@ paddle/fluid/API_DEV.spec paddle/fluid/API_PR.spec paddle/fluid/op_use_default_grad_maker_DEV.spec paddle/fluid/op_use_default_grad_maker_PR.spec -paddle/pten/api/*/api.* -paddle/pten/api/*/backward* +paddle/pten/api/include/api.h +paddle/pten/api/lib/api.cc +paddle/pten/api/backward/backward_api.h +paddle/pten/api/lib/backward_api.cc paddle/pten/include/* paddle/pten/extension.h paddle/fluid/eager/api/generated/* diff --git a/paddle/pten/api/lib/CMakeLists.txt b/paddle/pten/api/lib/CMakeLists.txt index 0b899f1abda9a4..0a55d52a26521c 100644 --- a/paddle/pten/api/lib/CMakeLists.txt +++ b/paddle/pten/api/lib/CMakeLists.txt @@ -15,6 +15,9 @@ cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS pten_tensor pten_context cc_library(op_meta_info SRCS op_meta_info.cc DEPS pten_tensor) cc_library(op_kernel_info SRCS op_kernel_info.cc DEPS pten_tensor) + +set(api_gen_utils ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/gen_utils.py) + # forward api file set(api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api_gen.py) set(api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml) @@ -46,7 +49,7 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} ${api_header_file} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} ${api_source_file} COMMENT "copy_if_different ${api_header_file} ${api_source_file}" - DEPENDS ${api_yaml_file} ${api_gen_file} + DEPENDS ${api_yaml_file} ${api_gen_file} ${api_gen_utils} VERBATIM) # generate backward api @@ -59,10 +62,11 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp} ${bw_api_header_file} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp} ${bw_api_source_file} COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}" - DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} + DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_utils} VERBATIM) +cc_library(pten_data_transform SRCS data_transform.cc DEPS pten_tensor transfer_layout_kernel cast_kernel data_device_transform) cc_library(manual_api SRCS manual_api.cc DEPS pten_tensor pten kernel_dispatch) -cc_library(sparse_api SRCS sparse_api.cc DEPS pten_tensor pten kernel_dispatch) -cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch) -cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_function_api) +cc_library(sparse_api SRCS sparse_api.cc DEPS pten_tensor pten kernel_dispatch pten_data_transform) +cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch pten_data_transform) +cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_data_transform pten_function_api) diff --git a/paddle/pten/api/lib/data_transform.cc b/paddle/pten/api/lib/data_transform.cc new file mode 100644 index 00000000000000..bbef82473683e6 --- /dev/null +++ b/paddle/pten/api/lib/data_transform.cc @@ -0,0 +1,232 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/api/lib/data_transform.h" + +#include "paddle/pten/api/ext/dispatch.h" +#include "paddle/pten/api/lib/kernel_dispatch.h" +#include "paddle/pten/backends/all_context.h" +#include "paddle/pten/kernels/cast_kernel.h" +#include "paddle/pten/kernels/transfer_layout_kernel.h" + +#include "paddle/fluid/framework/data_device_transform.h" + +namespace paddle { +namespace experimental { + +inline bool NeedTransformDataType(const DataType& input, + const DataType& target, + const TransformFlag& transform_flag) { + return input != target && + (transform_flag.need_trans_data_type() || + target == DataType::COMPLEX64 || target == DataType::COMPLEX128); +} + +inline bool NeedTransformPlace(const paddle::platform::Place& input, + const Backend& target, + const TransformFlag& transform_flag) { + bool ret = transform_flag.need_trans_backend() && + target != Backend::ALL_BACKEND && + !platform::is_same_place(input, pten::TransToFluidPlace(target)); + return ret; +} + +inline bool NeedTransformLayout(const DataLayout& input, + const DataLayout& target, + const TransformFlag& transform_flag) { + bool ret = transform_flag.need_trans_layout() && + (input != DataLayout::ALL_LAYOUT && + target != DataLayout::ALL_LAYOUT && input != target); + return ret; +} + +inline pten::DenseTensor TransDataLayout(const pten::DenseTensor& tensor, + DataLayout layout) { + auto& pool = paddle::platform::DeviceContextPool::Instance(); + VLOG(3) << "DataLayoutTransform src_layout: " << tensor.layout() + << " dst_layout: " << layout; + if (platform::is_cpu_place(tensor.place())) { + auto* dev_ctx = static_cast(pool.Get(tensor.place())); + return pten::TransferLayout(*dev_ctx, tensor, layout); + } else { + PADDLE_THROW(pten::errors::PreconditionNotMet( + "Unsupported data layout cast from CPU to GPU.")); + } +} + +template +pten::DenseTensor CastDateType(const Context& dev_ctx, + const pten::DenseTensor& tensor, + DataType dtype) { + switch (tensor.dtype()) { + case DataType::FLOAT32: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::FLOAT64: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::INT32: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::INT64: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::FLOAT16: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::BFLOAT16: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::BOOL: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::INT16: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::UINT8: + return pten::Cast(dev_ctx, tensor, dtype); + default: + PADDLE_THROW(pten::errors::Unimplemented( + "Data type (%s) is not supported when casting data type.", + tensor.dtype())); + } +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +pten::DenseTensor CastDateType(const pten::GPUContext& dev_ctx, + const pten::DenseTensor& tensor, + DataType dtype) { + switch (tensor.dtype()) { + case DataType::FLOAT32: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::FLOAT64: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::INT32: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::INT64: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::FLOAT16: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::BOOL: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::INT16: + return pten::Cast(dev_ctx, tensor, dtype); + case DataType::UINT8: + return pten::Cast(dev_ctx, tensor, dtype); + default: + PADDLE_THROW(pten::errors::Unimplemented( + "Data type (%s) is not supported when casting data type.", + tensor.dtype())); + } +} +#endif + +inline pten::DenseTensor TransDataType(const pten::DenseTensor& tensor, + DataType dtype) { + auto& pool = paddle::platform::DeviceContextPool::Instance(); + + VLOG(3) << "DataTypeTransform src_dtype: " << tensor.dtype() + << " dst_dtype: " << dtype; + + pten::DenseTensor out( + pten::make_intrusive(tensor.place()), + {dtype, tensor.dims(), tensor.layout()}); + + if (platform::is_cpu_place(tensor.place())) { + auto* dev_ctx = static_cast(pool.Get(tensor.place())); + return CastDateType(*dev_ctx, tensor, dtype); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else if (platform::is_gpu_place(tensor.place())) { + auto* dev_ctx = static_cast(pool.Get(tensor.place())); + return CastDateType(*dev_ctx, tensor, dtype); +#endif + } else { + PADDLE_THROW(pten::errors::Unimplemented( + "Place type is not supported when casting data type.")); + } + return out; +} + +pten::DenseTensor TransformData(const pten::DenseTensor& tensor, + const pten::TensorArgDef& target_args_def, + const TransformFlag& transform_flag) { + pten::DenseTensor out = tensor; + if (NeedTransformLayout( + tensor.layout(), target_args_def.layout, transform_flag)) { + out = TransDataLayout(out, target_args_def.layout); + } + + if (NeedTransformDataType( + tensor.dtype(), target_args_def.dtype, transform_flag)) { + out = TransDataType(out, target_args_def.dtype); + } + + if (NeedTransformPlace( + out.place(), target_args_def.backend, transform_flag)) { + pten::DenseTensor result( + pten::make_intrusive( + pten::TransToFluidPlace(target_args_def.backend)), + {out.dtype(), out.dims(), out.layout()}); + framework::TransDataDevice( + out, pten::TransToFluidPlace(target_args_def.backend), &result); + out = result; + } + return out; +} + +std::shared_ptr PrepareData( + const Tensor& input, + const pten::TensorArgDef& target_args_def, + const TransformFlag& transform_flag) { + const auto& tensor_in = input.impl(); + if (!transform_flag.NeedTransform() || !tensor_in->initialized() || + (!NeedTransformPlace( + tensor_in->place(), target_args_def.backend, transform_flag) && + !NeedTransformDataType( + tensor_in->dtype(), target_args_def.dtype, transform_flag) && + !NeedTransformLayout( + tensor_in->layout(), target_args_def.layout, transform_flag))) { + return std::dynamic_pointer_cast(tensor_in); + } + + pten::DenseTensor out = + TransformData(*(static_cast(tensor_in.get())), + target_args_def, + transform_flag); + return std::make_shared(out); +} + +std::unique_ptr> PrepareData( + const std::vector& inputs, + const pten::TensorArgDef& target_args_def, + const TransformFlag& transform_flag) { + auto pt_tensors = std::make_unique>(); + pt_tensors->reserve(inputs.size()); + + for (const auto& input : inputs) { + const auto& tensor_in = input.impl(); + if (!transform_flag.NeedTransform() || !tensor_in->initialized() || + (!NeedTransformPlace( + tensor_in->place(), target_args_def.backend, transform_flag) && + !NeedTransformDataType( + tensor_in->dtype(), target_args_def.dtype, transform_flag) && + !NeedTransformLayout( + tensor_in->layout(), target_args_def.layout, transform_flag))) { + pt_tensors->emplace_back( + *std::dynamic_pointer_cast(tensor_in)); + } else { + pt_tensors->emplace_back( + TransformData(*(static_cast(tensor_in.get())), + target_args_def, + transform_flag)); + } + } + + return std::move(pt_tensors); +} + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/api/lib/data_transform.h b/paddle/pten/api/lib/data_transform.h new file mode 100644 index 00000000000000..59f83d6e4f38bc --- /dev/null +++ b/paddle/pten/api/lib/data_transform.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/core/kernel_factory.h" + +namespace paddle { +namespace experimental { + +class TransformFlag { + public: + TransformFlag(bool stop_transform = false, + bool trans_dtype = false, + bool trans_backend = true, + bool trans_layout = true) + : stop_transform_(stop_transform), + trans_data_type_(trans_dtype), + trans_backend_(trans_backend), + trans_layout_(trans_layout) {} + + bool NeedTransform() const { + return !stop_transform_ && + (trans_data_type_ || trans_backend_ || trans_layout_); + } + + bool need_trans_data_type() const { + return !stop_transform_ && trans_data_type_; + } + + bool need_trans_backend() const { return !stop_transform_ && trans_backend_; } + + bool need_trans_layout() const { return !stop_transform_ && trans_layout_; } + + private: + // This is the highest priority in flags, + // and can be setted by api[data_transform->skip_transform] in the yaml file. + bool stop_transform_ = false; + + // trans_data_type_ can be setted by api[data_transform->support_trans_dtype] + // in the yaml file. + // trans_data_type_ only affect the non complex types, + // the complex is always transferd, except stop_transform_ is true. + bool trans_data_type_ = false; + + // trans_backend_ and trans_layout_ are true defalutly, + // and they can only be setted by global flag. + bool trans_backend_ = true; + bool trans_layout_ = true; +}; + +std::shared_ptr PrepareData( + const Tensor& input, + const pten::TensorArgDef& target_args_def, + const TransformFlag& transform_flag); + +std::unique_ptr> PrepareData( + const std::vector& inputs, + const pten::TensorArgDef& target_args_def, + const TransformFlag& transform_flag); + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/core/kernel_factory.cc b/paddle/pten/core/kernel_factory.cc index 34ec5205a8fe3a..22899fbe84d058 100644 --- a/paddle/pten/core/kernel_factory.cc +++ b/paddle/pten/core/kernel_factory.cc @@ -70,9 +70,9 @@ const Kernel& KernelFactory::SelectKernelOrThrowError( auto kernel_iter = iter->second.find(kernel_key); // TODO(chenweihang): polish refind impl here if (kernel_iter == iter->second.end() && - kernel_key.layout() != pten::DataLayout::ANY) { + kernel_key.layout() != pten::DataLayout::ALL_LAYOUT) { pten::KernelKey any_layout_kernel_key( - kernel_key.backend(), pten::DataLayout::ANY, kernel_key.dtype()); + kernel_key.backend(), pten::DataLayout::ALL_LAYOUT, kernel_key.dtype()); kernel_iter = iter->second.find(any_layout_kernel_key); } PADDLE_ENFORCE_NE( diff --git a/paddle/pten/core/kernel_utils.h b/paddle/pten/core/kernel_utils.h index 2ecb3eca6cf5bc..d05e3c2887306f 100644 --- a/paddle/pten/core/kernel_utils.h +++ b/paddle/pten/core/kernel_utils.h @@ -234,6 +234,7 @@ struct KernelImpl { PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(paddle::platform::float16); PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const Scalar&); PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(DataType); + PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(DataLayout); PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector&); PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const ScalarArray&); PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector&); diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc index 57bac52cef5919..60cd7a4abff625 100644 --- a/paddle/pten/infermeta/unary.cc +++ b/paddle/pten/infermeta/unary.cc @@ -317,6 +317,14 @@ void ReduceInferMeta(const MetaTensor& x, ReduceInferMeta(x, axis, keep_dim, DataType::UNDEFINED, out); } +void TransferLayoutInferMeta(const MetaTensor& x, + DataLayout layout, + MetaTensor* out) { + out->set_dims(x.dims()); + out->set_dtype(x.dtype()); + out->set_layout(layout); +} + } // namespace pten PT_REGISTER_INFER_META_FN(sign, pten::UnchangedInferMetaNew); diff --git a/paddle/pten/infermeta/unary.h b/paddle/pten/infermeta/unary.h index c1a939c2dec503..7c9d012df3b093 100644 --- a/paddle/pten/infermeta/unary.h +++ b/paddle/pten/infermeta/unary.h @@ -74,4 +74,9 @@ void SumInferMeta(const MetaTensor& x, DataType dtype, bool keep_dim, MetaTensor* out); + +void TransferLayoutInferMeta(const MetaTensor& x, + DataLayout layout, + MetaTensor* out); + } // namespace pten diff --git a/paddle/pten/kernels/transfer_layout_kernel.cc b/paddle/pten/kernels/transfer_layout_kernel.cc new file mode 100644 index 00000000000000..c21ab7c304d9e7 --- /dev/null +++ b/paddle/pten/kernels/transfer_layout_kernel.cc @@ -0,0 +1,77 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/transfer_layout_kernel.h" + +#include "paddle/pten/api/ext/dispatch.h" +#include "paddle/pten/backends/all_context.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/funcs/transpose.h" + +namespace pten { + +std::vector GetAxis(const DataLayout& from, const DataLayout& to) { + PADDLE_ENFORCE_NE( + from, + to, + pten::errors::InvalidArgument( + "Layout transform should transform between different layout.")); + if (from == DataLayout::NCHW && to == DataLayout::NHWC) { + return {0, 2, 3, 1}; + } else if (from == DataLayout::NHWC && to == DataLayout::NCHW) { + return {0, 3, 1, 2}; + } else { + PADDLE_THROW( + pten::errors::InvalidArgument("Unsupported layout transform.")); + } +} + +template +void CastDataLayout(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& axis, + DenseTensor* out) { + math::Transpose trans4; + trans4(dev_ctx, x, out, axis); +} + +template +void TransferLayoutKernel(const Context& dev_ctx, + const DenseTensor& x, + DataLayout dst_layout, + DenseTensor* out) { + auto src_dim = x.dims(); + + auto axis = GetAxis(x.layout(), dst_layout); + + std::vector dst_dim; + dst_dim.resize(axis.size()); + for (size_t i = 0; i < axis.size(); i++) { + dst_dim[i] = src_dim[axis[i]]; + } + + out->ResizeAndAllocate(framework::make_ddim(dst_dim)); + + PD_VISIT_ALL_TYPES(x.dtype(), "CastDataLayout", ([&] { + CastDataLayout(dev_ctx, x, axis, out); + })); +} + +} // namespace pten + +PT_REGISTER_GENERAL_KERNEL(pten_transfer_layout, + CPU, + ALL_LAYOUT, + pten::TransferLayoutKernel, + ALL_DTYPE) {} diff --git a/paddle/pten/kernels/transfer_layout_kernel.h b/paddle/pten/kernels/transfer_layout_kernel.h new file mode 100644 index 00000000000000..24854842e8b780 --- /dev/null +++ b/paddle/pten/kernels/transfer_layout_kernel.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/infermeta/unary.h" +#include "paddle/pten/kernels/empty_kernel.h" + +namespace pten { + +template +void TransferLayoutKernel(const Context& dev_ctx, + const DenseTensor& x, + DataLayout dst_layout, + DenseTensor* out); + +template +DenseTensor TransferLayout(const Context& dev_ctx, + const DenseTensor& x, + DataLayout dst_layout) { + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + {x.dtype(), x.dims(), dst_layout}); + MetaTensor meta_out(&dense_out); + TransferLayoutInferMeta(x, dst_layout, &meta_out); + TransferLayoutKernel(dev_ctx, x, dst_layout, &dense_out); + return dense_out; +} + +} // namespace pten diff --git a/paddle/pten/tests/api/CMakeLists.txt b/paddle/pten/tests/api/CMakeLists.txt index 33a1e25f3c534a..b8491ab7f5ea89 100644 --- a/paddle/pten/tests/api/CMakeLists.txt +++ b/paddle/pten/tests/api/CMakeLists.txt @@ -22,4 +22,6 @@ cc_test(test_scale_api SRCS test_scale_api.cc DEPS pten_tensor pten_api pten_api cc_test(test_scale_benchmark SRCS test_scale_benchmark.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_conj_api SRCS test_conj_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_concat_api SRCS test_concat_api.cc DEPS pten_tensor pten_api pten_api_utils) + +cc_test(test_data_transform SRCS test_data_transform.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_sparse_utils_api SRCS test_sparse_utils_api.cc DEPS pten_tensor pten_api pten_api_utils) diff --git a/paddle/pten/tests/api/test_data_transform.cc b/paddle/pten/tests/api/test_data_transform.cc new file mode 100644 index 00000000000000..ce3d19b8845137 --- /dev/null +++ b/paddle/pten/tests/api/test_data_transform.cc @@ -0,0 +1,100 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/pten/api/include/api.h" +#include "paddle/pten/api/include/manual_api.h" +#include "paddle/pten/common/complex.h" +#include "paddle/pten/core/compat/convert_utils.h" +#include "paddle/pten/core/dense_tensor.h" + +namespace paddle { +namespace tests { + +// TODO(chenweihang): Remove this test after the API is used in the dygraph +TEST(API, data_transform_same_place) { + // 1. create tensor + auto x = paddle::experimental::full({3, 3}, + 1.0, + experimental::DataType::COMPLEX128, + experimental::Backend::CPU); + + auto y = paddle::experimental::full( + {3, 3}, 2.0, experimental::DataType::FLOAT32, experimental::Backend::CPU); + + std::vector> sum(9, 6.0); + + // 2. test API + auto out = paddle::experimental::matmul(x, y, false, false); + + // 3. check result + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); + ASSERT_EQ(out.dims()[1], 3); + ASSERT_EQ(out.numel(), 9); + ASSERT_EQ(out.type(), pten::DataType::COMPLEX128); + ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); + ASSERT_EQ(out.initialized(), true); + + auto dense_out = std::dynamic_pointer_cast(out.impl()); + + for (size_t i = 0; i < 9; i++) { + ASSERT_NEAR(sum[i].real, + dense_out->data>()[i].real, + 1e-6f); + ASSERT_NEAR(sum[i].imag, + dense_out->data>()[i].imag, + 1e-6f); + } +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +TEST(Tensor, data_transform_diff_place) { + // 1. create tensor + auto x = paddle::experimental::full( + {3, 3}, 1.0, experimental::DataType::FLOAT64, experimental::Backend::CPU); + + auto y = paddle::experimental::full( + {3, 3}, 2.0, experimental::DataType::FLOAT64, experimental::Backend::GPU); + + std::vector sum(9, 6.0); + + // 2. test API + auto out = paddle::experimental::matmul(x, y, false, false); + + // 3. check result + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); + ASSERT_EQ(out.dims()[1], 3); + ASSERT_EQ(out.numel(), 9); + ASSERT_EQ(out.dtype(), pten::DataType::FLOAT64); + ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); + ASSERT_EQ(out.initialized(), true); + ASSERT_EQ(out.impl()->place(), + pten::TransToFluidPlace(experimental::Backend::GPU)); + + auto ref_out = experimental::copy_to(out, experimental::Backend::CPU, true); + + auto dense_out = std::dynamic_pointer_cast(ref_out.impl()); + for (size_t i = 0; i < 9; i++) { + ASSERT_NEAR(sum[i], dense_out->data()[i], 1e-6f); + } +} + +#endif + +} // namespace tests +} // namespace paddle diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index cc7b31559f67c0..9a772ad126cd1f 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -58,6 +58,18 @@ def __init__(self, api_item_yaml): if 'param' not in self.infer_meta: self.infer_meta['param'] = None + self.data_transform = { + 'skip_transform': [], + 'support_trans_dtype': [] + } + if 'data_transform' in api_item_yaml: + if 'skip_transform' in api_item_yaml['data_transform']: + self.data_transform['skip_transform'] = api_item_yaml[ + 'data_transform']['skip_transform'] + if 'support_trans_dtype' in api_item_yaml['data_transform']: + self.data_transform['support_trans_dtype'] = api_item_yaml[ + 'data_transform']['support_trans_dtype'] + def gene_api_declaration(self): return f""" PADDLE_API {self.return_type} {self.api}({self.args['args_declare']}); @@ -97,7 +109,7 @@ def gene_api_code(self): if self.is_base_api: input_tensors, kernel_args, kernel_signature = gen_utils.get_kernel_args( self.args['inputs'], self.args['attrs'], self.out_type_list, - self.kernel['param']) + self.kernel['param'], self.data_transform) outputs_args, output_names, output_create = self.gene_output( self.out_type_list) return f""" @@ -143,6 +155,7 @@ def source_include(header_file_path): #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_utils.h" +#include "paddle/pten/api/lib/data_transform.h" #include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/kernel_registry.h" diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index 9df25e2dead0b5..53207a089fb863 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -50,6 +50,20 @@ def __init__(self, backward_item_yaml): 'param']) == 0: self.infer_meta['param'] = None + self.data_transform = { + 'skip_transform': [], + 'support_trans_dtype': [] + } + if 'data_transform' in backward_item_yaml: + if 'skip_transform' in backward_item_yaml['data_transform']: + self.data_transform['skip_transform'] = backward_item_yaml[ + 'data_transform']['skip_transform'] + if 'support_trans_dtype' in backward_item_yaml[ + 'data_transform']: + self.data_transform[ + 'support_trans_dtype'] = backward_item_yaml[ + 'data_transform']['support_trans_dtype'] + def parse_forward_config(self, forward_config): # api_name (const Tensor& input, ... , int attr, ...) -> Tensor(out) result = re.search( @@ -144,7 +158,7 @@ def gene_api_code(self): if self.is_base_api: input_tensors, kernel_args, kernel_signature = gen_utils.get_kernel_args( self.args['inputs'], self.args['attrs'], self.output_type_list, - self.kernel['param']) + self.kernel['param'], self.data_transform) outputs_args, output_names, output_create = self.gene_output( self.output_type_list) return f""" @@ -208,6 +222,7 @@ def source_include(header_file_path): #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_utils.h" +#include "paddle/pten/api/lib/data_transform.h" #include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/kernel_registry.h" diff --git a/python/paddle/utils/code_gen/gen_utils.py b/python/paddle/utils/code_gen/gen_utils.py index 56143a8f517cfd..5ce5d96429e270 100644 --- a/python/paddle/utils/code_gen/gen_utils.py +++ b/python/paddle/utils/code_gen/gen_utils.py @@ -296,7 +296,7 @@ def gene_infer_meta(input_names, attr_names, output_names, infer_meta) -> str: """ -def get_kernel_args(inputs, attrs, out_type_list, kernel_param): +def get_kernel_args(inputs, attrs, out_type_list, kernel_param, data_transform): input_trans_map = { 'const Tensor&': 'const pten::DenseTensor&', 'const Tensor &': 'const pten::DenseTensor&', @@ -321,6 +321,22 @@ def get_kernel_args(inputs, attrs, out_type_list, kernel_param): if kernel_param is None: kernel_param = input_names + attr_names + input_tensor_code = "" + for i, input_name in enumerate(input_names): + # set input code + if input_name in kernel_param: + trans_flag = "{}" + if input_name in data_transform['skip_transform']: + trans_flag = "{true}" + elif input_name in data_transform['support_trans_dtype']: + trans_flag = "{false, true}" + input_tensor_code = input_tensor_code + f""" + auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, kernel.InputAt({i}), {trans_flag});""" + + else: + input_tensor_code = input_tensor_code + f""" + auto {PREFIX_TENSOR_NAME}{input_name} = TensorToDenseTensor({input_name});""" + kernel_args = "*dev_ctx, " for param in kernel_param: if param in input_names: