PaddlePaddle · zyfncg · Feb 4, 2022 · Jan 24, 2022 · Jan 26, 2022 · Jan 27, 2022
diff --git a/.gitignore b/.gitignore
@@ -4,8 +4,10 @@ paddle/fluid/API_DEV.spec
 paddle/fluid/API_PR.spec
 paddle/fluid/op_use_default_grad_maker_DEV.spec
 paddle/fluid/op_use_default_grad_maker_PR.spec
-paddle/pten/api/*/api.*
-paddle/pten/api/*/backward*
+paddle/pten/api/include/api.h
+paddle/pten/api/lib/api.cc
+paddle/pten/api/backward/backward_api.h
+paddle/pten/api/lib/backward_api.cc
 paddle/pten/include/*
 paddle/pten/extension.h
 

diff --git a/paddle/pten/api/lib/CMakeLists.txt b/paddle/pten/api/lib/CMakeLists.txt
@@ -15,6 +15,9 @@ cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS pten_tensor pten_context
 cc_library(op_meta_info SRCS op_meta_info.cc DEPS pten_tensor)
 cc_library(op_kernel_info SRCS op_kernel_info.cc DEPS pten_tensor)
 
+
+set(api_gen_utils ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/gen_utils.py)
+
 # forward api file
 set(api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api_gen.py)
 set(api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml)
@@ -46,7 +49,7 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} ${api_header_file}
   COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} ${api_source_file}
   COMMENT "copy_if_different ${api_header_file} ${api_source_file}"
-  DEPENDS ${api_yaml_file} ${api_gen_file}
+  DEPENDS ${api_yaml_file} ${api_gen_file} ${api_gen_utils}
   VERBATIM)
 
 # generate backward api
@@ -59,9 +62,10 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp} ${bw_api_header_file}
   COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp} ${bw_api_source_file}
   COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}"
-  DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file}
+  DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_utils}
   VERBATIM)
 
+cc_library(pten_data_transform SRCS data_transform.cc DEPS pten_tensor transfer_layout_kernel cast_kernel data_device_transform)
 cc_library(manual_api SRCS manual_api.cc DEPS pten_tensor pten kernel_dispatch)
-cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch)
-cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_function_api)
+cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch pten_data_transform)
+cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_data_transform pten_function_api)
diff --git a/paddle/pten/api/lib/data_transform.cc b/paddle/pten/api/lib/data_transform.cc
@@ -0,0 +1,232 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/pten/api/lib/data_transform.h"
+
+#include "paddle/pten/api/ext/dispatch.h"
+#include "paddle/pten/api/lib/kernel_dispatch.h"
+#include "paddle/pten/backends/all_context.h"
+#include "paddle/pten/kernels/cast_kernel.h"
+#include "paddle/pten/kernels/transfer_layout_kernel.h"
+
+#include "paddle/fluid/framework/data_device_transform.h"
+
+namespace paddle {
+namespace experimental {
+
+inline bool NeedTransformDataType(const DataType& input,
+                                  const DataType& target,
+                                  const TransformFlag& transform_flag) {
+  return input != target &&
+         (transform_flag.need_trans_data_type() ||
+          target == DataType::COMPLEX64 || target == DataType::COMPLEX128);
+}
+
+inline bool NeedTransformPlace(const paddle::platform::Place& input,
+                               const Backend& target,
+                               const TransformFlag& transform_flag) {
+  bool ret = transform_flag.need_trans_backend() &&
+             target != Backend::ALL_BACKEND &&
+             !platform::is_same_place(input, pten::TransToFluidPlace(target));
+  return ret;
+}
+
+inline bool NeedTransformLayout(const DataLayout& input,
+                                const DataLayout& target,
+                                const TransformFlag& transform_flag) {
+  bool ret = transform_flag.need_trans_layout() &&
+             (input != DataLayout::ALL_LAYOUT &&
+              target != DataLayout::ALL_LAYOUT && input != target);
+  return ret;
+}
+
+inline pten::DenseTensor TransDataLayout(const pten::DenseTensor& tensor,
+                                         DataLayout layout) {
+  auto& pool = paddle::platform::DeviceContextPool::Instance();
+  VLOG(3) << "DataLayoutTransform src_layout: " << tensor.layout()
+          << " dst_layout: " << layout;
+  if (platform::is_cpu_place(tensor.place())) {
+    auto* dev_ctx = static_cast<pten::CPUContext*>(pool.Get(tensor.place()));
+    return pten::TransferLayout(*dev_ctx, tensor, layout);
+  } else {
+    PADDLE_THROW(pten::errors::PreconditionNotMet(
+        "Unsupported data layout cast from CPU to GPU."));
+  }
+}
+
+template <typename Context>
+pten::DenseTensor CastDateType(const Context& dev_ctx,
+                               const pten::DenseTensor& tensor,
+                               DataType dtype) {
+  switch (tensor.dtype()) {
+    case DataType::FLOAT32:
+      return pten::Cast<float>(dev_ctx, tensor, dtype);
+    case DataType::FLOAT64:
+      return pten::Cast<double>(dev_ctx, tensor, dtype);
+    case DataType::INT32:
+      return pten::Cast<int32_t>(dev_ctx, tensor, dtype);
+    case DataType::INT64:
+      return pten::Cast<int64_t>(dev_ctx, tensor, dtype);
+    case DataType::FLOAT16:
+      return pten::Cast<pten::dtype::float16>(dev_ctx, tensor, dtype);
+    case DataType::BFLOAT16:
+      return pten::Cast<pten::dtype::bfloat16>(dev_ctx, tensor, dtype);
+    case DataType::BOOL:
+      return pten::Cast<bool>(dev_ctx, tensor, dtype);
+    case DataType::INT16:
+      return pten::Cast<int16_t>(dev_ctx, tensor, dtype);
+    case DataType::UINT8:
+      return pten::Cast<uint8_t>(dev_ctx, tensor, dtype);
+    default:
+      PADDLE_THROW(pten::errors::Unimplemented(
+          "Data type (%s) is not supported when casting data type.",
+          tensor.dtype()));
+  }
+}
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+pten::DenseTensor CastDateType(const pten::GPUContext& dev_ctx,
+                               const pten::DenseTensor& tensor,
+                               DataType dtype) {
+  switch (tensor.dtype()) {
+    case DataType::FLOAT32:
+      return pten::Cast<float>(dev_ctx, tensor, dtype);
+    case DataType::FLOAT64:
+      return pten::Cast<double>(dev_ctx, tensor, dtype);
+    case DataType::INT32:
+      return pten::Cast<int32_t>(dev_ctx, tensor, dtype);
+    case DataType::INT64:
+      return pten::Cast<int64_t>(dev_ctx, tensor, dtype);
+    case DataType::FLOAT16:
+      return pten::Cast<pten::dtype::float16>(dev_ctx, tensor, dtype);
+    case DataType::BOOL:
+      return pten::Cast<bool>(dev_ctx, tensor, dtype);
+    case DataType::INT16:
+      return pten::Cast<int16_t>(dev_ctx, tensor, dtype);
+    case DataType::UINT8:
+      return pten::Cast<uint8_t>(dev_ctx, tensor, dtype);
+    default:
+      PADDLE_THROW(pten::errors::Unimplemented(
+          "Data type (%s) is not supported when casting data type.",
+          tensor.dtype()));
+  }
+}
+#endif
+
+inline pten::DenseTensor TransDataType(const pten::DenseTensor& tensor,
+                                       DataType dtype) {
+  auto& pool = paddle::platform::DeviceContextPool::Instance();
+
+  VLOG(3) << "DataTypeTransform src_dtype: " << tensor.dtype()
+          << " dst_dtype: " << dtype;
+
+  pten::DenseTensor out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(tensor.place()),
+      {dtype, tensor.dims(), tensor.layout()});
+
+  if (platform::is_cpu_place(tensor.place())) {
+    auto* dev_ctx = static_cast<pten::CPUContext*>(pool.Get(tensor.place()));
+    return CastDateType(*dev_ctx, tensor, dtype);
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  } else if (platform::is_gpu_place(tensor.place())) {
+    auto* dev_ctx = static_cast<pten::GPUContext*>(pool.Get(tensor.place()));
+    return CastDateType(*dev_ctx, tensor, dtype);
+#endif
+  } else {
+    PADDLE_THROW(pten::errors::Unimplemented(
+        "Place type is not supported when casting data type."));
+  }
+  return out;
+}
+
+pten::DenseTensor TransformData(const pten::DenseTensor& tensor,
+                                const pten::TensorArgDef& target_args_def,
+                                const TransformFlag& transform_flag) {
+  pten::DenseTensor out = tensor;
+  if (NeedTransformLayout(
+          tensor.layout(), target_args_def.layout, transform_flag)) {
+    out = TransDataLayout(out, target_args_def.layout);
+  }
+
+  if (NeedTransformDataType(
+          tensor.dtype(), target_args_def.dtype, transform_flag)) {
+    out = TransDataType(out, target_args_def.dtype);
+  }
+
+  if (NeedTransformPlace(
+          out.place(), target_args_def.backend, transform_flag)) {
+    pten::DenseTensor result(
+        pten::make_intrusive<paddle::experimental::SharedStorage>(
+            pten::TransToFluidPlace(target_args_def.backend)),
+        {out.dtype(), out.dims(), out.layout()});
+    framework::TransDataDevice(
+        out, pten::TransToFluidPlace(target_args_def.backend), &result);
+    out = result;
+  }
+  return out;
+}
+
+std::shared_ptr<pten::DenseTensor> PrepareData(
+    const Tensor& input,
+    const pten::TensorArgDef& target_args_def,
+    const TransformFlag& transform_flag) {
+  const auto& tensor_in = input.impl();
+  if (!transform_flag.NeedTransform() || !tensor_in->initialized() ||
+      (!NeedTransformPlace(
+           tensor_in->place(), target_args_def.backend, transform_flag) &&
+       !NeedTransformDataType(
+           tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
+       !NeedTransformLayout(
+           tensor_in->layout(), target_args_def.layout, transform_flag))) {
+    return std::dynamic_pointer_cast<pten::DenseTensor>(tensor_in);
+  }
+
+  pten::DenseTensor out =
+      TransformData(*(static_cast<pten::DenseTensor*>(tensor_in.get())),
+                    target_args_def,
+                    transform_flag);
+  return std::make_shared<pten::DenseTensor>(out);
+}
+
+std::unique_ptr<std::vector<pten::DenseTensor>> PrepareData(
+    const std::vector<Tensor>& inputs,
+    const pten::TensorArgDef& target_args_def,
+    const TransformFlag& transform_flag) {
+  auto pt_tensors = std::make_unique<std::vector<pten::DenseTensor>>();
+  pt_tensors->reserve(inputs.size());
+
+  for (const auto& input : inputs) {
+    const auto& tensor_in = input.impl();
+    if (!transform_flag.NeedTransform() || !tensor_in->initialized() ||
+        (!NeedTransformPlace(
+             tensor_in->place(), target_args_def.backend, transform_flag) &&
+         !NeedTransformDataType(
+             tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
+         !NeedTransformLayout(
+             tensor_in->layout(), target_args_def.layout, transform_flag))) {
+      pt_tensors->emplace_back(
+          *std::dynamic_pointer_cast<pten::DenseTensor>(tensor_in));
+    } else {
+      pt_tensors->emplace_back(
+          TransformData(*(static_cast<pten::DenseTensor*>(tensor_in.get())),
+                        target_args_def,
+                        transform_flag));
+    }
+  }
+
+  return std::move(pt_tensors);
+}
+
+}  // namespace experimental
+}  // namespace paddle
diff --git a/paddle/pten/api/lib/data_transform.h b/paddle/pten/api/lib/data_transform.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/pten/api/include/tensor.h"
+#include "paddle/pten/core/kernel_factory.h"
+
+namespace paddle {
+namespace experimental {
+
+class TransformFlag {
+ public:
+  TransformFlag(bool stop_transform = false,
+                bool trans_dtype = false,
+                bool trans_backend = true,
+                bool trans_layout = true)
+      : stop_transform_(stop_transform),
+        trans_data_type_(trans_dtype),
+        trans_backend_(trans_backend),
+        trans_layout_(trans_layout) {}
+
+  bool NeedTransform() const {
+    return !stop_transform_ &&
+           (trans_data_type_ || trans_backend_ || trans_layout_);
+  }
+
+  bool need_trans_data_type() const {
+    return !stop_transform_ && trans_data_type_;
+  }
+
+  bool need_trans_backend() const { return !stop_transform_ && trans_backend_; }
+
+  bool need_trans_layout() const { return !stop_transform_ && trans_layout_; }
+
+ private:
+  // This is the highest priority in flags,
+  // and can be setted by api[data_transform->skip_transform] in the yaml file.
+  bool stop_transform_ = false;
+
+  // trans_data_type_ can be setted by api[data_transform->support_trans_dtype]
+  // in the yaml file.
+  // trans_data_type_ only affect the non complex types,
+  // the complex is always transferd, except stop_transform_ is true.
+  bool trans_data_type_ = false;
+
+  // trans_backend_ and trans_layout_ are true defalutly,
+  // and they can only be setted by global flag.
+  bool trans_backend_ = true;
+  bool trans_layout_ = true;
+};
+
+std::shared_ptr<pten::DenseTensor> PrepareData(
+    const Tensor& input,
+    const pten::TensorArgDef& target_args_def,
+    const TransformFlag& transform_flag);
+
+std::unique_ptr<std::vector<pten::DenseTensor>> PrepareData(
+    const std::vector<Tensor>& inputs,
+    const pten::TensorArgDef& target_args_def,
+    const TransformFlag& transform_flag);
+
+}  // namespace experimental
+}  // namespace paddle
diff --git a/paddle/pten/core/kernel_factory.cc b/paddle/pten/core/kernel_factory.cc
@@ -70,9 +70,9 @@ const Kernel& KernelFactory::SelectKernelOrThrowError(
   auto kernel_iter = iter->second.find(kernel_key);
   // TODO(chenweihang): polish refind impl here
   if (kernel_iter == iter->second.end() &&
-      kernel_key.layout() != pten::DataLayout::ANY) {
+      kernel_key.layout() != pten::DataLayout::ALL_LAYOUT) {
     pten::KernelKey any_layout_kernel_key(
-        kernel_key.backend(), pten::DataLayout::ANY, kernel_key.dtype());
+        kernel_key.backend(), pten::DataLayout::ALL_LAYOUT, kernel_key.dtype());
     kernel_iter = iter->second.find(any_layout_kernel_key);
   }
   PADDLE_ENFORCE_NE(

diff --git a/paddle/pten/core/kernel_utils.h b/paddle/pten/core/kernel_utils.h
@@ -234,6 +234,7 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
   PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(paddle::platform::float16);
   PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const Scalar&);
   PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(DataType);
+  PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(DataLayout);
   PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector<int64_t>&);
   PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const ScalarArray&);
   PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector<int>&);

diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc
@@ -306,6 +306,12 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
   return return_meta;
 }
 
+DenseTensorMeta TransferLayoutInferMeta(const DenseTensorMeta& x_meta,
+                                        DataLayout layout) {
+  DenseTensorMeta out_meta(x_meta.dtype, x_meta.dims, layout);
+  return out_meta;
+}
+
 }  // namespace pten
 
 PT_REGISTER_INFER_META_FN(sign, pten::UnchangedInferMetaNew);
diff --git a/paddle/pten/infermeta/unary.h b/paddle/pten/infermeta/unary.h
@@ -67,4 +67,8 @@ DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
                              const std::vector<int64_t>& axis,
                              DataType dtype,
                              bool keep_dim);
+
+DenseTensorMeta TransferLayoutInferMeta(const DenseTensorMeta& x_meta,
+                                        DataLayout layout);
+
 }  // namespace pten