Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize the performance of C++ API #40640

Merged
merged 5 commits into from
Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion paddle/phi/api/include/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ class PADDLE_API Tensor final {
*
* @return std::shared_ptr<phi::TensorBase>
*/
std::shared_ptr<phi::TensorBase> impl() const;
const std::shared_ptr<phi::TensorBase>& impl() const;

/**
* @brief Set the implemention of current Tensor.
Expand All @@ -333,6 +333,13 @@ class PADDLE_API Tensor final {
*/
void set_impl(const std::shared_ptr<phi::TensorBase>& impl);

/**
* @brief Set the implemention of current Tensor.
*
* @param impl
*/
void set_impl(std::shared_ptr<phi::TensorBase>&& impl);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
/**
* @brief Get the stream where the tensor is currently located
Expand Down
12 changes: 3 additions & 9 deletions paddle/phi/api/lib/api_gen_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,8 @@ paddle::optional<phi::MetaTensor> MakeMetaTensor(
/* ------------------ for output ----------------------- */

phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) {
if (!out->initialized()) {
auto dense_tensor = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<SharedStorage>(phi::TransToPhiPlace(backend)),
phi::DenseTensorMeta());
out->set_impl(dense_tensor);
return dense_tensor.get();
if (out->impl() == nullptr) {
out->set_impl(std::make_shared<phi::DenseTensor>());
}
return static_cast<phi::DenseTensor*>(out->impl().get());
}
Expand All @@ -111,9 +107,7 @@ std::vector<phi::DenseTensor*> SetKernelOutput(size_t out_size,
out->reserve(out_size);
std::vector<phi::DenseTensor*> results(out_size);
for (size_t i = 0; i < out_size; ++i) {
auto tensor_ptr = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<SharedStorage>(phi::TransToPhiPlace(backend)),
phi::DenseTensorMeta());
auto tensor_ptr = std::make_shared<phi::DenseTensor>();
results[i] = tensor_ptr.get();
out->emplace_back();
out->back().set_impl(tensor_ptr);
Expand Down
9 changes: 3 additions & 6 deletions paddle/phi/api/lib/data_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,7 @@ phi::DenseTensor TransformData(const phi::DenseTensor& tensor,

if (NeedTransformPlace(
out.place(), target_args_def.backend, transform_flag)) {
phi::DenseTensor result(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(target_args_def.backend)),
{out.dtype(), out.dims(), out.layout()});
phi::DenseTensor result;
framework::TransDataDevice(
out, phi::TransToPhiPlace(target_args_def.backend), &result);
out = result;
Expand All @@ -190,14 +187,14 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(
tensor_in->layout(), target_args_def.layout, transform_flag))) {
return std::dynamic_pointer_cast<phi::DenseTensor>(tensor_in);
return std::static_pointer_cast<phi::DenseTensor>(tensor_in);
}

phi::DenseTensor out =
TransformData(*(static_cast<phi::DenseTensor*>(tensor_in.get())),
target_args_def,
transform_flag);
return std::make_shared<phi::DenseTensor>(out);
return std::make_shared<phi::DenseTensor>(std::move(out));
}

std::shared_ptr<phi::DenseTensor> PrepareData(
Expand Down
14 changes: 12 additions & 2 deletions paddle/phi/api/lib/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ limitations under the License. */
* In the future, the necessary components will be moved to the this library,
* or the corresponding components will be re-implemented.
*/

#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream/cuda_stream.h"
Expand Down Expand Up @@ -142,7 +143,12 @@ PlaceType Tensor::place() const {
}

paddle::platform::Place Tensor::inner_place() const {
return ConvertExtPlaceToInnerPlace(place());
PADDLE_ENFORCE_NOT_NULL(
impl_,
phi::errors::PermissionDenied(
"Null pointer error, the impl_ of Tensor should not be "
"Null when calling Tensor::inner_place()."));
return impl_->place();
}

bool Tensor::is_cpu() const {
Expand Down Expand Up @@ -286,12 +292,16 @@ Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const {
}
}

std::shared_ptr<phi::TensorBase> Tensor::impl() const { return impl_; }
const std::shared_ptr<phi::TensorBase> &Tensor::impl() const { return impl_; }

void Tensor::set_impl(const std::shared_ptr<phi::TensorBase> &impl) {
impl_ = impl;
}

void Tensor::set_impl(std::shared_ptr<phi::TensorBase> &&impl) {
impl_ = std::move(impl);
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gpuStream_t Tensor::stream() const {
return platform::stream::get_current_stream(-1)->raw_stream();
Expand Down
8 changes: 8 additions & 0 deletions paddle/phi/core/kernel_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,16 @@ class Kernel {

const KernelArgsDef& args_def() const { return args_def_; }

const TensorArgDef& InputAt(size_t idx) const {
return args_def_.input_defs().at(idx);
}

TensorArgDef& InputAt(size_t idx) { return args_def_.input_defs().at(idx); }

const TensorArgDef& OutputAt(size_t idx) const {
return args_def_.output_defs().at(idx);
}

TensorArgDef& OutputAt(size_t idx) { return args_def_.output_defs().at(idx); }

bool IsValid() { return fn_ != nullptr; }
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/utils/code_gen/api_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ def gen_dense_tensor_kernel_code(self, code_indent, inplace_flag=False):
self.outputs['types'], 'SetKernelOutput', code_indent, inplace_flag)
api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '')
return f"""
{code_indent} auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}});
{code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{code_indent} VLOG(6) << "{self.api} API kernel: " << kernel;
Expand Down