Skip to content

Commit

Permalink
Refactored GradNodeAccumulation data structure and behaviour (#39526)
Browse files Browse the repository at this point in the history
* Refactored GradNodeAccumulation data structure and behaviour

* Fixed CI issues

* Fix compilation issues

* Fixed minor issues

* Reverted changes for intermediate and OverwriteOutput

* fixed minor issue

* Fixed code format issues

* Fixed CI-Coverage issue

* Fixed CI issues
  • Loading branch information
jim19930609 authored Feb 24, 2022
1 parent 4e26fa5 commit 1abfc8d
Show file tree
Hide file tree
Showing 18 changed files with 215 additions and 243 deletions.
23 changes: 9 additions & 14 deletions paddle/fluid/eager/accumulation/accumulation_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

#include "glog/logging.h"

namespace egr {

static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
const paddle::experimental::Tensor& t) {
if (!tensor->defined() || !tensor->initialized()) {
Expand All @@ -36,14 +38,6 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
}
}

namespace egr {

void GradNodeAccumulation::RetainGrad(
const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook) {
retain_grad_hook_ = hook;
}

std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation::
operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {
Expand All @@ -59,25 +53,26 @@ operator()(
"However received: %d in slot %d .",
grads[0].size(), 0));
// Apply Gradient Hooks
paddle::experimental::Tensor grad_out;
if (GradientHooksRegistered()) {
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads =
ApplyGradientHooks(grads);
// TODO(jiabin): It's little weird
CopyOrAddTensor(&accumulated_grad, hooked_grads[0][0]);
grad_out = hooked_grads[0][0];
} else {
CopyOrAddTensor(&accumulated_grad, grads[0][0]);
grad_out = grads[0][0];
}

if (retain_grad_hook_ != nullptr) {
retain_grad_hook_(accumulated_grad);
if (!weak_grad_.expired()) {
auto grad = weak_grad_.lock();
CopyOrAddTensor(grad.get(), grad_out);
}

// Apply Reduce Hooks
if (ReduceHooksRegistered()) {
ApplyReduceHooks();
}

return {{accumulated_grad}};
return {{grad_out}};
}

void GradNodeAccumulation::RegisterReduceHook(
Expand Down
13 changes: 6 additions & 7 deletions paddle/fluid/eager/accumulation/accumulation_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,18 @@

#pragma once

#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"

namespace egr {

class GradNodeAccumulation : public GradNodeBase {
public:
// Constructor: configure fwd input tensors to grad node
GradNodeAccumulation() : GradNodeBase(1, 1) { SetDefaultGradInOutMeta(); }
explicit GradNodeAccumulation(AutogradMeta* meta) : GradNodeBase(1, 1) {
weak_grad_ = meta->WeakGrad();
SetDefaultGradInOutMeta();
}

~GradNodeAccumulation() override = default;

Expand All @@ -30,11 +34,6 @@ class GradNodeAccumulation : public GradNodeBase {
const std::vector<std::vector<paddle::experimental::Tensor>>& grads)
override;

void RetainGrad(const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook);

paddle::experimental::Tensor* Grad() { return &accumulated_grad; }

std::string name() { return "GradNodeAccumulation"; }

/**
Expand All @@ -49,7 +48,7 @@ class GradNodeAccumulation : public GradNodeBase {
void ApplyReduceHooks();

private:
paddle::experimental::Tensor accumulated_grad;
std::weak_ptr<paddle::experimental::Tensor> weak_grad_;

std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
Expand Down
32 changes: 17 additions & 15 deletions paddle/fluid/eager/api/utils/hook_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,15 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
}
}

void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
// TODO(jiabin): Support More Tensor type here
static void RetainGradForRegularNode(
const paddle::experimental::Tensor& tensor) {
AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor);
if (meta->RetainGrads()) {
return;
} else {
meta->SetRetainGrads(true);
}

std::weak_ptr<paddle::experimental::Tensor> weak_grad_tensor =
meta->WeakGrad();

Expand All @@ -79,21 +85,17 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
}
};

if (IsLeafTensor(tensor)) {
// Add RetainGrad as PostHook to AccumulationNode
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node);
accumulation_grad_node->RetainGrad(hook);
// Append to GradientHooks
RegisterGradientHookForTensor(tensor, hook);
}

void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
if (IsLeafTensor(tensor)) {
// Leaf tensor's grad will always be retained
// Refer to implementation of AccumulationNode for more details
return;
} else {
// Append to GradientHooks
RegisterGradientHookForTensor(tensor, hook);
RetainGradForRegularNode(tensor);
}
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/api/utils/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ paddle::experimental::Tensor CreateTensorWithValue(

auto meta = EagerUtils::autograd_meta(&out);
if (is_leaf) {
auto accumulation_node = std::make_shared<GradNodeAccumulation>();
auto accumulation_node = std::make_shared<GradNodeAccumulation>(meta);
meta->SetGradNode(accumulation_node);
meta->SetStopGradient(false);
}
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,8 @@ static std::string GenerateGradNodeCreationContent(
const std::string& output_name = output.name();
const std::string& output_autograd_name = "p_autograd_" + output_name;

// Skip Intermediate Tensor

if (output.duplicable()) {
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = "
Expand Down Expand Up @@ -1145,6 +1147,8 @@ static std::string GenerateGradNodeCreationContent(
const std::string& output_autograd_name = "p_autograd_" + output_name;
size_t output_position = fwd_outputs_name_pos_map.at(output_name);

// Intermediate Tensor does not require SetHistory, nor RetainGrad

if (output.duplicable()) {
pass_stop_gradient_args += ", &" + output_autograd_name;
const char* SET_OUT_RANK_TEMPLATE =
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/eager/autograd_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class AutogradMeta : public AbstractAutogradMeta {
"Should Not set NULL as GradNode pointer, since "
"our default Edge and autogradMeta has nullptr for "
"grad node. Set Nullptr will lead error."));

grad_node_ = grad_node;
}

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/eager/grad_node_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
Expand All @@ -76,7 +76,7 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>());
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
Expand Down
15 changes: 7 additions & 8 deletions paddle/fluid/eager/tensor_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,13 @@ class TensorWrapper {
}

intermidiate_tensor_.set_name(tensor.name() + "@Saved");
PADDLE_ENFORCE_NOT_NULL(
EagerUtils::unsafe_autograd_meta(tensor),
paddle::platform::errors::Fatal(
"Full reserved Tensor should not have null autograd meta, since "
"tensor_wrapper is used to build backward info. There is no way "
"for us to build it with null autograd_meta."));
// copy output_rank
out_rank_info_ = EagerUtils::OutRankInfo(tensor);

// If an output is marked "intermedaite", we won't create
// autograd_meta for it.
// In that case, simply skip OutRankInfo Copy
if (EagerUtils::nullable_autograd_meta(tensor)) {
out_rank_info_ = EagerUtils::OutRankInfo(tensor);
}
}

paddle::experimental::Tensor recover(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
#include "gtest/gtest.h"

#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/utils/hook_utils.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/fluid/eager/utils.h"

#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h"

// TODO(jiabin): remove nolint here!!!
Expand All @@ -37,7 +39,7 @@ TEST(AccumulationNode, Tensor) {
.get(),
meta);
dt0->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = 10.0;
paddle::platform::CPUPlace())[0] = paddle::platform::float16(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0);

std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>(
Expand All @@ -47,84 +49,100 @@ TEST(AccumulationNode, Tensor) {
meta);

dt1->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = 20.0;
paddle::platform::CPUPlace())[0] = paddle::platform::float16(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1);

std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
paddle::experimental::Tensor input_et =
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et);

// Initialize Grad Tensor
std::shared_ptr<phi::DenseTensor> grad_dt =
std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
paddle::experimental::Tensor grad_et = paddle::experimental::Tensor(grad_dt);
grad_dt->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = paddle::platform::float16(0.0f);
grad_meta->MutableGrad()->set_impl(grad_dt);

// AccumulationNode
GradNodeAccumulation node = GradNodeAccumulation();

// Hook, RetainGrad
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = [&grad_et](const paddle::experimental::Tensor& t) {
grad_et.set_impl(t.impl());
return grad_et;
};
node.RetainGrad(hook);
auto node = std::make_shared<GradNodeAccumulation>(grad_meta);
grad_meta->SetGradNode(node);
grad_meta->SetStopGradient(false);

// operator()
paddle::experimental::Tensor ret_et0 = node({{et0}})[0][0];
paddle::experimental::Tensor ret_et0 = node->operator()({{et0}})[0][0];
auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl())
->data<paddle::platform::float16>();
CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f));

paddle::experimental::Tensor ret_et1 = node({{et1}})[0][0];
paddle::experimental::Tensor ret_et1 = node->operator()({{et1}})[0][0];

auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl())
->data<paddle::platform::float16>();
CHECK_EQ(ret_et1_ptr[0], paddle::platform::float16(30.0f));
CHECK_EQ(ret_et1_ptr[0], paddle::platform::float16(20.0f));

// Retain Grad
auto* ret_grad_et_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad_et.impl())
->data<paddle::platform::float16>();
CHECK_EQ(ret_grad_et_ptr[0], paddle::platform::float16(30.0f));
// Check Retain Grad
CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
->data<paddle::platform::float16>()[0],
paddle::platform::float16(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())
->data<paddle::platform::float16>();
CHECK_EQ(grad_ptr[0], paddle::platform::float16(30.0f));

// Reduce Hook case 1: Call RegisterReduceHook and run operator()
VLOG(6) << "Test Reduce Hook";
CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
->data<paddle::platform::float16>()[0],
paddle::platform::float16(10.0f));

auto reduce_hook_1 = [&](void) -> void {
auto* grad_et_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad_et.impl())
->data<paddle::platform::float16>();
grad_et_ptr[0] = 36.0;
auto* input_et_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(input_et.impl())
->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace());
input_et_ptr[0] = 36.0;
VLOG(6) << "Running Reduce Hook";
};

node.RegisterReduceHook(reduce_hook_1);
node->RegisterReduceHook(reduce_hook_1);

// operator()
paddle::experimental::Tensor _ret = node({{et0}})[0][0];
paddle::experimental::Tensor _ret = node->operator()({{et0}})[0][0];

// Check operator() result, should be 36.0
auto* _ret_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(_ret.impl())
->data<paddle::platform::float16>();
CHECK_EQ(_ret_ptr[0], paddle::platform::float16(36.0f));
CHECK_EQ(_ret_ptr[0], paddle::platform::float16(10.0f));

// Check Retain Grad, should be 36.0
auto* _ret_grad_et_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad_et.impl())
auto* _ret_input_et_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(input_et.impl())
->data<paddle::platform::float16>();
CHECK_EQ(_ret_grad_et_ptr[0], paddle::platform::float16(36.0f));
CHECK_EQ(_ret_input_et_ptr[0], paddle::platform::float16(36.0f));

// Reduce Hook case 2: Call RegisterReduceHook and ApplyReduceHooks directly
VLOG(6) << "Test Reduce Hook";
auto reduce_hook_2 = [&](void) -> void {
auto* ret_et0_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
->data<paddle::platform::float16>();
->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace());
ret_et0_ptr[0] = 100.0; // set to 100.0
VLOG(6) << "Running Reduce Hook";
};
node.RegisterReduceHook(reduce_hook_2);
node.ApplyReduceHooks();
node->RegisterReduceHook(reduce_hook_2);
node->ApplyReduceHooks();

// Check ApplyReduceHooks result
CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
Expand Down
Loading

0 comments on commit 1abfc8d

Please sign in to comment.