Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Eager Hook] Support ReduceHook in GradNodeAccumulation #39674

Merged
merged 8 commits into from
Feb 19, 2022
10 changes: 10 additions & 0 deletions paddle/fluid/eager/accumulation/accumulation_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,14 @@ operator()(
return {{accumulated_grad}};
}

void GradNodeAccumulation::RegisterReduceHook(
const std::function<void(void)>& hook) {
reduce_hooks_.emplace_back(hook);
}

void GradNodeAccumulation::ApplyReduceHooks() {
for (auto& hook : reduce_hooks_) {
hook();
}
}
} // namespace egr
13 changes: 13 additions & 0 deletions paddle/fluid/eager/accumulation/accumulation_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,25 @@ class GradNodeAccumulation : public GradNodeBase {

paddle::experimental::Tensor* Grad() { return &accumulated_grad; }

/**
* Register ReduceHook
* **/
void RegisterReduceHook(const std::function<void(void)>& hook);

/**
* Apply ReduceHook here
* **/
inline bool ReduceHooksRegistered() { return reduce_hooks_.size() != 0; }
void ApplyReduceHooks();

private:
paddle::experimental::Tensor accumulated_grad;

std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
retain_grad_hook_;

std::vector<std::function<void(void)>> reduce_hooks_;
};

} // namespace egr
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,6 @@ operator()(
&out);
}

// Apply Reduce Hooks
if (ReduceHooksRegistered()) {
ApplyReduceHooks();
}
return {{out}};
}

Expand Down
19 changes: 15 additions & 4 deletions paddle/fluid/eager/api/utils/hook_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,21 @@ void RegisterGradientHookForTensor(

void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
const std::function<void(void)>& hook) {
// Find grad_node and out_rank from AutogradMeta
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);

grad_node->RegisterReduceHook(hook);
if (IsLeafTensor(tensor)) {
VLOG(6) << "Register ReduceHook for leaf tensor";
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node,"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node);
accumulation_grad_node->RegisterReduceHook(hook);
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Only can register reduce hook for leaf Tensor."));
}
}

void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
Expand Down
9 changes: 0 additions & 9 deletions paddle/fluid/eager/grad_node_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,6 @@ void GradNodeBase::RegisterGradientHook(
gradient_hooks_.emplace_back(std::make_tuple(slot_id, rank, hook));
}

void GradNodeBase::RegisterReduceHook(const std::function<void(void)>& hook) {
reduce_hooks_.emplace_back(hook);
}

std::vector<std::vector<paddle::experimental::Tensor>>
GradNodeBase::ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors) {
Expand Down Expand Up @@ -267,9 +263,4 @@ GradNodeBase::ApplyGradientHooks(
return outs;
}

void GradNodeBase::ApplyReduceHooks() {
for (auto& hook : reduce_hooks_) {
hook();
}
}
} // namespace egr
8 changes: 2 additions & 6 deletions paddle/fluid/eager/grad_node_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,22 +133,19 @@ class GradNodeBase {
* **/
void SetDefaultGradInOutMeta();
/**
* Register GradientHook or ReduceHook
* Register GradientHook
* **/
void RegisterGradientHook(size_t slot_id, size_t rank,
const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook);
void RegisterReduceHook(const std::function<void(void)>& hook);

/**
* Apply GradientHook or ReduceHook
* Apply GradientHook
* **/
inline bool GradientHooksRegistered() { return gradient_hooks_.size() != 0; }
inline bool ReduceHooksRegistered() { return reduce_hooks_.size() != 0; }

std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors);
void ApplyReduceHooks();

private:
// TODO(jiabin): Use SmallVector instead after merge PR from develop
Expand All @@ -173,7 +170,6 @@ class GradNodeBase {
/* hook */ std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>>>
gradient_hooks_;
std::vector<std::function<void(void)>> reduce_hooks_;
};

class Edge {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ TEST(AccumulationNode, Tensor) {
// AccumulationNode
GradNodeAccumulation node = GradNodeAccumulation();

// Hook
// Hook, RetainGrad
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = [&grad_et](const paddle::experimental::Tensor& t) {
Expand All @@ -88,4 +88,46 @@ TEST(AccumulationNode, Tensor) {
std::dynamic_pointer_cast<pten::DenseTensor>(grad_et.impl())
->data<paddle::platform::float16>();
CHECK_EQ(ret_grad_et_ptr[0], paddle::platform::float16(30.0f));

// Reduce Hook case 1: Call RegisterReduceHook and run operator()
VLOG(6) << "Test Reduce Hook";
auto reduce_hook_1 = [&](void) -> void {
auto* grad_et_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(grad_et.impl())
->data<paddle::platform::float16>();
grad_et_ptr[0] = 36.0;
VLOG(6) << "Running Reduce Hook";
};

node.RegisterReduceHook(reduce_hook_1);

// operator()
paddle::experimental::Tensor _ret = node({{et0}})[0][0];

// Check operator() result, should be 36.0
auto* _ret_ptr = std::dynamic_pointer_cast<pten::DenseTensor>(_ret.impl())
->data<paddle::platform::float16>();
CHECK_EQ(_ret_ptr[0], paddle::platform::float16(36.0f));

// Check Retain Grad, should be 36.0
auto* _ret_grad_et_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(grad_et.impl())
->data<paddle::platform::float16>();
CHECK_EQ(_ret_grad_et_ptr[0], paddle::platform::float16(36.0f));

// Reduce Hook case 2: Call RegisterReduceHook and ApplyReduceHooks directly
VLOG(6) << "Test Reduce Hook";
auto reduce_hook_2 = [&](void) -> void {
auto* ret_et0_ptr = std::dynamic_pointer_cast<pten::DenseTensor>(et0.impl())
->data<paddle::platform::float16>();
ret_et0_ptr[0] = 100.0; // set to 100.0
VLOG(6) << "Running Reduce Hook";
};
node.RegisterReduceHook(reduce_hook_2);
node.ApplyReduceHooks();

// Check ApplyReduceHooks result
CHECK_EQ(std::dynamic_pointer_cast<pten::DenseTensor>(et0.impl())
->data<paddle::platform::float16>()[0],
paddle::platform::float16(100.0f));
}
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,6 @@ TEST(GradNodeInfo, GradNodeBase) {
std::dynamic_pointer_cast<pten::DenseTensor>(grad_hook_res[0][0].impl())
->data<float>()[0],
11.0);

VLOG(6) << "Test Reduce Hook";
auto reduce_hook = [&](void) -> void {
auto* et_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et1.impl())->data<float>();
et_ptr[0] = 100.0;
VLOG(6) << "Running Reduce Hook";
};
grad_test_node0->RegisterReduceHook(reduce_hook);
grad_test_node0->ApplyReduceHooks();
CHECK_EQ(std::dynamic_pointer_cast<pten::DenseTensor>(et1.impl())
->data<float>()[0],
100.0);
}

TEST(GradNodeInfo, Edge) {
Expand Down
52 changes: 51 additions & 1 deletion paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,24 @@ TEST(Hook_intermidiate, Sigmoid) {
const paddle::experimental::Tensor&)>
hook = &hook_function;

VLOG(6) << "Make ReduceHook function";
auto reduce_hook = [&](void) -> void {
auto* t_ptr = std::dynamic_pointer_cast<pten::DenseTensor>(tensor.impl())
->data<float>();
for (int i = 0; i < tensor.numel(); i++) {
t_ptr[i] = 100.0; // set to 100.0
}
};

VLOG(6) << "Retain Grad for Tensor";
egr_utils_api::RetainGradForTensor(tensor);

VLOG(6) << "Register GradientHook for Tensor";
egr_utils_api::RegisterGradientHookForTensor(tensor, hook);

VLOG(6) << "Register ReduceHook for Tensor";
egr_utils_api::RegisterReduceHookForTensor(tensor, reduce_hook);

VLOG(6) << "Runing Forward";
auto output_tensor = sigmoid_dygraph_function(tensor, {});
VLOG(6) << "Finish Forward";
Expand All @@ -92,6 +104,13 @@ TEST(Hook_intermidiate, Sigmoid) {
VLOG(6) << "Finish Backward";

eager_test::CompareGradTensorWithValue<float>(tensor, 0.25 + 3);

VLOG(6) << "Checking ReduceHook results";
for (int i = 0; i < tensor.numel(); i++) {
CHECK_EQ(std::dynamic_pointer_cast<pten::DenseTensor>(tensor.impl())
->data<float>()[i],
static_cast<float>(100.0f));
}
VLOG(6) << "After Tests";
}

Expand All @@ -118,8 +137,17 @@ TEST(Hook_intermidiate, ElementwiseAdd) {
const paddle::experimental::Tensor&)>
hook = &hook_function;

auto reduce_hook = [&](void) -> void {
auto* t_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(Y.impl())->data<float>();
for (int i = 0; i < Y.numel(); i++) {
t_ptr[i] = 100.0; // set to 100.0
}
};

egr_utils_api::RetainGradForTensor(Y);
egr_utils_api::RegisterGradientHookForTensor(Y, hook);
egr_utils_api::RegisterReduceHookForTensor(Y, reduce_hook);

auto output_tensor = elementwise_add_dygraph_function(X, Y, {});

Expand All @@ -130,6 +158,13 @@ TEST(Hook_intermidiate, ElementwiseAdd) {

eager_test::CompareGradTensorWithValue<float>(X, 1.0);
eager_test::CompareGradTensorWithValue<float>(Y, 4.0);

// Checking ReduceHook results
for (int i = 0; i < Y.numel(); i++) {
CHECK_EQ(std::dynamic_pointer_cast<pten::DenseTensor>(Y.impl())
->data<float>()[i],
static_cast<float>(100.0f));
}
}

TEST(Hook_intermidiate, Matmul_v2) {
Expand All @@ -155,8 +190,17 @@ TEST(Hook_intermidiate, Matmul_v2) {
const paddle::experimental::Tensor&)>
hook = &hook_function;

auto reduce_hook = [&](void) -> void {
auto* t_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(Y.impl())->data<float>();
for (int i = 0; i < Y.numel(); i++) {
t_ptr[i] = 100.0; // set to 100.0
}
};

egr_utils_api::RetainGradForTensor(Y);
egr_utils_api::RegisterGradientHookForTensor(Y, hook);
egr_utils_api::RegisterReduceHookForTensor(Y, reduce_hook);

auto output_tensor = matmul_v2_dygraph_function(
X, Y, {{"trans_x", false}, {"trans_y", false}});
Expand All @@ -168,8 +212,14 @@ TEST(Hook_intermidiate, Matmul_v2) {

eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
eager_test::CompareGradTensorWithValue<float>(Y, 3.0 * 4 + 3);
}

// Checking ReduceHook results
for (int i = 0; i < Y.numel(); i++) {
CHECK_EQ(std::dynamic_pointer_cast<pten::DenseTensor>(Y.impl())
->data<float>()[i],
static_cast<float>(100.0f));
}
}
} // namespace egr

USE_OP(sigmoid);
Expand Down