Skip to content

Commit

Permalink
Added RunBackward and HookUtils to Eager Dygraph (PaddlePaddle#37599)
Browse files Browse the repository at this point in the history
  • Loading branch information
jim19930609 authored and Zjq9409 committed Dec 10, 2021
1 parent 8f0a578 commit 0d32f7e
Show file tree
Hide file tree
Showing 13 changed files with 997 additions and 2 deletions.
1 change: 1 addition & 0 deletions paddle/fluid/eager/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api)
cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api)
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulation)
cc_library(utils SRCS utils.cc DEPS pten pten_api global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta)
cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info)
2 changes: 1 addition & 1 deletion paddle/fluid/eager/api/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
add_subdirectory(utils)
add_subdirectory(generated)

cc_library(eager_api SRCS all.cc DEPS global_utils eager_scale)
cc_library(eager_api SRCS all.cc DEPS tensor_utils hook_utils global_utils eager_scale)
2 changes: 2 additions & 0 deletions paddle/fluid/eager/api/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@

#include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/api/utils/hook_utils.h"
#include "paddle/fluid/eager/api/utils/tensor_utils.h"
1 change: 1 addition & 0 deletions paddle/fluid/eager/api/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
cc_library(tensor_utils SRCS tensor_utils.cc DEPS pten pten_api autograd_meta grad_node_info accumulation_node)
cc_library(hook_utils SRCS hook_utils.cc DEPS pten tensor_utils autograd_meta grad_node_info utils accumulation_node)
cc_library(global_utils SRCS global_utils.cc DEPS place)
93 changes: 93 additions & 0 deletions paddle/fluid/eager/api/utils/hook_utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/eager/api/utils/hook_utils.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/utils/tensor_utils.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/pten/core/dense_tensor.h"

namespace egr {

void RegisterGradientHookForTensor(
const egr::EagerTensor& tensor,
std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook) {
// Find grad_node and out_rank from AutogradMeta
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
auto rank_info = EagerUtils::unsafe_autograd_meta(tensor)->OutRankInfo();

grad_node->RegisterGradientHook(rank_info.first, rank_info.second, hook);
}

void RegisterReduceHookForTensor(const egr::EagerTensor& tensor,
const std::function<void(void)>& hook) {
// Find grad_node and out_rank from AutogradMeta
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);

grad_node->RegisterReduceHook(hook);
}

void RetainGradForTensor(const egr::EagerTensor& tensor) {
// TODO(jiabin): Support More Tensor type here
AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor);
egr::EagerTensor* grad_tensor = meta->MutableGrad();

// Define Hook
std::function<egr::EagerTensor(const egr::EagerTensor&)> hook =
[grad_tensor](const egr::EagerTensor& t) {
if (!grad_tensor) {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Detected null grad_tensor."
"Grad tensor in AutogradMeta of should not be nullptr"));
}
if (t.defined()) {
// Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl());
return *grad_tensor;
} else {
PADDLE_ENFORCE_EQ(
t.Var().IsInitialized(), true,
paddle::platform::errors::Fatal(
"Detected uninitialized variable, causing segmentation fault "
"inside the hook."
"Variable %s has to be initialized while we need to set it."
"please check tensor initialization status.",
t.name()));
grad_tensor->MutableVar()
->GetMutable<paddle::framework::LoDTensor>()
->ShareDataWith(t.Var().Get<paddle::framework::LoDTensor>());
return *grad_tensor;
}
};

if (IsLeafTensor(tensor)) {
// Add RetainGrad as PostHook to AccumulationNode
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node);
accumulation_grad_node->RetainGrad(hook);

} else {
// Append to GradientHooks
RegisterGradientHookForTensor(tensor, hook);
}
}

} // namespace egr
30 changes: 30 additions & 0 deletions paddle/fluid/eager/api/utils/hook_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/pten/api/all.h"
namespace egr {

void RegisterGradientHookForTensor(
const egr::EagerTensor& tensor,
std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook);

void RegisterReduceHookForTensor(const egr::EagerTensor& tensor,
const std::function<void(void)>& hook);
void RetainGradForTensor(const egr::EagerTensor& tensor);

} // namespace egr
212 changes: 212 additions & 0 deletions paddle/fluid/eager/backward.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/eager/backward.h"
#include <queue>

#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/fluid/eager/utils.h"

#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"

#include "glog/logging.h"

namespace egr {

std::unordered_map<GradNodeBase*, int> getInDegreeMap(
const std::queue<GradNodeBase*>& init_queue) {
// Calculate in_degree for each node
// We can completely remove this pass, if in_degree were set during forward
// pass
std::unordered_map<GradNodeBase*, int> node_in_degree_map;

// Copy nodes
std::queue<GradNodeBase*> queue = init_queue;
std::unordered_set<GradNodeBase*> visited;

// Visit each node exactly once in any order
while (!queue.empty()) {
GradNodeBase* node = queue.front();
queue.pop();

if (visited.count(node)) {
continue;
}
visited.insert(node);

// Find and append next nodes
const std::vector<std::vector<Edge>>& edges = node->GetEdges();
for (const auto& edge_list : edges) {
for (const Edge& edge : edge_list) {
GradNodeBase* next_node = edge.GetMutableGradNode().get();
// Update in_degree
if (!node_in_degree_map.count(next_node))
node_in_degree_map[next_node] = 0;
node_in_degree_map[next_node]++;
queue.push(next_node);
}
}
}

return node_in_degree_map;
}

void RunBackward(const std::vector<egr::EagerTensor>& tensors,
const std::vector<egr::EagerTensor>& grad_tensors,
bool retain_graph) {
VLOG(6) << "Start Backward";
// *Gradient Hook should happen at node-level
// *Inplace version check should perform at node-level
// *Cross-batch accumulation happens at forward pass

/* --- Initialization --- */
// 1. Init queue with starting nodes
// 2. Prepare initial input buffers
std::queue<GradNodeBase*> queue;
std::unordered_map<GradNodeBase*, std::unique_ptr<GradTensorHolder>>
node_input_buffers_dict;
for (size_t i = 0; i < tensors.size(); i++) {
const egr::EagerTensor& tensor = tensors[i];

AutogradMeta* auto_grad_meta = EagerUtils::unsafe_autograd_meta(tensor);
// Get grad input info from target tensors
auto input_info = auto_grad_meta->OutRankInfo();

VLOG(2) << "Out Rank of Tensor is slot: " << input_info.first
<< ", rank: " << input_info.second;
// Get target GradNodeBase from target tensors
GradNodeBase* grad_node = auto_grad_meta->GetMutableGradNode().get();

PADDLE_ENFORCE(grad_node,
paddle::platform::errors::Fatal(
"Detected null grad_node."
"Grad Node is nullptr for grad input tensor %d",
i));
// Prepare GradTensorHolder
if (!node_input_buffers_dict.count(grad_node)) {
VLOG(6) << "Create Value for grad input tensor " << i;
node_input_buffers_dict[grad_node] =
std::make_unique<GradTensorHolder>(grad_node->InputMeta());
}

if (grad_tensors.size() > 0) {
PADDLE_ENFORCE(
grad_tensors.size() == tensors.size(),
paddle::platform::errors::Fatal(
"Detected size mismatch between tensors and grad_tensors"
"grad_tensors should either have "
"size = 0 or same size as tensors"));
// Feed given tensor if it's provided
VLOG(6) << "Fill grad input tensor " << i << "with give grad tensor";
node_input_buffers_dict[grad_node]->add(
input_info.first, input_info.second, grad_tensors[i]);

} else {
VLOG(6) << "Fill grad input tensor " << i << " with 1.0";
// Initialize tensor with 1.0
// Forward Tensor "tensor" is passed to indicate tensortype, datatype and
// dims
// GradTensorHolder will initialize another tensor with same tensortype,
// datatype and dims but filled with 1.0
node_input_buffers_dict[grad_node]->add(
input_info.first, input_info.second, tensor, true /*fill_one=true*/);
}

// Prepare queue
queue.push(grad_node);
}

VLOG(6) << "Update In degree Map for backward";
// 3. Compute in_degree for each node
std::unordered_map<GradNodeBase*, int> node_in_degree_map =
getInDegreeMap(queue);

/* --- Topological Visit --- */
// 1. Pop queue
// 2. Run node
// |- node(grads)
// |- Prepare for next node
// 3. Update queue
VLOG(6) << "Run Backward";
while (!queue.empty()) {
GradNodeBase* node = queue.front();
queue.pop();

// Run node: This is where Hook happens
PADDLE_ENFORCE(
node_input_buffers_dict.count(node),
paddle::platform::errors::Fatal(
"Unable to find next node in the InputBuufer"
"Trying to run Node without configuring its GradTensorHolder"));

std::unique_ptr<GradTensorHolder> node_input_buffer =
std::move(node_input_buffers_dict[node]);
VLOG(6) << "Run Backward Kernel with input_buffer";
// Run Backward Node and get outputs
std::vector<std::vector<egr::EagerTensor>> grad_output_tensors =
(*node)(node_input_buffer->Buffers());
// TODO(jiabin): Should we erase it or find a more efficient way.
node_input_buffers_dict.erase(node);

// Prepare GradTensorHolder for next node
const std::vector<std::vector<Edge>>& edges = node->GetEdges();

PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(),
paddle::platform::errors::Fatal(
"Number of edges should be either empty ( for leaf node "
") or the same as number of output grad tensors"));

for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) {
const Edge& edge = edges[i][j];
auto edge_rank = edge.GetEdgeRankInfo();
// Since we make edge has as same rank as bwd outputs, we indexing them
// with
// the same rank(i, j)
VLOG(6) << "Get Edge with slot: " << i << ", rank: " << j;
egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j];
if (!grad_output_tensor.defined() ||
!grad_output_tensor.initialized()) {
VLOG(6) << "We get grad_output_tensor with slot: " << i
<< ", rank: " << j << " as uninitialized or undefined tensor";
}
GradNodeBase* next_node = edge.GetMutableGradNode().get();

if (!node_input_buffers_dict.count(next_node)) {
node_input_buffers_dict[next_node] =
std::make_unique<GradTensorHolder>(next_node->InputMeta());
}
VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first
<< ", rank: " << edge_rank.second;
node_input_buffers_dict[next_node]->add(
edge_rank.first, edge_rank.second, grad_output_tensor);

// Update queue
node_in_degree_map[next_node]--;
PADDLE_ENFORCE(node_in_degree_map[next_node] >= 0,
paddle::platform::errors::Fatal(
"Detected in-degree value smaller than zero."
"Node's in-degree cannot be negative"));
if (node_in_degree_map[next_node] == 0) {
queue.emplace(std::move(next_node));
}
}
}
}
}

} // namespace egr
31 changes: 31 additions & 0 deletions paddle/fluid/eager/backward.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/pten/api/all.h"

namespace egr {

// run_backward():
// tensors corresponds to those lived in the backward graph
// each grad_tensors[i] keeps the value for its corresponding tensors[i]
void RunBackward(const std::vector<egr::EagerTensor> &tensors,
const std::vector<egr::EagerTensor> &grad_tensors,
bool retain_graph = false);

// Reserved for gradient()

} // namespace egr
2 changes: 1 addition & 1 deletion paddle/fluid/eager/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(eager_deps pten pten_api tensor_utils utils global_utils pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node)
set(eager_deps pten pten_api hook_utils tensor_utils utils global_utils backward pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node)
set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy)

add_subdirectory(data_structure_tests)
Expand Down
Loading

0 comments on commit 0d32f7e

Please sign in to comment.