forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Eager] Supported Eager Dygraph Forward & AutoGrad functions (PaddleP…
- Loading branch information
1 parent
0a95ff3
commit 285cef5
Showing
14 changed files
with
615 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
add_subdirectory(accumulation) | ||
add_subdirectory(api) | ||
add_subdirectory(accumulation) | ||
add_subdirectory(tests) | ||
|
||
cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api) | ||
cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api) | ||
cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api) | ||
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulation) | ||
cc_library(utils SRCS utils.cc DEPS pten pten_api global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,4 @@ | ||
add_subdirectory(utils) | ||
add_subdirectory(generated) | ||
|
||
cc_library(eager_api SRCS all.cc DEPS global_utils eager_scale) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
add_subdirectory(eager_generated) |
2 changes: 2 additions & 0 deletions
2
paddle/fluid/eager/api/generated/eager_generated/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
add_subdirectory(backwards) | ||
add_subdirectory(forwards) |
1 change: 1 addition & 0 deletions
1
paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
cc_library(scale_node SRCS scale_node.cc DEPS global_utils pten pten_api grad_node_info) |
172 changes: 172 additions & 0 deletions
172
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" | ||
#include "paddle/fluid/eager/api/utils/global_utils.h" | ||
#include "paddle/fluid/eager/eager_tensor.h" | ||
|
||
#include "paddle/pten/api/all.h" | ||
|
||
#include "paddle/fluid/platform/device_context.h" | ||
#include "paddle/fluid/platform/enforce.h" | ||
#include "paddle/fluid/platform/errors.h" | ||
|
||
#include "glog/logging.h" | ||
|
||
namespace egr { | ||
|
||
template <typename DeviceContext> | ||
static void ScaleDeviceDispatch(const pten::DenseTensor& dense_tensor, | ||
const DeviceContext& dev_ctx, float scale, | ||
float bias, bool bias_after_scale, | ||
pten::DenseTensor* dense_out) { | ||
switch (dense_tensor.dtype()) { | ||
case pten::DataType::FLOAT64: { | ||
pten::Scale<double>(dev_ctx, dense_tensor /* tensor */, scale /* scale */, | ||
bias /* bias */, | ||
bias_after_scale /* bias_after_scale */, | ||
dense_out /* out tensor */); | ||
break; | ||
} | ||
case pten::DataType::FLOAT32: { | ||
pten::Scale<float>(dev_ctx, dense_tensor /* tensor */, scale /* scale */, | ||
bias /* bias */, | ||
bias_after_scale /* bias_after_scale */, | ||
dense_out /* out tensor */); | ||
break; | ||
} | ||
case pten::DataType::INT64: { | ||
pten::Scale<int64_t>(dev_ctx, dense_tensor /* tensor */, | ||
scale /* scale */, bias /* bias */, | ||
bias_after_scale /* bias_after_scale */, | ||
dense_out /* out tensor */); | ||
break; | ||
} | ||
case pten::DataType::INT32: { | ||
pten::Scale<int32_t>(dev_ctx, dense_tensor /* tensor */, | ||
scale /* scale */, bias /* bias */, | ||
bias_after_scale /* bias_after_scale */, | ||
dense_out /* out tensor */); | ||
break; | ||
} | ||
default: { | ||
PADDLE_THROW(paddle::platform::errors::Fatal( | ||
"Detected unsupported data type." | ||
"Only Float64, Float32, Int64, Int32 are supported for now.")); | ||
break; | ||
} | ||
} | ||
} | ||
|
||
void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, | ||
bool bias_after_scale, egr::EagerTensor* out) { | ||
// TODO(jiabin): Support multiple tensor here, Create DenseTensor is not a | ||
// proper way to Demo it | ||
// Run Forward Function | ||
auto dense_tensor = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl()); | ||
// Init output tensor | ||
auto tensor_meta = pten::DenseTensorMeta( | ||
dense_tensor->dtype(), dense_tensor->dims(), dense_tensor->layout()); | ||
auto place = dense_tensor->place(); | ||
size_t bytes_size = paddle::framework::product(dense_tensor->dims()) * | ||
SizeOf(dense_tensor->dtype()); | ||
auto dense_out = std::make_shared<pten::DenseTensor>( | ||
pten::make_intrusive<paddle::experimental::SharedStorage>( | ||
paddle::memory::Alloc(place, bytes_size), 0), | ||
std::move(tensor_meta)); | ||
// Handle Device Context | ||
const paddle::platform::Place& expected_kernel_place = | ||
Controller::Instance().GetExpectedPlace(); | ||
paddle::platform::DeviceContextPool& pool = | ||
paddle::platform::DeviceContextPool::Instance(); | ||
|
||
if (expected_kernel_place == paddle::platform::CPUPlace()) { | ||
auto* dev_ctx = dynamic_cast<paddle::platform::CPUDeviceContext*>( | ||
pool.Get(expected_kernel_place)); | ||
if (!dev_ctx) { | ||
PADDLE_THROW(paddle::platform::errors::Fatal( | ||
"Cannot convert device_context to CPUDeviceContext." | ||
"This indicates backend mismatch." | ||
"Pleas double check your expected place")); | ||
} | ||
ScaleDeviceDispatch<paddle::platform::CPUDeviceContext>( | ||
*dense_tensor.get(), *dev_ctx, scale, bias, bias_after_scale, | ||
dense_out.get()); | ||
|
||
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) | ||
} else if (expected_kernel_place == paddle::platform::CUDAPlace()) { | ||
auto* dev_ctx = dynamic_cast<paddle::platform::CUDADeviceContext*>( | ||
pool.Get(expected_kernel_place)); | ||
if (!dev_ctx) { | ||
PADDLE_THROW(paddle::platform::errors::Fatal( | ||
"Cannot convert device_context to CUDADeviceContext." | ||
"This indicates backend mismatch." | ||
"Pleas double check your expected place")); | ||
} | ||
ScaleDeviceDispatch<paddle::platform::CUDADeviceContext>( | ||
*dense_tensor.get(), *dev_ctx, scale, bias, bias_after_scale, | ||
dense_out.get()); | ||
#endif | ||
} else { | ||
PADDLE_THROW(paddle::platform::errors::Fatal( | ||
"Detected unsupported backend." | ||
"Only CPU and CUDA Backend are supported for now." | ||
"Please double check if your backend falls into the above two " | ||
"categories.")); | ||
} | ||
|
||
out->set_impl(dense_out); | ||
} | ||
|
||
void GradNodeScale::SetTensorWrappers_X( | ||
const std::vector<egr::EagerTensor>& tensors) { | ||
// Does nothing for scale | ||
} | ||
|
||
void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } | ||
|
||
std::vector<std::vector<egr::EagerTensor>> GradNodeScale::operator()( | ||
const std::vector<std::vector<egr::EagerTensor>>& grads) { | ||
// 1. Check Output Size | ||
PADDLE_ENFORCE( | ||
((grads.size() == 1) && (grads[0].size() == 1)), | ||
paddle::platform::errors::Fatal( | ||
"ScaleGradNode takes exactly 1 grad tensor." | ||
"However received: %d", | ||
"This indicates an issue with Eager Dygraph Backward logic", | ||
grads.size())); | ||
std::vector<std::vector<egr::EagerTensor>> outs; | ||
// 2. Create needed out parttern | ||
egr::EagerTensor out; | ||
// Apply Gradient Hooks | ||
if (GradientHooksRegistered()) { | ||
// TODO(jiabin): Shall we apply hook slot by slot here or accept | ||
// vector<vector<pten::tensor>> to apply all hooks? | ||
std::vector<std::vector<egr::EagerTensor>> hooked_grads = | ||
ApplyGradientHooks(grads); | ||
ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */, | ||
true /* bias_after_scale */, &out); | ||
} else { | ||
ScaleAPI(grads[0][0], scale_, 0.0 /* bias */, true /* bias_after_scale */, | ||
&out); | ||
} | ||
|
||
// Apply Reduce Hooks | ||
if (ReduceHooksRegistered()) { | ||
ApplyReduceHooks(); | ||
} | ||
return {{out}}; | ||
} | ||
|
||
} // namespace egr |
54 changes: 54 additions & 0 deletions
54
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include "paddle/fluid/eager/grad_node_info.h" | ||
#include "paddle/fluid/eager/tensor_wrapper.h" | ||
|
||
/* | ||
Each Operation has a specific GradNode inheritted from GradNodeBase | ||
A specific GradNode defines | ||
1. Input Tensors | ||
2. overrides operator() to perform actual backward computations | ||
TODO: Generate GradNode via auto-code-generation | ||
*/ | ||
namespace egr { | ||
|
||
void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, | ||
bool bias_after_scale, egr::EagerTensor* out); | ||
|
||
class GradNodeScale : public GradNodeBase { | ||
public: | ||
// Constructor: configure fwd input tensors to grad node | ||
GradNodeScale(size_t bwd_in_slot_num, size_t bwd_out_slot_num) | ||
: GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} | ||
~GradNodeScale() override = default; | ||
|
||
// Functor: perform backward computations | ||
virtual std::vector<std::vector<egr::EagerTensor>> operator()( | ||
const std::vector<std::vector<egr::EagerTensor>>& grads) override; | ||
|
||
void SetTensorWrappers_X(const std::vector<egr::EagerTensor>& tensors); | ||
|
||
void SetAttributes_scale(float scale); | ||
|
||
// Members: define fwd input tensors | ||
// For Scale there is no fwd input tensor needed | ||
private: | ||
float scale_{1.0}; | ||
}; | ||
|
||
} // namespace egr |
1 change: 1 addition & 0 deletions
1
paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
cc_library(eager_scale SRCS scale.cc DEPS pten_api pten autograd_meta scale_node) |
100 changes: 100 additions & 0 deletions
100
paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
/** | ||
* This File should be automatically generated by coding auto generator. | ||
* All ops C++ autograd logic is defined here, in Python-C extension API | ||
* system we try to avoid any autograd related code, and move them all to | ||
* here. | ||
* | ||
* Currently, we just manually do some fwd autograd here. And we will replace | ||
* them with auto code generator later. | ||
* **/ | ||
|
||
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h" | ||
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" | ||
#include "paddle/fluid/eager/autograd_meta.h" | ||
#include "paddle/fluid/eager/eager_tensor.h" | ||
#include "paddle/fluid/eager/utils.h" | ||
|
||
#include "paddle/pten/api/all.h" | ||
#include "paddle/pten/include/core.h" | ||
|
||
namespace egr { | ||
|
||
egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, | ||
bool bias_after_scale, bool trace_backward) { | ||
// 1. Run Forward | ||
// 1.1 Create outputs | ||
egr::EagerTensor out; | ||
// 1.2 Need by original op, we assemble ins, outs, attrs here | ||
|
||
// 1.3 Call forward C++ api | ||
ScaleAPI(x, scale, bias, bias_after_scale, &out); | ||
|
||
// 2. Build Backward Depends | ||
// 2.1 Get AutogradMetas for all ins and outs | ||
auto p_autograd_in = EagerUtils::unsafe_autograd_meta(x); | ||
// NOTE: Call EagerUtils::multi_autograd_meta when we have vector of outputs | ||
auto p_autograd_out = EagerUtils::autograd_meta(&out); | ||
|
||
// 2.2 Add GradNode | ||
// 2.2.1 ComputeRequireGrad | ||
// TODO(jiabin) : make this function accept different kinds of input | ||
// TODO(zhanlve): which one is more efficient: | ||
// 1. construct a vector of pointers | ||
// 2. call "ComputeRequireGrad" multiple times | ||
bool require_any_grad = | ||
EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_in); | ||
if (require_any_grad) { | ||
EagerUtils::PassStopGradient(false /*generate_grad*/, p_autograd_out); | ||
|
||
// 2.2.2 Set OutRankInfo for outputs this needs to be as same as Edges's | ||
// input_rank_ | ||
/** Note: | ||
// 1. We provide EagerUtils::SetMultiOutRank(vector<AutogradMeta*>), | ||
// since we have some of Operator has servel slot name with duplicate | ||
outputs. | ||
// 2. We call AutogradMeta's SetOutput Rank only when we have single output | ||
with | ||
// single slot name. | ||
**/ | ||
p_autograd_out->SetSingleOutRankWithSlot(0, 0); | ||
|
||
// Init GradNode | ||
auto scale_node = std::make_shared<GradNodeScale>(/* fwd_in_slot_num */ 1, | ||
/* bwd_in_slot_num */ 1); | ||
|
||
// Pass Attributes to GradNode | ||
scale_node->SetAttributes_scale(scale); | ||
|
||
// Set Next Edges | ||
scale_node->AddEdges(*p_autograd_in, /*slot id*/ 0); | ||
|
||
// Set TensorWrappers | ||
scale_node->SetTensorWrappers_X({x}); | ||
|
||
// Set Grad out rank as same as fwd input and set stop gradient to bwd | ||
scale_node->SetGradOutMeta(*p_autograd_in, /*slot id*/ 0); | ||
// Set Grad out rank as same as fwd input and set stop gradient to bwd | ||
scale_node->SetGradInMeta(*p_autograd_out, /*slot id*/ 0); | ||
|
||
// Set History for output set current Grad Node for | ||
EagerUtils::SetHistory(p_autograd_out, scale_node); | ||
} | ||
|
||
return out; | ||
} | ||
|
||
} // namespace egr |
23 changes: 23 additions & 0 deletions
23
paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include "paddle/fluid/eager/eager_tensor.h" | ||
namespace egr { | ||
|
||
egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, | ||
bool bias_after_scale, bool trace_backward); | ||
|
||
} // namespace egr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
set(eager_deps pten pten_api utils tensor_utils global_utils pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node) | ||
set(eager_deps pten pten_api tensor_utils utils global_utils pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node) | ||
set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy) | ||
|
||
add_subdirectory(data_structure_tests) | ||
add_subdirectory(task_tests) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
cc_test(test_egr_task_tensor_utils SRCS tensor_utils_test.cc DEPS ${eager_deps}) | ||
cc_test(test_egr_task_eager_utils SRCS eager_utils_test.cc DEPS ${eager_deps}) | ||
cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) |
Oops, something went wrong.