Skip to content

Commit

Permalink
merge form develop
Browse files Browse the repository at this point in the history
  • Loading branch information
YuanRisheng committed Nov 24, 2021
2 parents 965caf1 + 8b87d5e commit 9aadbbe
Show file tree
Hide file tree
Showing 41 changed files with 602 additions and 62 deletions.
116 changes: 112 additions & 4 deletions paddle/fluid/framework/new_executor/data_transfer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,24 @@ bool DataTranferHelper::apply(const OpKernelType& kernel_type_for_var,
return is_transferred;
}

void DataTranferHelper::RunAndConstructShareNode(
const std::string& src_var_name, const std::string& dst_var_name,
std::vector<OpFuncNode>* op_func_nodes) {
VariableNameMap in_name_map = {{"X", {src_var_name}}};
VariableNameMap out_name_map = {{"Out", {dst_var_name}}};
AttributeMap attr_map;

std::string op_type("share_data");
auto& op_info = OpInfoMap::Instance().Get(op_type);
auto op = std::shared_ptr<OperatorBase>(
op_info.Creator()(op_type, in_name_map, out_name_map, attr_map));

VLOG(3) << string::Sprintf("Insert %s with %s -> %s.", op_type, src_var_name,
dst_var_name);

RunAndConstructOpFuncNode(op, src_var_name, dst_var_name, op_func_nodes);
}

void DataTranferHelper::RunAndConstructOpFuncNode(
const std::shared_ptr<OperatorBase>& op, const std::string& var_name,
const std::string& new_var_name,
Expand Down Expand Up @@ -133,7 +151,7 @@ std::shared_ptr<OperatorBase> TransferLayout(const std::string& var_name,
VariableNameMap out_name_map = {{"Out", {*new_var_name}}};
AttributeMap attr_map = {{"dst_layout", static_cast<int>(out_layout)}};

// 3. Create transfer_op
// 3. Create transfer_layout_op
std::string op_type("transfer_layout");
auto& op_info = OpInfoMap::Instance().Get(op_type);
auto op = std::shared_ptr<OperatorBase>(
Expand All @@ -154,9 +172,10 @@ std::shared_ptr<OperatorBase> TransferDtype(const std::string& var_name,
*new_var_name =
var_name + "_dtype_" + std::to_string(var_scope->VarSize() + 1);
auto* ptr = local_scope->Var(new_var_name);

var_scope->SetVarDesc(var_name, nullptr);
auto var_type = var_scope->Var(var_name)->Type();
InitializeVariable(ptr, static_cast<proto::VarType::Type>(var_type));

VLOG(3) << "Create Variable " << *new_var_name
<< " locally, which pointer is " << ptr << "Variable Type "
<< var_type;
Expand All @@ -171,7 +190,7 @@ std::shared_ptr<OperatorBase> TransferDtype(const std::string& var_name,
// NOTE(Aurelius84): In whice case use_mkldnn = true?
attr_map["use_mkldnn"] = false;

// 3. Create transfer_op
// 3. Create transfer_dtype_op
std::string op_type("transfer_dtype");
auto& op_info = OpInfoMap::Instance().Get(op_type);
auto op = std::shared_ptr<OperatorBase>(
Expand Down Expand Up @@ -209,7 +228,7 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name,
: platform::is_gpu_place(dst_place) ? 1 : -1;
AttributeMap attr_map = {{"dst_place_type", dst_place_type}};

// 3. Create transfer_op
// 3. Create memcpy_d2h_op or memcpy_h2d_op
std::string op_type = get_memcpy_type(src_place, dst_place);
auto& op_info = OpInfoMap::Instance().Get(op_type);
auto op = std::shared_ptr<OperatorBase>(
Expand Down Expand Up @@ -303,6 +322,95 @@ std::string get_memcpy_type(const platform::Place& src_place,
}
}

void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
const platform::Place& place,
const VariableNameMap& out_names,
VariableValueMap* out_vars,
VariableScope* var_scope,
std::vector<OpFuncNode>* op_func_nodes,
framework::Scope* local_scope) {
DataTranferHelper data_transfer_helper(place, var_scope);
for (auto& var_name_item : out_names) {
std::vector<Variable*>& vars = out_vars->at(var_name_item.first);
for (size_t i = 0; i < var_name_item.second.size(); ++i) {
// 1. find grad_var & check whether is complex tensor
auto var_name = var_name_item.second[i];
auto orig_var_name = framework::GradOriginalVarName(var_name);
// only focus on gradient var
if (var_name == orig_var_name) {
VLOG(3) << "skip " << var_name << " with same name as "
<< orig_var_name;
continue;
}
auto* grad_var = vars[i];
// skip nullptr var
if (grad_var == nullptr) {
VLOG(3) << "skip grad_var with nullptr";
continue;
}
// don't process LoDTensorArray temporarily,
// add support if necessary for complex number calculations in the future
if (!framework::VarIsTensor(*grad_var)) {
VLOG(3) << "skip grad_var with LoDTensorArray type";
continue;
}
auto* grad_tensor =
framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(grad_var);
// skip nullptr tensor
if (grad_tensor == nullptr || !grad_tensor->IsInitialized()) {
VLOG(3) << "skip with grad_tensor not IsInitialized";
continue;
}
// only focus on complex dtype now
auto src_type = grad_tensor->type();
if (!framework::IsComplexType(src_type)) {
VLOG(3) << "skip grad_tensor with not complexType";
continue;
}

// 2. find forward var & check whether need to cast
auto* var = var_scope->FindVar(orig_var_name);
// if forward var not exists, do nothing
if (var == nullptr) {
VLOG(3) << "skip " << orig_var_name << " with not found in var_scope";
continue;
}
if (!framework::VarIsTensor(*var)) {
VLOG(3) << "skip " << orig_var_name << " with LoDTensorArray.";
continue;
}
const auto* tensor =
framework::GetLoDTensorOrSelectedRowsValueFromVar(*var);
PADDLE_ENFORCE_NOT_NULL(
tensor,
platform::errors::Unavailable(
"Forward tensor is nullptr when handle complex data to real."));
// only need record type, the allocation may have been released
auto dst_type = tensor->saved_type();
// only focus on real dtype and need casting
if (framework::IsComplexType(dst_type)) {
continue;
}

// 3. cast complex grad to real grad inplacely
VLOG(3) << "Transform " << framework::DataTypeToString(src_type)
<< " var `" << var_name << "` to "
<< framework::DataTypeToString(dst_type)
<< " real var in static graph.";

// NOTE(Aurelius84): Consider to define a complex2real op to deal this
// case.
std::string new_var_name;
auto op = TransferDtype(var_name, &new_var_name, src_type, dst_type,
var_scope, local_scope);
data_transfer_helper.RunAndConstructOpFuncNode(op, var_name, new_var_name,
op_func_nodes);
data_transfer_helper.RunAndConstructShareNode(new_var_name, var_name,
op_func_nodes);
}
}
}

} // namespace interpreter
} // namespace framework
} // namespace paddle
18 changes: 15 additions & 3 deletions paddle/fluid/framework/new_executor/data_transfer.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,18 @@ class DataTranferHelper {
const std::string& var_name, std::string* new_var_name,
std::vector<OpFuncNode>* new_op_func_nodes, bool use_local_scope);

private:
platform::Place place_;
VariableScope* var_scope_;
void RunAndConstructShareNode(const std::string& src_var_name,
const std::string& dst_var_name,
std::vector<OpFuncNode>* op_func_nodes);

void RunAndConstructOpFuncNode(const std::shared_ptr<OperatorBase>& op,
const std::string& var_name,
const std::string& new_var_name,
std::vector<OpFuncNode>* op_func_nodes);

private:
platform::Place place_;
VariableScope* var_scope_;
};

void ApplyDataTransform(const OpKernelType& expected_kernel_key,
Expand All @@ -54,6 +58,14 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key,
std::vector<OpFuncNode>* op_func_nodes,
bool use_local_scope = true);

void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
const platform::Place& place,
const VariableNameMap& out_names,
VariableValueMap* out_vars,
VariableScope* var_scope,
std::vector<OpFuncNode>* op_func_nodes,
framework::Scope* local_scope);

std::string get_memcpy_type(const platform::Place& src_place,
const platform::Place& dst_place);

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/new_executor/interpretercore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ paddle::framework::FetchList InterpreterCore::Run(

// return Fetch Tensors
auto* fetch_var = global_scope_->Var(interpreter::kFetchVarName);
return *(fetch_var->GetMutable<framework::FetchList>());
return std::move(*fetch_var->GetMutable<framework::FetchList>());
}

paddle::framework::FetchList InterpreterCore::Run(
Expand Down Expand Up @@ -124,7 +124,7 @@ paddle::framework::FetchList InterpreterCore::Run(

// return Fetch Tensors
auto* fetch_var = global_scope_->Var(interpreter::kFetchVarName);
return *(fetch_var->GetMutable<framework::FetchList>());
return std::move(*fetch_var->GetMutable<framework::FetchList>());
}

void InterpreterCore::BuildOperatorDependences() {
Expand Down
20 changes: 11 additions & 9 deletions paddle/fluid/framework/new_executor/interpretercore_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -328,20 +328,14 @@ void build_op_func_list(const platform::Place& place,
->GetExpectedKernelType(
ExecutionContext(*op, scope, *dev_ctx, runtime_context));

// consider device_guard()
apply_device_guard(
op, place,
&expected_kernel_key); // change device by the device_guard()
// change device by the device_guard()
apply_device_guard(op, place, &expected_kernel_key);
VLOG(3) << "expected_kernel_key : " << expected_kernel_key;

// step 3. apply data transforms and insert data transfer ops
VariableValueMap& ins_map_temp = runtime_context.inputs;
std::vector<OpFuncNode> new_op_func_nodes;
ApplyDataTransform(expected_kernel_key, place, &ins_map_temp, var_scope,
&op_func_node, &new_op_func_nodes, use_local_scope);
for (auto& item : new_op_func_nodes) {
vec_func_list->emplace_back(std::move(item));
}
&op_func_node, vec_func_list, use_local_scope);
// step 4. Run op kernel
VLOG(3) << op->Type()
<< " : expected_kernel_key : " << expected_kernel_key;
Expand Down Expand Up @@ -370,6 +364,14 @@ void build_op_func_list(const platform::Place& place,

op_func_node.kernel_func_ = OpKernelComputeFunc(kernel_iter->second);
op_func_node.kernel_func_(exec_ctx);

// post-process grad_op.outputs if need cast complex grad into real grad.
// NOTE(Aurelius84): insert a transfer_dtype_op inplacely to cast it.
if (framework::IsComplexType(expected_kernel_key.data_type_)) {
interpreter::HandleComplexGradToRealGrad(
op_func_node, place, outputs_names, &runtime_context.outputs,
var_scope, vec_func_list, local_scope);
}
}

vec_func_list->emplace_back(op_func_node);
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/framework/new_executor/interpretercore_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ namespace framework {
namespace interpreter {

using AtomicVectorSizeT = std::vector<std::unique_ptr<std::atomic<size_t>>>;
static constexpr char kFetchVarName[] = "fetch";

class AsyncWorkQueue {
public:
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/framework/new_executor/new_executor_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ class Instruction {
namespace interpreter {
static constexpr char kMemcpyH2D[] = "memcpy_h2d";
static constexpr char kMemcpyD2H[] = "memcpy_d2h";
static constexpr char kFetchVarName[] = "fetch";

static bool IsMemcpyH2D(const Instruction& instr) {
return instr.OpBase()->Type() == kMemcpyH2D;
Expand Down
4 changes: 0 additions & 4 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -479,10 +479,6 @@ void OperatorBase::GenerateTemporaryNames() {
}
}

static bool VarIsTensor(const Variable& var) {
return var.IsType<LoDTensor>() || var.IsType<SelectedRows>();
}

const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
if (var.IsType<LoDTensor>()) {
return static_cast<const Tensor*>(&(var.Get<LoDTensor>()));
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ inline std::string GradOriginalVarName(const std::string& grad_var_name) {
}
}

inline bool VarIsTensor(const Variable& var) {
return var.IsType<LoDTensor>() || var.IsType<SelectedRows>();
}

const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var);
Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var);

Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/imperative/amp_auto_cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,13 @@ NameVarBaseMap CastPureFp16Inputs(const std::string& op_type,
dst_type = framework::proto::VarType::FP32;
}
for (auto& pair : new_ins) {
// NOTE: The run_program OP only has FP32 kernel. In dy2stat pure fp16
// training, we have correctly cast the inputs of run_program OP before,
// so here should avoid casting for run_program OP.
if (op_type == "run_program") {
continue;
}

if ((op_type == "batch_norm" || op_type == "layer_norm" ||
op_type == "sync_batch_norm") &&
pair.first != "X") {
Expand Down
2 changes: 1 addition & 1 deletion paddle/pten/api/lib/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ template <typename T>
Tensor Tensor::copy_to(const PlaceType &target_place) const {
LOG(WARNING) << "The Tensor's `copy_to` method is deprecated since version "
"2.3, and will be removed in version 2.4, please use "
"`copy_to` method without template argumentinstead. "
"`copy_to` method without template argument instead. "
"reason: copying a Tensor to another device does not need "
"to specify the data type template argument.";
return copy_to(ConvertExtPlaceToBackend(target_place), /*blocking=*/false);
Expand Down
4 changes: 2 additions & 2 deletions paddle/pten/tests/api/test_cast_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"

namespace pten {
namespace paddle {
namespace tests {

namespace framework = paddle::framework;
Expand Down Expand Up @@ -85,4 +85,4 @@ TEST(Tensor, cast) {
}

} // namespace tests
} // namespace pten
} // namespace paddle
6 changes: 6 additions & 0 deletions paddle/pten/tests/api/test_dot_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"

namespace paddle {
namespace tests {

namespace framework = paddle::framework;
using DDim = paddle::framework::DDim;

Expand Down Expand Up @@ -76,3 +79,6 @@ TEST(API, dot) {
ASSERT_NEAR(expect_result[1], actual_result1, 1e-6f);
ASSERT_NEAR(expect_result[2], actual_result2, 1e-6f);
}

} // namespace tests
} // namespace paddle
5 changes: 5 additions & 0 deletions paddle/pten/tests/api/test_elementwise_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"

namespace paddle {
namespace tests {

namespace framework = paddle::framework;
using DDim = paddle::framework::DDim;

Expand Down Expand Up @@ -239,3 +242,5 @@ TEST(API, multiply) {
ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
}
} // namespace tests
} // namespace paddle
6 changes: 6 additions & 0 deletions paddle/pten/tests/api/test_fill_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"

namespace paddle {
namespace tests {

namespace framework = paddle::framework;
using DDim = paddle::framework::DDim;

Expand Down Expand Up @@ -151,3 +154,6 @@ TEST(API, full) {
ASSERT_NEAR(actual_result[i], val, 1e-6f);
}
}

} // namespace tests
} // namespace paddle
6 changes: 6 additions & 0 deletions paddle/pten/tests/api/test_flatten_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"

namespace paddle {
namespace tests {

namespace framework = paddle::framework;
using DDim = paddle::framework::DDim;

Expand Down Expand Up @@ -64,3 +67,6 @@ TEST(API, flatten) {
}
ASSERT_EQ(value_equal, true);
}

} // namespace tests
} // namespace paddle
Loading

1 comment on commit 9aadbbe

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on 9aadbbe Nov 24, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #37471 Commit ID: 9aadbbe contains failed CI.

🔹 Failed: PR-CI-Windows-OPENBLAS

test_failed
2021-11-24 15:00:49 The following tests FAILED:
2021-11-24 15:00:49 896 - test_reshape_op (Failed)
2021-11-24 15:00:49 905 - test_reshape_op (Failed)
2021-11-24 15:00:49 905 - test_reshape_op (Failed)
2021-11-24 15:00:49 C:\home\workspace\Paddle\build>goto:eof
2021-11-24 15:00:49 C:\home\workspace\Paddle\build>for /F %# in ('wmic os get localdatetime|findstr 20') do set end=%#
2021-11-24 15:00:49 C:\home\workspace\Paddle\build>set end=20211124150049.180000+480
2021-11-24 15:00:49 C:\home\workspace\Paddle\build>set end=1124150049
2021-11-24 15:00:49 C:\home\workspace\Paddle\build>call :timestamp "1124143632" "1124150049" "1 card TestCases Total"
2021-11-24 15:00:49 C:\home\workspace\Paddle\build>setlocal enabledelayedexpansion
2021-11-24 15:00:49 2126192
2021-11-24 15:00:49 "Windows 1 card TestCases Total Time: 1457s"
2021-11-24 15:00:49 ipipe_log_param_Windows_1_card_TestCases_Total_Time: 1457s
2021-11-24 15:00:49 2126192
2021-11-24 15:00:49 "Windows TestCases Total Time: 1457s"
2021-11-24 15:00:49 ipipe_log_param_Windows_TestCases_Total_Time: 1457s
2021-11-24 15:00:49 Running unit tests failed, will exit
2021-11-24 15:00:49 EXCODE: 8

🔹 Failed: PR-CI-CINN

test_failed
2021-11-24 15:03:31 The following tests FAILED:
2021-11-24 15:03:31 178 - cinn_lib_test (Child aborted)
2021-11-24 15:03:31 182 - cinn_graph_symbolization_test (SEGFAULT)
2021-11-24 15:03:31 Errors while running CTest
2021-11-24 15:03:31 paddle/scripts/paddle_build.sh: line 1024: warning: run_pending_traps: bad value in trap_list[17]: 0x560aeff93c30
2021-11-24 15:03:31 At least one test failed with exit code => 0
2021-11-24 15:03:31 paddle/scripts/paddle_build.sh: line 1023: warning: run_pending_traps: bad value in trap_list[17]: 0x560aeff93c30
2021-11-24 15:03:31 1 card TestCases Total Time: 0s
2021-11-24 15:03:31 1 card TestCases finished!!!!
2021-11-24 15:03:31 ++ date +%s
2021-11-24 15:03:31 + ut_endTime_s=1637737411
2021-11-24 15:03:31 + echo 'CINN testCase Time: 0s'
2021-11-24 15:03:31 CINN testCase Time: 0s
2021-11-24 15:03:31 + [[ 1 != \0 ]]
2021-11-24 15:03:31 + exit 8
2021-11-24 15:03:31 + EXCODE=8
2021-11-24 15:03:31 + echo 8
2021-11-24 15:03:31 8
2021-11-24 15:03:31 + echo 'ipipe_log_param_EXCODE: 8'
2021-11-24 15:03:31 ipipe_log_param_EXCODE: 8
2021-11-24 15:03:31 + set +x

Please sign in to comment.