Skip to content

Commit

Permalink
[cherry-pick] Implement Amp Layout AutoTune(41884) (#41964)
Browse files Browse the repository at this point in the history
 cherry-pick #41884
  • Loading branch information
zhangting2020 authored Apr 20, 2022
1 parent d17e39c commit 85a4ecb
Show file tree
Hide file tree
Showing 12 changed files with 846 additions and 3 deletions.
27 changes: 27 additions & 0 deletions paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include "gtest/gtest.h"

#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"

PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
Expand Down Expand Up @@ -206,3 +208,28 @@ TEST(EagerVariable, Constructor) {

VLOG(6) << "Finish";
}

TEST(EagerVariable, DataLayout) {
paddle::experimental::Tensor tensor;
phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1, 1, 1}),
paddle::experimental::DataLayout::UNDEFINED);
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f;
dt_ptr[1] = 5.0f;
dt_ptr[2] = 5.0f;
dt_ptr[3] = 5.0f;
tensor.set_impl(dt);
auto eager_var = std::make_shared<egr::EagerVariable>(tensor);
auto layout = paddle::imperative::GetDataLayout(eager_var);
CHECK_EQ(layout, paddle::experimental::DataLayout::UNDEFINED);
paddle::imperative::SetDataLayout(eager_var,
paddle::experimental::DataLayout::NCHW);
layout = paddle::imperative::GetDataLayout(eager_var);
CHECK_EQ(layout, paddle::experimental::DataLayout::NCHW);
}
7 changes: 6 additions & 1 deletion paddle/fluid/imperative/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,13 @@ cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator
ENDIF()
cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry var_helper phi_api)
add_subdirectory(jit)
if (WITH_GPU)
cc_library(layout_autotune SRCS layout_autotune.cc DEPS op_info phi_gpu_info)
else()
cc_library(layout_autotune SRCS layout_autotune.cc DEPS op_info)
endif()
cc_library(amp SRCS amp_auto_cast.cc DEPS layer var_helper)
cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal garbage_collector var_helper)
cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal garbage_collector var_helper layout_autotune)
cc_library(basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator switch_autotune)
cc_library(engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator switch_autotune)
cc_library(imperative_profiler SRCS profiler.cc DEPS flags)
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/imperative/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,14 @@ class VarBase {

framework::proto::VarType::Type DataType() const { return var_->DataType(); }

void SetDataLayout(paddle::experimental::DataLayout data_layout) {
var_->SetDataLayout(data_layout);
}

paddle::experimental::DataLayout DataLayout() const {
return var_->DataLayout();
}

size_t ElementSize() const { return framework::SizeOfType(var_->DataType()); }

void SetForwardDataType(framework::proto::VarType::Type data_type) {
Expand Down
178 changes: 178 additions & 0 deletions paddle/fluid/imperative/layout_autotune.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/imperative/layout_autotune.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/imperative/layout_transformer.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"

namespace paddle {
namespace imperative {

bool LayoutAutoTune::UseLayoutAutoTune() const {
#if defined(PADDLE_WITH_CUDA)
if (!phi::backends::gpu::TensorCoreAvailable()) {
LOG(INFO) << "Layout AutoTuning is not available.";
return false;
} else {
return use_layout_autotune_;
}
#else
return false;
#endif
}

LayoutAutoTune::LayoutAutoTune() {
const auto& op_info = paddle::framework::OpInfoMap::Instance().map();
for (auto it = op_info.begin(); it != op_info.end(); it++) {
// only record forwrd operators
if (it->first.find("_grad") != std::string::npos) {
continue;
}

// some normalization operators such as instance_norm and layer_norm
// do not have data_format attr, but are layout sensitive.
if (it->first.find("norm") != std::string::npos) {
layout_agnostic_ops_.emplace(it->first);
continue;
}

auto* attr_checker = it->second.Checker();
if (attr_checker) {
auto attrs = attr_checker->GetDefaultAttrMap();
if (attrs.find("data_format") != attrs.end() ||
attrs.find("data_layout") != attrs.end()) {
VLOG(4) << "Heavily layout sensitive OP: " << it->first;
heavily_layout_sensitive_ops_.emplace(it->first);
continue;
}

// Attribute name is fuzzy matched, such as start and start_axis.
bool layout_agnostic = true;
for (auto& attr : attrs) {
auto attr_name = attr.first;
VLOG(6) << "OP: " << it->first << " Attr Name: " << attr_name;
if (attr_name.find("axis") != std::string::npos ||
attr_name.find("axes") != std::string::npos ||
attr_name.find("dim") != std::string::npos ||
attr_name.find("start") != std::string::npos ||
attr_name.find("end") != std::string::npos) {
VLOG(4) << "Lightly layout sensitive OP: " << it->first;
layout_agnostic = false;
lightly_layout_sensitive_ops_.emplace(it->first);
break;
}
}

if (layout_agnostic) {
VLOG(4) << "Layout agnostic_ops: " << it->first;
layout_agnostic_ops_.emplace(it->first);
}
}
}

VLOG(3) << "The number of layout agnostic OPs: "
<< layout_agnostic_ops_.size() << ", heavily layout sensitive OPs: "
<< heavily_layout_sensitive_ops_.size()
<< ", lightly layout sensitive OPs: "
<< lightly_layout_sensitive_ops_.size();
}

template <typename VarType>
paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const std::string& op_type,
const paddle::imperative::NameVarMap<VarType>& ins,
const paddle::imperative::NameVarMap<VarType>& outs,
paddle::framework::AttributeMap* attrs,
const std::shared_ptr<imperative::Tracer>& tracer) {
if (!LayoutAutoTune::Instance().UseLayoutAutoTune()) {
return ins;
}

// When layout autotuning is enabled, the tuner will check the desired layout.
// (1) If the desired layout is undefined, and there is no convolutional
// layers, layout optimization is unnecessary. Otherwise, the desired layout
// will be set to the best layout only when these is a convolutional layer
// with
// NCHW-Layout and the TensorCore is available.
// (2) If the desired layout is defined, run the transposer.

if (LayoutAutoTune::Instance().GetDesiredLayout() == DataLayout::UNDEFINED) {
// Layout autotune only supports model with convolutional layers
if (op_type != "conv2d") {
return ins;
} else {
if (BOOST_GET_CONST(std::string, (*attrs)["data_format"]) == "NCHW") {
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC);
VLOG(3) << "Tune the layout from "
<< BOOST_GET_CONST(std::string, (*attrs)["data_format"])
<< " to " << paddle::framework::DataLayoutToString(
LayoutAutoTune::Instance().GetDesiredLayout());
} else {
LayoutAutoTune::Instance().DisableLayoutAutoTune();
return ins;
}
}
}

std::shared_ptr<LayoutTransformer<VarType>> transposer = nullptr;
if (op_type == "conv2d") {
transposer =
std::make_shared<HeavilyLayoutSensitiveOpTransformer<VarType>>(op_type);
transposer->SetArguments({"Input"}, {"Output"}, {"data_format"});
} else if (op_type == "batch_norm") {
transposer =
std::make_shared<HeavilyLayoutSensitiveOpTransformer<VarType>>(op_type);
transposer->SetArguments({"X"}, {"Y"}, {"data_layout"});
} else if (op_type == "pool2d") {
transposer =
std::make_shared<HeavilyLayoutSensitiveOpTransformer<VarType>>(op_type);
transposer->SetArguments({"X"}, {"Out"}, {"data_format"});
} else if (op_type == "transpose2") {
transposer = std::make_shared<TransposeOpTransformer<VarType>>(op_type);
} else if (op_type == "flatten_contiguous_range") {
transposer = std::make_shared<FlattenOpTransformer<VarType>>(op_type);
} else if (op_type.find("elementwise_") != std::string::npos) {
transposer = std::make_shared<ElementwiseOpTransformer<VarType>>(op_type);
} else if (LayoutAutoTune::Instance().IsLayoutAgnostic(op_type)) {
transposer = std::make_shared<LayoutTransformer<VarType>>(op_type);
} else if (LayoutAutoTune::Instance().IsLightlyLayoutSensitive(op_type)) {
transposer =
std::make_shared<LightlyLayoutSensitiveOpTransformer<VarType>>(op_type);
} else {
PADDLE_ENFORCE_NOT_NULL(
transposer, phi::errors::Unimplemented(
"%s 's LayoutTransformer is unimplemented.", op_type));
}

return transposer->Apply(ins, outs, attrs, tracer);
}
template paddle::imperative::NameVarMap<VarBase> AutoTuneLayout<VarBase>(
const std::string& op_type,
const paddle::imperative::NameVarMap<VarBase>& ins,
const paddle::imperative::NameVarMap<VarBase>& outs,
paddle::framework::AttributeMap* attrs,
const std::shared_ptr<imperative::Tracer>& tracer);
template paddle::imperative::NameVarMap<egr::EagerVariable>
AutoTuneLayout<egr::EagerVariable>(
const std::string& op_type,
const paddle::imperative::NameVarMap<egr::EagerVariable>& ins,
const paddle::imperative::NameVarMap<egr::EagerVariable>& outs,
paddle::framework::AttributeMap* attrs,
const std::shared_ptr<imperative::Tracer>& tracer);

} // namespace imperative
} // namespace paddle
77 changes: 77 additions & 0 deletions paddle/fluid/imperative/layout_autotune.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <glog/logging.h>
#include <memory>
#include <unordered_set>
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/compat/type_defs.h"

namespace paddle {
namespace imperative {

class Tracer;

using DataLayout = paddle::experimental::DataLayout;

class LayoutAutoTune {
public:
static LayoutAutoTune& Instance() {
static LayoutAutoTune layout_autoTune;
return layout_autoTune;
}

bool UseLayoutAutoTune() const;

void EnableLayoutAutoTune() { use_layout_autotune_ = true; }

void DisableLayoutAutoTune() { use_layout_autotune_ = false; }

bool IsLightlyLayoutSensitive(const std::string& op_type) const {
return lightly_layout_sensitive_ops_.count(op_type) != 0;
}

bool IsLayoutAgnostic(const std::string& op_type) const {
return layout_agnostic_ops_.count(op_type) != 0;
}

DataLayout GetDesiredLayout() const { return layout_; }

void SetDesiredLayout(const DataLayout& layout) { layout_ = layout; }

private:
LayoutAutoTune();

bool use_layout_autotune_{false};

std::unordered_set<std::string> layout_agnostic_ops_{};

std::unordered_set<std::string> heavily_layout_sensitive_ops_{};

std::unordered_set<std::string> lightly_layout_sensitive_ops_{};

DataLayout layout_{DataLayout::UNDEFINED};
};

template <typename VarType>
paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
const std::string& op_type,
const paddle::imperative::NameVarMap<VarType>& ins,
const paddle::imperative::NameVarMap<VarType>& outs,
paddle::framework::AttributeMap* attrs,
const std::shared_ptr<imperative::Tracer>& tracer);

} // namespace imperative
} // namespace paddle
Loading

0 comments on commit 85a4ecb

Please sign in to comment.