Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/release/2.3' into release_2.3_…
Browse files Browse the repository at this point in the history
…mp_eager
  • Loading branch information
haohongxiang committed Apr 20, 2022
2 parents 7e54b51 + 3b25afb commit 87c1bc9
Show file tree
Hide file tree
Showing 61 changed files with 1,966 additions and 206 deletions.
5 changes: 3 additions & 2 deletions paddle/fluid/distributed/collective/ProcessGroup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ bool ProcessGroup::Task::Wait(std::chrono::milliseconds timeout) {

void ProcessGroup::Task::Synchronize() {}

ProcessGroup::ProcessGroup(int rank, int size, int gid)
: rank_(rank), size_(size), gid_(gid) {
ProcessGroup::ProcessGroup(int rank, int size, const platform::Place& place,
int gid)
: rank_(rank), size_(size), place_(place), gid_(gid) {
if (gid != IGNORE_ID) {
auto map = ProcessGroupMapFromGid::getInstance();
map->insert(gid_, this);
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/distributed/collective/ProcessGroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class ProcessGroup {
bool is_completed_ = false;
};

explicit ProcessGroup(int rank, int size, int gid);
explicit ProcessGroup(int rank, int size, const platform::Place& place,
int gid);
virtual ~ProcessGroup() {}

int GetRank() const { return rank_; }
Expand Down Expand Up @@ -145,6 +146,7 @@ class ProcessGroup {
protected:
const int rank_;
const int size_;
const platform::Place place_;
const int gid_;
};

Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/distributed/collective/ProcessGroupGloo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,9 @@ ProcessGroupGloo::GlooTask::GlooTask(

ProcessGroupGloo::ProcessGroupGloo(
const std::shared_ptr<distributed::Store>& store, int rank, int world_size,
int gid, const std::shared_ptr<GlooOptions> options)
: ProcessGroup(rank, world_size, gid),
const platform::Place& place, int gid,
const std::shared_ptr<GlooOptions> options)
: ProcessGroup(rank, world_size, place, gid),
_tag(0),
_store(new GlooStore(store)) {
_context = std::make_shared<gloo::rendezvous::Context>(rank, world_size);
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/distributed/collective/ProcessGroupGloo.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ class ProcessGroupGloo : public ProcessGroup {

explicit ProcessGroupGloo(
const std::shared_ptr<paddle::distributed::Store>& store, int rank,
int world_size, int gid, std::shared_ptr<GlooOptions> options);
int world_size, const platform::Place& place, int gid,
std::shared_ptr<GlooOptions> options);

~ProcessGroupGloo() = default;

Expand Down
8 changes: 6 additions & 2 deletions paddle/fluid/distributed/collective/ProcessGroupHCCL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "paddle/fluid/distributed/collective/HCCLTools.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/npu_info.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/api/include/api.h"
Expand Down Expand Up @@ -97,8 +98,11 @@ bool ProcessGroupHCCL::HCCLTask::Wait(std::chrono::milliseconds timeout) {
void ProcessGroupHCCL::HCCLTask::Synchronize() { Wait(kWaitTimeout); }

ProcessGroupHCCL::ProcessGroupHCCL(const std::shared_ptr<Store>& store,
int rank, int size, int gid)
: ProcessGroup(rank, size, gid), store_(store) {}
int rank, int size,
const platform::Place& place, int gid)
: ProcessGroup(rank, size, place, gid), store_(store) {
platform::SetNPUDeviceId(place_.device);
}

void ProcessGroupHCCL::BroadcastUniqueHCCLID(
std::vector<HcclRootInfo>& hccl_ids) { // NOLINT
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/collective/ProcessGroupHCCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class ProcessGroupHCCL : public ProcessGroup {
};

ProcessGroupHCCL(const std::shared_ptr<Store>& store, int rank, int size,
int gid);
const platform::Place& place, int gid);

const std::string GetBackendName() const override {
return std::string(HCCL_BACKEND_NAME);
Expand Down
20 changes: 9 additions & 11 deletions paddle/fluid/distributed/collective/ProcessGroupHeter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,11 @@ bool ProcessGroupHeter::HeterTask::Wait(std::chrono::milliseconds timeout) {
return true;
}

ProcessGroupHeter::ProcessGroupHeter(const std::shared_ptr<Store>& store,
int rank, int size, int gid,
int local_rank, int local_size,
int gloo_rank, int gloo_size,
bool with_switch,
std::string switch_endpoint)
: ProcessGroup(rank, size, gid),
ProcessGroupHeter::ProcessGroupHeter(
const std::shared_ptr<Store>& store, int rank, int size,
const platform::Place& place, int gid, int local_rank, int local_size,
int gloo_rank, int gloo_size, bool with_switch, std::string switch_endpoint)
: ProcessGroup(rank, size, place, gid),
store_(store),
local_rank_(local_rank),
local_size_(local_size),
Expand All @@ -60,19 +58,19 @@ ProcessGroupHeter::ProcessGroupHeter(const std::shared_ptr<Store>& store,
switch_endpoint_(switch_endpoint) {
#if defined(PADDLE_WITH_NCCL)
inner_pg_ = std::make_shared<ProcessGroupNCCL>(store, local_rank, local_size,
IGNORE_ID);
place_, IGNORE_ID);
#elif defined(PADDLE_WITH_ASCEND_CL)
inner_pg_ = std::make_shared<ProcessGroupHCCL>(store, local_rank, local_size,
IGNORE_ID);
place_, IGNORE_ID);
#else
PADDLE_THROW(platform::errors::Fatal(
"ProcessGroupHeter only supports NCCL and HCCL now.");
#endif
if (local_rank_ == 0 && !with_switch_) {
auto opts = ProcessGroupGloo::GlooOptions::create();
opts->device = ProcessGroupGloo::createDefaultDevice();
inter_pg_ = std::make_shared<ProcessGroupGloo>(store, gloo_rank_,
gloo_size_, IGNORE_ID, opts);
inter_pg_ = std::make_shared<ProcessGroupGloo>(
store, gloo_rank_, gloo_size_, place_, IGNORE_ID, opts);
}
}

Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/distributed/collective/ProcessGroupHeter.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ class ProcessGroupHeter : public ProcessGroup {
};

ProcessGroupHeter(const std::shared_ptr<Store>& store, int rank, int size,
int gid, int local_rank, int local_size, int gloo_rank,
int gloo_size, bool with_switch,
std::string switch_endpoints);
const platform::Place& place, int gid, int local_rank,
int local_size, int gloo_rank, int gloo_size,
bool with_switch, std::string switch_endpoints);

const std::string GetBackendName() const override {
return std::string(HETER_BACKEND_NAME);
Expand Down
8 changes: 6 additions & 2 deletions paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#include "paddle/fluid/distributed/collective/Common.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/api/include/api.h"
Expand Down Expand Up @@ -103,8 +104,11 @@ bool ProcessGroupNCCL::NCCLTask::Wait(std::chrono::milliseconds timeout) {
void ProcessGroupNCCL::NCCLTask::Synchronize() { Wait(kWaitTimeout); }

ProcessGroupNCCL::ProcessGroupNCCL(const std::shared_ptr<Store>& store,
int rank, int size, int gid)
: ProcessGroup(rank, size, gid), store_(store) {}
int rank, int size,
const platform::Place& place, int gid)
: ProcessGroup(rank, size, place, gid), store_(store) {
platform::SetDeviceId(place_.device);
}

void ProcessGroupNCCL::BroadcastUniqueNCCLID(
std::vector<ncclUniqueId>& nccl_ids) { // NOLINT
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/collective/ProcessGroupNCCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class ProcessGroupNCCL : public ProcessGroup {
};

ProcessGroupNCCL(const std::shared_ptr<Store>& store, int rank, int size,
int gid);
const platform::Place& place, int gid);

const std::string GetBackendName() const override {
return std::string(NCCL_BACKEND_NAME);
Expand Down
27 changes: 27 additions & 0 deletions paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include "gtest/gtest.h"

#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/kernel_registry.h"

PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
Expand Down Expand Up @@ -206,3 +208,28 @@ TEST(EagerVariable, Constructor) {

VLOG(6) << "Finish";
}

TEST(EagerVariable, DataLayout) {
paddle::experimental::Tensor tensor;
phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1, 1, 1}),
paddle::experimental::DataLayout::UNDEFINED);
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f;
dt_ptr[1] = 5.0f;
dt_ptr[2] = 5.0f;
dt_ptr[3] = 5.0f;
tensor.set_impl(dt);
auto eager_var = std::make_shared<egr::EagerVariable>(tensor);
auto layout = paddle::imperative::GetDataLayout(eager_var);
CHECK_EQ(layout, paddle::experimental::DataLayout::UNDEFINED);
paddle::imperative::SetDataLayout(eager_var,
paddle::experimental::DataLayout::NCHW);
layout = paddle::imperative::GetDataLayout(eager_var);
CHECK_EQ(layout, paddle::experimental::DataLayout::NCHW);
}
7 changes: 6 additions & 1 deletion paddle/fluid/imperative/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,13 @@ cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator
ENDIF()
cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry var_helper phi_api)
add_subdirectory(jit)
if (WITH_GPU)
cc_library(layout_autotune SRCS layout_autotune.cc DEPS op_info phi_gpu_info)
else()
cc_library(layout_autotune SRCS layout_autotune.cc DEPS op_info)
endif()
cc_library(amp SRCS amp_auto_cast.cc DEPS layer var_helper)
cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal garbage_collector var_helper)
cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal garbage_collector var_helper layout_autotune)
cc_library(basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator switch_autotune)
cc_library(engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator switch_autotune)
cc_library(imperative_profiler SRCS profiler.cc DEPS flags)
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/imperative/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,14 @@ class VarBase {

framework::proto::VarType::Type DataType() const { return var_->DataType(); }

void SetDataLayout(paddle::experimental::DataLayout data_layout) {
var_->SetDataLayout(data_layout);
}

paddle::experimental::DataLayout DataLayout() const {
return var_->DataLayout();
}

size_t ElementSize() const { return framework::SizeOfType(var_->DataType()); }

void SetForwardDataType(framework::proto::VarType::Type data_type) {
Expand Down
Loading

1 comment on commit 87c1bc9

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.