Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Reference engine from chunk via weak pointer #14591

Merged
merged 1 commit into from
Apr 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions include/mxnet/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -850,15 +850,20 @@ class NDArray {
mxnet::ShapeVector aux_shapes;
/*! \brief Reference to the storage to ensure proper destruct order */
std::shared_ptr<Storage> storage_ref_;
/*! \brief Reference to the engine to ensure we cleanup without calling a destructed engine */
std::weak_ptr<Engine> engine_ref_;

/*! \brief default cosntructor */

/*! \brief default constructor */
Chunk() : static_data(true), delay_alloc(false),
storage_ref_(Storage::_GetSharedRef()) {}
storage_ref_(Storage::_GetSharedRef()),
engine_ref_(Engine::_GetSharedRef()) {}

/*! \brief construct a new chunk */
Chunk(mxnet::TShape shape, Context ctx_, bool delay_alloc_, int dtype)
: static_data(false), delay_alloc(true), ctx(ctx_),
storage_ref_(Storage::_GetSharedRef()) {
storage_ref_(Storage::_GetSharedRef()),
engine_ref_(Engine::_GetSharedRef()) {
storage_shape = shape;
if (shape_is_known(storage_shape)) {
shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
Expand All @@ -872,7 +877,8 @@ class NDArray {

Chunk(const TBlob &data, int dev_id)
: static_data(true), delay_alloc(false),
storage_ref_(Storage::_GetSharedRef()) {
storage_ref_(Storage::_GetSharedRef()),
engine_ref_(Engine::_GetSharedRef()) {
CHECK(storage_type == kDefaultStorage);
var = Engine::Get()->NewVariable();
if (data.dev_mask() == cpu::kDevMask) {
Expand All @@ -890,7 +896,8 @@ class NDArray {

Chunk(int shared_pid, int shared_id, const mxnet::TShape& shape, int dtype)
: static_data(false), delay_alloc(false),
storage_ref_(Storage::_GetSharedRef()) {
storage_ref_(Storage::_GetSharedRef()),
engine_ref_(Engine::_GetSharedRef()) {
var = Engine::Get()->NewVariable();
ctx = Context::CPUShared(0);
shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
Expand All @@ -906,7 +913,8 @@ class NDArray {
const mxnet::ShapeVector &aux_shapes_)
: static_data(false), delay_alloc(delay_alloc_), storage_type(storage_type_),
aux_types(aux_types_), ctx(ctx_), storage_shape(storage_shape_),
aux_shapes(aux_shapes_), storage_ref_(Storage::_GetSharedRef()) {
aux_shapes(aux_shapes_), storage_ref_(Storage::_GetSharedRef()),
engine_ref_(Engine::_GetSharedRef()) {
shandle.ctx = ctx;
var = Engine::Get()->NewVariable();
// aux_handles always reflect the correct number of aux data
Expand All @@ -924,7 +932,7 @@ class NDArray {
Chunk(const NDArrayStorageType storage_type_, const TBlob &data,
const std::vector<TBlob> &aux_data, int dev_id)
: static_data(true), delay_alloc(false), storage_type(storage_type_),
storage_ref_(Storage::_GetSharedRef()) {
storage_ref_(Storage::_GetSharedRef()), engine_ref_(Engine::_GetSharedRef()) {
using namespace mshadow;
CHECK_NE(storage_type, kDefaultStorage);
// init var
Expand Down
24 changes: 13 additions & 11 deletions src/ndarray/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,20 +113,22 @@ NDArray::Chunk::~Chunk() {
// We want to delete mkldnn memory after deleting the variable.
mem.mem = this->mkl_mem_;
#endif
Engine::Get()->DeleteVariable([mem, skip_free](RunContext s) {
if (skip_free == false) {
if (auto engine = engine_ref_.lock()) {
engine->DeleteVariable([mem, skip_free](RunContext s) {
if (skip_free == false) {
#if MXNET_USE_MKLDNN == 1
if (mem.mem) {
CHECK_LE(mem.mem->GetSize(), mem.h.size);
CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr);
}
if (mem.mem) {
CHECK_LE(mem.mem->GetSize(), mem.h.size);
CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr);
}
#endif
Storage::Get()->Free(mem.h);
for (const auto& aux : mem.aux_h) {
Storage::Get()->Free(aux);
Storage::Get()->Free(mem.h);
for (const auto &aux : mem.aux_h) {
Storage::Get()->Free(aux);
}
}
}
}, shandle.ctx, var);
}, shandle.ctx, var);
}
}

void NDArray::Chunk::CheckAndAllocData(const mxnet::TShape &shape, int dtype) {
Expand Down
41 changes: 41 additions & 0 deletions tests/cpp/engine/engine_shutdown_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file engine_shutdown_test.cc
* \brief Tests engine shutdown for possible crashes
*/
#include <gtest/gtest.h>

#include "../src/engine/engine_impl.h"
#include "../include/test_util.h"

/**
* This test will help ensure we don't crash during engine shutdown.
* The crash happens during a static destructor call, so this test may pass and then cause a test-run process crash.
*/
TEST(EngineShutdown, stop_without_crashing) {
static std::unique_ptr<mxnet::NDArray> ndArray;
{
auto engine = mxnet::Engine::_GetSharedRef();
ndArray = std::make_unique<mxnet::NDArray>(mxnet::Context::CPU());
engine->Stop();
}
}