Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance the unit test framework to explicitly test whether the operator correctly handles gradients for multiple inputs. #3857

Merged
merged 4 commits into from
Sep 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 35 additions & 13 deletions doc/howto/dev/new_op_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,28 +280,50 @@ class TestMulOp(unittest.TestCase):

反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。

```
class MulGradOpTest(GradientChecker):
def test_mul(self):
op = create_op("mul")
inputs = {
```
class TestMulGradOp(GradientChecker):
def setUp(self):
self.op = create_op("mul")
self.inputs = {
'X': np.random.random((32, 84)).astype("float32"),
'Y': np.random.random((84, 100)).astype("float32")
}
self.compare_grad(op, inputs)

def test_cpu_gpu_compare(self):
self.compare_grad(self.op, self.inputs)

def test_normal(self):
# mul op will enlarge the relative error
self.check_grad(
op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5)
```
self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5)

def test_ignore_x(self):
self.check_grad(
self.op,
self.inputs, ["Y"],
"Out",
max_relative_error=0.5,
no_grad_set={"X"})

def test_ignore_y(self):
self.check_grad(
self.op,
self.inputs, ["X"],
"Out",
max_relative_error=0.5,
no_grad_set={"Y"})
```

下面解释一些关键的地方:

- 调用`create_op("mul")`创建反向Op对应的前向Op。
- 定义输入`inputs`。
- 调用`compare_grad`函数对比CPU、GPU计算结果。
- 调用`check_grad`检查梯度稳定性,这里采用数值法检测梯度正确性。
- 第一个参数`op` : 前向op
- 第二个参数`inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。
- 第三个参数`set(["X", "Y"])` : 指定对输入变量`X`、`Y`做梯度检测。
- `test_normal`中调用`check_grad`检查梯度稳定性,这里采用数值法检测梯度正确性。
- 第一个参数`self.op` : 前向Op
- 第二个参数`self.inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。
- 第三个参数`["X", "Y"]` : 指定对输入变量`X`、`Y`做梯度检测。
- 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out`
- `test_ignore_x`和`test_ignore_y`分支测试只需要计算一个输入梯度的情况。


### 编译和执行
Expand Down
4 changes: 2 additions & 2 deletions paddle/operators/mul_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ class MulOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE(y_dims[1] == out_dims[1],
"Out@GRAD M X N must equal to Y dims 1, N ");

x_grad->Resize(x_dims);
y_grad->Resize(y_dims);
if (x_grad) x_grad->Resize(x_dims);
if (y_grad) y_grad->Resize(y_dims);
}
};

Expand Down
36 changes: 20 additions & 16 deletions paddle/operators/mul_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,34 +31,38 @@ template <typename Place, typename T>
class MulKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<Tensor>("X");
auto* Y = context.Input<Tensor>("Y");
auto* Z = context.Output<Tensor>("Out");
Z->mutable_data<T>(context.GetPlace());
auto* x = context.Input<Tensor>("X");
auto* y = context.Input<Tensor>("Y");
auto* z = context.Output<Tensor>("Out");
z->mutable_data<T>(context.GetPlace());
auto* device_context =
const_cast<platform::DeviceContext*>(context.device_context_);
math::matmul<Place, T>(*X, false, *Y, false, 1, Z, 0, device_context);
math::matmul<Place, T>(*x, false, *y, false, 1, z, 0, device_context);
}
};

template <typename Place, typename T>
class MulGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* X = ctx.Input<Tensor>("X");
auto* Y = ctx.Input<Tensor>("Y");
auto* dOut = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));

auto* dX = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dY = ctx.Output<Tensor>(framework::GradVarName("Y"));
dX->mutable_data<T>(ctx.GetPlace());
dY->mutable_data<T>(ctx.GetPlace());
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
auto* device_context =
const_cast<platform::DeviceContext*>(ctx.device_context_);
// dX = dOut * Y'. dX: M x K, dOut : M x N, Y : K x N
math::matmul<Place, T>(*dOut, false, *Y, true, 1, dX, 0, device_context);
// dY = X' * dOut. dY: K x N, dOut : M x N, X : M x K
math::matmul<Place, T>(*X, true, *dOut, false, 1, dY, 0, device_context);
if (dx) {
dx->mutable_data<T>(ctx.GetPlace());
// dx = dout * y'. dx: M x K, dout : M x N, y : K x N
math::matmul<Place, T>(*dout, false, *y, true, 1, dx, 0, device_context);
}
if (dy) {
dy->mutable_data<T>(ctx.GetPlace());
// dy = x' * dout. dy K x N, dout : M x N, x : M x K
math::matmul<Place, T>(*x, true, *dout, false, 1, dy, 0, device_context);
}
}
};

Expand Down
6 changes: 4 additions & 2 deletions paddle/operators/rowwise_add_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel {
auto dims0 = ctx.Input<Tensor>("X")->dims();
auto dims1 = ctx.Input<Tensor>("b")->dims();
PADDLE_ENFORCE_EQ(1, dims1.size(), "b dims should be 1")
ctx.Output<Tensor>(framework::GradVarName("X"))->Resize(dims0);
ctx.Output<Tensor>(framework::GradVarName("b"))->Resize(dims1);
auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *db = ctx.Output<Tensor>(framework::GradVarName("b"));
if (dx) dx->Resize(dims0);
if (db) db->Resize(dims1);
}
};

Expand Down
24 changes: 14 additions & 10 deletions paddle/operators/rowwise_add_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,24 @@ template <typename Place, typename T>
class RowwiseAddGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* dOut = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dX = context.Output<Tensor>(framework::GradVarName("X"));
auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
auto* db = context.Output<Tensor>(framework::GradVarName("b"));
dX->mutable_data<T>(context.GetPlace());
db->mutable_data<T>(context.GetPlace());

auto OutGrad = EigenMatrix<T>::From(*dOut);
auto out_grad = EigenMatrix<T>::From(*dout);
auto place = context.GetEigenDevice<Place>();
EigenMatrix<T>::From(*dX).device(place) = OutGrad;
if (dx) {
dx->mutable_data<T>(context.GetPlace());
EigenMatrix<T>::From(*dx).device(place) = out_grad;
}

// https://eigen.tuxfamily.org/dox/unsupported/TensorBase_8h_source.html
// colwise add
Eigen::array<int, 1> dims{{0}}; /* dimension to reduce */
EigenVector<T>::Flatten(*db).device(place) = OutGrad.sum(dims);
if (db) {
db->mutable_data<T>(context.GetPlace());
// https://eigen.tuxfamily.org/dox/unsupported/TensorBase_8h_source.html
// colwise add
Eigen::array<int, 1> dims{{0}}; /* dimension to reduce */
EigenVector<T>::Flatten(*db).device(place) = out_grad.sum(dims);
}
}
};
} // namespace operators
Expand Down
4 changes: 3 additions & 1 deletion python/paddle/v2/framework/tests/gradient_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,9 @@ def check_grad(self,
for no_grad in no_grad_set:
if no_grad not in in_names:
raise ValueError("no_grad should be in in_names")
if no_grad in inputs_to_check:
raise ValueError("no_grad should not be in inputs_to_check")

backward_op = core.Operator.backward(forward_op, no_grad_set)

places = [core.CPUPlace()]
Expand All @@ -301,7 +304,6 @@ def check_grad(self,

check_names = [grad_var_name(name) for name in inputs_to_check]
for place in places:
# get analytical gradients according to different device
analytic_grads = self.__get_gradient(forward_op, backward_op,
input_vars, check_names, place)
self.__assert_is_close(numeric_grads, analytic_grads, check_names,
Expand Down
31 changes: 26 additions & 5 deletions python/paddle/v2/framework/tests/test_mul_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,37 @@ def setUp(self):
self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])}


class MulGradOpTest(GradientChecker):
def test_mul(self):
op = create_op("mul")
inputs = {
class TestMulGradOp(GradientChecker):
def setUp(self):
self.op = create_op("mul")
self.inputs = {
'X': np.random.random((32, 84)).astype("float32"),
'Y': np.random.random((84, 100)).astype("float32")
}

def test_cpu_gpu_compare(self):
self.compare_grad(self.op, self.inputs)

def test_normal(self):
# mul op will enlarge the relative error
self.check_grad(
op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5)
self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5)

def test_ignore_x(self):
self.check_grad(
self.op,
self.inputs, ["Y"],
"Out",
max_relative_error=0.5,
no_grad_set={"X"})

def test_ignore_y(self):
self.check_grad(
self.op,
self.inputs, ["X"],
"Out",
max_relative_error=0.5,
no_grad_set={"Y"})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@qingqing01 The framework should automatically test cases with certain input gradient ignored without explicitly writing code here. We should change GradientChecker.check_grad() to do this.

Copy link
Contributor Author

@qingqing01 qingqing01 Sep 7, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@emailweixu Thanks for your comments. Ok, in the first comments: 4470332, the check_grad is automatically testing. I'll change it again :).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The framework should automatically test cases with certain input gradient ignored without explicitly writing code here. We should change GradientChecker.check_grad() to do this.

  • To make users invoke check_grad by themselves can handle if some IGs are not able to be ignored. For example, OpA has two inputs, 'X' and 'Y' and gradient 'Y' should always be calculated, and it YG should never be nullptr.
    • Also, check_grad is not only used to check single operator's gradient but also the gradient inside a whole neural network. It is hard to automatically testing a whole network.
  • To make users invoke check_grad can make error log clearer. The error would be test_mul_grad_ingore_x error.
  • To make user-interface easily, there maybe a parameter in check_grad, like test_no_input_grad. If test_no_input_grad=True, multiple check_grads will be invoked.



# TODO(dzh,qijun) : mulgrad test case need transpose feature of blas library
Expand Down
18 changes: 13 additions & 5 deletions python/paddle/v2/framework/tests/test_rowwise_add_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,22 @@ def setUp(self):
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}


class RowwiseAddGradOpTest(GradientChecker):
def test_rowwise_add(self):
op = create_op("rowwise_add")
inputs = {
class TestRowwiseAddGradOp(GradientChecker):
def setUp(self):
self.op = create_op("rowwise_add")
self.inputs = {
"X": np.random.uniform(0.1, 1, [5, 10]).astype("float32"),
"b": np.random.uniform(0.1, 1, [10]).astype("float32")
}
self.check_grad(op, inputs, set(["X", "b"]), "Out")

def test_normal(self):
self.check_grad(self.op, self.inputs, ["X", "b"], "Out")

def test_ignore_b(self):
self.check_grad(self.op, self.inputs, ["X"], "Out", no_grad_set={"b"})

def test_ignore_x(self):
self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})


if __name__ == '__main__':
Expand Down