From 8f1929b789968b879f1aab6182d1dae1d9c443b3 Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Thu, 23 Mar 2023 15:27:16 +0800 Subject: [PATCH 1/5] [AMP] add fp16&bf16 support for flatten op --- .../test_flatten_contiguous_range_op.py | 236 +++++++++++++++++- .../white_list/op_accuracy_white_list.py | 1 + python/paddle/tensor/manipulation.py | 1 + 3 files changed, 236 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py index 8d2dbc0312c7ec..d4c6d7f8dcad11 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py @@ -15,9 +15,10 @@ import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import OpTest, convert_float_to_uint16 import paddle +from paddle.fluid import core class TestFlattenOp(OpTest): @@ -31,7 +32,8 @@ def setUp(self): self.stop_axis = -1 self.skip_cinn() self.init_test_case() - self.inputs = {"X": np.random.random(self.in_shape).astype("float64")} + self.init_test_dtype() + self.init_input_data() self.init_attrs() self.outputs = { "Out": self.inputs["X"].reshape(self.new_shape), @@ -59,6 +61,40 @@ def init_attrs(self): "stop_axis": self.stop_axis, } + def init_test_dtype(self): + self.dtype = "float64" + + def init_input_data(self): + self.inputs = {"X": np.random.random(self.in_shape).astype(self.dtype)} + + +class TestFlattenFP32Op(TestFlattenOp): + def init_test_dtype(self): + self.dtype = "float32" + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not complied with CUDA", +) +class TestFlattenFP16Op(TestFlattenOp): + def init_test_dtype(self): + self.dtype = "float16" + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestFlattenBF16Op(TestFlattenOp): + def init_test_dtype(self): + self.dtype = "uint16" + + def init_input_data(self): + x = np.random.random(self.in_shape).astype("float32") + self.inputs = {"X": convert_float_to_uint16(x)} + class TestFlattenOp_1(TestFlattenOp): def init_test_case(self): @@ -74,6 +110,34 @@ def init_attrs(self): } +class TestFlattenFP32Op_1(TestFlattenOp_1): + def init_test_dtype(self): + self.dtype = "float32" + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not complied with CUDA", +) +class TestFlattenFP16Op_1(TestFlattenOp_1): + def init_test_dtype(self): + self.dtype = "float16" + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestFlattenBF16Op_1(TestFlattenOp_1): + def init_test_dtype(self): + self.dtype = "uint16" + + def init_input_data(self): + x = np.random.random(self.in_shape).astype("float32") + self.inputs = {"X": convert_float_to_uint16(x)} + + class TestFlattenOp_2(TestFlattenOp): def init_test_case(self): self.in_shape = (3, 2, 5, 4) @@ -88,6 +152,34 @@ def init_attrs(self): } +class TestFlattenFP32Op_2(TestFlattenOp_2): + def init_test_dtype(self): + self.dtype = "float32" + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not complied with CUDA", +) +class TestFlattenFP16Op_2(TestFlattenOp_2): + def init_test_dtype(self): + self.dtype = "float16" + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestFlattenBF16Op_2(TestFlattenOp_2): + def init_test_dtype(self): + self.dtype = "uint16" + + def init_input_data(self): + x = np.random.random(self.in_shape).astype("float32") + self.inputs = {"X": convert_float_to_uint16(x)} + + class TestFlattenOp_3(TestFlattenOp): def init_test_case(self): self.in_shape = (3, 2, 5, 4) @@ -102,6 +194,34 @@ def init_attrs(self): } +class TestFlattenFP32Op_3(TestFlattenOp_3): + def init_test_dtype(self): + self.dtype = "float32" + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not complied with CUDA", +) +class TestFlattenFP16Op_3(TestFlattenOp_3): + def init_test_dtype(self): + self.dtype = "float16" + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestFlattenBF16Op_3(TestFlattenOp_3): + def init_test_dtype(self): + self.dtype = "uint16" + + def init_input_data(self): + x = np.random.random(self.in_shape).astype("float32") + self.inputs = {"X": convert_float_to_uint16(x)} + + class TestFlattenOp_4(TestFlattenOp): def init_test_case(self): self.in_shape = (3, 2, 5, 4) @@ -116,6 +236,34 @@ def init_attrs(self): } +class TestFlattenFP32Op_4(TestFlattenOp_4): + def init_test_dtype(self): + self.dtype = "float32" + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not complied with CUDA", +) +class TestFlattenFP16Op_4(TestFlattenOp_4): + def init_test_dtype(self): + self.dtype = "float16" + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestFlattenBF16Op_4(TestFlattenOp_4): + def init_test_dtype(self): + self.dtype = "uint16" + + def init_input_data(self): + x = np.random.random(self.in_shape).astype("float32") + self.inputs = {"X": convert_float_to_uint16(x)} + + class TestFlattenOp_5(TestFlattenOp): def init_test_case(self): self.in_shape = (3, 2, 5, 4) @@ -130,6 +278,34 @@ def init_attrs(self): } +class TestFlattenFP32Op_5(TestFlattenOp_5): + def init_test_dtype(self): + self.dtype = "float32" + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not complied with CUDA", +) +class TestFlattenFP16Op_5(TestFlattenOp_5): + def init_test_dtype(self): + self.dtype = "float16" + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestFlattenBF16Op_5(TestFlattenOp_5): + def init_test_dtype(self): + self.dtype = "uint16" + + def init_input_data(self): + x = np.random.random(self.in_shape).astype("float32") + self.inputs = {"X": convert_float_to_uint16(x)} + + class TestFlattenOp_6(TestFlattenOp): def init_test_case(self): self.in_shape = tuple() @@ -147,6 +323,34 @@ def init_attrs(self): } +class TestFlattenFP32Op_6(TestFlattenOp_6): + def init_test_dtype(self): + self.dtype = "float32" + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not complied with CUDA", +) +class TestFlattenFP16Op_6(TestFlattenOp_6): + def init_test_dtype(self): + self.dtype = "float16" + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestFlattenBF16Op_6(TestFlattenOp_6): + def init_test_dtype(self): + self.dtype = "uint16" + + def init_input_data(self): + x = np.random.random(self.in_shape).astype("float32") + self.inputs = {"X": convert_float_to_uint16(x)} + + class TestFlattenOpSixDims(TestFlattenOp): def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) @@ -161,6 +365,34 @@ def init_attrs(self): } +class TestFlattenFP32OpSixDims(TestFlattenOpSixDims): + def init_test_dtype(self): + self.dtype = "float32" + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not complied with CUDA", +) +class TestFlattenFP16OpSixDims(TestFlattenOpSixDims): + def init_test_dtype(self): + self.dtype = "float16" + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestFlattenBF16OpSixDims(TestFlattenOpSixDims): + def init_test_dtype(self): + self.dtype = "uint16" + + def init_input_data(self): + x = np.random.random(self.in_shape).astype("float32") + self.inputs = {"X": convert_float_to_uint16(x)} + + class TestFlatten2OpError(unittest.TestCase): def test_errors(self): image_shape = (2, 3, 4, 4) diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py index ced30722cf2792..b0266bfa9a36c3 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py @@ -31,6 +31,7 @@ 'depthwise_conv2d', 'depthwise_conv2d_transpose', 'dropout', + 'flatten_contiguous_range', 'fused_elemwise_activation', 'hinge_loss', 'huber_loss', diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 41a8cfa856f8c5..87b01d963ed090 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -1591,6 +1591,7 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): 'int32', 'int64', 'uint8', + 'uint16', ], 'flatten', ) From 3c75dbfb2c447171db4ef9e66b4de90924ef64f2 Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Thu, 23 Mar 2023 19:40:10 +0800 Subject: [PATCH 2/5] fix ci bug --- .../test_flatten_contiguous_range_op.py | 51 ++++++------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py index d4c6d7f8dcad11..2cfe77885f4754 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py @@ -44,10 +44,20 @@ def skip_cinn(self): self.enable_cinn = True def test_check_output(self): - self.check_output(no_check_set=["XShape"], check_prim=True) + if str(self.dtype) in {"float16", "uint16"}: + self.check_output_with_place( + core.CUDAPlace(0), no_check_set=["XShape"], check_prim=True + ) + else: + self.check_output(no_check_set=["XShape"], check_prim=True) def test_check_grad(self): - self.check_grad(["X"], "Out", check_prim=True) + if str(self.dtype) in {"float16", "uint16"}: + self.check_grad_with_place( + core.CUDAPlace(0), ["X"], "Out", check_prim=True + ) + else: + self.check_grad(["X"], "Out", check_prim=True) def init_test_case(self): self.in_shape = (3, 2, 5, 4) @@ -65,7 +75,10 @@ def init_test_dtype(self): self.dtype = "float64" def init_input_data(self): - self.inputs = {"X": np.random.random(self.in_shape).astype(self.dtype)} + x = np.random.random(self.in_shape).astype("float32") + if str(self.dtype) == "uint16": + x = convert_float_to_uint16(x) + self.inputs = {"X": x} class TestFlattenFP32Op(TestFlattenOp): @@ -91,10 +104,6 @@ class TestFlattenBF16Op(TestFlattenOp): def init_test_dtype(self): self.dtype = "uint16" - def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - self.inputs = {"X": convert_float_to_uint16(x)} - class TestFlattenOp_1(TestFlattenOp): def init_test_case(self): @@ -133,10 +142,6 @@ class TestFlattenBF16Op_1(TestFlattenOp_1): def init_test_dtype(self): self.dtype = "uint16" - def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - self.inputs = {"X": convert_float_to_uint16(x)} - class TestFlattenOp_2(TestFlattenOp): def init_test_case(self): @@ -175,10 +180,6 @@ class TestFlattenBF16Op_2(TestFlattenOp_2): def init_test_dtype(self): self.dtype = "uint16" - def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - self.inputs = {"X": convert_float_to_uint16(x)} - class TestFlattenOp_3(TestFlattenOp): def init_test_case(self): @@ -217,10 +218,6 @@ class TestFlattenBF16Op_3(TestFlattenOp_3): def init_test_dtype(self): self.dtype = "uint16" - def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - self.inputs = {"X": convert_float_to_uint16(x)} - class TestFlattenOp_4(TestFlattenOp): def init_test_case(self): @@ -259,10 +256,6 @@ class TestFlattenBF16Op_4(TestFlattenOp_4): def init_test_dtype(self): self.dtype = "uint16" - def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - self.inputs = {"X": convert_float_to_uint16(x)} - class TestFlattenOp_5(TestFlattenOp): def init_test_case(self): @@ -301,10 +294,6 @@ class TestFlattenBF16Op_5(TestFlattenOp_5): def init_test_dtype(self): self.dtype = "uint16" - def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - self.inputs = {"X": convert_float_to_uint16(x)} - class TestFlattenOp_6(TestFlattenOp): def init_test_case(self): @@ -346,10 +335,6 @@ class TestFlattenBF16Op_6(TestFlattenOp_6): def init_test_dtype(self): self.dtype = "uint16" - def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - self.inputs = {"X": convert_float_to_uint16(x)} - class TestFlattenOpSixDims(TestFlattenOp): def init_test_case(self): @@ -388,10 +373,6 @@ class TestFlattenBF16OpSixDims(TestFlattenOpSixDims): def init_test_dtype(self): self.dtype = "uint16" - def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - self.inputs = {"X": convert_float_to_uint16(x)} - class TestFlatten2OpError(unittest.TestCase): def test_errors(self): From 9645c2fb0b0381f7586b732e1942382e163fd0cd Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Mon, 27 Mar 2023 11:24:13 +0800 Subject: [PATCH 3/5] fix inpute should astype self.dtype bug and fix zerodim test name --- .../unittests/test_flatten_contiguous_range_op.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py index 2cfe77885f4754..f4334f7dcdaf73 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py @@ -75,9 +75,12 @@ def init_test_dtype(self): self.dtype = "float64" def init_input_data(self): - x = np.random.random(self.in_shape).astype("float32") - if str(self.dtype) == "uint16": + if str(self.dtype) != "uint16": + x = np.random.random(self.in_shape).astype(self.dtype) + else: + x = np.random.random(self.in_shape).astype("float32") x = convert_float_to_uint16(x) + self.inputs = {"X": x} @@ -295,7 +298,7 @@ def init_test_dtype(self): self.dtype = "uint16" -class TestFlattenOp_6(TestFlattenOp): +class TestFlattenOp_ZeroDim(TestFlattenOp): def init_test_case(self): self.in_shape = tuple() self.start_axis = 0 @@ -312,7 +315,7 @@ def init_attrs(self): } -class TestFlattenFP32Op_6(TestFlattenOp_6): +class TestFlattenFP32Op_ZeroDim(TestFlattenOp_ZeroDim): def init_test_dtype(self): self.dtype = "float32" @@ -321,7 +324,7 @@ def init_test_dtype(self): not core.is_compiled_with_cuda(), "core is not complied with CUDA", ) -class TestFlattenFP16Op_6(TestFlattenOp_6): +class TestFlattenFP16Op_ZeroDim(TestFlattenOp_ZeroDim): def init_test_dtype(self): self.dtype = "float16" @@ -331,7 +334,7 @@ def init_test_dtype(self): or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not complied with CUDA and not support the bfloat16", ) -class TestFlattenBF16Op_6(TestFlattenOp_6): +class TestFlattenBF16Op_ZeroDim(TestFlattenOp_ZeroDim): def init_test_dtype(self): self.dtype = "uint16" From 471b403a862e69a32421525460cb224237319647 Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Mon, 27 Mar 2023 11:32:18 +0800 Subject: [PATCH 4/5] remove 0D-tensor bf16 test for window-inference-ci pass --- .../unittests/test_flatten_contiguous_range_op.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py index f4334f7dcdaf73..81aa43454df9b7 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py @@ -329,16 +329,6 @@ def init_test_dtype(self): self.dtype = "float16" -@unittest.skipIf( - not core.is_compiled_with_cuda() - or not core.is_bfloat16_supported(core.CUDAPlace(0)), - "core is not complied with CUDA and not support the bfloat16", -) -class TestFlattenBF16Op_ZeroDim(TestFlattenOp_ZeroDim): - def init_test_dtype(self): - self.dtype = "uint16" - - class TestFlattenOpSixDims(TestFlattenOp): def init_test_case(self): self.in_shape = (3, 2, 3, 2, 4, 4) From 52d2a98717fd0c5d01106ca4161dac074fd3d2e3 Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Mon, 27 Mar 2023 11:41:37 +0800 Subject: [PATCH 5/5] remove flatten from op_accuracy_white_list --- .../fluid/tests/unittests/white_list/op_accuracy_white_list.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py index b0266bfa9a36c3..ced30722cf2792 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py @@ -31,7 +31,6 @@ 'depthwise_conv2d', 'depthwise_conv2d_transpose', 'dropout', - 'flatten_contiguous_range', 'fused_elemwise_activation', 'hinge_loss', 'huber_loss',