From 8f1929b789968b879f1aab6182d1dae1d9c443b3 Mon Sep 17 00:00:00 2001
From: jiangcheng <thisjiang@qq.com>
Date: Thu, 23 Mar 2023 15:27:16 +0800
Subject: [PATCH 1/5] [AMP] add fp16&bf16 support for flatten op

---
 .../test_flatten_contiguous_range_op.py       | 236 +++++++++++++++++-
 .../white_list/op_accuracy_white_list.py      |   1 +
 python/paddle/tensor/manipulation.py          |   1 +
 3 files changed, 236 insertions(+), 2 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
index 8d2dbc0312c7ec..d4c6d7f8dcad11 100644
--- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
+++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
@@ -15,9 +15,10 @@
 import unittest
 
 import numpy as np
-from eager_op_test import OpTest
+from eager_op_test import OpTest, convert_float_to_uint16
 
 import paddle
+from paddle.fluid import core
 
 
 class TestFlattenOp(OpTest):
@@ -31,7 +32,8 @@ def setUp(self):
         self.stop_axis = -1
         self.skip_cinn()
         self.init_test_case()
-        self.inputs = {"X": np.random.random(self.in_shape).astype("float64")}
+        self.init_test_dtype()
+        self.init_input_data()
         self.init_attrs()
         self.outputs = {
             "Out": self.inputs["X"].reshape(self.new_shape),
@@ -59,6 +61,40 @@ def init_attrs(self):
             "stop_axis": self.stop_axis,
         }
 
+    def init_test_dtype(self):
+        self.dtype = "float64"
+
+    def init_input_data(self):
+        self.inputs = {"X": np.random.random(self.in_shape).astype(self.dtype)}
+
+
+class TestFlattenFP32Op(TestFlattenOp):
+    def init_test_dtype(self):
+        self.dtype = "float32"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not complied with CUDA",
+)
+class TestFlattenFP16Op(TestFlattenOp):
+    def init_test_dtype(self):
+        self.dtype = "float16"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestFlattenBF16Op(TestFlattenOp):
+    def init_test_dtype(self):
+        self.dtype = "uint16"
+
+    def init_input_data(self):
+        x = np.random.random(self.in_shape).astype("float32")
+        self.inputs = {"X": convert_float_to_uint16(x)}
+
 
 class TestFlattenOp_1(TestFlattenOp):
     def init_test_case(self):
@@ -74,6 +110,34 @@ def init_attrs(self):
         }
 
 
+class TestFlattenFP32Op_1(TestFlattenOp_1):
+    def init_test_dtype(self):
+        self.dtype = "float32"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not complied with CUDA",
+)
+class TestFlattenFP16Op_1(TestFlattenOp_1):
+    def init_test_dtype(self):
+        self.dtype = "float16"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestFlattenBF16Op_1(TestFlattenOp_1):
+    def init_test_dtype(self):
+        self.dtype = "uint16"
+
+    def init_input_data(self):
+        x = np.random.random(self.in_shape).astype("float32")
+        self.inputs = {"X": convert_float_to_uint16(x)}
+
+
 class TestFlattenOp_2(TestFlattenOp):
     def init_test_case(self):
         self.in_shape = (3, 2, 5, 4)
@@ -88,6 +152,34 @@ def init_attrs(self):
         }
 
 
+class TestFlattenFP32Op_2(TestFlattenOp_2):
+    def init_test_dtype(self):
+        self.dtype = "float32"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not complied with CUDA",
+)
+class TestFlattenFP16Op_2(TestFlattenOp_2):
+    def init_test_dtype(self):
+        self.dtype = "float16"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestFlattenBF16Op_2(TestFlattenOp_2):
+    def init_test_dtype(self):
+        self.dtype = "uint16"
+
+    def init_input_data(self):
+        x = np.random.random(self.in_shape).astype("float32")
+        self.inputs = {"X": convert_float_to_uint16(x)}
+
+
 class TestFlattenOp_3(TestFlattenOp):
     def init_test_case(self):
         self.in_shape = (3, 2, 5, 4)
@@ -102,6 +194,34 @@ def init_attrs(self):
         }
 
 
+class TestFlattenFP32Op_3(TestFlattenOp_3):
+    def init_test_dtype(self):
+        self.dtype = "float32"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not complied with CUDA",
+)
+class TestFlattenFP16Op_3(TestFlattenOp_3):
+    def init_test_dtype(self):
+        self.dtype = "float16"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestFlattenBF16Op_3(TestFlattenOp_3):
+    def init_test_dtype(self):
+        self.dtype = "uint16"
+
+    def init_input_data(self):
+        x = np.random.random(self.in_shape).astype("float32")
+        self.inputs = {"X": convert_float_to_uint16(x)}
+
+
 class TestFlattenOp_4(TestFlattenOp):
     def init_test_case(self):
         self.in_shape = (3, 2, 5, 4)
@@ -116,6 +236,34 @@ def init_attrs(self):
         }
 
 
+class TestFlattenFP32Op_4(TestFlattenOp_4):
+    def init_test_dtype(self):
+        self.dtype = "float32"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not complied with CUDA",
+)
+class TestFlattenFP16Op_4(TestFlattenOp_4):
+    def init_test_dtype(self):
+        self.dtype = "float16"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestFlattenBF16Op_4(TestFlattenOp_4):
+    def init_test_dtype(self):
+        self.dtype = "uint16"
+
+    def init_input_data(self):
+        x = np.random.random(self.in_shape).astype("float32")
+        self.inputs = {"X": convert_float_to_uint16(x)}
+
+
 class TestFlattenOp_5(TestFlattenOp):
     def init_test_case(self):
         self.in_shape = (3, 2, 5, 4)
@@ -130,6 +278,34 @@ def init_attrs(self):
         }
 
 
+class TestFlattenFP32Op_5(TestFlattenOp_5):
+    def init_test_dtype(self):
+        self.dtype = "float32"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not complied with CUDA",
+)
+class TestFlattenFP16Op_5(TestFlattenOp_5):
+    def init_test_dtype(self):
+        self.dtype = "float16"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestFlattenBF16Op_5(TestFlattenOp_5):
+    def init_test_dtype(self):
+        self.dtype = "uint16"
+
+    def init_input_data(self):
+        x = np.random.random(self.in_shape).astype("float32")
+        self.inputs = {"X": convert_float_to_uint16(x)}
+
+
 class TestFlattenOp_6(TestFlattenOp):
     def init_test_case(self):
         self.in_shape = tuple()
@@ -147,6 +323,34 @@ def init_attrs(self):
         }
 
 
+class TestFlattenFP32Op_6(TestFlattenOp_6):
+    def init_test_dtype(self):
+        self.dtype = "float32"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not complied with CUDA",
+)
+class TestFlattenFP16Op_6(TestFlattenOp_6):
+    def init_test_dtype(self):
+        self.dtype = "float16"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestFlattenBF16Op_6(TestFlattenOp_6):
+    def init_test_dtype(self):
+        self.dtype = "uint16"
+
+    def init_input_data(self):
+        x = np.random.random(self.in_shape).astype("float32")
+        self.inputs = {"X": convert_float_to_uint16(x)}
+
+
 class TestFlattenOpSixDims(TestFlattenOp):
     def init_test_case(self):
         self.in_shape = (3, 2, 3, 2, 4, 4)
@@ -161,6 +365,34 @@ def init_attrs(self):
         }
 
 
+class TestFlattenFP32OpSixDims(TestFlattenOpSixDims):
+    def init_test_dtype(self):
+        self.dtype = "float32"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not complied with CUDA",
+)
+class TestFlattenFP16OpSixDims(TestFlattenOpSixDims):
+    def init_test_dtype(self):
+        self.dtype = "float16"
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestFlattenBF16OpSixDims(TestFlattenOpSixDims):
+    def init_test_dtype(self):
+        self.dtype = "uint16"
+
+    def init_input_data(self):
+        x = np.random.random(self.in_shape).astype("float32")
+        self.inputs = {"X": convert_float_to_uint16(x)}
+
+
 class TestFlatten2OpError(unittest.TestCase):
     def test_errors(self):
         image_shape = (2, 3, 4, 4)
diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
index ced30722cf2792..b0266bfa9a36c3 100644
--- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
+++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
@@ -31,6 +31,7 @@
     'depthwise_conv2d',
     'depthwise_conv2d_transpose',
     'dropout',
+    'flatten_contiguous_range',
     'fused_elemwise_activation',
     'hinge_loss',
     'huber_loss',
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 41a8cfa856f8c5..87b01d963ed090 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -1591,6 +1591,7 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None):
                 'int32',
                 'int64',
                 'uint8',
+                'uint16',
             ],
             'flatten',
         )

From 3c75dbfb2c447171db4ef9e66b4de90924ef64f2 Mon Sep 17 00:00:00 2001
From: jiangcheng <thisjiang@qq.com>
Date: Thu, 23 Mar 2023 19:40:10 +0800
Subject: [PATCH 2/5] fix ci bug

---
 .../test_flatten_contiguous_range_op.py       | 51 ++++++-------------
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
index d4c6d7f8dcad11..2cfe77885f4754 100644
--- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
+++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
@@ -44,10 +44,20 @@ def skip_cinn(self):
         self.enable_cinn = True
 
     def test_check_output(self):
-        self.check_output(no_check_set=["XShape"], check_prim=True)
+        if str(self.dtype) in {"float16", "uint16"}:
+            self.check_output_with_place(
+                core.CUDAPlace(0), no_check_set=["XShape"], check_prim=True
+            )
+        else:
+            self.check_output(no_check_set=["XShape"], check_prim=True)
 
     def test_check_grad(self):
-        self.check_grad(["X"], "Out", check_prim=True)
+        if str(self.dtype) in {"float16", "uint16"}:
+            self.check_grad_with_place(
+                core.CUDAPlace(0), ["X"], "Out", check_prim=True
+            )
+        else:
+            self.check_grad(["X"], "Out", check_prim=True)
 
     def init_test_case(self):
         self.in_shape = (3, 2, 5, 4)
@@ -65,7 +75,10 @@ def init_test_dtype(self):
         self.dtype = "float64"
 
     def init_input_data(self):
-        self.inputs = {"X": np.random.random(self.in_shape).astype(self.dtype)}
+        x = np.random.random(self.in_shape).astype("float32")
+        if str(self.dtype) == "uint16":
+            x = convert_float_to_uint16(x)
+        self.inputs = {"X": x}
 
 
 class TestFlattenFP32Op(TestFlattenOp):
@@ -91,10 +104,6 @@ class TestFlattenBF16Op(TestFlattenOp):
     def init_test_dtype(self):
         self.dtype = "uint16"
 
-    def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        self.inputs = {"X": convert_float_to_uint16(x)}
-
 
 class TestFlattenOp_1(TestFlattenOp):
     def init_test_case(self):
@@ -133,10 +142,6 @@ class TestFlattenBF16Op_1(TestFlattenOp_1):
     def init_test_dtype(self):
         self.dtype = "uint16"
 
-    def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        self.inputs = {"X": convert_float_to_uint16(x)}
-
 
 class TestFlattenOp_2(TestFlattenOp):
     def init_test_case(self):
@@ -175,10 +180,6 @@ class TestFlattenBF16Op_2(TestFlattenOp_2):
     def init_test_dtype(self):
         self.dtype = "uint16"
 
-    def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        self.inputs = {"X": convert_float_to_uint16(x)}
-
 
 class TestFlattenOp_3(TestFlattenOp):
     def init_test_case(self):
@@ -217,10 +218,6 @@ class TestFlattenBF16Op_3(TestFlattenOp_3):
     def init_test_dtype(self):
         self.dtype = "uint16"
 
-    def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        self.inputs = {"X": convert_float_to_uint16(x)}
-
 
 class TestFlattenOp_4(TestFlattenOp):
     def init_test_case(self):
@@ -259,10 +256,6 @@ class TestFlattenBF16Op_4(TestFlattenOp_4):
     def init_test_dtype(self):
         self.dtype = "uint16"
 
-    def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        self.inputs = {"X": convert_float_to_uint16(x)}
-
 
 class TestFlattenOp_5(TestFlattenOp):
     def init_test_case(self):
@@ -301,10 +294,6 @@ class TestFlattenBF16Op_5(TestFlattenOp_5):
     def init_test_dtype(self):
         self.dtype = "uint16"
 
-    def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        self.inputs = {"X": convert_float_to_uint16(x)}
-
 
 class TestFlattenOp_6(TestFlattenOp):
     def init_test_case(self):
@@ -346,10 +335,6 @@ class TestFlattenBF16Op_6(TestFlattenOp_6):
     def init_test_dtype(self):
         self.dtype = "uint16"
 
-    def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        self.inputs = {"X": convert_float_to_uint16(x)}
-
 
 class TestFlattenOpSixDims(TestFlattenOp):
     def init_test_case(self):
@@ -388,10 +373,6 @@ class TestFlattenBF16OpSixDims(TestFlattenOpSixDims):
     def init_test_dtype(self):
         self.dtype = "uint16"
 
-    def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        self.inputs = {"X": convert_float_to_uint16(x)}
-
 
 class TestFlatten2OpError(unittest.TestCase):
     def test_errors(self):

From 9645c2fb0b0381f7586b732e1942382e163fd0cd Mon Sep 17 00:00:00 2001
From: jiangcheng <thisjiang@qq.com>
Date: Mon, 27 Mar 2023 11:24:13 +0800
Subject: [PATCH 3/5] fix inpute should astype self.dtype bug and fix zerodim
 test name

---
 .../unittests/test_flatten_contiguous_range_op.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
index 2cfe77885f4754..f4334f7dcdaf73 100644
--- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
+++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
@@ -75,9 +75,12 @@ def init_test_dtype(self):
         self.dtype = "float64"
 
     def init_input_data(self):
-        x = np.random.random(self.in_shape).astype("float32")
-        if str(self.dtype) == "uint16":
+        if str(self.dtype) != "uint16":
+            x = np.random.random(self.in_shape).astype(self.dtype)
+        else:
+            x = np.random.random(self.in_shape).astype("float32")
             x = convert_float_to_uint16(x)
+
         self.inputs = {"X": x}
 
 
@@ -295,7 +298,7 @@ def init_test_dtype(self):
         self.dtype = "uint16"
 
 
-class TestFlattenOp_6(TestFlattenOp):
+class TestFlattenOp_ZeroDim(TestFlattenOp):
     def init_test_case(self):
         self.in_shape = tuple()
         self.start_axis = 0
@@ -312,7 +315,7 @@ def init_attrs(self):
         }
 
 
-class TestFlattenFP32Op_6(TestFlattenOp_6):
+class TestFlattenFP32Op_ZeroDim(TestFlattenOp_ZeroDim):
     def init_test_dtype(self):
         self.dtype = "float32"
 
@@ -321,7 +324,7 @@ def init_test_dtype(self):
     not core.is_compiled_with_cuda(),
     "core is not complied with CUDA",
 )
-class TestFlattenFP16Op_6(TestFlattenOp_6):
+class TestFlattenFP16Op_ZeroDim(TestFlattenOp_ZeroDim):
     def init_test_dtype(self):
         self.dtype = "float16"
 
@@ -331,7 +334,7 @@ def init_test_dtype(self):
     or not core.is_bfloat16_supported(core.CUDAPlace(0)),
     "core is not complied with CUDA and not support the bfloat16",
 )
-class TestFlattenBF16Op_6(TestFlattenOp_6):
+class TestFlattenBF16Op_ZeroDim(TestFlattenOp_ZeroDim):
     def init_test_dtype(self):
         self.dtype = "uint16"
 

From 471b403a862e69a32421525460cb224237319647 Mon Sep 17 00:00:00 2001
From: jiangcheng <thisjiang@qq.com>
Date: Mon, 27 Mar 2023 11:32:18 +0800
Subject: [PATCH 4/5] remove 0D-tensor bf16 test for window-inference-ci pass

---
 .../unittests/test_flatten_contiguous_range_op.py      | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
index f4334f7dcdaf73..81aa43454df9b7 100644
--- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
+++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
@@ -329,16 +329,6 @@ def init_test_dtype(self):
         self.dtype = "float16"
 
 
-@unittest.skipIf(
-    not core.is_compiled_with_cuda()
-    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
-    "core is not complied with CUDA and not support the bfloat16",
-)
-class TestFlattenBF16Op_ZeroDim(TestFlattenOp_ZeroDim):
-    def init_test_dtype(self):
-        self.dtype = "uint16"
-
-
 class TestFlattenOpSixDims(TestFlattenOp):
     def init_test_case(self):
         self.in_shape = (3, 2, 3, 2, 4, 4)

From 52d2a98717fd0c5d01106ca4161dac074fd3d2e3 Mon Sep 17 00:00:00 2001
From: jiangcheng <thisjiang@qq.com>
Date: Mon, 27 Mar 2023 11:41:37 +0800
Subject: [PATCH 5/5] remove flatten from op_accuracy_white_list

---
 .../fluid/tests/unittests/white_list/op_accuracy_white_list.py   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
index b0266bfa9a36c3..ced30722cf2792 100644
--- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
+++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
@@ -31,7 +31,6 @@
     'depthwise_conv2d',
     'depthwise_conv2d_transpose',
     'dropout',
-    'flatten_contiguous_range',
     'fused_elemwise_activation',
     'hinge_loss',
     'huber_loss',