From 987fb2d88e8731544a777342f75ffcc2cdcc0359 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Mon, 24 Apr 2023 16:11:59 +0800
Subject: [PATCH] rm mlu (#53194)

---
 paddle/fluid/imperative/amp_auto_cast.cc                  | 4 ++--
 paddle/fluid/operators/collective/c_comm_init_op.cc       | 4 ++--
 .../fluid/operators/generator/get_expected_kernel_func.cc | 2 +-
 paddle/fluid/operators/reduce_ops/reduce_op.h             | 2 +-
 paddle/fluid/operators/softmax_op.cc                      | 4 ++--
 paddle/fluid/platform/device_context.h                    | 1 -
 paddle/fluid/platform/profiler/dump/nodetree.proto        | 2 --
 paddle/fluid/platform/profiler/profiler.h                 | 2 +-
 paddle/fluid/pybind/imperative.cc                         | 7 +++----
 paddle/phi/api/profiler/trace_event.h                     | 2 --
 paddle/phi/common/backend.h                               | 1 -
 paddle/phi/common/place.h                                 | 1 -
 python/paddle/amp/auto_cast.py                            | 2 +-
 python/paddle/amp/grad_scaler.py                          | 2 +-
 python/paddle/distributed/launch/main.py                  | 2 +-
 python/paddle/distributed/spawn.py                        | 8 +++-----
 python/paddle/profiler/profiler.py                        | 8 ++------
 17 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/paddle/fluid/imperative/amp_auto_cast.cc b/paddle/fluid/imperative/amp_auto_cast.cc
index 2689a4eafa4422..bf6bdf382ce446 100644
--- a/paddle/fluid/imperative/amp_auto_cast.cc
+++ b/paddle/fluid/imperative/amp_auto_cast.cc
@@ -57,7 +57,7 @@ OpSupportedInfos(const std::string& place,
                     0,
                     platform::errors::InvalidArgument(
                         "The argument `place` should be 'GPU', 'CPU', 'XPU', "
-                        "'NPU', 'MLU', but got '%s'.",
+                        "'NPU', but got '%s'.",
                         place));
 
   std::unordered_set<std::string> all_ops;
@@ -148,7 +148,7 @@ AmpOperators::AmpOperators()
       OpSupportedInfos("GPU", paddle::framework::proto::VarType::BF16));
   unsupported_bf16_ops_->insert(unsupported_ops_gpu_bf16.begin(),
                                 unsupported_ops_gpu_bf16.end());
-// NOTE: GPU/NPU/XPU/MLU is compiled seperatly.
+// NOTE: GPU/NPU/XPU is compiled seperatly.
 #elif defined(PADDLE_WITH_XPU)
   auto unsupported_ops_xpu_fp16 = std::get<2>(
       OpSupportedInfos("XPU", paddle::framework::proto::VarType::FP16));
diff --git a/paddle/fluid/operators/collective/c_comm_init_op.cc b/paddle/fluid/operators/collective/c_comm_init_op.cc
index 9a34a3a2f37792..b32857a27b2d25 100644
--- a/paddle/fluid/operators/collective/c_comm_init_op.cc
+++ b/paddle/fluid/operators/collective/c_comm_init_op.cc
@@ -57,14 +57,14 @@ class CCommInitOp : public framework::OperatorBase {
     using CommContext = platform::BKCLCommContext;
 #else
     PADDLE_THROW(platform::errors::PreconditionNotMet(
-        "PaddlePaddle should be compiled with GPU or XPU or MLU."));
+        "PaddlePaddle should be compiled with GPU or XPU."));
 #endif
 
     PADDLE_ENFORCE_EQ(
         platform::is_gpu_place(place) || platform::is_xpu_place(place),
         true,
         platform::errors::PreconditionNotMet(
-            "CCommInitOp can run on gpu or xpu or mlu place only."));
+            "CCommInitOp can run on gpu or xpu place only."));
 
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
     defined(PADDLE_WITH_XPU_BKCL)
diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.cc b/paddle/fluid/operators/generator/get_expected_kernel_func.cc
index 931bbc1fecc7da..558b0c400a4caf 100644
--- a/paddle/fluid/operators/generator/get_expected_kernel_func.cc
+++ b/paddle/fluid/operators/generator/get_expected_kernel_func.cc
@@ -80,7 +80,7 @@ phi::KernelKey GetReduceExpectedKernelType(
             platform::is_custom_place(ctx.GetPlace()),
         true,
         platform::errors::InvalidArgument(
-            "float16 can only be used on GPU or NPU or MLU or XPU place"));
+            "float16 can only be used on GPU or NPU or XPU place"));
   }
   return phi::KernelKey(input_data_type, ctx.GetPlace());
 }
diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h
index 1a3925e4422d04..3c2b0b948bb22b 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_op.h
@@ -629,7 +629,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel {
               platform::is_custom_place(ctx.GetPlace()),
           true,
           platform::errors::InvalidArgument(
-              "float16 can only be used on GPU or NPU or MLU or XPU place"));
+              "float16 can only be used on GPU or NPU or XPU place"));
     }
     return phi::KernelKey(input_data_type, ctx.GetPlace());
   }
diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc
index 4f440ec495d423..ab5816965f05c9 100644
--- a/paddle/fluid/operators/softmax_op.cc
+++ b/paddle/fluid/operators/softmax_op.cc
@@ -48,7 +48,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
               platform::is_custom_place(ctx.GetPlace()),
           true,
           platform::errors::InvalidArgument(
-              "float16 can only be used on GPU/NPU/XPU/MLU and custom place"));
+              "float16 can only be used on GPU/NPU/XPU and custom place"));
     }
     return phi::KernelKey(
         ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));
@@ -132,7 +132,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
             platform::is_xpu_place(ctx.GetPlace()) ||
             platform::is_custom_place(ctx.GetPlace())))
         PADDLE_THROW(platform::errors::InvalidArgument(
-            "float16 can only be used on GPU/NPU/XPU/MLU and custom place"));
+            "float16 can only be used on GPU/NPU/XPU and custom place"));
     }
     return phi::KernelKey(
         ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
index d2866a9aa11651..96ddef2c60efe2 100644
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -96,7 +96,6 @@ enum DeviceType {
   NPU = 2,
   XPU = 3,
   IPU = 4,
-  MLU = 5,
   CUSTOM_DEVICE = 6,
 
   MAX_DEVICE_TYPES = 7,
diff --git a/paddle/fluid/platform/profiler/dump/nodetree.proto b/paddle/fluid/platform/profiler/dump/nodetree.proto
index dc7b5046b079d1..cfb424b0202697 100644
--- a/paddle/fluid/platform/profiler/dump/nodetree.proto
+++ b/paddle/fluid/platform/profiler/dump/nodetree.proto
@@ -46,8 +46,6 @@ enum TracerEventTypeProto {
   PythonOp = 13;
   // Used to mark python level userdefined
   PythonUserDefined = 14;
-  // Used to mark mlu runtime record returned by cnpapi
-  MluRuntime = 15;
 };
 
 enum TracerMemEventTypeProto {
diff --git a/paddle/fluid/platform/profiler/profiler.h b/paddle/fluid/platform/profiler/profiler.h
index 2a7100b0a64ead..b486e7543d96cc 100644
--- a/paddle/fluid/platform/profiler/profiler.h
+++ b/paddle/fluid/platform/profiler/profiler.h
@@ -39,7 +39,7 @@ static constexpr uint32_t kProfileCustomDeviceOptionBit = 3;
 void SynchronizeDevice();
 
 struct ProfilerOptions {
-  uint32_t trace_switch = 0;  // bit 0: cpu, bit 1: gpu, bit 2: mlu
+  uint32_t trace_switch = 0;  // bit 0: cpu, bit 1: gpu
   uint32_t trace_level = FLAGS_host_trace_level;
 };
 
diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
index e60211286ed37d..d6a5a8b8dfc874 100644
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -155,7 +155,7 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "Place should be one of "
         "Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"
-        "MLUPlace/CustomPlace"));
+        "CustomPlace"));
   }
 }
 
@@ -209,8 +209,7 @@ static void InitVarBaseAndTensor(imperative::VarBase *self,
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "Place should be one of "
-        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"
-        "MLUPlace"));
+        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"));
   }
   self->SetDataType(framework::TransToProtoVarType(tensor->dtype()));
 }
@@ -2214,7 +2213,7 @@ void BindImperative(py::module *m_ptr) {
             } else {
               PADDLE_THROW(platform::errors::InvalidArgument(
                   "Incompatible Place Type: supports XPUPlace, CUDAPlace, "
-                  "CPUPlace, NPUPlace, IPUPlace, MLUPlace"
+                  "CPUPlace, NPUPlace, IPUPlace"
                   "and CUDAPinnedPlace, "
                   "but got Unknown Type!"));
             }
diff --git a/paddle/phi/api/profiler/trace_event.h b/paddle/phi/api/profiler/trace_event.h
index 1b049a0f105cb0..e526953d5c8e0c 100644
--- a/paddle/phi/api/profiler/trace_event.h
+++ b/paddle/phi/api/profiler/trace_event.h
@@ -51,8 +51,6 @@ enum class TracerEventType {
   PythonOp = 13,
   // Used to mark python level userdefined
   PythonUserDefined = 14,
-  // Used to mark mlu runtime record returned by cnpapi
-  MluRuntime = 15,
   // A flag to denote the number of current types
   NumTypes
 };
diff --git a/paddle/phi/common/backend.h b/paddle/phi/common/backend.h
index 904038a4f56020..b7f30797ca78ec 100644
--- a/paddle/phi/common/backend.h
+++ b/paddle/phi/common/backend.h
@@ -53,7 +53,6 @@ enum class Backend : uint8_t {
   // various acceleration devices' backends
   XPU,  // XPU currently does not exist at the same time as CUDA
   NPU,  // NPU currently does not exist at the same time as CUDA
-  MLU,  // MLU currently does not exist at the same time as CUDA
   IPU,
 
   // paddle kernel primitives backend
diff --git a/paddle/phi/common/place.h b/paddle/phi/common/place.h
index e65235cfa3d691..543a79977eb7ed 100644
--- a/paddle/phi/common/place.h
+++ b/paddle/phi/common/place.h
@@ -34,7 +34,6 @@ enum class AllocationType : int8_t {
   NPU = 5,
   NPUPINNED = 6,
   IPU = 7,
-  MLU = 8,
   CUSTOM = 9,
 };
 
diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py
index 1f82533edbfb32..ae9c957df68faf 100644
--- a/python/paddle/amp/auto_cast.py
+++ b/python/paddle/amp/auto_cast.py
@@ -349,7 +349,7 @@ def amp_guard(
         or tracer._expected_place.is_custom_place()
     ):
         warnings.warn(
-            'amp_guard can only be enabled on CUDAPlace, XPUPlace, MLUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
+            'amp_guard can only be enabled on CUDAPlace, XPUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
             % tracer._expected_place
         )
         enable = False
diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py
index 5c2d033d336332..0f6d9f21a32c6a 100644
--- a/python/paddle/amp/grad_scaler.py
+++ b/python/paddle/amp/grad_scaler.py
@@ -108,7 +108,7 @@ def __init__(
             or tracer._expected_place.is_custom_place()
         ):
             warnings.warn(
-                'AmpScaler can only be enabled on CUDAPlace, XPUPlace, MLUPlace and CustomPlace, current place is %s, so it makes no effect.'
+                'AmpScaler can only be enabled on CUDAPlace, XPUPlace and CustomPlace, current place is %s, so it makes no effect.'
                 % tracer._expected_place
             )
             enable = False
diff --git a/python/paddle/distributed/launch/main.py b/python/paddle/distributed/launch/main.py
index da113e72c35c56..02099c743933e5 100644
--- a/python/paddle/distributed/launch/main.py
+++ b/python/paddle/distributed/launch/main.py
@@ -52,7 +52,7 @@ def launch():
 
         - ``--job_id``: The job unique id, it affects the log files' name. e.g., ``--job_id=job1``. Default ``--job_id=default``.
 
-        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu/mlu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.
+        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.
 
         - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``
 
diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py
index 713ba7d118f0a6..62e6eb0b14228d 100644
--- a/python/paddle/distributed/spawn.py
+++ b/python/paddle/distributed/spawn.py
@@ -428,9 +428,9 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options):
     Start multiple processes with ``spawn`` method for parallel training.
 
     .. note::
-        ``spawn`` now only supports GPU or XPU or MLU collective mode. The collective mode
-        of GPU and XPU and MLU cannot be started at the same time, so the option `gpus` and
-        `xpus` and 'mlus' cannot be configured at the same time.
+        ``spawn`` now only supports GPU or XPU collective mode. The collective mode
+        of GPU and XPU cannot be started at the same time, so the option `gpus` and
+        `xpus` cannot be configured at the same time.
 
     Args:
         func (function): The target function is called by spawned process.
@@ -457,8 +457,6 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options):
             selected gpus, such as "0,1,2,3". Default: None;
             (3) xpus (string): The training process will run on the
             selected xpus, such as "0,1,2,3". Default: None;
-            (4) mlus (string): The training process will run on the
-            selected mlus, such as "0,1,2,3". Default: None;
             (5) ips (string): Paddle cluster nodes ips, such as
             "192.168.0.16,192.168.0.17". Default: "127.0.0.1" .
 
diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py
index 570580a3d2cd4d..065721a2747508 100644
--- a/python/paddle/profiler/profiler.py
+++ b/python/paddle/profiler/profiler.py
@@ -98,19 +98,16 @@ class ProfilerState(Enum):
 
 class ProfilerTarget(Enum):
     r"""
-    ProfilerTarget is used to specify target device for :ref:`profiling <api_paddle_profiler_Profiler>` . Only CPU, GPU and MLU are supported currently.
+    ProfilerTarget is used to specify target device for :ref:`profiling <api_paddle_profiler_Profiler>` . Only CPU and GPU are supported currently.
 
     The meaning of each ProfilerState is as following
 
     - **ProfilerTarget.CPU** : Profile events on CPU.
 
     - **ProfilerTarget.GPU** : Profile events on GPU.
-
-    - **ProfilerTarget.MLU** : Profile events on MLU.
     """
     CPU = 0
     GPU = 1
-    MLU = 2
     CUSTOM_DEVICE = 3
 
 
@@ -335,7 +332,6 @@ def _get_supported_targets() -> Iterable[ProfilerTarget]:
     if _Profiler.is_cnpapi_supported():
         return [
             ProfilerTarget.CPU,
-            ProfilerTarget.MLU,
             ProfilerTarget.CUSTOM_DEVICE,
         ]
     return [ProfilerTarget.CPU, ProfilerTarget.CUSTOM_DEVICE]
@@ -346,7 +342,7 @@ class Profiler:
     Profiler context manager, user interface to manage profiling process to start, stop, export profiling data and print summary table.
 
     Args:
-        targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU <api_paddle_profiler_ProfilerTarget>` , :ref:`ProfilerTarget.GPU <api_paddle_profiler_ProfilerTarget>` and :ref:`ProfilerTarget.MLU <api_paddle_profiler_ProfilerTarget>` .
+        targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU <api_paddle_profiler_ProfilerTarget>` and :ref:`ProfilerTarget.GPU <api_paddle_profiler_ProfilerTarget>`  .
         scheduler (Callable|tuple, optional): If it is a callable object, it takes a step number as parameter and return the corresponding :ref:`ProfilerState <api_paddle_profiler_ProfilerState>`. This callable object can be generated by :ref:`make_scheduler <api_paddle_profiler_make_scheduler>` function.
             If not provided (None), the default scheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch,
             which means profiling range [start_batch, end_batch).