From 987fb2d88e8731544a777342f75ffcc2cdcc0359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Mon, 24 Apr 2023 16:11:59 +0800 Subject: [PATCH] rm mlu (#53194) --- paddle/fluid/imperative/amp_auto_cast.cc | 4 ++-- paddle/fluid/operators/collective/c_comm_init_op.cc | 4 ++-- .../fluid/operators/generator/get_expected_kernel_func.cc | 2 +- paddle/fluid/operators/reduce_ops/reduce_op.h | 2 +- paddle/fluid/operators/softmax_op.cc | 4 ++-- paddle/fluid/platform/device_context.h | 1 - paddle/fluid/platform/profiler/dump/nodetree.proto | 2 -- paddle/fluid/platform/profiler/profiler.h | 2 +- paddle/fluid/pybind/imperative.cc | 7 +++---- paddle/phi/api/profiler/trace_event.h | 2 -- paddle/phi/common/backend.h | 1 - paddle/phi/common/place.h | 1 - python/paddle/amp/auto_cast.py | 2 +- python/paddle/amp/grad_scaler.py | 2 +- python/paddle/distributed/launch/main.py | 2 +- python/paddle/distributed/spawn.py | 8 +++----- python/paddle/profiler/profiler.py | 8 ++------ 17 files changed, 20 insertions(+), 34 deletions(-) diff --git a/paddle/fluid/imperative/amp_auto_cast.cc b/paddle/fluid/imperative/amp_auto_cast.cc index 2689a4eafa4422..bf6bdf382ce446 100644 --- a/paddle/fluid/imperative/amp_auto_cast.cc +++ b/paddle/fluid/imperative/amp_auto_cast.cc @@ -57,7 +57,7 @@ OpSupportedInfos(const std::string& place, 0, platform::errors::InvalidArgument( "The argument `place` should be 'GPU', 'CPU', 'XPU', " - "'NPU', 'MLU', but got '%s'.", + "'NPU', but got '%s'.", place)); std::unordered_set all_ops; @@ -148,7 +148,7 @@ AmpOperators::AmpOperators() OpSupportedInfos("GPU", paddle::framework::proto::VarType::BF16)); unsupported_bf16_ops_->insert(unsupported_ops_gpu_bf16.begin(), unsupported_ops_gpu_bf16.end()); -// NOTE: GPU/NPU/XPU/MLU is compiled seperatly. +// NOTE: GPU/NPU/XPU is compiled seperatly. #elif defined(PADDLE_WITH_XPU) auto unsupported_ops_xpu_fp16 = std::get<2>( OpSupportedInfos("XPU", paddle::framework::proto::VarType::FP16)); diff --git a/paddle/fluid/operators/collective/c_comm_init_op.cc b/paddle/fluid/operators/collective/c_comm_init_op.cc index 9a34a3a2f37792..b32857a27b2d25 100644 --- a/paddle/fluid/operators/collective/c_comm_init_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_op.cc @@ -57,14 +57,14 @@ class CCommInitOp : public framework::OperatorBase { using CommContext = platform::BKCLCommContext; #else PADDLE_THROW(platform::errors::PreconditionNotMet( - "PaddlePaddle should be compiled with GPU or XPU or MLU.")); + "PaddlePaddle should be compiled with GPU or XPU.")); #endif PADDLE_ENFORCE_EQ( platform::is_gpu_place(place) || platform::is_xpu_place(place), true, platform::errors::PreconditionNotMet( - "CCommInitOp can run on gpu or xpu or mlu place only.")); + "CCommInitOp can run on gpu or xpu place only.")); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ defined(PADDLE_WITH_XPU_BKCL) diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.cc b/paddle/fluid/operators/generator/get_expected_kernel_func.cc index 931bbc1fecc7da..558b0c400a4caf 100644 --- a/paddle/fluid/operators/generator/get_expected_kernel_func.cc +++ b/paddle/fluid/operators/generator/get_expected_kernel_func.cc @@ -80,7 +80,7 @@ phi::KernelKey GetReduceExpectedKernelType( platform::is_custom_place(ctx.GetPlace()), true, platform::errors::InvalidArgument( - "float16 can only be used on GPU or NPU or MLU or XPU place")); + "float16 can only be used on GPU or NPU or XPU place")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 1a3925e4422d04..3c2b0b948bb22b 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -629,7 +629,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { platform::is_custom_place(ctx.GetPlace()), true, platform::errors::InvalidArgument( - "float16 can only be used on GPU or NPU or MLU or XPU place")); + "float16 can only be used on GPU or NPU or XPU place")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index 4f440ec495d423..ab5816965f05c9 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -48,7 +48,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { platform::is_custom_place(ctx.GetPlace()), true, platform::errors::InvalidArgument( - "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); + "float16 can only be used on GPU/NPU/XPU and custom place")); } return phi::KernelKey( ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type)); @@ -132,7 +132,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()))) PADDLE_THROW(platform::errors::InvalidArgument( - "float16 can only be used on GPU/NPU/XPU/MLU and custom place")); + "float16 can only be used on GPU/NPU/XPU and custom place")); } return phi::KernelKey( ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type)); diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index d2866a9aa11651..96ddef2c60efe2 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -96,7 +96,6 @@ enum DeviceType { NPU = 2, XPU = 3, IPU = 4, - MLU = 5, CUSTOM_DEVICE = 6, MAX_DEVICE_TYPES = 7, diff --git a/paddle/fluid/platform/profiler/dump/nodetree.proto b/paddle/fluid/platform/profiler/dump/nodetree.proto index dc7b5046b079d1..cfb424b0202697 100644 --- a/paddle/fluid/platform/profiler/dump/nodetree.proto +++ b/paddle/fluid/platform/profiler/dump/nodetree.proto @@ -46,8 +46,6 @@ enum TracerEventTypeProto { PythonOp = 13; // Used to mark python level userdefined PythonUserDefined = 14; - // Used to mark mlu runtime record returned by cnpapi - MluRuntime = 15; }; enum TracerMemEventTypeProto { diff --git a/paddle/fluid/platform/profiler/profiler.h b/paddle/fluid/platform/profiler/profiler.h index 2a7100b0a64ead..b486e7543d96cc 100644 --- a/paddle/fluid/platform/profiler/profiler.h +++ b/paddle/fluid/platform/profiler/profiler.h @@ -39,7 +39,7 @@ static constexpr uint32_t kProfileCustomDeviceOptionBit = 3; void SynchronizeDevice(); struct ProfilerOptions { - uint32_t trace_switch = 0; // bit 0: cpu, bit 1: gpu, bit 2: mlu + uint32_t trace_switch = 0; // bit 0: cpu, bit 1: gpu uint32_t trace_level = FLAGS_host_trace_level; }; diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index e60211286ed37d..d6a5a8b8dfc874 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -155,7 +155,7 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) { PADDLE_THROW(platform::errors::InvalidArgument( "Place should be one of " "Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/" - "MLUPlace/CustomPlace")); + "CustomPlace")); } } @@ -209,8 +209,7 @@ static void InitVarBaseAndTensor(imperative::VarBase *self, } else { PADDLE_THROW(platform::errors::InvalidArgument( "Place should be one of " - "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/" - "MLUPlace")); + "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/")); } self->SetDataType(framework::TransToProtoVarType(tensor->dtype())); } @@ -2214,7 +2213,7 @@ void BindImperative(py::module *m_ptr) { } else { PADDLE_THROW(platform::errors::InvalidArgument( "Incompatible Place Type: supports XPUPlace, CUDAPlace, " - "CPUPlace, NPUPlace, IPUPlace, MLUPlace" + "CPUPlace, NPUPlace, IPUPlace" "and CUDAPinnedPlace, " "but got Unknown Type!")); } diff --git a/paddle/phi/api/profiler/trace_event.h b/paddle/phi/api/profiler/trace_event.h index 1b049a0f105cb0..e526953d5c8e0c 100644 --- a/paddle/phi/api/profiler/trace_event.h +++ b/paddle/phi/api/profiler/trace_event.h @@ -51,8 +51,6 @@ enum class TracerEventType { PythonOp = 13, // Used to mark python level userdefined PythonUserDefined = 14, - // Used to mark mlu runtime record returned by cnpapi - MluRuntime = 15, // A flag to denote the number of current types NumTypes }; diff --git a/paddle/phi/common/backend.h b/paddle/phi/common/backend.h index 904038a4f56020..b7f30797ca78ec 100644 --- a/paddle/phi/common/backend.h +++ b/paddle/phi/common/backend.h @@ -53,7 +53,6 @@ enum class Backend : uint8_t { // various acceleration devices' backends XPU, // XPU currently does not exist at the same time as CUDA NPU, // NPU currently does not exist at the same time as CUDA - MLU, // MLU currently does not exist at the same time as CUDA IPU, // paddle kernel primitives backend diff --git a/paddle/phi/common/place.h b/paddle/phi/common/place.h index e65235cfa3d691..543a79977eb7ed 100644 --- a/paddle/phi/common/place.h +++ b/paddle/phi/common/place.h @@ -34,7 +34,6 @@ enum class AllocationType : int8_t { NPU = 5, NPUPINNED = 6, IPU = 7, - MLU = 8, CUSTOM = 9, }; diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py index 1f82533edbfb32..ae9c957df68faf 100644 --- a/python/paddle/amp/auto_cast.py +++ b/python/paddle/amp/auto_cast.py @@ -349,7 +349,7 @@ def amp_guard( or tracer._expected_place.is_custom_place() ): warnings.warn( - 'amp_guard can only be enabled on CUDAPlace, XPUPlace, MLUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.' + 'amp_guard can only be enabled on CUDAPlace, XPUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.' % tracer._expected_place ) enable = False diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py index 5c2d033d336332..0f6d9f21a32c6a 100644 --- a/python/paddle/amp/grad_scaler.py +++ b/python/paddle/amp/grad_scaler.py @@ -108,7 +108,7 @@ def __init__( or tracer._expected_place.is_custom_place() ): warnings.warn( - 'AmpScaler can only be enabled on CUDAPlace, XPUPlace, MLUPlace and CustomPlace, current place is %s, so it makes no effect.' + 'AmpScaler can only be enabled on CUDAPlace, XPUPlace and CustomPlace, current place is %s, so it makes no effect.' % tracer._expected_place ) enable = False diff --git a/python/paddle/distributed/launch/main.py b/python/paddle/distributed/launch/main.py index da113e72c35c56..02099c743933e5 100644 --- a/python/paddle/distributed/launch/main.py +++ b/python/paddle/distributed/launch/main.py @@ -52,7 +52,7 @@ def launch(): - ``--job_id``: The job unique id, it affects the log files' name. e.g., ``--job_id=job1``. Default ``--job_id=default``. - - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu/mlu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device. + - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device. - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py`` diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 713ba7d118f0a6..62e6eb0b14228d 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -428,9 +428,9 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): Start multiple processes with ``spawn`` method for parallel training. .. note:: - ``spawn`` now only supports GPU or XPU or MLU collective mode. The collective mode - of GPU and XPU and MLU cannot be started at the same time, so the option `gpus` and - `xpus` and 'mlus' cannot be configured at the same time. + ``spawn`` now only supports GPU or XPU collective mode. The collective mode + of GPU and XPU cannot be started at the same time, so the option `gpus` and + `xpus` cannot be configured at the same time. Args: func (function): The target function is called by spawned process. @@ -457,8 +457,6 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): selected gpus, such as "0,1,2,3". Default: None; (3) xpus (string): The training process will run on the selected xpus, such as "0,1,2,3". Default: None; - (4) mlus (string): The training process will run on the - selected mlus, such as "0,1,2,3". Default: None; (5) ips (string): Paddle cluster nodes ips, such as "192.168.0.16,192.168.0.17". Default: "127.0.0.1" . diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py index 570580a3d2cd4d..065721a2747508 100644 --- a/python/paddle/profiler/profiler.py +++ b/python/paddle/profiler/profiler.py @@ -98,19 +98,16 @@ class ProfilerState(Enum): class ProfilerTarget(Enum): r""" - ProfilerTarget is used to specify target device for :ref:`profiling ` . Only CPU, GPU and MLU are supported currently. + ProfilerTarget is used to specify target device for :ref:`profiling ` . Only CPU and GPU are supported currently. The meaning of each ProfilerState is as following - **ProfilerTarget.CPU** : Profile events on CPU. - **ProfilerTarget.GPU** : Profile events on GPU. - - - **ProfilerTarget.MLU** : Profile events on MLU. """ CPU = 0 GPU = 1 - MLU = 2 CUSTOM_DEVICE = 3 @@ -335,7 +332,6 @@ def _get_supported_targets() -> Iterable[ProfilerTarget]: if _Profiler.is_cnpapi_supported(): return [ ProfilerTarget.CPU, - ProfilerTarget.MLU, ProfilerTarget.CUSTOM_DEVICE, ] return [ProfilerTarget.CPU, ProfilerTarget.CUSTOM_DEVICE] @@ -346,7 +342,7 @@ class Profiler: Profiler context manager, user interface to manage profiling process to start, stop, export profiling data and print summary table. Args: - targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU ` , :ref:`ProfilerTarget.GPU ` and :ref:`ProfilerTarget.MLU ` . + targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU ` and :ref:`ProfilerTarget.GPU ` . scheduler (Callable|tuple, optional): If it is a callable object, it takes a step number as parameter and return the corresponding :ref:`ProfilerState `. This callable object can be generated by :ref:`make_scheduler ` function. If not provided (None), the default scheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch, which means profiling range [start_batch, end_batch).