[metal][macos] support metal on Mac OS(2/2) (#7348)

* [metal][macos] support metal on Mac OS platform * add CoreGraphic * add platform avalbility * Update conv2d_image_compute.mm * update * Update metal_context_imp.mm
PaddlePaddle · Nov 5, 2021 · 2240f44 · 2240f44
1 parent 224730d
commit 2240f44
Show file tree

Hide file tree

Showing 10 changed files with 44 additions and 26 deletions.
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
@@ -191,6 +191,8 @@ endif()
 
 if (LITE_WITH_METAL)
     find_library(METAL_LIBRARY Metal REQUIRED)
+    find_library(GRAPHIC CoreGraphics REQUIRED)
+    find_library(MPS_LIBRARY MetalPerformanceShaders REQUIRED)
     find_library(FOUNDATION_LIBRARY Foundation)
     add_definitions("-DLITE_WITH_METAL")
 endif()

diff --git a/cmake/lite.cmake b/cmake/lite.cmake
@@ -276,6 +276,9 @@ function(lite_cc_binary TARGET)
 
 
     # link to dynamic runtime lib
+    if(LITE_WITH_METAL)
+        target_link_libraries(${TARGET} ${METAL_LIBRARY} ${GRAPHIC} ${MPS_LIBRARY} ${FOUNDATION_LIBRARY})
+    endif()
     if(LITE_WITH_XPU)
         target_link_libraries(${TARGET} ${xpu_builder_libs} ${xpu_runtime_libs})
     endif()

diff --git a/lite/CMakeLists.txt b/lite/CMakeLists.txt
@@ -587,20 +587,20 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
         add_dependencies(tiny_publish_cxx_lib publish_inference_opencl)
        endif()
     endif()
+endif()
 
-    if (LITE_WITH_METAL)
-        add_custom_target(metal_lib_publish DEPENDS LiteMetalLIB
-                COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/metal/"
-                COMMAND cp -r "${CMAKE_BINARY_DIR}/lite.metallib" "${INFER_LITE_PUBLISH_ROOT}/metal/"
-                COMMENT "COPY lite.metallib")
+if (LITE_WITH_METAL)
+    add_custom_target(metal_lib_publish DEPENDS LiteMetalLIB
+            COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/metal/"
+            COMMAND cp -r "${CMAKE_BINARY_DIR}/lite.metallib" "${INFER_LITE_PUBLISH_ROOT}/metal/"
+            COMMENT "COPY lite.metallib")
 
-        if (NOT LITE_ON_TINY_PUBLISH)
-            add_dependencies(publish_inference_cxx_lib metal_lib_publish)
-        else ()
-            add_dependencies(tiny_publish_cxx_lib metal_lib_publish)
-        endif ()
+    if (NOT LITE_ON_TINY_PUBLISH)
+        add_dependencies(publish_inference_cxx_lib metal_lib_publish)
+    else ()
+        add_dependencies(tiny_publish_cxx_lib metal_lib_publish)
     endif ()
-endif()
+endif ()
 
 if(LITE_WITH_SW)
     add_custom_target(publish_inference_cxx_lib ${TARGET}

diff --git a/lite/backends/metal/CMakeLists.txt b/lite/backends/metal/CMakeLists.txt
@@ -41,6 +41,6 @@ lite_cc_library(metal_target_wrapper SRCS
         target_wrapper.mm
         )
 
-target_link_libraries(metal_target_wrapper ${METAL_LIBRARY} ${FOUNDATION_LIBRARY})
+target_link_libraries(metal_target_wrapper ${METAL_LIBRARY} ${GRAPHIC} ${MPS_LIBRARY} ${FOUNDATION_LIBRARY})
 
 add_dependencies(metal_target_wrapper LiteMetalLIB)
diff --git a/lite/backends/metal/metal_context_imp.mm b/lite/backends/metal/metal_context_imp.mm
@@ -306,7 +306,8 @@ - (void)set_use_memory_reuse:(bool)flag {
 - (void)setHeap:(id<MTLHeap>)heap key:(std::string)ptr API_AVAILABLE(ios(10.0)) {
     NSString* ptrStr = cString2NSString(ptr);
     if (!ptrStr) {
-        return nil;
+        LOG(WARN) << "heap key is nil";
+        return;
     }
     [self.memoryReuseHeaps setObject:heap forKey:ptrStr];
 }

diff --git a/lite/kernels/metal/image_op/conv2d_image_compute.mm b/lite/kernels/metal/image_op/conv2d_image_compute.mm
@@ -44,7 +44,7 @@
         init_memory();
 
         if (use_mps_) {
-            if (@available(iOS 11.3, *)) {
+            if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
                 if (mps_input_image_) {
                     CFRelease(mps_input_image_);
                     mps_input_image_ = nullptr;
@@ -107,7 +107,7 @@
         KernelFunctionName(param, metal_context_->use_winograde(), metal_context_->use_quadruple());
     // use mps or not
     bool should_use_mps = false;
-    if (@available(iOS 11.3, *)) {
+    if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
         if (metal_context_->use_mps()) {
             int input_c = static_cast<int>(input_buffer_->tensor_dim_[1]);
             int output_c = static_cast<int>(output_buffer_->tensor_dim_[1]);
@@ -425,7 +425,7 @@
     auto backend = (__bridge MetalContextImp*)metal_context_->backend();
     auto cmdbuf = [backend commandBuffer];
     if (mps_conv_op_) {
-        if (@available(iOS 11.3, *)) {
+        if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
             [((__bridge MPSCNNConvolution*)mps_conv_op_)
                 encodeToCommandBuffer:cmdbuf
                           sourceImage:(__bridge MPSImage*)mps_input_image_
@@ -447,7 +447,7 @@
         ((int)((*param.dilations)[0]) * (param.filter->dims()[2] - 1) + 1) / 2 - padding_top);
 
     // mps-Convolution
-    if (@available(iOS 11.3, *)) {
+    if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
         output_buffer_->use_mps_ = true;
         const_cast<MetalImage*>(input_buffer_)->use_mps_ = true;
         auto filter_h = static_cast<int>(param.filter->dims()[2]);

diff --git a/lite/kernels/metal/image_op/elementwise_image_compute.mm b/lite/kernels/metal/image_op/elementwise_image_compute.mm
@@ -91,7 +91,7 @@ bool InputsValid(const MetalImage* input_x_, const MetalImage* input_y_) {
     bool should_use_mps = false;
     auto ele_type_ = KernelBase::op_type();
 
-    if (@available(iOS 11.3, *)) {
+    if (@available(iOS 11.3, macOS 10.13.4, macCatalyst 13.0, *)) {
         if (metal_context_->use_mps()) {
             should_use_mps = true;
         }
@@ -251,8 +251,9 @@ bool InputsValid(const MetalImage* input_x_, const MetalImage* input_y_) {
 void ElementwiseImageCompute::run_with_mps() {
     auto backend = (__bridge MetalContextImp*)metal_context_->backend();
     auto cmdbuf = [backend commandBuffer];
+
     if (mps_op_) {
-        if (@available(iOS 11.3, *)) {
+        if (@available(iOS 11.3, macOS 10.13.4, macCatalyst 13.0, *)) {
             ((__bridge T*)mps_op_).primaryStrideInPixelsY = input_buffer_x_->dim_[1] == 1 ? 0 : 1;
             ((__bridge T*)mps_op_).primaryStrideInPixelsX = input_buffer_x_->dim_[2] == 1 ? 0 : 1;
             ((__bridge T*)mps_op_).secondaryStrideInPixelsY = input_buffer_y_->dim_[1] == 1 ? 0 : 1;
@@ -268,7 +269,7 @@ bool InputsValid(const MetalImage* input_x_, const MetalImage* input_y_) {
 
 template <typename T>
 void ElementwiseImageCompute::setup_with_mps() {
-    if (@available(iOS 11.3, *)) {
+    if (@available(iOS 11.3, macOS 10.13.4, macCatalyst 13.0, *)) {
         auto backend = (__bridge MetalContextImp*)metal_context_->backend();
         mps_op_ = (__bridge_retained void*)[[T alloc] initWithDevice:backend.device];
         // MPS input and output

diff --git a/lite/kernels/metal/image_op/pool_image_compute.mm b/lite/kernels/metal/image_op/pool_image_compute.mm
@@ -41,7 +41,7 @@
 
     // use mps or not
     bool should_use_mps = false;
-    if (@available(iOS 10.0, *)) {
+    if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
         if (metal_context_->use_mps()) {
             int input_c = static_cast<int>(input_buffer_->tensor_dim_[1]);
             int output_c = static_cast<int>(output_buffer_->tensor_dim_[1]);
@@ -153,7 +153,7 @@
     auto backend = (__bridge MetalContextImp*)metal_context_->backend();
     auto cmdbuf = [backend commandBuffer];
     if (mps_pool_op_) {
-        if (@available(iOS 10.0, *)) {
+        if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
             [((__bridge MPSCNNPooling*)mps_pool_op_)
                 encodeToCommandBuffer:cmdbuf
                           sourceImage:(__bridge MPSImage*)mps_input_image_
@@ -184,7 +184,7 @@
     int offsetX = static_cast<int>(((int)(kw - 1) + 1) / 2 - pw);
     int offsetY = static_cast<int>(((int)(kh - 1) + 1) / 2 - ph);
 
-    if (@available(iOS 10.0, *)) {
+    if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
         if (param.pooling_type == "max") {
             mps_pool_op_ =
                 (__bridge_retained void*)[[MPSCNNPoolingMax alloc] initWithDevice:backend.device

diff --git a/lite/kernels/metal/image_op/softmax_image_compute.mm b/lite/kernels/metal/image_op/softmax_image_compute.mm
@@ -39,7 +39,7 @@
 
     // whether to use mps
     bool should_use_mps = false;
-    if (@available(iOS 10.0, *)) {
+    if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
         if (metal_context_->use_mps()) {
             int input_c = static_cast<int>(input_buffer_->dim_[3]);
             int output_c = static_cast<int>(output_buffer_->dim_[3]);
@@ -137,7 +137,7 @@
     auto backend = (__bridge MetalContextImp*)metal_context_->backend();
     auto cmdbuf = [backend commandBuffer];
     if (mps_softmax_op_) {
-        if (@available(iOS 10.0, *)) {
+        if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
             [((__bridge MPSCNNSoftMax*)mps_softmax_op_)
                 encodeToCommandBuffer:cmdbuf
                           sourceImage:(__bridge MPSImage*)mps_input_image_
@@ -148,7 +148,7 @@
 }
 
 void SoftmaxImageCompute::setup_with_mps() {
-    if (@available(iOS 10.0, *)) {
+    if (@available(iOS 10.0, macOS 10.13, macCatalyst 13.0, *)) {
         auto backend = (__bridge MetalContextImp*)metal_context_->backend();
         //
         mps_softmax_op_ =

diff --git a/lite/tools/build_linux.sh b/lite/tools/build_linux.sh
@@ -29,6 +29,8 @@ WITH_STATIC_MKL=OFF
 WITH_AVX=ON
 # options of compiling OPENCL lib.
 WITH_OPENCL=OFF
+# options of compiling Metal lib for Mac OS.
+WITH_METAL=OFF
 # options of compiling rockchip NPU lib.
 WITH_ROCKCHIP_NPU=OFF
 ROCKCHIP_NPU_SDK_ROOT="$(pwd)/rknpu_ddk"  # Download RKNPU SDK from /~https://github.com/airockchip/rknpu_ddk.git
@@ -176,6 +178,7 @@ function init_cmake_mutable_options {
                         -DWITH_STATIC_MKL=$WITH_STATIC_MKL \
                         -DWITH_AVX=$WITH_AVX \
                         -DLITE_WITH_OPENCL=$WITH_OPENCL \
+                        -DLITE_WITH_METAL=$WITH_METAL \
                         -DLITE_WITH_RKNPU=$WITH_ROCKCHIP_NPU \
                         -DRKNPU_DDK_ROOT=$ROCKCHIP_NPU_SDK_ROOT \
                         -DLITE_WITH_XPU=$WITH_BAIDU_XPU \
@@ -287,6 +290,9 @@ function make_publish_so {
     if [ "${WITH_OPENCL}" = "ON" ]; then
         build_dir=${build_dir}.opencl
     fi
+    if [ "${WITH_METAL}" = "ON" ]; then
+        build_dir=${build_dir}.metal
+    fi
     if [ "${WITH_BAIDU_XPU}" = "ON" ]; then
         build_dir=${build_dir}.baidu_xpu
     fi
@@ -461,6 +467,11 @@ function main {
                 WITH_OPENCL="${i#*=}"
                 shift
                 ;;
+            # compiling lib for Mac OS with GPU support.
+            --with_metal=*)
+                WITH_METAL="${i#*=}"
+                shift
+                ;;
             # compiling lib which can operate on rockchip npu.
             --with_rockchip_npu=*)
                 WITH_ROCKCHIP_NPU="${i#*=}"