diff --git a/lite/backends/opencl/cl_kernel/buffer/depthwise_conv2d_kernel.cl b/lite/backends/opencl/cl_kernel/buffer/depthwise_conv2d_kernel.cl
index ab575ba9b38..cc45f66859b 100644
--- a/lite/backends/opencl/cl_kernel/buffer/depthwise_conv2d_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/buffer/depthwise_conv2d_kernel.cl
@@ -62,7 +62,8 @@ __kernel void depthwise_conv2d(const int numel, // num of elements
               v += bias_data[c];
           }
 #ifdef RELU
-          output_data[index] = activation(v);
+          CL_DTYPE alpha;
+          output_data[index] = activation(v, alpha);
 #else
           output_data[index] = v;
 #endif
diff --git a/lite/backends/opencl/cl_kernel/buffer/elementwise_add_kernel.cl b/lite/backends/opencl/cl_kernel/buffer/elementwise_add_kernel.cl
index bb6faea629c..05566798ad7 100644
--- a/lite/backends/opencl/cl_kernel/buffer/elementwise_add_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/buffer/elementwise_add_kernel.cl
@@ -37,7 +37,8 @@ __kernel void elementwise_add(__global const CL_DTYPE* x_data,
   for (int n = 0; n < num; ++n) { // n: [0, h*w)
     *dout_ptr = *din_ptr + diny_data;
 #ifdef RELU
-    *dout_ptr = activation(*dout_ptr);
+    CL_DTYPE alpha;
+    *dout_ptr = activation(*dout_ptr, alpha);
 #endif
     ++dout_ptr;
     ++din_ptr;
diff --git a/lite/backends/opencl/cl_kernel/buffer/fc_kernel.cl b/lite/backends/opencl/cl_kernel/buffer/fc_kernel.cl
index 080ce2b4574..b48b83e788a 100644
--- a/lite/backends/opencl/cl_kernel/buffer/fc_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/buffer/fc_kernel.cl
@@ -54,7 +54,8 @@ void fc_gemm_naive(__global const CL_DTYPE* a,
   }
 
 #ifdef RELU
-  c[row * N + col] = activation(c0);
+  CL_DTYPE alpha;
+  c[row * N + col] = activation(c0, alpha);
 #else
   c[row * N + col] = c0;
 #endif
@@ -91,7 +92,8 @@ void gemm_batch_naive(__global const CL_DTYPE* a,
     c0 += a0 * b0;
   }
 
-  cur_c[row * N + col] = activation(c0);
+  CL_DTYPE alpha;
+  cur_c[row * N + col] = activation(c0, alpha);
 }
 
 
@@ -235,7 +237,8 @@ void fc_gemv_naive(__global const CL_DTYPE* a,
     }
 
 #ifdef RELU
-  c[col] = activation(c0);
+  CL_DTYPE alpha;
+  c[col] = activation(c0, alpha);
 #else
   c[col] = c0;
 #endif
@@ -254,6 +257,7 @@ void fc_gemv_1x4(__global const CL_DTYPE* a,
                  const int M, const int N, const int K) {
     const int col = get_global_id(0) << 2; // gws[0]: [0, N >> 2) height of B == N
 
+    half alpha;
     if (col + 3 < N) {
         half4 c0 = 0.0f;
         if (bias) {
@@ -310,11 +314,11 @@ void fc_gemv_1x4(__global const CL_DTYPE* a,
         } else {
             switch (col % 4) {
                 case 3:
-                    c[col + 2] = activation(c0.z);
+                    c[col + 2] = activation(c0.z, alpha);
                 case 2:
-                    c[col + 1] = activation(c0.y);
+                    c[col + 1] = activation(c0.y, alpha);
                 case 1:
-                    c[col] = activation(c0.x);
+                    c[col] = activation(c0.x, alpha);
             }
         }
 #else
@@ -341,7 +345,7 @@ void fc_gemv_1x4(__global const CL_DTYPE* a,
                c0 += a0 * b0;
            }
 #ifdef RELU
-           c[col + col_offset] = activation(c0);
+           c[col + col_offset] = activation(c0, alpha);
 #else
            c[col + col_offset] = c0;
 #endif
diff --git a/lite/backends/opencl/cl_kernel/buffer/relu_kernel.cl b/lite/backends/opencl/cl_kernel/buffer/relu_kernel.cl
index b07dc8132f4..fcf74685924 100644
--- a/lite/backends/opencl/cl_kernel/buffer/relu_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/buffer/relu_kernel.cl
@@ -16,7 +16,8 @@ limitations under the License. */
 
 __kernel void relu(__global const CL_DTYPE* x_data, const int count, __global CL_DTYPE* out_data) {
   const int index = get_global_id(0); 
+  CL_DTYPE alpha;
   if (index < count) {
-    out_data[index] = activation(x_data[index]);
+    out_data[index] = activation(x_data[index], alpha);
   }
 }
diff --git a/lite/backends/opencl/cl_kernel/cl_common.h b/lite/backends/opencl/cl_kernel/cl_common.h
index a8013e4ec4c..6998051e1e9 100644
--- a/lite/backends/opencl/cl_kernel/cl_common.h
+++ b/lite/backends/opencl/cl_kernel/cl_common.h
@@ -90,12 +90,7 @@ __constant sampler_t SAMPLER =
 /////////////////////////////////
 // activation / activation_type4
 /////////////////////////////////
-inline CL_DTYPE activation(CL_DTYPE in
-#ifdef PRELU
-                           ,
-                           CL_DTYPE prelu_alpha
-#endif
-                           ) {
+inline CL_DTYPE activation(CL_DTYPE in, CL_DTYPE prelu_alpha) {
   CL_DTYPE output = in;
 #ifdef PRELU
   output = select(prelu_alpha * in, in, in >= (CL_DTYPE)0);
@@ -138,12 +133,7 @@ inline CL_DTYPE activation(CL_DTYPE in
   return output;
 }
 
-inline CL_DTYPE4 activation_type4(CL_DTYPE4 in
-#ifdef PRELU
-                                  ,
-                                  CL_DTYPE4 prelu_alpha
-#endif
-                                  ) {
+inline CL_DTYPE4 activation_type4(CL_DTYPE4 in, CL_DTYPE4 prelu_alpha) {
   CL_DTYPE4 output = in;
 #ifdef PRELU
   output = select(prelu_alpha * in, in, isgreaterequal(in, (CL_DTYPE4)0));
diff --git a/lite/backends/opencl/cl_kernel/image/conv2d_1x1_opt_kernel.cl b/lite/backends/opencl/cl_kernel/image/conv2d_1x1_opt_kernel.cl
index 57880dfec43..67b0ccdd50e 100644
--- a/lite/backends/opencl/cl_kernel/image/conv2d_1x1_opt_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/conv2d_1x1_opt_kernel.cl
@@ -21,7 +21,8 @@ __kernel void conv2d_1x1_opt(
     __private const int input_height, /* of one block */
     __private const int output_width,
     __private const int output_height,
-    __private const int old_w) {
+    __private const int old_w,
+    __read_only image2d_t prelu_alpha) {
 
   const int out_c = get_global_id(0);
   const int out_w = get_global_id(1);
@@ -251,10 +252,33 @@ __kernel void conv2d_1x1_opt(
             READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
 #endif
 
-  output0 = activation_type4(output0);
-  output1 = activation_type4(output1);
-  output2 = activation_type4(output2);
-  output3 = activation_type4(output3);
+CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
+#ifdef PRELU_CH //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
+  alpha1 = alpha0;
+  alpha2 = alpha0;
+  alpha3 = alpha0;
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos0);
+  alpha1 = alpha0;
+  alpha2 = alpha0;
+  alpha3 = alpha0;
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha0.y = alpha0.x;
+  alpha0.z = alpha0.x;
+  alpha0.w = alpha0.x;
+  alpha1 = alpha0;
+  alpha2 = alpha0;
+  alpha3 = alpha0;
+  //}
+#endif
+  output0 = activation_type4(output0, alpha0);
+  output1 = activation_type4(output1, alpha1);
+  output2 = activation_type4(output2, alpha2);
+  output3 = activation_type4(output3, alpha3);
 
 #ifdef SCALE_ACTIVATION
   output0 = fuse_scale(output0, 1.f, 0.f, 0.f);
@@ -301,7 +325,8 @@ __kernel void conv2d_1x1_simple(
     __private const int input_height, /* of one block */
     __private const int output_width,
     __private const int output_height,
-    __private const int old_w) {
+    __private const int old_w,
+    __read_only image2d_t prelu_alpha) {
   const int out_c = get_global_id(0);
   const int out_w = get_global_id(1);
   const int out_nh = get_global_id(2);
@@ -421,10 +446,33 @@ __kernel void conv2d_1x1_simple(
             READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
 #endif
 
-  output0 = activation_type4(output0);
-  output1 = activation_type4(output1);
-  output2 = activation_type4(output2);
-  output3 = activation_type4(output3);
+CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
+#ifdef PRELU_CH //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
+  alpha1 = alpha0;
+  alpha2 = alpha0;
+  alpha3 = alpha0;
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos0);
+  alpha1 = alpha0;
+  alpha2 = alpha0;
+  alpha3 = alpha0;
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha0.y = alpha0.x;
+  alpha0.z = alpha0.x;
+  alpha0.w = alpha0.x;
+  alpha1 = alpha0;
+  alpha2 = alpha0;
+  alpha3 = alpha0;
+  //}
+#endif
+  output0 = activation_type4(output0, alpha0);
+  output1 = activation_type4(output1, alpha1);
+  output2 = activation_type4(output2, alpha2);
+  output3 = activation_type4(output3, alpha3);
 
 #ifdef SCALE_ACTIVATION
   output0 = fuse_scale(output0, 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/conv2d_3x3_kernel.cl b/lite/backends/opencl/cl_kernel/image/conv2d_3x3_kernel.cl
index 4e16ead836f..65c119a23c4 100644
--- a/lite/backends/opencl/cl_kernel/image/conv2d_3x3_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/conv2d_3x3_kernel.cl
@@ -34,7 +34,8 @@ __kernel void conv2d_3x3(__private const int global_size_dim0,
                          __private const int filter_width,
                          __private const int filter_height,
                          __private const int group,
-                         __private const int input_tensor_c) {
+                         __private const int input_tensor_c,
+                         __read_only image2d_t prelu_alpha) {
 
   const int out_c = get_global_id(0);
   const int out_w = get_global_id(1);
@@ -251,7 +252,22 @@ __kernel void conv2d_3x3(__private const int global_size_dim0,
         output.w = (i == 3) ? output.w + tmp_out : output.w;
       }
     }
-  output = activation_type4(output);
+
+CL_DTYPE4 alpha0;
+#ifdef PRELU_CH //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos);
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha0.y = alpha0.x;
+  alpha0.z = alpha0.x;
+  alpha0.w = alpha0.x;
+  //}
+#endif
+  output = activation_type4(output, alpha0);
 
 #ifdef SCALE_ACTIVATION
   output = fuse_scale(output, 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/conv2d_3x3_opt_kernel.cl b/lite/backends/opencl/cl_kernel/image/conv2d_3x3_opt_kernel.cl
index 02a0c778103..ebd0e021b99 100644
--- a/lite/backends/opencl/cl_kernel/image/conv2d_3x3_opt_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/conv2d_3x3_opt_kernel.cl
@@ -29,7 +29,8 @@ __kernel void conv2d_3x3_opt(__private const int item_ch,
                              __private const int in_w,
                              __private const int in_h,
                              __private const int out_w,
-                             __private const int out_h) {
+                             __private const int out_h,
+                             __read_only image2d_t prelu_alpha) {
   // item_id
   const int item_ch_id = get_global_id(0);
   const int item_w_id = get_global_id(1);
@@ -216,11 +217,56 @@ __kernel void conv2d_3x3_opt(__private const int item_ch,
     }
   }
 
-  output[0] = activation_type4(output[0]);
-  output[1] = activation_type4(output[1]);
-  output[2] = activation_type4(output[2]);
-  output[3] = activation_type4(output[3]);
-  output[4] = activation_type4(output[4]);
+CL_DTYPE4 alpha[5];
+#ifdef PRELU_CH //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(item_ch_id, 0));
+  alpha[1] = alpha[0];
+  alpha[2] = alpha[0];
+  alpha[3] = alpha[0];
+  alpha[4] = alpha[0];
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                            prelu_alpha,
+                            SAMPLER,
+                            (int2)(out_w_base_id + out_w_id0, item_h_id));
+  if (out_w_id1 < out_w) {
+    alpha[1] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id1, item_h_id));
+  }
+  if (out_w_id2 < out_w) {
+    alpha[2] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id2, item_h_id));
+  }
+  if (out_w_id3 < out_w) {
+    alpha[3] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id3, item_h_id));
+  }
+  if (out_w_id4 < out_w) {
+    alpha[4] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id4, item_h_id));
+  }
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha[0].y = alpha[0].x; alpha[0].z = alpha[0].x; alpha[0].w = alpha[0].x;
+  alpha[1] = alpha[0]; alpha[2] = alpha[0];
+  alpha[3] = alpha[0]; alpha[4] = alpha[0];
+  //}
+#endif
+  output[0] = activation_type4(output[0], alpha[0]);
+  output[1] = activation_type4(output[1], alpha[1]);
+  output[2] = activation_type4(output[2], alpha[2]);
+  output[3] = activation_type4(output[3], alpha[3]);
+  output[4] = activation_type4(output[4], alpha[4]);
 
 #ifdef SCALE_ACTIVATION
   output[0] = fuse_scale(output[0], 1.f, 0.f, 0.f);
@@ -276,7 +322,8 @@ __kernel void conv2d_3x3_multi_batch(__private const int item_ch,
                                      __private const int in_w,
                                      __private const int in_h,
                                      __private const int out_w,
-                                     __private const int out_h) {
+                                     __private const int out_h,
+                                     __read_only image2d_t prelu_alpha) {
   // item_id
   const int item_ch_id = get_global_id(0);
   const int item_w_id = get_global_id(1);
@@ -464,11 +511,56 @@ __kernel void conv2d_3x3_multi_batch(__private const int item_ch,
     }
   }
 
-  output[0] = activation_type4(output[0]);
-  output[1] = activation_type4(output[1]);
-  output[2] = activation_type4(output[2]);
-  output[3] = activation_type4(output[3]);
-  output[4] = activation_type4(output[4]);
+CL_DTYPE4 alpha[5];
+#ifdef PRELU_CH //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(item_ch_id, 0));
+  alpha[1] = alpha[0];
+  alpha[2] = alpha[0];
+  alpha[3] = alpha[0];
+  alpha[4] = alpha[0];
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                            prelu_alpha,
+                            SAMPLER,
+                            (int2)(out_w_base_id + out_w_id0, item_h_id));
+  if (out_w_id1 < out_w) {
+    alpha[1] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id1, item_h_id));
+  }
+  if (out_w_id2 < out_w) {
+    alpha[2] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id2, item_h_id));
+  }
+  if (out_w_id3 < out_w) {
+    alpha[3] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id3, item_h_id));
+  }
+  if (out_w_id4 < out_w) {
+    alpha[4] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id4, item_h_id));
+  }
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha[0].y = alpha[0].x; alpha[0].z = alpha[0].x; alpha[0].w = alpha[0].x;
+  alpha[1] = alpha[0]; alpha[2] = alpha[0];
+  alpha[3] = alpha[0]; alpha[4] = alpha[0];
+  //}
+#endif
+  output[0] = activation_type4(output[0], alpha[0]);
+  output[1] = activation_type4(output[1], alpha[1]);
+  output[2] = activation_type4(output[2], alpha[2]);
+  output[3] = activation_type4(output[3], alpha[3]);
+  output[4] = activation_type4(output[4], alpha[4]);
 
 #ifdef SCALE_ACTIVATION
   output[0] = fuse_scale(output[0], 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/conv2d_5x5_kernel.cl b/lite/backends/opencl/cl_kernel/image/conv2d_5x5_kernel.cl
index 587d488e9ab..d96dc8cc11b 100644
--- a/lite/backends/opencl/cl_kernel/image/conv2d_5x5_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/conv2d_5x5_kernel.cl
@@ -32,7 +32,8 @@ __kernel void conv2d_5x5(__private const int global_size_dim0,
                          __private const int input_width,  /* of one block */
                          __private const int input_height, /* of one block */
                          __private const int output_width,
-                         __private const int output_height) {
+                         __private const int output_height,
+                         __read_only image2d_t prelu_alpha) {
 
   const int out_c = get_global_id(0);
   const int out_w = get_global_id(1);
@@ -172,7 +173,21 @@ __kernel void conv2d_5x5(__private const int global_size_dim0,
             READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
 #endif
 
-        output = activation_type4(output);
+CL_DTYPE4 alpha0;
+#ifdef PRELU_CH //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos);
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha0.y = alpha0.x;
+  alpha0.z = alpha0.x;
+  alpha0.w = alpha0.x;
+  //}
+#endif
+  output = activation_type4(output, alpha0);
 
 #ifdef SCALE_ACTIVATION
   output = fuse_scale(output, 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/conv2d_5x5_opt_kernel.cl b/lite/backends/opencl/cl_kernel/image/conv2d_5x5_opt_kernel.cl
index 927d56d5ac2..b041eb73ac1 100644
--- a/lite/backends/opencl/cl_kernel/image/conv2d_5x5_opt_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/conv2d_5x5_opt_kernel.cl
@@ -30,7 +30,8 @@ __kernel void conv2d_5x5_opt(__private const int item_ch,
                              __private const int in_w,
                              __private const int in_h,
                              __private const int out_w,
-                             __private const int out_h) {
+                             __private const int out_h,
+                             __read_only image2d_t prelu_alpha) {
   // filter
   const int filter_w = 5;
   const int filter_h = 5;
@@ -222,11 +223,56 @@ __kernel void conv2d_5x5_opt(__private const int item_ch,
     }
   }
 
-  output[0] = activation_type4(output[0]);
-  output[1] = activation_type4(output[1]);
-  output[2] = activation_type4(output[2]);
-  output[3] = activation_type4(output[3]);
-  output[4] = activation_type4(output[4]);
+CL_DTYPE4 alpha[5];
+#ifdef PRELU_CH //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(item_ch_id, 0));
+  alpha[1] = alpha[0];
+  alpha[2] = alpha[0];
+  alpha[3] = alpha[0];
+  alpha[4] = alpha[0];
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                            prelu_alpha,
+                            SAMPLER,
+                            (int2)(out_w_base_id + out_w_id0, item_h_id));
+  if (out_w_id1 < out_w) {
+    alpha[1] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id1, item_h_id));
+  }
+  if (out_w_id2 < out_w) {
+    alpha[2] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id2, item_h_id));
+  }
+  if (out_w_id3 < out_w) {
+    alpha[3] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id3, item_h_id));
+  }
+  if (out_w_id4 < out_w) {
+    alpha[4] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id4, item_h_id));
+  }
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha[0].y = alpha[0].x; alpha[0].z = alpha[0].x; alpha[0].w = alpha[0].x;
+  alpha[1] = alpha[0]; alpha[2] = alpha[0];
+  alpha[3] = alpha[0]; alpha[4] = alpha[0];
+  //}
+#endif
+  output[0] = activation_type4(output[0], alpha[0]);
+  output[1] = activation_type4(output[1], alpha[1]);
+  output[2] = activation_type4(output[2], alpha[2]);
+  output[3] = activation_type4(output[3], alpha[3]);
+  output[4] = activation_type4(output[4], alpha[4]);
 
 #ifdef SCALE_ACTIVATION
   output[0] = fuse_scale(output[0], 1.f, 0.f, 0.f);
@@ -281,7 +327,8 @@ __kernel void conv2d_5x5_multi_batch(__private const int item_ch,
                                      __private const int in_w,
                                      __private const int in_h,
                                      __private const int out_w,
-                                     __private const int out_h) {
+                                     __private const int out_h,
+                                     __read_only image2d_t prelu_alpha) {
   // filter
   const int filter_w = 5;
   const int filter_h = 5;
@@ -477,11 +524,56 @@ __kernel void conv2d_5x5_multi_batch(__private const int item_ch,
     }
   }
 
-  output[0] = activation_type4(output[0]);
-  output[1] = activation_type4(output[1]);
-  output[2] = activation_type4(output[2]);
-  output[3] = activation_type4(output[3]);
-  output[4] = activation_type4(output[4]);
+CL_DTYPE4 alpha[5];
+#ifdef PRELU_CH //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(item_ch_id, 0));
+  alpha[1] = alpha[0];
+  alpha[2] = alpha[0];
+  alpha[3] = alpha[0];
+  alpha[4] = alpha[0];
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                            prelu_alpha,
+                            SAMPLER,
+                            (int2)(out_w_base_id + out_w_id0, item_h_id));
+  if (out_w_id1 < out_w) {
+    alpha[1] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id1, item_h_id));
+  }
+  if (out_w_id2 < out_w) {
+    alpha[2] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id2, item_h_id));
+  }
+  if (out_w_id3 < out_w) {
+    alpha[3] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id3, item_h_id));
+  }
+  if (out_w_id4 < out_w) {
+    alpha[4] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id4, item_h_id));
+  }
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha[0].y = alpha[0].x; alpha[0].z = alpha[0].x; alpha[0].w = alpha[0].x;
+  alpha[1] = alpha[0]; alpha[2] = alpha[0];
+  alpha[3] = alpha[0]; alpha[4] = alpha[0];
+  //}
+#endif
+  output[0] = activation_type4(output[0], alpha[0]);
+  output[1] = activation_type4(output[1], alpha[1]);
+  output[2] = activation_type4(output[2], alpha[2]);
+  output[3] = activation_type4(output[3], alpha[3]);
+  output[4] = activation_type4(output[4], alpha[4]);
 
 #ifdef SCALE_ACTIVATION
   output[0] = fuse_scale(output[0], 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/conv2d_7x7_kernel.cl b/lite/backends/opencl/cl_kernel/image/conv2d_7x7_kernel.cl
index 67ffea51539..2ffd988f180 100644
--- a/lite/backends/opencl/cl_kernel/image/conv2d_7x7_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/conv2d_7x7_kernel.cl
@@ -18,7 +18,8 @@ __kernel void conv2d_7x7(__private const int global_size_dim0,
                          __private const int input_width,  /* of one block */
                          __private const int input_height, /* of one block */
                          __private const int output_width,
-                         __private const int output_height) {
+                         __private const int output_height,
+                         __read_only image2d_t prelu_alpha) {
 
   const int out_c = get_global_id(0);
   const int out_w = get_global_id(1);
@@ -123,7 +124,21 @@ __kernel void conv2d_7x7(__private const int global_size_dim0,
            READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
 #endif
 
-  output = activation_type4(output);
+CL_DTYPE4 alpha0;
+#ifdef PRELU_CH //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos);
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha0.y = alpha0.x;
+  alpha0.z = alpha0.x;
+  alpha0.w = alpha0.x;
+  //}
+#endif
+  output = activation_type4(output, alpha0);
 
 #ifdef SCALE_ACTIVATION
   output = fuse_scale(output, 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/conv2d_7x7_opt_kernel.cl b/lite/backends/opencl/cl_kernel/image/conv2d_7x7_opt_kernel.cl
index cc64137fcd5..497234428a8 100644
--- a/lite/backends/opencl/cl_kernel/image/conv2d_7x7_opt_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/conv2d_7x7_opt_kernel.cl
@@ -30,7 +30,8 @@ __kernel void conv2d_7x7_opt(__private const int item_ch,
                              __private const int in_w,
                              __private const int in_h,
                              __private const int out_w,
-                             __private const int out_h) {
+                             __private const int out_h,
+                             __read_only image2d_t prelu_alpha) {
   // filter
   const int filter_w = 7;
   const int filter_h = 7;
@@ -222,11 +223,56 @@ __kernel void conv2d_7x7_opt(__private const int item_ch,
     }
   }
 
-  output[0] = activation_type4(output[0]);
-  output[1] = activation_type4(output[1]);
-  output[2] = activation_type4(output[2]);
-  output[3] = activation_type4(output[3]);
-  output[4] = activation_type4(output[4]);
+CL_DTYPE4 alpha[5];
+#ifdef PRELU_CH //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(item_ch_id, 0));
+  alpha[1] = alpha[0];
+  alpha[2] = alpha[0];
+  alpha[3] = alpha[0];
+  alpha[4] = alpha[0];
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                            prelu_alpha,
+                            SAMPLER,
+                            (int2)(out_w_base_id + out_w_id0, item_h_id));
+  if (out_w_id1 < out_w) {
+    alpha[1] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id1, item_h_id));
+  }
+  if (out_w_id2 < out_w) {
+    alpha[2] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id2, item_h_id));
+  }
+  if (out_w_id3 < out_w) {
+    alpha[3] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id3, item_h_id));
+  }
+  if (out_w_id4 < out_w) {
+    alpha[4] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id4, item_h_id));
+  }
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha[0].y = alpha[0].x; alpha[0].z = alpha[0].x; alpha[0].w = alpha[0].x;
+  alpha[1] = alpha[0]; alpha[2] = alpha[0];
+  alpha[3] = alpha[0]; alpha[4] = alpha[0];
+  //}
+#endif
+  output[0] = activation_type4(output[0], alpha[0]);
+  output[1] = activation_type4(output[1], alpha[1]);
+  output[2] = activation_type4(output[2], alpha[2]);
+  output[3] = activation_type4(output[3], alpha[3]);
+  output[4] = activation_type4(output[4], alpha[4]);
 
 #ifdef SCALE_ACTIVATION
   output[0] = fuse_scale(output[0], 1.f, 0.f, 0.f);
@@ -281,7 +327,8 @@ __kernel void conv2d_7x7_multi_batch(__private const int item_ch,
                                      __private const int in_w,
                                      __private const int in_h,
                                      __private const int out_w,
-                                     __private const int out_h) {
+                                     __private const int out_h,
+                                     __read_only image2d_t prelu_alpha) {
   // filter
   const int filter_w = 7;
   const int filter_h = 7;
@@ -477,11 +524,56 @@ __kernel void conv2d_7x7_multi_batch(__private const int item_ch,
     }
   }
 
-  output[0] = activation_type4(output[0]);
-  output[1] = activation_type4(output[1]);
-  output[2] = activation_type4(output[2]);
-  output[3] = activation_type4(output[3]);
-  output[4] = activation_type4(output[4]);
+CL_DTYPE4 alpha[5];
+#ifdef PRELU_CH //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(item_ch_id, 0));
+  alpha[1] = alpha[0];
+  alpha[2] = alpha[0];
+  alpha[3] = alpha[0];
+  alpha[4] = alpha[0];
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                            prelu_alpha,
+                            SAMPLER,
+                            (int2)(out_w_base_id + out_w_id0, item_h_id));
+  if (out_w_id1 < out_w) {
+    alpha[1] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id1, item_h_id));
+  }
+  if (out_w_id2 < out_w) {
+    alpha[2] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id2, item_h_id));
+  }
+  if (out_w_id3 < out_w) {
+    alpha[3] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id3, item_h_id));
+  }
+  if (out_w_id4 < out_w) {
+    alpha[4] = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                              prelu_alpha,
+                              SAMPLER,
+                              (int2)(out_w_base_id + out_w_id4, item_h_id));
+  }
+  //}
+#elif defined(PRELU_ALL) //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha[0].y = alpha[0].x; alpha[0].z = alpha[0].x; alpha[0].w = alpha[0].x;
+  alpha[1] = alpha[0]; alpha[2] = alpha[0];
+  alpha[3] = alpha[0]; alpha[4] = alpha[0];
+  //}
+#endif
+  output[0] = activation_type4(output[0], alpha[0]);
+  output[1] = activation_type4(output[1], alpha[1]);
+  output[2] = activation_type4(output[2], alpha[2]);
+  output[3] = activation_type4(output[3], alpha[3]);
+  output[4] = activation_type4(output[4], alpha[4]);
 
 #ifdef SCALE_ACTIVATION
   output[0] = fuse_scale(output[0], 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/conv2d_common_kernel.cl b/lite/backends/opencl/cl_kernel/image/conv2d_common_kernel.cl
index 962fa3a7182..d60440aaea5 100644
--- a/lite/backends/opencl/cl_kernel/image/conv2d_common_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/conv2d_common_kernel.cl
@@ -33,7 +33,8 @@ __kernel void conv2d_common(__private const int global_size_dim0,
                             __private const int padding_width,
                             __private const int padding_height, 
                             __private const int dilation_width,
-                            __private const int dilation_height) {
+                            __private const int dilation_height,
+                            __read_only image2d_t prelu_alpha) {
     const int out_channel_block_idx = get_global_id(0);
     const int out_width_block_idx   = get_global_id(1);
     const int output_bh_idx         = get_global_id(2);
@@ -86,7 +87,6 @@ __kernel void conv2d_common(__private const int global_size_dim0,
   CL_DTYPE4 out3 = out0;
 #endif
 
-
     int in_width0 = mad24(out_width_block_idx, stride_width << 2, -padding_width);
     int in_width1 = in_width0 + stride_width;
     int in_width2 = in_width0 + stride_width * 2;
@@ -152,16 +152,57 @@ __kernel void conv2d_common(__private const int global_size_dim0,
             }
         }
     }
-    out0 = activation_type4(out0);
-    out1 = activation_type4(out1);
-    out2 = activation_type4(out2);
-    out3 = activation_type4(out3);
+CL_DTYPE4 alpha0, alpha1, alpha2, alpha3;
+#ifdef PRELU_CH //{
+    alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_channel_block_idx, 0));
+    alpha1 = alpha0;
+    alpha2 = alpha0;
+    alpha3 = alpha0;
+  //}
+#elif defined(PRELU_ELE) //{
+    alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                                prelu_alpha,
+                                SAMPLER,
+                                (int2)(out_w_base_id + out_w_id0, output_bh_idx));
+    if (out_w_id1 < output_width) {
+        alpha1 = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                                prelu_alpha,
+                                SAMPLER,
+                                (int2)(out_w_base_id + out_w_id1, output_bh_idx));
+    }
+    if (out_w_id2 < output_width) {
+        alpha2 = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                                prelu_alpha,
+                                SAMPLER,
+                                (int2)(out_w_base_id + out_w_id2, output_bh_idx));
+    }
+    if (out_w_id3 < output_width) {
+        alpha3 = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                                prelu_alpha,
+                                SAMPLER,
+                                (int2)(out_w_base_id + out_w_id3, output_bh_idx));
+    }
+  //}
+#elif defined(PRELU_ALL) //{
+    alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+    alpha0.y = alpha0.x;
+    alpha0.z = alpha0.x;
+    alpha0.w = alpha0.x;
+    alpha1 = alpha0;
+    alpha2 = alpha0;
+    alpha3 = alpha0;
+    //}
+#endif
+    out0 = activation_type4(out0, alpha0);
+    out1 = activation_type4(out1, alpha1);
+    out2 = activation_type4(out2, alpha2);
+    out3 = activation_type4(out3, alpha3);
 
 #ifdef SCALE_ACTIVATION
-  out0 = fuse_scale(out0, 1.f, 0.f, 0.f);
-  out1 = fuse_scale(out1, 1.f, 0.f, 0.f);
-  out2 = fuse_scale(out2, 1.f, 0.f, 0.f);
-  out3 = fuse_scale(out3, 1.f, 0.f, 0.f);
+    out0 = fuse_scale(out0, 1.f, 0.f, 0.f);
+    out1 = fuse_scale(out1, 1.f, 0.f, 0.f);
+    out2 = fuse_scale(out2, 1.f, 0.f, 0.f);
+    out3 = fuse_scale(out3, 1.f, 0.f, 0.f);
 #endif
 
     const int out_x_base = mul24(out_channel_block_idx, output_width);
diff --git a/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_basic_kernel.cl b/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_basic_kernel.cl
index 1d8a64793fd..717db4784b6 100755
--- a/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_basic_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_basic_kernel.cl
@@ -33,7 +33,8 @@ __kernel void depth_conv2d_common(__private const int global_size_dim0, // (out_
                                   __private const int output_width,
                                   __private const int output_height,
                                   __private const int filter_width,
-                                  __private const int filter_height) {
+                                  __private const int filter_height,
+                                  __read_only image2d_t prelu_alpha) {
 
   const int out_c_blk = get_global_id(0); // [0, (C+3)/4)
   const int out_w_blk = get_global_id(1); // [0, (W+3)/4)
@@ -107,10 +108,51 @@ __kernel void depth_conv2d_common(__private const int global_size_dim0, // (out_
     }
   }
 
-  out0 = activation_type4(out0);
-  out1 = activation_type4(out1);
-  out2 = activation_type4(out2);
-  out3 = activation_type4(out3);
+CL_DTYPE4 alpha0, alpha1, alpha2, alpha3;
+#ifdef PRELU_CH //{
+    alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_channel_block_idx, 0));
+    alpha1 = alpha0;
+    alpha2 = alpha0;
+    alpha3 = alpha0;
+  //}
+#elif defined(PRELU_ELE) //{
+    alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                                prelu_alpha,
+                                SAMPLER,
+                                (int2)(out_w_base_id + out_w_id0, output_bh_idx));
+    if (out_w_id1 < output_width) {
+        alpha1 = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                                prelu_alpha,
+                                SAMPLER,
+                                (int2)(out_w_base_id + out_w_id1, output_bh_idx));
+    }
+    if (out_w_id2 < output_width) {
+        alpha2 = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                                prelu_alpha,
+                                SAMPLER,
+                                (int2)(out_w_base_id + out_w_id2, output_bh_idx));
+    }
+    if (out_w_id3 < output_width) {
+        alpha3 = READ_IMG_TYPE(CL_DTYPE_CHAR,
+                                prelu_alpha,
+                                SAMPLER,
+                                (int2)(out_w_base_id + out_w_id3, output_bh_idx));
+    }
+  //}
+#elif defined(PRELU_ALL) //{
+    alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+    alpha0.y = alpha0.x;
+    alpha0.z = alpha0.x;
+    alpha0.w = alpha0.x;
+    alpha1 = alpha0;
+    alpha2 = alpha0;
+    alpha3 = alpha0;
+    //}
+#endif
+    out0 = activation_type4(out0, alpha0);
+    out1 = activation_type4(out1, alpha1);
+    out2 = activation_type4(out2, alpha2);
+    out3 = activation_type4(out3, alpha3);
 
 #ifdef SCALE_ACTIVATION
   out0 = fuse_scale(out0, 1.f, 0.f, 0.f);
@@ -156,7 +198,8 @@ __kernel void depth_conv2d(__private const int global_size_dim0,
                            __private const int output_width,
                            __private const int output_height,
                            __private const int filter_width,
-                           __private const int filter_height) {
+                           __private const int filter_height,
+                           __read_only image2d_t prelu_alpha) {
 
   const int out_c = get_global_id(0);
   const int out_w = get_global_id(1);
@@ -210,7 +253,21 @@ __kernel void depth_conv2d(__private const int global_size_dim0,
     }
   }
 
-  output = activation_type4(output);
+CL_DTYPE4 alpha0;
+#ifdef PRELU_CH //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos);
+  //}
+#else //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha0.y = alpha0.x;
+  alpha0.z = alpha0.x;
+  alpha0.w = alpha0.x;
+  //}
+#endif
+  output = activation_type4(output, alpha0);
 
 #ifdef SCALE_ACTIVATION
   output = fuse_scale(output, 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_kernel.cl b/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_kernel.cl
index 729298a4b6b..c5ef38038e3 100755
--- a/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_kernel.cl
@@ -30,7 +30,8 @@ __kernel void depth_conv2d_3x3(
     __private const int input_width,  /* of one block */
     __private const int input_height, /* of one block */
     __private const int output_width,
-    __private const int output_height) {
+    __private const int output_height,
+    __read_only image2d_t prelu_alpha) {
 
   const int out_c = get_global_id(0);
   const int out_w = get_global_id(1);
@@ -205,7 +206,21 @@ __kernel void depth_conv2d_3x3(
     output += inputs[i] * filters[i];
   }
 
-  output = activation_type4(output);
+CL_DTYPE4 alpha0;
+#ifdef PRELU_CH //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos);
+  //}
+#else //{
+  alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha0.y = alpha0.x;
+  alpha0.z = alpha0.x;
+  alpha0.w = alpha0.x;
+  //}
+#endif
+  output = activation_type4(output, alpha0);
 
 #ifdef SCALE_ACTIVATION
   output = fuse_scale(output, 1.f, 0.f, 0.f);
@@ -252,7 +267,8 @@ __kernel void depth_conv2d_3x3s1(__private const int ou_ch_blk,
                                  __private const int in_w, /* of one block */
                                  __private const int in_h, /* of one block */
                                  __private const int ou_w,
-                                 __private const int ou_h) {
+                                 __private const int ou_h,
+                                 __read_only image2d_t prelu_alpha) {
 
   const int ou_ch_blk_id = get_global_id(0);
   const int ou_w_blk_id = get_global_id(1);
@@ -363,8 +379,29 @@ __kernel void depth_conv2d_3x3s1(__private const int ou_ch_blk,
   output[0] = mad(inputs[10], filters[8], output[0]);
   output[1] = mad(inputs[11], filters[8], output[1]);
 
-  output[0] = activation_type4(output[0]);
-  output[1] = activation_type4(output[1]);
+CL_DTYPE4 alpha[2];
+#ifdef PRELU_CH //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(ou_ch_blk_id, 0));
+  alpha[1] = alpha[0];
+  //}
+#elif defined(PRELU_ELE) //{
+  alpha[0] =
+      READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(ou_x, ou_nh_id));
+  if (ou_col_id + 1 < ou_w) {
+    alpha[1] =
+        READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(ou_x + 1, ou_nh_id));
+  }
+  //}
+#else //{
+  alpha[0] = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
+  alpha[0].y = alpha[0].x;
+  alpha[0].z = alpha[0].x;
+  alpha[0].w = alpha[0].x;
+  alpha[1] = alpha[0];
+  //}
+#endif
+  output[0] = activation_type4(output[0], alpha[0]);
+  output[1] = activation_type4(output[1], alpha[1]);
 
 #ifdef SCALE_ACTIVATION
   output[0] = fuse_scale(output[0], 1.f, 0.f, 0.f);
diff --git a/lite/backends/opencl/cl_kernel/image/elementwise_add_kernel.cl b/lite/backends/opencl/cl_kernel/image/elementwise_add_kernel.cl
index 0dc287ed847..a7cc062b664 100644
--- a/lite/backends/opencl/cl_kernel/image/elementwise_add_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/elementwise_add_kernel.cl
@@ -33,7 +33,8 @@ __kernel void elementwise_add(__read_only image2d_t input,
      CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, SAMPLER, coords);
 #endif
      CL_DTYPE4 biase = READ_IMG_TYPE(CL_DTYPE_CHAR, bias, SAMPLER, coords);
-     CL_DTYPE4 output = activation_type4(in + biase);
+     CL_DTYPE4 alpha;
+     CL_DTYPE4 output = activation_type4(in + biase, alpha);
 
      WRITE_IMG_TYPE(CL_DTYPE_CHAR, outputImage,coords,output);
  }
diff --git a/lite/backends/opencl/cl_kernel/image/elementwise_sub_kernel.cl b/lite/backends/opencl/cl_kernel/image/elementwise_sub_kernel.cl
index 3bcc2159705..c31131f15ae 100644
--- a/lite/backends/opencl/cl_kernel/image/elementwise_sub_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/elementwise_sub_kernel.cl
@@ -26,7 +26,8 @@ __kernel void elementwise_sub(__read_only image2d_t input,
 
      CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, SAMPLER, coords);
      CL_DTYPE4 biase = READ_IMG_TYPE(CL_DTYPE_CHAR, bias, SAMPLER, coords);
-     CL_DTYPE4 output = activation_type4(in - biase);
+     CL_DTYPE4 alpha;
+     CL_DTYPE4 output = activation_type4(in - biase, alpha);
 
      WRITE_IMG_TYPE(CL_DTYPE_CHAR, outputImage,coords,output);
  }
diff --git a/lite/core/mir/fusion/conv_activation_fuse_pass.cc b/lite/core/mir/fusion/conv_activation_fuse_pass.cc
index 5d8a1fece3a..20c0b978cec 100644
--- a/lite/core/mir/fusion/conv_activation_fuse_pass.cc
+++ b/lite/core/mir/fusion/conv_activation_fuse_pass.cc
@@ -56,6 +56,7 @@ void ConvActivationFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
     act_types.push_back("leaky_relu");
     act_types.push_back("hard_swish");
     act_types.push_back("hard_sigmoid");
+    act_types.push_back("prelu");
   }
   if (!has_int8 && has_cuda) {
     act_types.push_back("leaky_relu");
@@ -64,10 +65,17 @@ void ConvActivationFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
     act_types.push_back("relu");
     act_types.push_back("relu6");
   }
+  bool has_alpha = false;
   for (auto conv_type : {"conv2d", "depthwise_conv2d", "conv2d_transpose"}) {
     for (auto act_type : act_types) {
+      if (act_type == "prelu") {
+        has_alpha = true;
+      } else {
+        has_alpha = false;
+      }
       for (auto has_bias : {true, false}) {
-        fusion::ConvActivationFuser fuser(conv_type, act_type, has_bias);
+        fusion::ConvActivationFuser fuser(
+            conv_type, act_type, has_bias, has_alpha);
         fuser(graph.get());
       }
     }
diff --git a/lite/core/mir/fusion/conv_activation_fuser.cc b/lite/core/mir/fusion/conv_activation_fuser.cc
index 413941d301a..49c5029ff9c 100644
--- a/lite/core/mir/fusion/conv_activation_fuser.cc
+++ b/lite/core/mir/fusion/conv_activation_fuser.cc
@@ -28,9 +28,13 @@ void ConvActivationFuser::BuildPattern() {
   auto* filter =
       VarNode("filter")->assert_is_op_input(conv_type_, "Filter")->AsInput();
   PMNode* bias = nullptr;
+  PMNode* alpha = nullptr;
   if (has_bias_) {
     bias = VarNode("bias")->assert_is_op_input(conv_type_, "Bias")->AsInput();
   }
+  if (has_alpha_) {
+    alpha = VarNode("alpha")->assert_is_op_input(act_type_, "Alpha")->AsInput();
+  }
   auto* conv2d = OpNode("conv2d", conv_type_)->AsIntermediate();
 
   auto* act = OpNode("act", act_type_)->AsIntermediate();
@@ -49,6 +53,9 @@ void ConvActivationFuser::BuildPattern() {
   if (has_bias_) {
     *bias >> *conv2d;
   }
+  if (has_alpha_) {
+    *alpha >> *act;
+  }
 }
 
 void ConvActivationFuser::InsertNewNode(SSAGraph* graph,
@@ -67,6 +74,9 @@ void ConvActivationFuser::InsertNewNode(SSAGraph* graph,
   if (has_bias_) {
     IR_NODE_LINK_TO(matched.at("bias"), new_op_node);
   }
+  if (has_alpha_) {
+    IR_NODE_LINK_TO(matched.at("alpha"), new_op_node);
+  }
   IR_NODE_LINK_TO(new_op_node, matched.at("output"));
 }
 
@@ -97,6 +107,10 @@ cpp::OpDesc ConvActivationFuser::GenOpDesc(const key2nodes_t& matched) {
     float offset = act_op_desc.GetAttr<float>("offset");
     op_desc.SetAttr("slope", slope);
     op_desc.SetAttr("offset", offset);
+  } else if (act_type_ == "prelu") {
+    auto prelu_mode = act_op_desc.GetAttr<std::string>("mode");
+    op_desc.SetAttr("prelu_mode", prelu_mode);
+    op_desc.SetInput("Prelu_alpha", {matched.at("alpha")->arg()->name});
   }
   return op_desc;
 }
diff --git a/lite/core/mir/fusion/conv_activation_fuser.h b/lite/core/mir/fusion/conv_activation_fuser.h
index d352a32f9f8..04951b4c2cb 100644
--- a/lite/core/mir/fusion/conv_activation_fuser.h
+++ b/lite/core/mir/fusion/conv_activation_fuser.h
@@ -27,10 +27,12 @@ class ConvActivationFuser : public FuseBase {
  public:
   explicit ConvActivationFuser(const std::string& conv_type,
                                const std::string& act_type,
-                               bool has_bias) {
+                               bool has_bias,
+                               bool has_alpha) {
     conv_type_ = conv_type;
     act_type_ = act_type;
     has_bias_ = has_bias;
+    has_alpha_ = has_alpha;
   }
 
   void BuildPattern() override;
@@ -41,6 +43,7 @@ class ConvActivationFuser : public FuseBase {
   std::string conv_type_;
   std::string act_type_;
   bool has_bias_;
+  bool has_alpha_;
 };
 
 }  // namespace fusion
diff --git a/lite/kernels/arm/conv_compute.cc b/lite/kernels/arm/conv_compute.cc
index ad80a8763f6..03dfd5df4dd 100644
--- a/lite/kernels/arm/conv_compute.cc
+++ b/lite/kernels/arm/conv_compute.cc
@@ -199,6 +199,7 @@ typedef paddle::lite::kernels::arm::ConvCompute<PRECISION(kFP16),
 REGISTER_LITE_KERNEL(conv2d, kARM, kFP16, kNCHW, ConvFp16, def)
     .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Prelu_alpha", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindPaddleOpVersion("conv2d", 1)
@@ -216,6 +217,7 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFP16, kNCHW, ConvFp16, def)
 REGISTER_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, ConvFp32, def)
     .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Prelu_alpha", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindPaddleOpVersion("conv2d", 1)
@@ -224,6 +226,7 @@ REGISTER_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, ConvFp32, def)
 REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, ConvFp32, def)
     .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Prelu_alpha", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindPaddleOpVersion("depthwise_conv2d", 1)
@@ -232,6 +235,8 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, ConvFp32, def)
 REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, ConvInt8_Int8, int8_out)
     .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
+    .BindInput("Prelu_alpha",
+               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
     .BindInput("Filter",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
     .BindOutput("Output",
@@ -242,6 +247,8 @@ REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, ConvInt8_Int8, int8_out)
 REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, ConvInt8_Fp32, fp32_out)
     .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
+    .BindInput("Prelu_alpha",
+               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
     .BindInput("Filter",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
     .BindOutput("Output",
@@ -253,6 +260,8 @@ REGISTER_LITE_KERNEL(
     depthwise_conv2d, kARM, kInt8, kNCHW, ConvInt8_Int8, int8_out)
     .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
+    .BindInput("Prelu_alpha",
+               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
     .BindInput("Filter",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
     .BindOutput("Output",
@@ -264,6 +273,8 @@ REGISTER_LITE_KERNEL(
     depthwise_conv2d, kARM, kInt8, kNCHW, ConvInt8_Fp32, fp32_out)
     .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
+    .BindInput("Prelu_alpha",
+               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
     .BindInput("Filter",
                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
     .BindOutput("Output",
diff --git a/lite/kernels/opencl/activation_image_compute.cc b/lite/kernels/opencl/activation_image_compute.cc
index 205a391dda5..44b75c0480a 100644
--- a/lite/kernels/opencl/activation_image_compute.cc
+++ b/lite/kernels/opencl/activation_image_compute.cc
@@ -60,33 +60,60 @@ class ActivationComputeImageDefault
           tensor_hold_alpha_image_ = std::unique_ptr<Tensor>(new Tensor);
           kernel_func_name_ = "prelu_channel";
           auto& out_dims = act_param_->Out->dims();
-          width_ = out_dims[3];
+          if (out_dims.size() == 4) {
+            width_ = out_dims[3];
+            CLImageConverterFolder alpha_converter;
+            const DDim& alpha_image_dims = alpha_converter.InitImageDimInfoWith(
+                act_param_->Prelu_alpha->dims());
+            tensor_hold_alpha_image_->Resize(
+                {1, alpha_image_dims[0], alpha_image_dims[1], 4});
 
-          CLImageConverterFolder alpha_converter;
-          const DDim& alpha_image_dims = alpha_converter.InitImageDimInfoWith(
-              act_param_->Prelu_alpha->dims());
-          tensor_hold_alpha_image_->Resize(
-              {1, alpha_image_dims[0], alpha_image_dims[1], 4});
+            auto* alpha_image_data = MUTABLE_DATA_CPU(tensor_hold_alpha_image_);
+            auto* alpha_cpu_data =
+                act_param_->Prelu_alpha->mutable_data<float>();
+            alpha_converter.NCHWToImage(alpha_cpu_data,
+                                        alpha_image_data,
+                                        act_param_->Prelu_alpha->dims());
 
-          auto* alpha_image_data = MUTABLE_DATA_CPU(tensor_hold_alpha_image_);
-          auto* alpha_cpu_data = act_param_->Prelu_alpha->mutable_data<float>();
-          alpha_converter.NCHWToImage(alpha_cpu_data,
-                                      alpha_image_data,
-                                      act_param_->Prelu_alpha->dims());
+            MUTABLE_DATA_GPU(alpha_gpu_image_,
+                             alpha_image_dims[0],
+                             alpha_image_dims[1],
+                             alpha_image_data);
+          } else if (out_dims.size() == 2) {
+            width_ = 1;
+            CLImageConverterDefault alpha_converter;
+            const DDim& alpha_image_dims = alpha_converter.InitImageDimInfoWith(
+                act_param_->Prelu_alpha->dims());
+            tensor_hold_alpha_image_->Resize(
+                {1, alpha_image_dims[0], alpha_image_dims[1], 4});
 
-          MUTABLE_DATA_GPU(alpha_gpu_image_,
-                           alpha_image_dims[0],
-                           alpha_image_dims[1],
-                           alpha_image_data);
+            auto* alpha_image_data = MUTABLE_DATA_CPU(tensor_hold_alpha_image_);
+            auto* alpha_cpu_data =
+                act_param_->Prelu_alpha->mutable_data<float>();
+            alpha_converter.NCHWToImage(alpha_cpu_data,
+                                        alpha_image_data,
+                                        act_param_->Prelu_alpha->dims());
+
+            MUTABLE_DATA_GPU(alpha_gpu_image_,
+                             alpha_image_dims[0],
+                             alpha_image_dims[1],
+                             alpha_image_data);
+          } else {
+            LOG(FATAL) << "unsupport dims.size(): " << out_dims.size();
+          }
         } else {
           alpha_gpu_image_ = std::unique_ptr<Tensor>(new Tensor);
           tensor_hold_alpha_image_ = std::unique_ptr<Tensor>(new Tensor);
           kernel_func_name_ = "prelu_element";
           auto& in_dim = act_param_->X->dims();
-          height_ = in_dim[2];
+          if (in_dim.size() > 3) {
+            height_ = in_dim[2];
+          } else {
+            height_ = 1;
+          }
           scale_ = act_param_->Leaky_relu_alpha;
 
-          CLImageConverterFolder alpha_converter;
+          CLImageConverterDefault alpha_converter;
           const DDim& alpha_image_dims = alpha_converter.InitImageDimInfoWith(
               act_param_->Prelu_alpha->dims());
           tensor_hold_alpha_image_->Resize(
diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc
index da56a971cc8..17e1941bae8 100644
--- a/lite/kernels/opencl/conv_image_compute.cc
+++ b/lite/kernels/opencl/conv_image_compute.cc
@@ -301,6 +301,14 @@ void ConvImageCompute::PrepareForRun() {
           << static_cast<int>(conv_param_->activation_param.active_type)
           << " conv_param_->activation_param.has_active:"
           << conv_param_->activation_param.has_active;
+  // alpha_image_p_ init
+  alpha_gpu_image_ = std::unique_ptr<Tensor>(new Tensor);
+  std::unique_ptr<Tensor> tensor_hold_alpha_image =
+      std::unique_ptr<Tensor>(new Tensor);
+  tensor_hold_alpha_image->Resize({1, 1, 1, 4});
+  auto* alpha_image_data = DATA_GPU(tensor_hold_alpha_image);
+  MUTABLE_DATA_GPU(alpha_gpu_image_, 1, 1, alpha_image_data);
+  alpha_image_p_ = DATA_GPU(alpha_gpu_image_);
   if (conv_param_->activation_param.has_active) {
     if (conv_param_->activation_param.active_type ==
         lite_api::ActivationType::kRelu) {
@@ -333,6 +341,34 @@ void ConvImageCompute::PrepareForRun() {
           std::to_string(conv_param_->activation_param.hard_sigmoid_offset);
       build_options_single += " -DHARD_SIGMOID -DHARD_SIGMOID_SLOPE=" + slope +
                               "f" + " -DHARD_SIGMOID_OFFSET=" + offset + "f";
+    } else if (conv_param_->activation_param.active_type ==
+               lite_api::ActivationType::kPRelu) {
+      std::string prelu_mode = conv_param_->activation_param.Prelu_mode;
+      build_options_single += " -DPRELU";
+      if (prelu_mode == "channel") {
+        build_options_single += " -DPRELU_CH";
+      } else if (prelu_mode == "element") {
+        build_options_single += " -DPRELU_ELE";
+      } else {
+        build_options_single += " -DPRELU_ALL";
+      }
+      CLImageConverterFolder alpha_converter;
+      const DDim& alpha_image_dims = alpha_converter.InitImageDimInfoWith(
+          conv_param_->activation_param.Prelu_alpha->dims());
+      tensor_hold_alpha_image->Resize(
+          {1, alpha_image_dims[0], alpha_image_dims[1], 4});
+      auto* alpha_image_data = MUTABLE_DATA_CPU(tensor_hold_alpha_image);
+      auto* alpha_cpu_data =
+          conv_param_->activation_param.Prelu_alpha->mutable_data<float>();
+      alpha_converter.NCHWToImage(
+          alpha_cpu_data,
+          alpha_image_data,
+          conv_param_->activation_param.Prelu_alpha->dims());
+      MUTABLE_DATA_GPU(alpha_gpu_image_,
+                       alpha_image_dims[0],
+                       alpha_image_dims[1],
+                       alpha_image_data);
+      alpha_image_p_ = DATA_GPU(alpha_gpu_image_);
     } else {
       LOG(FATAL) << "Unsupported activation type:"
                  << static_cast<int>(conv_param_->activation_param.active_type);
@@ -678,6 +714,8 @@ void ConvImageCompute::Conv2d1x1opt() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(16, default_w_blk_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(17, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::Conv2d3x3() {
@@ -724,6 +762,8 @@ void ConvImageCompute::Conv2d3x3() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(20, input_tensor_c_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(21, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::Conv2d3x3opt() {
@@ -759,6 +799,8 @@ void ConvImageCompute::Conv2d3x3opt() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(15, output_tensor_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(16, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::Conv2d5x5() {
@@ -793,6 +835,8 @@ void ConvImageCompute::Conv2d5x5() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(14, output_tensor_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(15, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::Conv2d5x5opt() {
@@ -828,6 +872,8 @@ void ConvImageCompute::Conv2d5x5opt() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(15, output_tensor_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(16, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::Conv2d7x7() {
@@ -862,6 +908,8 @@ void ConvImageCompute::Conv2d7x7() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(13, output_tensor_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(14, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::Conv2d7x7opt() {
@@ -897,6 +945,8 @@ void ConvImageCompute::Conv2d7x7opt() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(15, output_tensor_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(16, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::DepthwiseConv2d3x3s1() {
@@ -930,6 +980,8 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(14, output_tensor_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(15, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::DepthwiseConv2d3x3() {
@@ -966,6 +1018,8 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(15, output_tensor_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(16, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::DepthwiseConv2d() {
@@ -1008,6 +1062,8 @@ void ConvImageCompute::DepthwiseConv2d() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(18, filter_tensor_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(19, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::Conv2dCommon() {
@@ -1052,6 +1108,8 @@ void ConvImageCompute::Conv2dCommon() {
   CL_CHECK_FATAL(status_);
   status_ = kernel_.setArg(19, dilation_h_);
   CL_CHECK_FATAL(status_);
+  status_ = kernel_.setArg(20, *alpha_image_p_);
+  CL_CHECK_FATAL(status_);
 }
 
 void ConvImageCompute::Run() {
@@ -1163,6 +1221,7 @@ REGISTER_LITE_KERNEL(conv2d,
                                       DATALAYOUT(kImageDefault))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Prelu_alpha", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindOutput("Output",
                 {LiteType::GetTensorTy(TARGET(kOpenCL),
                                        PRECISION(kFP16),
@@ -1182,6 +1241,7 @@ REGISTER_LITE_KERNEL(depthwise_conv2d,
                                       DATALAYOUT(kImageDefault))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindInput("Prelu_alpha", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindOutput("Output",
                 {LiteType::GetTensorTy(TARGET(kOpenCL),
                                        PRECISION(kFP16),
@@ -1202,6 +1262,7 @@ REGISTER_LITE_KERNEL(conv2d,
                                       DATALAYOUT(kImageDefault))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kHost))})
     .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindInput("Prelu_alpha", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindOutput("Output",
                 {LiteType::GetTensorTy(TARGET(kOpenCL),
                                        PRECISION(kFP16),
@@ -1221,6 +1282,7 @@ REGISTER_LITE_KERNEL(depthwise_conv2d,
                                       DATALAYOUT(kImageDefault))})
     .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kHost))})
     .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindInput("Prelu_alpha", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindOutput("Output",
                 {LiteType::GetTensorTy(TARGET(kOpenCL),
                                        PRECISION(kFP16),
diff --git a/lite/kernels/opencl/conv_image_compute.h b/lite/kernels/opencl/conv_image_compute.h
index 0e66a9701cd..f96887e1f11 100644
--- a/lite/kernels/opencl/conv_image_compute.h
+++ b/lite/kernels/opencl/conv_image_compute.h
@@ -85,6 +85,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
 
   std::unique_ptr<Tensor> filter_gpu_image_{nullptr};
   std::unique_ptr<Tensor> bias_gpu_image_{nullptr};
+  std::unique_ptr<Tensor> alpha_gpu_image_{nullptr};
   std::unique_ptr<Tensor> tensor_hold_filter_image_{nullptr};
   std::unique_ptr<Tensor> tensor_hold_bias_image_{nullptr};
   cl::NDRange global_work_size_ = cl::NDRange{
@@ -98,6 +99,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
   const cl::Image2D* input_image_p_{nullptr};
   const cl::Image2D* filter_image_p_{nullptr};
   const cl::Image2D* bias_image_p_{nullptr};
+  const cl::Image2D* alpha_image_p_{nullptr};
   const cl::Image2D* output_image_p_{nullptr};
 
   int stride_h_{-1};
diff --git a/lite/operators/conv_op.h b/lite/operators/conv_op.h
index e8ad812e27e..a99736ee422 100644
--- a/lite/operators/conv_op.h
+++ b/lite/operators/conv_op.h
@@ -136,6 +136,14 @@ class ConvOpLite : public OpLite {
             op_desc.GetAttr<float>("slope");
         param_.activation_param.hard_sigmoid_offset =
             op_desc.GetAttr<float>("offset");
+      } else if (act_type == "prelu") {
+        param_.activation_param.active_type = lite_api::ActivationType::kPRelu;
+        param_.activation_param.Prelu_mode =
+            op_desc.GetAttr<std::string>("prelu_mode");
+        auto prelu_alpha_name = op_desc.Input("Prelu_alpha").front();
+        auto prelu_alpha_var = scope->FindVar(prelu_alpha_name);
+        param_.activation_param.Prelu_alpha =
+            const_cast<lite::Tensor*>(&(prelu_alpha_var->Get<lite::Tensor>()));
       } else {
         LOG(FATAL) << "The fused conv only supports fuse with relu, leaky "
                       "relu, hard_swish, while the given activation type is "