add adaround post-quant method (#38460)

* add adaround post-quant method
PaddlePaddle · Mar 28, 2022 · 3d5a27f · 3d5a27f
1 parent 56dc8c7
commit 3d5a27f
Show file tree

Hide file tree

Showing 8 changed files with 709 additions and 146 deletions.
diff --git a/python/paddle/fluid/contrib/slim/quantization/adaround.py b/python/paddle/fluid/contrib/slim/quantization/adaround.py
@@ -0,0 +1,309 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import time
+import sys
+import logging
+
+import paddle.fluid as fluid
+
+from ....log_helper import get_logger
+from .utils import load_variable_data, set_variable_data, stable_sigmoid, quant_tensor, dequant_tensor, _channelwise_quant_axis1_ops, calculate_quant_cos_error
+
+_logger = get_logger(
+    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
+
+GAMMA = -0.1
+ZETA = 1.1
+
+
+def compute_soft_rounding(alpha_v):
+    return fluid.layers.clip(
+        fluid.layers.sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA, min=0, max=1)
+
+
+def compute_soft_rounding_np(alpha_v):
+    return np.clip(
+        stable_sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA, a_min=0, a_max=1)
+
+
+class AdaRoundLoss(object):
+    def __init__(self, reg_param=0.01, default_beta_range=(20, 2)):
+        self.default_reg_param = reg_param
+        self.default_beta_range = default_beta_range
+
+    def compute_recon_loss(self, ada_quantized_output, orig_output):
+        square_cost = fluid.layers.square_error_cost(ada_quantized_output,
+                                                     orig_output)
+        recon_loss = fluid.layers.reduce_mean(
+            fluid.layers.reduce_sum(
+                square_cost, dim=-1))
+        return recon_loss
+
+    def compute_round_loss(self, alpha_v, warm_start, beta):
+        def round_loss_fn():
+            # compute rectified sigmoid of parameter 'alpha' which maps it between zero and one
+            h_v = compute_soft_rounding(alpha_v)
+
+            # calculate regularization term - which ensures parameter to converge to exactly zeros and ones
+            # at the end of optimization
+            reg_term = fluid.layers.reduce_sum(-fluid.layers.pow(
+                fluid.layers.abs(2 * h_v - 1), factor=beta) + 1)
+
+            # calculate the rounding loss
+            round_loss = self.default_reg_param * reg_term
+
+            return round_loss
+
+        round_loss = fluid.layers.cond(warm_start, lambda: fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.0), round_loss_fn)
+
+        return round_loss
+
+    def compute_beta(self, max_iter, cur_iter, warm_start):
+
+        #  Start and stop beta for annealing of rounding loss (start_beta, end_beta)
+        start_beta, end_beta = self.default_beta_range
+
+        # iteration at end of warm start period, which is 20% of max iterations
+        warm_start_end_iter = warm_start * max_iter
+
+        # compute relative iteration of current iteration
+        rel_iter = (cur_iter - warm_start_end_iter) / (
+            max_iter - warm_start_end_iter)
+        beta = end_beta + 0.5 * (start_beta - end_beta) * (1 + np.cos(rel_iter *
+                                                                      np.pi))
+
+        return beta
+
+
+class AdaRound(object):
+    def __init__(self,
+                 scale,
+                 weight_tensor,
+                 scope=None,
+                 weight_var_name=None,
+                 weight_op_type=None,
+                 is_train=True,
+                 num_iterations=1000):
+        self.is_train = is_train
+        self.num_iterations = num_iterations
+        self.warm_start = 0.1
+        self.weight_bits = 8
+        self.offset = 0.  # zero-point offset
+        self.adaround_loss = AdaRoundLoss()
+        self.ori_weight_tensor = weight_tensor
+        self.scale = scale
+        self.scope = scope
+        self.quant_axis = 0
+        if weight_op_type in _channelwise_quant_axis1_ops:
+            self.quant_axis = 1
+        self.weight_var_name = weight_var_name
+        self.alpha_name = weight_var_name + ".alpha"
+        self.initialize_alpha(weight_tensor.copy(), scale, weight_var_name)
+
+    def initialize_alpha(self, tensor, scale, var_name):
+        """
+        Initializes alpha parameter, same shape as the weight tensor
+        """
+        tensor_scale = quant_tensor(tensor, scale, quant_axis=self.quant_axis)
+        tensor_floor = np.floor(tensor_scale)
+        tensor = tensor_scale - tensor_floor
+        alpha = -np.log((ZETA - GAMMA) / (tensor - GAMMA) - 1)
+        self.alpha_v = fluid.layers.create_parameter(
+            shape=alpha.shape,
+            dtype="float32",
+            name=var_name + ".alpha",
+            default_initializer=fluid.initializer.NumpyArrayInitializer(alpha))
+
+    def _calculate_output_with_adarounded_weights(self, program, place, exe,
+                                                  data, fp32_fetch_list,
+                                                  weight_tensor_dequant):
+        set_variable_data(self.scope, place, self.weight_var_name,
+                          weight_tensor_dequant)
+
+        adaround_out_tensor = exe.run(program=program,
+                                      feed=data,
+                                      fetch_list=[fp32_fetch_list],
+                                      return_numpy=True,
+                                      scope=self.scope)
+        return adaround_out_tensor
+
+    def _calculate_quant_weight(self):
+        np_alpha = load_variable_data(self.scope, self.alpha_name)
+        h_alpha = compute_soft_rounding_np(np_alpha)
+
+        # Scale the tensor
+        tensor_scale = quant_tensor(
+            self.ori_weight_tensor.copy(),
+            self.scale,
+            quant_axis=self.quant_axis)
+
+        weight_tensor = np.floor(tensor_scale)
+
+        # Adaround the tensor
+        weight_tensor_quant = np.add(weight_tensor, h_alpha)
+        return weight_tensor_quant
+
+    def _calculate_adarounded_weights(self):
+        weight_tensor_quant = self._calculate_quant_weight()
+
+        # Dequantize the tensor
+        weight_tensor_dequant = dequant_tensor(
+            weight_tensor_quant + self.offset,
+            self.scale,
+            quant_axis=self.quant_axis)
+        return weight_tensor_dequant
+
+    def update_final_weights(self):
+        weight_tensor_quant = self._calculate_quant_weight()
+        return weight_tensor_quant
+
+    def get_loss(self, beta, warm_start, adaround_out_tensor, orig_out_tensor):
+        round_loss = self.adaround_loss.compute_round_loss(self.alpha_v,
+                                                           warm_start, beta)
+        recon_loss = self.adaround_loss.compute_recon_loss(adaround_out_tensor,
+                                                           orig_out_tensor)
+        loss = round_loss + recon_loss
+        losses = {
+            'loss': loss,
+            'round_loss': round_loss,
+            'recon_loss': recon_loss
+        }
+        return losses
+
+    def update_beta_warm(self, cur_iteration):
+        warm_start = cur_iteration < self.num_iterations * self.warm_start
+        beta = self.adaround_loss.compute_beta(self.num_iterations,
+                                               cur_iteration, self.warm_start)
+        return beta, warm_start
+
+
+def run_adaround(data_loader,
+                 fp32_program,
+                 fetch_list,
+                 exe,
+                 scope,
+                 place,
+                 quantized_op_pairs,
+                 weight_op_pairs,
+                 scale_dict,
+                 num_iterations=1000,
+                 lr=0.001,
+                 fast_mode=True):
+    fetch_op_name = fetch_list[0].name
+    final_weight_tensor_quant_dict = {}
+    for weight_var_name, quant_op_out_name in quantized_op_pairs.items():
+        _logger.info('Start adaround op: {}'.format(weight_var_name))
+        weight_op_type = weight_op_pairs[weight_var_name]
+        # get scale and weight tensor
+        weight_var_tensor = load_variable_data(scope, weight_var_name)
+        scale = scale_dict[weight_var_name]
+        fp32_fetch_list = None
+        for _op in fp32_program.global_block().ops:
+            if _op.type == "fetch":
+                _op._rename_input(fetch_op_name, quant_op_out_name)
+                fp32_fetch_list = fp32_program.global_block().var(
+                    quant_op_out_name)
+                fetch_op_name = quant_op_out_name
+
+        # build adaround program
+        exec_strategy = fluid.ExecutionStrategy()
+        exec_strategy.num_iteration_per_drop_scope = 1
+        startup_program = fluid.Program()
+        train_program = fluid.Program()
+        with fluid.program_guard(train_program, startup_program):
+            with fluid.unique_name.guard():
+                # initialize adaround
+                adaround = AdaRound(
+                    scale,
+                    weight_var_tensor,
+                    scope=scope,
+                    weight_var_name=weight_var_name,
+                    weight_op_type=weight_op_type,
+                    num_iterations=num_iterations)
+                orig_out_tensor = fluid.data(
+                    name='orig_out_tensor',
+                    shape=fp32_fetch_list.shape,
+                    dtype='float32')
+                adaround_out_tensor = fluid.data(
+                    name='adaround_out_tensor',
+                    shape=fp32_fetch_list.shape,
+                    dtype='float32')
+                beta_tensor = fluid.data(
+                    name='beta', shape=[1], dtype='float32')
+                warm_start_tensor = fluid.data(
+                    name='warm_start', shape=[1], dtype='bool')
+
+                train_fetches_loss = adaround.get_loss(
+                    beta_tensor, warm_start_tensor, adaround_out_tensor,
+                    orig_out_tensor)
+                optimizer = fluid.optimizer.Adam(learning_rate=lr)
+                loss = train_fetches_loss['loss']
+                optimizer.minimize(loss)
+        exe.run(startup_program)
+
+        start_time = time.time()
+        prev_start_time = start_time
+        for i, data in enumerate(data_loader()):
+            prev_start_time = start_time
+            start_time = time.time()
+            # run fp32 model
+            np_orig_out_tensor = exe.run(program=fp32_program,
+                                         feed=data,
+                                         fetch_list=[fp32_fetch_list],
+                                         return_numpy=True,
+                                         scope=scope)
+
+            adaround_weight_tensor_dequant = adaround._calculate_adarounded_weights(
+            )
+            np_adaround_out_tensor = adaround._calculate_output_with_adarounded_weights(
+                fp32_program, place, exe, data, fp32_fetch_list,
+                adaround_weight_tensor_dequant)
+
+            # If the cosine distance of the two tensor is small, skip training
+            cos_error = calculate_quant_cos_error(np_orig_out_tensor[0],
+                                                  np_adaround_out_tensor[0])
+            if fast_mode and cos_error > 0.99:
+                _logger.info("The cosine error is small, skip training.")
+                break
+            beta, warm_start = adaround.update_beta_warm(i)
+            feed_dict = {
+                'orig_out_tensor': np_orig_out_tensor[0],
+                'adaround_out_tensor': np_adaround_out_tensor[0],
+                'beta': beta,
+                'warm_start': warm_start
+            }
+            out = exe.run(
+                train_program,
+                feed=feed_dict,
+                fetch_list=[v.name for v in train_fetches_loss.values()],
+                return_numpy=True)
+            _logger.info(
+                "Iter {:d}, lr {:.5f}, loss {:.5f}, loss_round {:.5f}, loss_recon {:.5f}, time {:.5f}s".
+                format(i, lr,
+                       np.mean(out[0]),
+                       np.mean(out[1]),
+                       np.mean(out[2]), start_time - prev_start_time))
+            sys.stdout.flush()
+            if i == num_iterations:
+                break
+        final_weight_tensor_quant_dict[
+            weight_var_name] = adaround.update_final_weights()
+        del adaround
+
+    # update adarounded calibrated weights
+    for weight_var_name in quantized_op_pairs.keys():
+        set_variable_data(scope, place, weight_var_name,
+                          final_weight_tensor_quant_dict[weight_var_name])