Const quantization (#1045)

Add const quantization to "add", "sub", "mul" & "div" operations. Enabled in TPC imx500.v2.
sony · Apr 18, 2024 · 64dacc0 · 64dacc0
1 parent 6cbffa7
commit 64dacc0
Show file tree

Hide file tree

Showing 44 changed files with 546 additions and 152 deletions.
diff --git a/model_compression_toolkit/core/common/graph/base_graph.py b/model_compression_toolkit/core/common/graph/base_graph.py
@@ -98,8 +98,8 @@ def set_tpc(self,
         tpc_layers = tpc.op_sets_to_layers.get_layers()
         tpc_filtered_layers = [layer for layer in tpc_layers if isinstance(layer, LayerFilterParams)]
         for n in self.nodes:
-            is_node_in_tpc = n.type in tpc_layers or any([n.is_match_filter_params(filtered_layer)
-                                                          for filtered_layer in tpc_filtered_layers])
+            is_node_in_tpc = any([n.is_match_type(_type) for _type in tpc_layers]) or \
+                             any([n.is_match_filter_params(filtered_layer) for filtered_layer in tpc_filtered_layers])
             if n.is_custom:
                 if not is_node_in_tpc:
                     Logger.critical(f'MCT does not support optimizing Keras custom layers. Found a layer of type {n.type}. '

diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py
@@ -151,7 +151,21 @@ def is_reused(self) -> bool:
         """
         return self.reuse or self.reuse_group is not None
 
-    def get_weights_by_keys(self, name: str) -> np.ndarray:
+    def _get_weight_name(self, name: Union[str, int]) -> List[Union[str, int]]:
+        """
+        Get weight names that match argument name (either string weights or integer for
+        positional weights).
+        Args:
+            name: weight name
+
+        Returns:
+            A list of weight names that match input "name"
+
+        """
+        return [k for k in self.weights.keys()
+                if (isinstance(k, int) and name == k) or (isinstance(k, str) and name in k)]
+
+    def get_weights_by_keys(self, name: Union[str, int]) -> np.ndarray:
         """
         Get a node's weight by its name.
         Args:
@@ -163,7 +177,7 @@ def get_weights_by_keys(self, name: str) -> np.ndarray:
         if name is None:
             return None
 
-        res = [k for k in self.weights.keys() if name in k]
+        res = self._get_weight_name(name)
         if len(res) == 1:  # Make sure there are no duplicates
             return self.weights[res[0]]
         else:
@@ -179,7 +193,7 @@ def set_weights_by_keys(self, name: str, tensor: np.ndarray):
 
         """
 
-        res = [k for k in self.weights.keys() if name in k]
+        res = self._get_weight_name(name)
         if len(res) == 1:
             self.weights[res[0]] = tensor
         else:  # Add if not exist
@@ -552,22 +566,25 @@ def get_qco(self, tpc: TargetPlatformCapabilities) -> QuantizationConfigOptions:
         for fl, qco in tpc.filterlayer2qco.items():
             if self.is_match_filter_params(fl):
                 return qco
-        if self.type in tpc.layer2qco:
-            return tpc.layer2qco.get(self.type)
+        # Extract qco with is_match_type to overcome mismatch of function types in TF 2.15
+        matching_qcos = [_qco for _type, _qco in tpc.layer2qco.items() if self.is_match_type(_type)]
+        if matching_qcos:
+            if len(matching_qcos) > 1:
+                Logger.error('Found duplicate qco types!')
+            return matching_qcos[0]
         return tpc.tp_model.default_qco
 
     def is_match_type(self, _type: Type) -> bool:
         """
-        Check if input type matches the node type, either in instance type or in type name. Checking the
-        name string is required because of function types changes that occurred in TF 2.15.
+        Check if input type matches the node type, either in instance type or in type name.
 
         Args:
             _type: other node type
         Returns:
             Whether _type matches the self node type
 
         """
-        return _type == self.type or _type.__name__ == self.type.__name__
+        return _type == self.type
 
     def is_match_filter_params(self, layer_filter_params: LayerFilterParams) -> bool:
         """

diff --git a/model_compression_toolkit/core/common/graph/functional_node.py b/model_compression_toolkit/core/common/graph/functional_node.py
@@ -1,5 +1,6 @@
-from typing import Dict, Any, Tuple, List
+from typing import Dict, Any, Tuple, Type
 
+from model_compression_toolkit.constants import FOUND_TF
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 import numpy as np
 
@@ -71,3 +72,19 @@ def type(self):
         :return: the node's functional_op
         """
         return self.functional_op
+
+    def is_match_type(self, _type: Type) -> bool:
+        """
+        Check if input type matches the node type, either in instance type or in type name. Checking the
+        name string is required because of function types changes that occurred in TF 2.15, because it
+        changes the "function" attribute object (e.g. a different tf.add function that will fail the
+        equal operation).
+
+        Args:
+            _type: other node type
+        Returns:
+            Whether _type matches the self node type
+
+        """
+        names_match = _type.__name__ == self.type.__name__ if FOUND_TF else False
+        return super().is_match_type(_type) or names_match
diff --git a/model_compression_toolkit/core/common/network_editors/node_filters.py b/model_compression_toolkit/core/common/network_editors/node_filters.py
@@ -15,6 +15,7 @@
 
 from typing import Any
 from model_compression_toolkit.core.common.matchers.node_matcher import BaseNodeMatcher
+from model_compression_toolkit.core.common.graph.base_node import BaseNode
 
 
 class NodeTypeFilter(BaseNodeMatcher):
@@ -30,17 +31,17 @@ def __init__(self, node_type):
         """
         self.node_type = node_type
 
-    def apply(self, input_object: Any) -> bool:
+    def apply(self, input_object: BaseNode) -> bool:
         """
         Check if input_object is of the type that NodeTypeFilter contains.
 
         Args:
             input_object: Node object to check for its type.
 
         Returns:
-            True if the node if of the type that was passed during the initialization of NodeTypeFilter.
+            True if the node is of the type that was passed during the initialization of NodeTypeFilter.
         """
-        if input_object.type == self.node_type:
+        if input_object.is_match_type(self.node_type):
             return True
 
 

diff --git a/model_compression_toolkit/core/common/quantization/node_quantization_config.py b/model_compression_toolkit/core/common/quantization/node_quantization_config.py
@@ -265,8 +265,6 @@ def __init__(self,
         self.enable_weights_quantization = weights_attr_cfg.enable_weights_quantization
         self.l_p_value = qc.l_p_value
 
-
-
     @property
     def weights_error_method(self) -> QuantizationErrorMethod:
         """
@@ -412,9 +410,6 @@ def __init__(self, qc: QuantizationConfig,
         for attr in node_attrs_list:
             if isinstance(attr, int):
                 # this is a positional attribute, so it needs to be handled separately.
-                # we assume that a positional attribute is quantized with the default configuration provided in the TPC.
-                if op_cfg.default_weight_attr_config.enable_weights_quantization:
-                    Logger.critical(f"Quantizing constant weights is not supported.")
                 self.pos_attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(qc=qc,
                                                                                          weights_attr_cfg=op_cfg.default_weight_attr_config,
                                                                                          weights_channels_axis=weights_channels_axis)

diff --git a/...sion_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py b/...sion_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 
+from typing import Dict
 import numpy as np
 from sklearn.cluster import KMeans
 
@@ -38,10 +39,10 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
                       n_iter: int = 10,
                       min_threshold: float = MIN_THRESHOLD,
                       quant_error_method: qc.QuantizationErrorMethod = None,
-                      is_symmetric=False,
+                      is_symmetric: bool = False,
                       node=None,
                       hessian_info_service: HessianInfoService = None,
-                      num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> dict:
+                      num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Dict:
     """
     The quantizer first finds the closest max value per channel of tensor_data.
     Now, we divide tensor_data with the threshold vector per channel. In addition, we scale the result to the range
@@ -101,7 +102,7 @@ def lut_kmeans_histogram(bins: np.ndarray,
                          constrained: bool = True,
                          n_iter: int = 20,
                          min_threshold: float = MIN_THRESHOLD,
-                         quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
+                         quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> Dict:
     """
     Finds quantization cluster points for non-uniform activation quantization.
     The quantizer first finds the closest power-of-two number to the max value of the given histogram,

diff --git a/model_compression_toolkit/core/common/similarity_analyzer.py b/model_compression_toolkit/core/common/similarity_analyzer.py
@@ -235,7 +235,7 @@ def compute_kl_divergence(float_tensor: np.ndarray, fxp_tensor: np.ndarray, batc
                           axis: int = None) -> float:
     """
     Compute the similarity between two tensor using KL-divergence.
-    The returned values is between 0 to 1: the smaller returned value,
+    The returned values is between 0 and 1: the smaller returned value,
     the greater similarity there is between the two tensors.
 
     Args:
@@ -257,6 +257,6 @@ def compute_kl_divergence(float_tensor: np.ndarray, fxp_tensor: np.ndarray, batc
     non_zero_fxp_tensor[non_zero_fxp_tensor == 0] = EPS
 
     prob_distance = np.where(float_flat != 0, float_flat * np.log(float_flat / non_zero_fxp_tensor), 0)
-    # The sum is part of the KL-Divergance function.
+    # The sum is part of the KL-Divergence function.
     # The mean is to aggregate the distance between each output probability vectors.
     return np.mean(np.sum(prob_distance, axis=-1), axis=-1)
diff --git a/model_compression_toolkit/core/keras/back2framework/keras_model_builder.py b/model_compression_toolkit/core/keras/back2framework/keras_model_builder.py
@@ -39,6 +39,7 @@
 from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
 from model_compression_toolkit.core.keras.back2framework.instance_builder import OperationHandler
 from model_compression_toolkit.core.keras.reader.connectivity_handler import OutTensor
+from mct_quantizers import KerasQuantizationWrapper
 
 # In tf2.3 fake quant node is implemented as TensorFlowOpLayer, while in tf2.4 as TFOpLambda.
 FQ_NODE_OP_V2_3 = 'FakeQuantWithMinMaxVars'
@@ -270,7 +271,9 @@ def _run_operation(self,
                                                                            out_tensors_of_n_float)
         else:
             input_tensors = [tensor for tensor_list in input_tensors for tensor in tensor_list]  # flat list of lists
-            input_tensors = n.insert_positional_weights_to_input_list(input_tensors)
+            if not isinstance(op_func, KerasQuantizationWrapper):
+                # The KerasQuantizationWrapper will insert the quantized positional weights internally.
+                input_tensors = n.insert_positional_weights_to_input_list(input_tensors)
             # Build a functional node using its args
             if isinstance(n, FunctionalNode):
                 if n.inputs_as_list:  # If the first argument should be a list of tensors:

diff --git a/model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_folding.py b/model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_folding.py
@@ -70,9 +70,9 @@ def update_kernel_for_bn_folding_fn(conv_node: BaseNode,
     Returns:
         The modified convolution node's weight/kernel/
     """
-    if conv_node.type == DepthwiseConv2D:
+    if conv_node.is_match_type(DepthwiseConv2D):
         kernel = kernel * weights_scale.reshape((1, 1, kernel.shape[-2], kernel.shape[-1]))
-    elif conv_node.type == Conv2DTranspose:
+    elif conv_node.is_match_type(Conv2DTranspose):
         kernel = kernel * weights_scale.reshape((1, 1, -1, 1))
     else:
         kernel = kernel * weights_scale.reshape((1, 1, 1, -1))
@@ -98,10 +98,10 @@ def update_weights_for_bn_forward_folding_fn(conv_node: BaseNode,
     Returns:
         The modified convolution node's weight/kernel/
     """
-    if conv_node.type == DepthwiseConv2D:
+    if conv_node.is_match_type(DepthwiseConv2D):
         bias_update = kernel * bias_factor.reshape((1, 1, -1, 1))
         kernel = kernel * weights_scale.reshape((1, 1, -1, 1))
-    elif conv_node.type == Conv2DTranspose:
+    elif conv_node.is_match_type(Conv2DTranspose):
         bias_update = (kernel * bias_factor.reshape((1, 1, 1, -1))).sum(3)
         kernel = kernel * weights_scale.reshape((1, 1, 1, -1))
     else:
@@ -133,7 +133,7 @@ def is_group_conv_fn(node: BaseNode) -> bool:
     Returns:
         True if the node is a group convolution, else False
     """
-    return (node.type == Conv2D) and node.framework_attr[GROUPS] > 1
+    return (node.is_match_type(Conv2D)) and node.framework_attr[GROUPS] > 1
 
 
 def get_foldable_node_type_and_validity_fn(node: BaseNode) -> [bool, bool]:
@@ -147,8 +147,8 @@ def get_foldable_node_type_and_validity_fn(node: BaseNode) -> [bool, bool]:
         is_bn: True if the node is a batch norm, else False
         is_dw_valid: True if the node is a dw-convolution valid for folding or a batch-norm node, else False
     """
-    is_bn = node.type is BatchNormalization
-    is_dw = node.type is DepthwiseConv2D
+    is_bn = node.is_match_type(BatchNormalization)
+    is_dw = node.is_match_type(DepthwiseConv2D)
     is_dw_valid = is_dw and np.all(np.array(node.get_weights_by_keys(DEPTHWISE_KERNEL).shape[:2]) == 1)
     return is_bn, is_dw_valid
 

diff --git a/model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py b/model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py
@@ -58,7 +58,7 @@ def conv2d_collapsing_fn(first_node: BaseNode,
     Returns:
         The modified layer node's weights: kernel, bias
     """
-    if first_node.type == Conv2D and second_node.type == Conv2D:
+    if first_node.is_match_type(Conv2D) and second_node.is_match_type(Conv2D):
         # Get nodes attributes
         kernel1 = first_node.get_weights_by_keys(kernel_str)
         kernel2 = second_node.get_weights_by_keys(kernel_str)

diff --git a/...l_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py b/...l_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py
@@ -49,7 +49,7 @@ def residual_collapsing_fn(first_node: BaseNode,
     Returns:
         The modified layer node's weights: kernel
     """
-    if first_node.type == Conv2D:
+    if first_node.is_match_type(Conv2D):
         # Get nodes attributes
         kernel = first_node.get_weights_by_keys(kernel_str)
         (kH, kW, Cin, Cout) = kernel.shape

diff --git a/model_compression_toolkit/core/keras/keras_implementation.py b/model_compression_toolkit/core/keras/keras_implementation.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 from functools import partial
-from typing import List, Any, Tuple, Callable, Dict
+from typing import List, Any, Tuple, Callable, Dict, Union
 
 import numpy as np
 import tensorflow as tf
@@ -412,12 +412,13 @@ def count_node_for_mixed_precision_interest_points(self, node: BaseNode) -> bool
         Returns: True if the node should be considered an interest point, False otherwise.
         """
 
-        if node.type == Activation:
+        if node.is_match_type(Activation):
             node_type_name = node.framework_attr[keras_constants.ACTIVATION]
             if node_type_name in [keras_constants.SOFTMAX, keras_constants.SIGMOID]:
                 return True
-        elif node.type in [tf.nn.softmax, tf.keras.layers.Softmax, tf.nn.sigmoid, Conv2D, DepthwiseConv2D, Conv2DTranspose, Dense, Concatenate,
-                           tf.concat, Add, tf.add]:
+        elif any([node.is_match_type(_type) for _type in [tf.nn.softmax, tf.keras.layers.Softmax, tf.nn.sigmoid, Conv2D,
+                                                          DepthwiseConv2D, Conv2DTranspose, Dense, Concatenate, tf.concat,
+                                                          Add, tf.add]]):
             return True
 
         return False
@@ -529,18 +530,18 @@ def get_node_mac_operations(self,
         kernel_shape = node.get_weights_by_keys(fw_info.get_kernel_op_attributes(node.type)[0]).shape
         output_channel_axis, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
 
-        if node.type is Conv2D or node.type is Conv2DTranspose:
+        if node.is_match_type(Conv2D) or node.is_match_type(Conv2DTranspose):
             # (C_out * W_out * H_out) * C_in * (W_kernel * H_kernel)
             return np.prod([x for x in output_shape if x is not None]) * \
                    kernel_shape[input_channel_axis] * \
                    (kernel_shape[0] * kernel_shape[1])
-        elif node.type is DepthwiseConv2D:
+        elif node.is_match_type(DepthwiseConv2D):
             # Depth * (W_out * H_out) * C_in * (W_kernel * H_kernel)
             return node.framework_attr.get(DEPTH_MULTIPLIER) * \
                    np.prod([x for x in output_shape if x is not None]) / output_shape[output_channel_axis] * \
                    kernel_shape[input_channel_axis] * \
                    (kernel_shape[0] * kernel_shape[1])
-        elif node.type is Dense:
+        elif node.is_match_type(Dense):
             # IN * OUT
             return kernel_shape[0] * kernel_shape[1]
         else:
@@ -593,10 +594,9 @@ def get_inferable_quantizers(self, node: BaseNode):
         Returns:
             weight_quantizers: A dictionary between a weight's name to its quantizer.
             activation_quantizers: A list of activations quantization, one for each layer output.
-
         """
 
-        def _weight_name(w: str) -> str:
+        def _weight_name(w: Union[str, int]) -> Union[str, int]:
             """
             Extracts the weight name from the full TensorFlow variable name.
 
@@ -609,7 +609,7 @@ def _weight_name(w: str) -> str:
               Extracted weight name.
             """
 
-            return w.split(':')[0].split('/')[-1]
+            return w.split(':')[0].split('/')[-1] if isinstance(w, str) else w
 
         attribute_names = [_weight_name(wn) for wn in node.get_node_weights_attributes()
                            if node.is_weights_quantization_enabled(wn)]

diff --git a/model_compression_toolkit/core/keras/keras_node_prior_info.py b/model_compression_toolkit/core/keras/keras_node_prior_info.py
@@ -56,13 +56,13 @@ def _get_min_max_outputs(node: BaseNode,
     """
     min_output, max_output = None, None
 
-    if node.type == ReLU:
+    if node.is_match_type(ReLU):
         min_output = node.framework_attr[THRESHOLD] if node.framework_attr[NEGATIVE_SLOPE] == 0 else None
 
     elif fw_info.layers_has_min_max(node.type):
         min_output, max_output = fw_info.layer_min_max_mapping[node.type]
 
-    elif node.type == Activation and fw_info.activation_has_min_max(node.framework_attr[ACTIVATION]):
+    elif node.is_match_type(Activation) and fw_info.activation_has_min_max(node.framework_attr[ACTIVATION]):
         min_output, max_output = fw_info.activation_min_max_mapping[node.framework_attr[ACTIVATION]]
 
     return min_output, max_output
@@ -82,7 +82,7 @@ def _get_mean_std_outputs(node: BaseNode,
     """
     mean_output, std_output = None, None
 
-    if node.type == BatchNormalization:
+    if node.is_match_type(BatchNormalization):
         mean_output = node.get_weights_by_keys(BETA)
         if node.get_weights_by_keys(GAMMA) is None:
             std_output = 1.0
@@ -92,7 +92,7 @@ def _get_mean_std_outputs(node: BaseNode,
             mean_output = 0.0
     else:
         next_node_list = graph.get_next_nodes(node)
-        bn_nodes = [bn_node for bn_node in next_node_list if bn_node.type == BatchNormalization]
+        bn_nodes = [bn_node for bn_node in next_node_list if bn_node.is_match_type(BatchNormalization)]
         if len(bn_nodes) != 0:
             bn_node = bn_nodes[0]
             moving_variance = bn_node.get_weights_by_keys(MOVING_VARIANCE)