Skip to content

Commit

Permalink
Const quantization (#1045)
Browse files Browse the repository at this point in the history
Add const quantization to "add", "sub", "mul" & "div" operations.
Enabled in TPC imx500.v2.
  • Loading branch information
elad-c authored Apr 18, 2024
1 parent 6cbffa7 commit 64dacc0
Show file tree
Hide file tree
Showing 44 changed files with 546 additions and 152 deletions.
4 changes: 2 additions & 2 deletions model_compression_toolkit/core/common/graph/base_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def set_tpc(self,
tpc_layers = tpc.op_sets_to_layers.get_layers()
tpc_filtered_layers = [layer for layer in tpc_layers if isinstance(layer, LayerFilterParams)]
for n in self.nodes:
is_node_in_tpc = n.type in tpc_layers or any([n.is_match_filter_params(filtered_layer)
for filtered_layer in tpc_filtered_layers])
is_node_in_tpc = any([n.is_match_type(_type) for _type in tpc_layers]) or \
any([n.is_match_filter_params(filtered_layer) for filtered_layer in tpc_filtered_layers])
if n.is_custom:
if not is_node_in_tpc:
Logger.critical(f'MCT does not support optimizing Keras custom layers. Found a layer of type {n.type}. '
Expand Down
33 changes: 25 additions & 8 deletions model_compression_toolkit/core/common/graph/base_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,21 @@ def is_reused(self) -> bool:
"""
return self.reuse or self.reuse_group is not None

def get_weights_by_keys(self, name: str) -> np.ndarray:
def _get_weight_name(self, name: Union[str, int]) -> List[Union[str, int]]:
"""
Get weight names that match argument name (either string weights or integer for
positional weights).
Args:
name: weight name
Returns:
A list of weight names that match input "name"
"""
return [k for k in self.weights.keys()
if (isinstance(k, int) and name == k) or (isinstance(k, str) and name in k)]

def get_weights_by_keys(self, name: Union[str, int]) -> np.ndarray:
"""
Get a node's weight by its name.
Args:
Expand All @@ -163,7 +177,7 @@ def get_weights_by_keys(self, name: str) -> np.ndarray:
if name is None:
return None

res = [k for k in self.weights.keys() if name in k]
res = self._get_weight_name(name)
if len(res) == 1: # Make sure there are no duplicates
return self.weights[res[0]]
else:
Expand All @@ -179,7 +193,7 @@ def set_weights_by_keys(self, name: str, tensor: np.ndarray):
"""

res = [k for k in self.weights.keys() if name in k]
res = self._get_weight_name(name)
if len(res) == 1:
self.weights[res[0]] = tensor
else: # Add if not exist
Expand Down Expand Up @@ -552,22 +566,25 @@ def get_qco(self, tpc: TargetPlatformCapabilities) -> QuantizationConfigOptions:
for fl, qco in tpc.filterlayer2qco.items():
if self.is_match_filter_params(fl):
return qco
if self.type in tpc.layer2qco:
return tpc.layer2qco.get(self.type)
# Extract qco with is_match_type to overcome mismatch of function types in TF 2.15
matching_qcos = [_qco for _type, _qco in tpc.layer2qco.items() if self.is_match_type(_type)]
if matching_qcos:
if len(matching_qcos) > 1:
Logger.error('Found duplicate qco types!')
return matching_qcos[0]
return tpc.tp_model.default_qco

def is_match_type(self, _type: Type) -> bool:
"""
Check if input type matches the node type, either in instance type or in type name. Checking the
name string is required because of function types changes that occurred in TF 2.15.
Check if input type matches the node type, either in instance type or in type name.
Args:
_type: other node type
Returns:
Whether _type matches the self node type
"""
return _type == self.type or _type.__name__ == self.type.__name__
return _type == self.type

def is_match_filter_params(self, layer_filter_params: LayerFilterParams) -> bool:
"""
Expand Down
19 changes: 18 additions & 1 deletion model_compression_toolkit/core/common/graph/functional_node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Dict, Any, Tuple, List
from typing import Dict, Any, Tuple, Type

from model_compression_toolkit.constants import FOUND_TF
from model_compression_toolkit.core.common.graph.base_node import BaseNode
import numpy as np

Expand Down Expand Up @@ -71,3 +72,19 @@ def type(self):
:return: the node's functional_op
"""
return self.functional_op

def is_match_type(self, _type: Type) -> bool:
"""
Check if input type matches the node type, either in instance type or in type name. Checking the
name string is required because of function types changes that occurred in TF 2.15, because it
changes the "function" attribute object (e.g. a different tf.add function that will fail the
equal operation).
Args:
_type: other node type
Returns:
Whether _type matches the self node type
"""
names_match = _type.__name__ == self.type.__name__ if FOUND_TF else False
return super().is_match_type(_type) or names_match
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from typing import Any
from model_compression_toolkit.core.common.matchers.node_matcher import BaseNodeMatcher
from model_compression_toolkit.core.common.graph.base_node import BaseNode


class NodeTypeFilter(BaseNodeMatcher):
Expand All @@ -30,17 +31,17 @@ def __init__(self, node_type):
"""
self.node_type = node_type

def apply(self, input_object: Any) -> bool:
def apply(self, input_object: BaseNode) -> bool:
"""
Check if input_object is of the type that NodeTypeFilter contains.
Args:
input_object: Node object to check for its type.
Returns:
True if the node if of the type that was passed during the initialization of NodeTypeFilter.
True if the node is of the type that was passed during the initialization of NodeTypeFilter.
"""
if input_object.type == self.node_type:
if input_object.is_match_type(self.node_type):
return True


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,6 @@ def __init__(self,
self.enable_weights_quantization = weights_attr_cfg.enable_weights_quantization
self.l_p_value = qc.l_p_value



@property
def weights_error_method(self) -> QuantizationErrorMethod:
"""
Expand Down Expand Up @@ -412,9 +410,6 @@ def __init__(self, qc: QuantizationConfig,
for attr in node_attrs_list:
if isinstance(attr, int):
# this is a positional attribute, so it needs to be handled separately.
# we assume that a positional attribute is quantized with the default configuration provided in the TPC.
if op_cfg.default_weight_attr_config.enable_weights_quantization:
Logger.critical(f"Quantizing constant weights is not supported.")
self.pos_attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(qc=qc,
weights_attr_cfg=op_cfg.default_weight_attr_config,
weights_channels_axis=weights_channels_axis)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
# ==============================================================================

from typing import Dict
import numpy as np
from sklearn.cluster import KMeans

Expand All @@ -38,10 +39,10 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
n_iter: int = 10,
min_threshold: float = MIN_THRESHOLD,
quant_error_method: qc.QuantizationErrorMethod = None,
is_symmetric=False,
is_symmetric: bool = False,
node=None,
hessian_info_service: HessianInfoService = None,
num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> dict:
num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Dict:
"""
The quantizer first finds the closest max value per channel of tensor_data.
Now, we divide tensor_data with the threshold vector per channel. In addition, we scale the result to the range
Expand Down Expand Up @@ -101,7 +102,7 @@ def lut_kmeans_histogram(bins: np.ndarray,
constrained: bool = True,
n_iter: int = 20,
min_threshold: float = MIN_THRESHOLD,
quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> Dict:
"""
Finds quantization cluster points for non-uniform activation quantization.
The quantizer first finds the closest power-of-two number to the max value of the given histogram,
Expand Down
4 changes: 2 additions & 2 deletions model_compression_toolkit/core/common/similarity_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def compute_kl_divergence(float_tensor: np.ndarray, fxp_tensor: np.ndarray, batc
axis: int = None) -> float:
"""
Compute the similarity between two tensor using KL-divergence.
The returned values is between 0 to 1: the smaller returned value,
The returned values is between 0 and 1: the smaller returned value,
the greater similarity there is between the two tensors.
Args:
Expand All @@ -257,6 +257,6 @@ def compute_kl_divergence(float_tensor: np.ndarray, fxp_tensor: np.ndarray, batc
non_zero_fxp_tensor[non_zero_fxp_tensor == 0] = EPS

prob_distance = np.where(float_flat != 0, float_flat * np.log(float_flat / non_zero_fxp_tensor), 0)
# The sum is part of the KL-Divergance function.
# The sum is part of the KL-Divergence function.
# The mean is to aggregate the distance between each output probability vectors.
return np.mean(np.sum(prob_distance, axis=-1), axis=-1)
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
from model_compression_toolkit.core.keras.back2framework.instance_builder import OperationHandler
from model_compression_toolkit.core.keras.reader.connectivity_handler import OutTensor
from mct_quantizers import KerasQuantizationWrapper

# In tf2.3 fake quant node is implemented as TensorFlowOpLayer, while in tf2.4 as TFOpLambda.
FQ_NODE_OP_V2_3 = 'FakeQuantWithMinMaxVars'
Expand Down Expand Up @@ -270,7 +271,9 @@ def _run_operation(self,
out_tensors_of_n_float)
else:
input_tensors = [tensor for tensor_list in input_tensors for tensor in tensor_list] # flat list of lists
input_tensors = n.insert_positional_weights_to_input_list(input_tensors)
if not isinstance(op_func, KerasQuantizationWrapper):
# The KerasQuantizationWrapper will insert the quantized positional weights internally.
input_tensors = n.insert_positional_weights_to_input_list(input_tensors)
# Build a functional node using its args
if isinstance(n, FunctionalNode):
if n.inputs_as_list: # If the first argument should be a list of tensors:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def update_kernel_for_bn_folding_fn(conv_node: BaseNode,
Returns:
The modified convolution node's weight/kernel/
"""
if conv_node.type == DepthwiseConv2D:
if conv_node.is_match_type(DepthwiseConv2D):
kernel = kernel * weights_scale.reshape((1, 1, kernel.shape[-2], kernel.shape[-1]))
elif conv_node.type == Conv2DTranspose:
elif conv_node.is_match_type(Conv2DTranspose):
kernel = kernel * weights_scale.reshape((1, 1, -1, 1))
else:
kernel = kernel * weights_scale.reshape((1, 1, 1, -1))
Expand All @@ -98,10 +98,10 @@ def update_weights_for_bn_forward_folding_fn(conv_node: BaseNode,
Returns:
The modified convolution node's weight/kernel/
"""
if conv_node.type == DepthwiseConv2D:
if conv_node.is_match_type(DepthwiseConv2D):
bias_update = kernel * bias_factor.reshape((1, 1, -1, 1))
kernel = kernel * weights_scale.reshape((1, 1, -1, 1))
elif conv_node.type == Conv2DTranspose:
elif conv_node.is_match_type(Conv2DTranspose):
bias_update = (kernel * bias_factor.reshape((1, 1, 1, -1))).sum(3)
kernel = kernel * weights_scale.reshape((1, 1, 1, -1))
else:
Expand Down Expand Up @@ -133,7 +133,7 @@ def is_group_conv_fn(node: BaseNode) -> bool:
Returns:
True if the node is a group convolution, else False
"""
return (node.type == Conv2D) and node.framework_attr[GROUPS] > 1
return (node.is_match_type(Conv2D)) and node.framework_attr[GROUPS] > 1


def get_foldable_node_type_and_validity_fn(node: BaseNode) -> [bool, bool]:
Expand All @@ -147,8 +147,8 @@ def get_foldable_node_type_and_validity_fn(node: BaseNode) -> [bool, bool]:
is_bn: True if the node is a batch norm, else False
is_dw_valid: True if the node is a dw-convolution valid for folding or a batch-norm node, else False
"""
is_bn = node.type is BatchNormalization
is_dw = node.type is DepthwiseConv2D
is_bn = node.is_match_type(BatchNormalization)
is_dw = node.is_match_type(DepthwiseConv2D)
is_dw_valid = is_dw and np.all(np.array(node.get_weights_by_keys(DEPTHWISE_KERNEL).shape[:2]) == 1)
return is_bn, is_dw_valid

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def conv2d_collapsing_fn(first_node: BaseNode,
Returns:
The modified layer node's weights: kernel, bias
"""
if first_node.type == Conv2D and second_node.type == Conv2D:
if first_node.is_match_type(Conv2D) and second_node.is_match_type(Conv2D):
# Get nodes attributes
kernel1 = first_node.get_weights_by_keys(kernel_str)
kernel2 = second_node.get_weights_by_keys(kernel_str)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def residual_collapsing_fn(first_node: BaseNode,
Returns:
The modified layer node's weights: kernel
"""
if first_node.type == Conv2D:
if first_node.is_match_type(Conv2D):
# Get nodes attributes
kernel = first_node.get_weights_by_keys(kernel_str)
(kH, kW, Cin, Cout) = kernel.shape
Expand Down
20 changes: 10 additions & 10 deletions model_compression_toolkit/core/keras/keras_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
from functools import partial
from typing import List, Any, Tuple, Callable, Dict
from typing import List, Any, Tuple, Callable, Dict, Union

import numpy as np
import tensorflow as tf
Expand Down Expand Up @@ -412,12 +412,13 @@ def count_node_for_mixed_precision_interest_points(self, node: BaseNode) -> bool
Returns: True if the node should be considered an interest point, False otherwise.
"""

if node.type == Activation:
if node.is_match_type(Activation):
node_type_name = node.framework_attr[keras_constants.ACTIVATION]
if node_type_name in [keras_constants.SOFTMAX, keras_constants.SIGMOID]:
return True
elif node.type in [tf.nn.softmax, tf.keras.layers.Softmax, tf.nn.sigmoid, Conv2D, DepthwiseConv2D, Conv2DTranspose, Dense, Concatenate,
tf.concat, Add, tf.add]:
elif any([node.is_match_type(_type) for _type in [tf.nn.softmax, tf.keras.layers.Softmax, tf.nn.sigmoid, Conv2D,
DepthwiseConv2D, Conv2DTranspose, Dense, Concatenate, tf.concat,
Add, tf.add]]):
return True

return False
Expand Down Expand Up @@ -529,18 +530,18 @@ def get_node_mac_operations(self,
kernel_shape = node.get_weights_by_keys(fw_info.get_kernel_op_attributes(node.type)[0]).shape
output_channel_axis, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)

if node.type is Conv2D or node.type is Conv2DTranspose:
if node.is_match_type(Conv2D) or node.is_match_type(Conv2DTranspose):
# (C_out * W_out * H_out) * C_in * (W_kernel * H_kernel)
return np.prod([x for x in output_shape if x is not None]) * \
kernel_shape[input_channel_axis] * \
(kernel_shape[0] * kernel_shape[1])
elif node.type is DepthwiseConv2D:
elif node.is_match_type(DepthwiseConv2D):
# Depth * (W_out * H_out) * C_in * (W_kernel * H_kernel)
return node.framework_attr.get(DEPTH_MULTIPLIER) * \
np.prod([x for x in output_shape if x is not None]) / output_shape[output_channel_axis] * \
kernel_shape[input_channel_axis] * \
(kernel_shape[0] * kernel_shape[1])
elif node.type is Dense:
elif node.is_match_type(Dense):
# IN * OUT
return kernel_shape[0] * kernel_shape[1]
else:
Expand Down Expand Up @@ -593,10 +594,9 @@ def get_inferable_quantizers(self, node: BaseNode):
Returns:
weight_quantizers: A dictionary between a weight's name to its quantizer.
activation_quantizers: A list of activations quantization, one for each layer output.
"""

def _weight_name(w: str) -> str:
def _weight_name(w: Union[str, int]) -> Union[str, int]:
"""
Extracts the weight name from the full TensorFlow variable name.
Expand All @@ -609,7 +609,7 @@ def _weight_name(w: str) -> str:
Extracted weight name.
"""

return w.split(':')[0].split('/')[-1]
return w.split(':')[0].split('/')[-1] if isinstance(w, str) else w

attribute_names = [_weight_name(wn) for wn in node.get_node_weights_attributes()
if node.is_weights_quantization_enabled(wn)]
Expand Down
8 changes: 4 additions & 4 deletions model_compression_toolkit/core/keras/keras_node_prior_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ def _get_min_max_outputs(node: BaseNode,
"""
min_output, max_output = None, None

if node.type == ReLU:
if node.is_match_type(ReLU):
min_output = node.framework_attr[THRESHOLD] if node.framework_attr[NEGATIVE_SLOPE] == 0 else None

elif fw_info.layers_has_min_max(node.type):
min_output, max_output = fw_info.layer_min_max_mapping[node.type]

elif node.type == Activation and fw_info.activation_has_min_max(node.framework_attr[ACTIVATION]):
elif node.is_match_type(Activation) and fw_info.activation_has_min_max(node.framework_attr[ACTIVATION]):
min_output, max_output = fw_info.activation_min_max_mapping[node.framework_attr[ACTIVATION]]

return min_output, max_output
Expand All @@ -82,7 +82,7 @@ def _get_mean_std_outputs(node: BaseNode,
"""
mean_output, std_output = None, None

if node.type == BatchNormalization:
if node.is_match_type(BatchNormalization):
mean_output = node.get_weights_by_keys(BETA)
if node.get_weights_by_keys(GAMMA) is None:
std_output = 1.0
Expand All @@ -92,7 +92,7 @@ def _get_mean_std_outputs(node: BaseNode,
mean_output = 0.0
else:
next_node_list = graph.get_next_nodes(node)
bn_nodes = [bn_node for bn_node in next_node_list if bn_node.type == BatchNormalization]
bn_nodes = [bn_node for bn_node in next_node_list if bn_node.is_match_type(BatchNormalization)]
if len(bn_nodes) != 0:
bn_node = bn_nodes[0]
moving_variance = bn_node.get_weights_by_keys(MOVING_VARIANCE)
Expand Down
Loading

0 comments on commit 64dacc0

Please sign in to comment.