merge main and resolved conflicts

sony · Jan 31, 2024 · 090e609 · 090e609
2 parents 0ff82cd + b62ed4e
commit 090e609
Show file tree

Hide file tree

Showing 46 changed files with 115 additions and 186 deletions.
diff --git a/model_compression_toolkit/core/common/quantization/quantization_config.py b/model_compression_toolkit/core/common/quantization/quantization_config.py
@@ -50,7 +50,6 @@ def __init__(self,
                  weights_error_method: QuantizationErrorMethod = QuantizationErrorMethod.MSE,
                  relu_bound_to_power_of_2: bool = False,
                  weights_bias_correction: bool = True,
-                 weights_per_channel_threshold: bool = True,
                  weights_second_moment_correction: bool = False,
                  input_scaling: bool = False,
                  softmax_shift: bool = False,
@@ -73,7 +72,6 @@ def __init__(self,
             relu_bound_to_power_of_2 (bool): Whether to use relu to power of 2 scaling correction or not.
             weights_bias_correction (bool): Whether to use weights bias correction or not.
             weights_second_moment_correction (bool): Whether to use weights second_moment correction or not.
-            weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor).
             input_scaling (bool): Whether to use input scaling or not.
             softmax_shift (bool): Whether to use softmax shift or not.
             shift_negative_activation_correction (bool): Whether to use shifting negative activation correction or not.
@@ -90,11 +88,11 @@ def __init__(self,
             One may create a quantization configuration to quantize a model according to.
             For example, to quantize a model's weights and activation using thresholds, such that
             weights threshold selection is done using MSE, activation threshold selection is done using NOCLIPPING (min/max),
-            enabling relu_bound_to_power_of_2, weights_bias_correction, and quantizing the weights per-channel,
+            enabling relu_bound_to_power_of_2, weights_bias_correction,
             one can instantiate a quantization configuration:
 
             >>> import model_compression_toolkit as mct
-            >>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING,weights_error_method=mct.core.QuantizationErrorMethod.MSE,relu_bound_to_power_of_2=True,weights_bias_correction=True,weights_per_channel_threshold=True)
+            >>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING, weights_error_method=mct.core.QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=True, weights_bias_correction=True)
 
 
             The QuantizationConfig instanse can then be passed to
@@ -107,7 +105,6 @@ def __init__(self,
         self.relu_bound_to_power_of_2 = relu_bound_to_power_of_2
         self.weights_bias_correction = weights_bias_correction
         self.weights_second_moment_correction = weights_second_moment_correction
-        self.weights_per_channel_threshold = weights_per_channel_threshold
         self.activation_channel_equalization = activation_channel_equalization
         self.input_scaling = input_scaling
         self.softmax_shift = softmax_shift
@@ -126,11 +123,6 @@ def __repr__(self):
 
 
 # Default quantization configuration the library use.
-DEFAULTCONFIG = QuantizationConfig(QuantizationErrorMethod.MSE,
-                                   QuantizationErrorMethod.MSE,
-                                   relu_bound_to_power_of_2=False,
-                                   weights_bias_correction=True,
-                                   weights_second_moment_correction=False,
-                                   weights_per_channel_threshold=True,
-                                   input_scaling=False,
-                                   softmax_shift=False)
+DEFAULTCONFIG = QuantizationConfig(QuantizationErrorMethod.MSE, QuantizationErrorMethod.MSE,
+                                   relu_bound_to_power_of_2=False, weights_bias_correction=True,
+                                   weights_second_moment_correction=False, input_scaling=False, softmax_shift=False)
diff --git a/model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py b/model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 import os
+from pathlib import Path
 import tempfile
 from typing import Callable
 
@@ -56,14 +57,20 @@ def export(self):
 
         """
         # Use Keras exporter to quantize model's weights before converting it to TFLite.
-        # Since exporter saves the model, we use a tmp path for saving, and then we delete it automatically.
-        with tempfile.NamedTemporaryFile(suffix=DEFAULT_KERAS_EXPORT_EXTENTION) as tmp_file:
-            FakelyQuantKerasExporter(self.model,
-                                     self.is_layer_exportable_fn,
-                                     tmp_file.name,
-                                     verbose=False).export()
+        # Since exporter saves the model, we use a tmp path for saving, and then we delete it.
+        handle, tmp_file = tempfile.mkstemp(DEFAULT_KERAS_EXPORT_EXTENTION)
+        # Close handle right away, the file is going to be reopenned by Keras exporter
+        os.close(handle)
+        try:
+            custom_objects = FakelyQuantKerasExporter(self.model,
+                                                      self.is_layer_exportable_fn,
+                                                      tmp_file,
+                                                      verbose=False).export()
 
-            model = keras_load_quantized_model(tmp_file.name)
+            model = keras_load_quantized_model(tmp_file)
+        # Ensures artifact is removed even in case of error
+        finally:
+            Path(tmp_file).unlink(missing_ok=True)
 
         self.exported_model = tf.lite.TFLiteConverter.from_keras_model(model).convert()
         Logger.info(f'Exporting FQ tflite model to: {self.save_model_path}')

diff --git a/...compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py b/...compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py
@@ -26,8 +26,11 @@
 # ONNX opset version 16 is supported from PyTorch 1.12
 if version.parse(torch.__version__) < version.parse("1.12"):
     OPSET_VERSION = 15
-else:
+elif version.parse("1.12.0") <= version.parse(torch.__version__) < version.parse("1.13.0"):
     OPSET_VERSION = 16
+else:
+    # ONNX opset version 17 is supported from PyTorch 1.13
+    OPSET_VERSION = 17
 
 
 class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/bn_folding_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/bn_folding_test.py
@@ -70,8 +70,7 @@ def get_tpc(self):
 
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      False, False, True)
+                                           mct.core.QuantizationErrorMethod.NOCLIPPING, False, False)
 
     def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         # check the conv weights after the bn folding

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py
@@ -82,9 +82,7 @@ def get_tpc(self):
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING,
                                            weights_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                           relu_bound_to_power_of_2=True,
-                                           weights_bias_correction=False,
-                                           weights_per_channel_threshold=self.per_channel)
+                                           relu_bound_to_power_of_2=True, weights_bias_correction=False)
 
     def get_gptq_config(self):
         return GradientPTQConfig(5, optimizer=tf.keras.optimizers.Adam(

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/input_scaling_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/input_scaling_test.py
@@ -39,8 +39,7 @@ def get_tpc(self):
 
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      input_scaling=True)
+                                           mct.core.QuantizationErrorMethod.NOCLIPPING, input_scaling=True)
 
     def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         fi = 2 if isinstance(float_model.layers[1], layers.ZeroPadding2D) else 1

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/linear_collapsing_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/linear_collapsing_test.py
@@ -48,8 +48,7 @@ def get_tpc(self):
 
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      False, False, True)
+                                           mct.core.QuantizationErrorMethod.NOCLIPPING, False, False)
 
     def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         y = float_model.predict(input_x)

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/matmul_substitution_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/matmul_substitution_test.py
@@ -43,8 +43,7 @@ def get_tpc(self):
 
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                           mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                           False, False, True)
+                                           mct.core.QuantizationErrorMethod.NOCLIPPING, False, False)
 
     def create_networks(self):
         inputs = tf.keras.layers.Input(shape=self.get_input_shapes()[0][1:])

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py
@@ -61,7 +61,6 @@ def get_quantization_config(self):
                                            mct.core.QuantizationErrorMethod.MSE,
                                            relu_bound_to_power_of_2=False,
                                            weights_bias_correction=True,
-                                           weights_per_channel_threshold=True,
                                            input_scaling=False,
                                            activation_channel_equalization=False)
 
@@ -419,13 +418,9 @@ def get_input_shapes(self):
         return [[self.val_batch_size, 224, 244, 3] for _ in range(self.num_of_inputs)]
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      relu_bound_to_power_of_2=False,
-                                      weights_bias_correction=True,
-                                      weights_per_channel_threshold=True,
-                                      input_scaling=False,
-                                      activation_channel_equalization=False)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           relu_bound_to_power_of_2=False, weights_bias_correction=True,
+                                           input_scaling=False, activation_channel_equalization=False)
 
     def get_mixed_precision_v2_config(self):
         return mct.core.MixedPrecisionQuantizationConfigV2(num_of_images=self.num_of_inputs)

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/multi_inputs_to_node_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/multi_inputs_to_node_test.py
@@ -32,9 +32,8 @@ def get_tpc(self):
         return get_16bit_tpc("multi_input_test")
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      True, True, True, input_scaling=True)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           True, True, input_scaling=True)
 
     def get_input_shapes(self):
         return [[self.val_batch_size, 224, 224, 3]]

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/multiple_inputs_node_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/multiple_inputs_node_tests.py
@@ -34,8 +34,7 @@ def get_tpc(self):
 
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      True, False, True)
+                                           mct.core.QuantizationErrorMethod.NOCLIPPING, True, False)
 
     def get_input_shapes(self):
         return [[self.val_batch_size, 224, 244, 3]]

diff --git a/...ture_networks_tests/feature_networks/nested_networks/nested_model_multiple_inputs_test.py b/...ture_networks_tests/feature_networks/nested_networks/nested_model_multiple_inputs_test.py
@@ -41,9 +41,8 @@ def get_tpc(self):
         return get_16bit_tpc("nested_multi_inputs_test")
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      True, True, True)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           True, True)
 
     def get_input_shapes(self):
         return [[self.val_batch_size, 236, 236, 3]]

diff --git a/...ure_networks_tests/feature_networks/nested_networks/nested_model_multiple_outputs_test.py b/...ure_networks_tests/feature_networks/nested_networks/nested_model_multiple_outputs_test.py
@@ -41,9 +41,8 @@ def get_tpc(self):
         return get_16bit_tpc("nested_multi_outputs_test")
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      True, True, True)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           True, True)
 
     def get_input_shapes(self):
         return [[self.val_batch_size, 236, 236, 3]]

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/network_editor/node_filter_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/network_editor/node_filter_test.py
@@ -63,9 +63,8 @@ def get_tpc(self):
         return generate_keras_tpc(name="scope_filter_test", tp_model=tp_model)
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      False, False, True)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           False, False)
 
     def get_debug_config(self):
         # first rule is to check that the scope filter catches the 2 convs with
@@ -143,9 +142,8 @@ def get_tpc(self):
         return generate_keras_tpc(name="name_filter_test", tp_model=tp_model)
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      False, False, True)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           False, False)
 
     def get_debug_config(self):
         network_editor = [EditRule(filter=NodeNameFilter(self.node_to_change_name),
@@ -215,9 +213,8 @@ def get_tpc(self):
         return generate_keras_tpc(name="type_filter_test", tp_model=tp_model)
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      False, False, False)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           False, False)
 
     def get_debug_config(self):
         network_editor = [EditRule(filter=NodeTypeFilter(self.type_to_change),

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/residual_collapsing_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/residual_collapsing_test.py
@@ -41,8 +41,7 @@ def get_tpc(self):
 
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      mct.core.QuantizationErrorMethod.NOCLIPPING,
-                                      False, False, True)
+                                           mct.core.QuantizationErrorMethod.NOCLIPPING, False, False)
 
     def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         y = float_model.predict(input_x)

diff --git a/.../keras_tests/feature_networks_tests/feature_networks/reused_layer_mixed_precision_test.py b/.../keras_tests/feature_networks_tests/feature_networks/reused_layer_mixed_precision_test.py
@@ -47,13 +47,9 @@ def get_tpc(self):
                                              name="reused_layer_mp_test")
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      relu_bound_to_power_of_2=True,
-                                      weights_bias_correction=True,
-                                      weights_per_channel_threshold=True,
-                                      input_scaling=True,
-                                      activation_channel_equalization=True)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           relu_bound_to_power_of_2=True, weights_bias_correction=True,
+                                           input_scaling=True, activation_channel_equalization=True)
 
     def get_mixed_precision_v2_config(self):
         return MixedPrecisionQuantizationConfigV2()

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/scale_equalization_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/scale_equalization_test.py
@@ -55,8 +55,8 @@ def get_tpc(self):
 
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
-                                      relu_bound_to_power_of_2=False, weights_bias_correction=False,
-                                      weights_per_channel_threshold=True, activation_channel_equalization=True)
+                                           relu_bound_to_power_of_2=False, weights_bias_correction=False,
+                                           activation_channel_equalization=True)
 
     def create_networks(self):
         inputs = layers.Input(shape=self.get_input_shapes()[0][1:])

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/shift_neg_activation_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/shift_neg_activation_test.py
@@ -53,10 +53,8 @@ def get_tpc(self):
 
     def get_quantization_config(self):
         return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
-                                      False, False, weights_per_channel_threshold=True,
-                                      shift_negative_activation_correction=True,
-                                      shift_negative_ratio=np.inf,
-                                      shift_negative_params_search=self.param_search)
+                                           False, False, shift_negative_activation_correction=True,
+                                           shift_negative_ratio=np.inf, shift_negative_params_search=self.param_search)
 
     def create_networks(self):
         inputs = layers.Input(shape=self.get_input_shapes()[0][1:])

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/split_conv_bug_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/split_conv_bug_test.py
@@ -33,9 +33,8 @@ def get_tpc(self):
         return get_16bit_tpc("split_conv_bug_test")
 
     def get_quantization_config(self):
-        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
-                                      mct.core.QuantizationErrorMethod.MSE,
-                                      True, True, True, input_scaling=True)
+        return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
+                                           True, True, input_scaling=True)
 
     def get_input_shapes(self):
         return [[self.val_batch_size, 224, 224, 3]]