Skip to content

Commit

Permalink
merge main and resolved conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
Ofir Gordon authored and Ofir Gordon committed Jan 31, 2024
2 parents 0ff82cd + b62ed4e commit 090e609
Show file tree
Hide file tree
Showing 46 changed files with 115 additions and 186 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def __init__(self,
weights_error_method: QuantizationErrorMethod = QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2: bool = False,
weights_bias_correction: bool = True,
weights_per_channel_threshold: bool = True,
weights_second_moment_correction: bool = False,
input_scaling: bool = False,
softmax_shift: bool = False,
Expand All @@ -73,7 +72,6 @@ def __init__(self,
relu_bound_to_power_of_2 (bool): Whether to use relu to power of 2 scaling correction or not.
weights_bias_correction (bool): Whether to use weights bias correction or not.
weights_second_moment_correction (bool): Whether to use weights second_moment correction or not.
weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor).
input_scaling (bool): Whether to use input scaling or not.
softmax_shift (bool): Whether to use softmax shift or not.
shift_negative_activation_correction (bool): Whether to use shifting negative activation correction or not.
Expand All @@ -90,11 +88,11 @@ def __init__(self,
One may create a quantization configuration to quantize a model according to.
For example, to quantize a model's weights and activation using thresholds, such that
weights threshold selection is done using MSE, activation threshold selection is done using NOCLIPPING (min/max),
enabling relu_bound_to_power_of_2, weights_bias_correction, and quantizing the weights per-channel,
enabling relu_bound_to_power_of_2, weights_bias_correction,
one can instantiate a quantization configuration:
>>> import model_compression_toolkit as mct
>>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING,weights_error_method=mct.core.QuantizationErrorMethod.MSE,relu_bound_to_power_of_2=True,weights_bias_correction=True,weights_per_channel_threshold=True)
>>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING, weights_error_method=mct.core.QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=True, weights_bias_correction=True)
The QuantizationConfig instanse can then be passed to
Expand All @@ -107,7 +105,6 @@ def __init__(self,
self.relu_bound_to_power_of_2 = relu_bound_to_power_of_2
self.weights_bias_correction = weights_bias_correction
self.weights_second_moment_correction = weights_second_moment_correction
self.weights_per_channel_threshold = weights_per_channel_threshold
self.activation_channel_equalization = activation_channel_equalization
self.input_scaling = input_scaling
self.softmax_shift = softmax_shift
Expand All @@ -126,11 +123,6 @@ def __repr__(self):


# Default quantization configuration the library use.
DEFAULTCONFIG = QuantizationConfig(QuantizationErrorMethod.MSE,
QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2=False,
weights_bias_correction=True,
weights_second_moment_correction=False,
weights_per_channel_threshold=True,
input_scaling=False,
softmax_shift=False)
DEFAULTCONFIG = QuantizationConfig(QuantizationErrorMethod.MSE, QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2=False, weights_bias_correction=True,
weights_second_moment_correction=False, input_scaling=False, softmax_shift=False)
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
# ==============================================================================
import os
from pathlib import Path
import tempfile
from typing import Callable

Expand Down Expand Up @@ -56,14 +57,20 @@ def export(self):
"""
# Use Keras exporter to quantize model's weights before converting it to TFLite.
# Since exporter saves the model, we use a tmp path for saving, and then we delete it automatically.
with tempfile.NamedTemporaryFile(suffix=DEFAULT_KERAS_EXPORT_EXTENTION) as tmp_file:
FakelyQuantKerasExporter(self.model,
self.is_layer_exportable_fn,
tmp_file.name,
verbose=False).export()
# Since exporter saves the model, we use a tmp path for saving, and then we delete it.
handle, tmp_file = tempfile.mkstemp(DEFAULT_KERAS_EXPORT_EXTENTION)
# Close handle right away, the file is going to be reopenned by Keras exporter
os.close(handle)
try:
custom_objects = FakelyQuantKerasExporter(self.model,
self.is_layer_exportable_fn,
tmp_file,
verbose=False).export()

model = keras_load_quantized_model(tmp_file.name)
model = keras_load_quantized_model(tmp_file)
# Ensures artifact is removed even in case of error
finally:
Path(tmp_file).unlink(missing_ok=True)

self.exported_model = tf.lite.TFLiteConverter.from_keras_model(model).convert()
Logger.info(f'Exporting FQ tflite model to: {self.save_model_path}')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@
# ONNX opset version 16 is supported from PyTorch 1.12
if version.parse(torch.__version__) < version.parse("1.12"):
OPSET_VERSION = 15
else:
elif version.parse("1.12.0") <= version.parse(torch.__version__) < version.parse("1.13.0"):
OPSET_VERSION = 16
else:
# ONNX opset version 17 is supported from PyTorch 1.13
OPSET_VERSION = 17


class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ def get_tpc(self):

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
mct.core.QuantizationErrorMethod.NOCLIPPING,
False, False, True)
mct.core.QuantizationErrorMethod.NOCLIPPING, False, False)

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
# check the conv weights after the bn folding
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ def get_tpc(self):
def get_quantization_config(self):
return mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING,
weights_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING,
relu_bound_to_power_of_2=True,
weights_bias_correction=False,
weights_per_channel_threshold=self.per_channel)
relu_bound_to_power_of_2=True, weights_bias_correction=False)

def get_gptq_config(self):
return GradientPTQConfig(5, optimizer=tf.keras.optimizers.Adam(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ def get_tpc(self):

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
mct.core.QuantizationErrorMethod.NOCLIPPING,
input_scaling=True)
mct.core.QuantizationErrorMethod.NOCLIPPING, input_scaling=True)

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
fi = 2 if isinstance(float_model.layers[1], layers.ZeroPadding2D) else 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def get_tpc(self):

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
mct.core.QuantizationErrorMethod.NOCLIPPING,
False, False, True)
mct.core.QuantizationErrorMethod.NOCLIPPING, False, False)

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
y = float_model.predict(input_x)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ def get_tpc(self):

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
mct.core.QuantizationErrorMethod.NOCLIPPING,
False, False, True)
mct.core.QuantizationErrorMethod.NOCLIPPING, False, False)

def create_networks(self):
inputs = tf.keras.layers.Input(shape=self.get_input_shapes()[0][1:])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def get_quantization_config(self):
mct.core.QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2=False,
weights_bias_correction=True,
weights_per_channel_threshold=True,
input_scaling=False,
activation_channel_equalization=False)

Expand Down Expand Up @@ -419,13 +418,9 @@ def get_input_shapes(self):
return [[self.val_batch_size, 224, 244, 3] for _ in range(self.num_of_inputs)]

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2=False,
weights_bias_correction=True,
weights_per_channel_threshold=True,
input_scaling=False,
activation_channel_equalization=False)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2=False, weights_bias_correction=True,
input_scaling=False, activation_channel_equalization=False)

def get_mixed_precision_v2_config(self):
return mct.core.MixedPrecisionQuantizationConfigV2(num_of_images=self.num_of_inputs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ def get_tpc(self):
return get_16bit_tpc("multi_input_test")

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
True, True, True, input_scaling=True)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
True, True, input_scaling=True)

def get_input_shapes(self):
return [[self.val_batch_size, 224, 224, 3]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ def get_tpc(self):

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
mct.core.QuantizationErrorMethod.NOCLIPPING,
True, False, True)
mct.core.QuantizationErrorMethod.NOCLIPPING, True, False)

def get_input_shapes(self):
return [[self.val_batch_size, 224, 244, 3]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,8 @@ def get_tpc(self):
return get_16bit_tpc("nested_multi_inputs_test")

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
True, True, True)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
True, True)

def get_input_shapes(self):
return [[self.val_batch_size, 236, 236, 3]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,8 @@ def get_tpc(self):
return get_16bit_tpc("nested_multi_outputs_test")

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
True, True, True)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
True, True)

def get_input_shapes(self):
return [[self.val_batch_size, 236, 236, 3]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@ def get_tpc(self):
return generate_keras_tpc(name="scope_filter_test", tp_model=tp_model)

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
False, False, True)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
False, False)

def get_debug_config(self):
# first rule is to check that the scope filter catches the 2 convs with
Expand Down Expand Up @@ -143,9 +142,8 @@ def get_tpc(self):
return generate_keras_tpc(name="name_filter_test", tp_model=tp_model)

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
False, False, True)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
False, False)

def get_debug_config(self):
network_editor = [EditRule(filter=NodeNameFilter(self.node_to_change_name),
Expand Down Expand Up @@ -215,9 +213,8 @@ def get_tpc(self):
return generate_keras_tpc(name="type_filter_test", tp_model=tp_model)

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
False, False, False)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
False, False)

def get_debug_config(self):
network_editor = [EditRule(filter=NodeTypeFilter(self.type_to_change),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ def get_tpc(self):

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING,
mct.core.QuantizationErrorMethod.NOCLIPPING,
False, False, True)
mct.core.QuantizationErrorMethod.NOCLIPPING, False, False)

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
y = float_model.predict(input_x)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,9 @@ def get_tpc(self):
name="reused_layer_mp_test")

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2=True,
weights_bias_correction=True,
weights_per_channel_threshold=True,
input_scaling=True,
activation_channel_equalization=True)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2=True, weights_bias_correction=True,
input_scaling=True, activation_channel_equalization=True)

def get_mixed_precision_v2_config(self):
return MixedPrecisionQuantizationConfigV2()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def get_tpc(self):

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
relu_bound_to_power_of_2=False, weights_bias_correction=False,
weights_per_channel_threshold=True, activation_channel_equalization=True)
relu_bound_to_power_of_2=False, weights_bias_correction=False,
activation_channel_equalization=True)

def create_networks(self):
inputs = layers.Input(shape=self.get_input_shapes()[0][1:])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ def get_tpc(self):

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
False, False, weights_per_channel_threshold=True,
shift_negative_activation_correction=True,
shift_negative_ratio=np.inf,
shift_negative_params_search=self.param_search)
False, False, shift_negative_activation_correction=True,
shift_negative_ratio=np.inf, shift_negative_params_search=self.param_search)

def create_networks(self):
inputs = layers.Input(shape=self.get_input_shapes()[0][1:])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@ def get_tpc(self):
return get_16bit_tpc("split_conv_bug_test")

def get_quantization_config(self):
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE,
mct.core.QuantizationErrorMethod.MSE,
True, True, True, input_scaling=True)
return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE,
True, True, input_scaling=True)

def get_input_shapes(self):
return [[self.val_batch_size, 224, 224, 3]]
Expand Down
Loading

0 comments on commit 090e609

Please sign in to comment.