From 838f83c0999cffac8e02cb0d6b4b938658282f71 Mon Sep 17 00:00:00 2001 From: julesmuhizi Date: Tue, 27 Apr 2021 19:06:08 +0000 Subject: [PATCH 01/10] Add support for qdense_batchnorm in QKeras --- qkeras/qdense_batchnorm.py | 329 +++++++++++++++++++++++++++++++++++++ 1 file changed, 329 insertions(+) create mode 100644 qkeras/qdense_batchnorm.py diff --git a/qkeras/qdense_batchnorm.py b/qkeras/qdense_batchnorm.py new file mode 100644 index 00000000..df24ab2c --- /dev/null +++ b/qkeras/qdense_batchnorm.py @@ -0,0 +1,329 @@ +# Copyright 2020 Google LLC +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Fold batchnormalization with previous QDense layers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import warnings + +import numpy as np +import six + + +from qkeras.qlayers import QDense +from qkeras.quantizers import * + +import tensorflow.compat.v2 as tf +from tensorflow.keras import layers +from tensorflow.python.framework import smart_cond as tf_utils +from tensorflow.python.ops import math_ops + +tf.compat.v2.enable_v2_behavior() + + +class QDenseBatchnorm(QDense): + """Implements a quantized Dense layer fused with Batchnorm.""" + + def __init__( + self, + units, + activation=None, + use_bias=True, + kernel_initializer="he_normal", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + kernel_quantizer=None, + bias_quantizer=None, + kernel_range=None, + bias_range=None, + + # batchnorm params + axis=-1, + momentum=0.99, + epsilon=0.001, + center=True, + scale=True, + beta_initializer="zeros", + gamma_initializer="ones", + moving_mean_initializer="zeros", + moving_variance_initializer="ones", + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + renorm=False, + renorm_clipping=None, + renorm_momentum=0.99, + fused=None, + trainable=True, + virtual_batch_size=None, + adjustment=None, + + # other params + ema_freeze_delay=None, + folding_mode="ema_stats_folding", + **kwargs): + + super(QDenseBatchnorm, self).__init__( + units=units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + kernel_quantizer=kernel_quantizer, + bias_quantizer=bias_quantizer, + kernel_range=kernel_range, + bias_range=bias_range, + **kwargs) + + # initialization of batchnorm part of the composite layer + self.batchnorm = layers.BatchNormalization( + axis=axis, momentum=momentum, epsilon=epsilon, center=center, + scale=scale, beta_initializer=beta_initializer, + gamma_initializer=gamma_initializer, + moving_mean_initializer=moving_mean_initializer, + moving_variance_initializer=moving_variance_initializer, + beta_regularizer=beta_regularizer, + gamma_regularizer=gamma_regularizer, + beta_constraint=beta_constraint, gamma_constraint=gamma_constraint, + renorm=renorm, renorm_clipping=renorm_clipping, + renorm_momentum=renorm_momentum, fused=fused, trainable=trainable, + virtual_batch_size=virtual_batch_size, adjustment=adjustment + ) + + self.ema_freeze_delay = ema_freeze_delay + assert folding_mode in ["ema_stats_folding", "batch_stats_folding"] + self.folding_mode = folding_mode + + def build(self, input_shape): + super(QDenseBatchnorm, self).build(input_shape) + + # self._iteration (i.e., training_steps) is initialized with -1. When + # loading ckpt, it can load the number of training steps that have been + # previously trainied. If start training from scratch. + # TODO(lishanok): develop a way to count iterations outside layer + self._iteration = tf.Variable(-1, trainable=False, name="iteration", + dtype=tf.int64) + + def call(self, inputs, training=None): + + # numpy value, mark the layer is in training + training = self.batchnorm._get_training_value(training) # pylint: disable=protected-access + + # checking if to update batchnorm params + if (self.ema_freeze_delay is None) or (self.ema_freeze_delay < 0): + # if ema_freeze_delay is None or a negative value, do not freeze bn stats + bn_training = tf.cast(training, dtype=bool) + else: + bn_training = tf.math.logical_and(training, tf.math.less_equal( + self._iteration, self.ema_freeze_delay)) + + kernel = self.kernel + + #execute qdense output + qdense_outputs = tf.keras.backend.dot( + inputs, + kernel + ) + + if self.use_bias: + bias = self.bias + qdense_outputs = tf.keras.backend.bias_add( + qdense_outputs, + bias, + data_format="channels_last") + else: + bias = 0 + + # begin batchnorm + _ = self.batchnorm(qdense_outputs, training=bn_training) + + self._iteration.assign_add(tf_utils.smart_cond( + training, lambda: tf.constant(1, tf.int64), + lambda: tf.constant(0, tf.int64))) + + # calculate mean and variance from current batch + bn_shape = qdense_outputs.shape + ndims = len(bn_shape) + reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis] + keep_dims = len(self.batchnorm.axis) > 1 + mean, variance = self.batchnorm._moments( # pylint: disable=protected-access + math_ops.cast(qdense_outputs, self.batchnorm._param_dtype), # pylint: disable=protected-access + reduction_axes, + keep_dims=keep_dims) + + # get batchnorm weights + gamma = self.batchnorm.gamma + beta = self.batchnorm.beta + moving_mean = self.batchnorm.moving_mean + moving_variance = self.batchnorm.moving_variance + + if self.folding_mode == "batch_stats_folding": + # using batch mean and variance in the initial training stage + # after sufficient training, switch to moving mean and variance + new_mean = tf_utils.smart_cond( + bn_training, lambda: mean, lambda: moving_mean) + new_variance = tf_utils.smart_cond( + bn_training, lambda: variance, lambda: moving_variance) + + # get the inversion factor so that we replace division by multiplication + inv = math_ops.rsqrt(new_variance + self.batchnorm.epsilon) + if gamma is not None: + inv *= gamma + # fold bias with bn stats + folded_bias = inv * (bias - new_mean) + beta + + elif self.folding_mode == "ema_stats_folding": + # We always scale the weights with a correction factor to the long term + # statistics prior to quantization. This ensures that there is no jitter + # in the quantized weights due to batch to batch variation. During the + # initial phase of training, we undo the scaling of the weights so that + # outputs are identical to regular batch normalization. We also modify + # the bias terms correspondingly. After sufficient training, switch from + # using batch statistics to long term moving averages for batch + # normalization. + + # use batch stats for calcuating bias before bn freeze, and use moving + # stats after bn freeze + mv_inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon) + batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon) + + if gamma is not None: + mv_inv *= gamma + batch_inv *= gamma + folded_bias = tf_utils.smart_cond( + bn_training, + lambda: batch_inv * (bias - mean) + beta, + lambda: mv_inv * (bias - moving_mean) + beta) + # moving stats is always used to fold kernel in tflite; before bn freeze + # an additional correction factor will be applied to the conv2d output + # end batchnorm + inv = mv_inv + else: + assert ValueError + + # wrap qdense kernel with bn parameters + folded_kernel = inv * kernel + # quantize the folded kernel + if self.kernel_quantizer is not None: + q_folded_kernel = self.kernel_quantizer_internal(folded_kernel) + else: + q_folded_kernel = folded_kernel + + # If loaded from a ckpt, bias_quantizer is the ckpt value + # Else if the layer is called for the first time, in this case bias + # quantizer is None and we need to calculate bias quantizer + # type according to accumulator type + + if self.bias_quantizer_internal is not None: + q_folded_bias = self.bias_quantizer_internal(folded_bias) + else: + q_folded_bias = folded_bias + + applied_kernel = q_folded_kernel + applied_bias = q_folded_bias + + #calculate qdense output using the quantized folded kernel + folded_outputs = tf.keras.backend.dot(inputs, applied_kernel) + + if training is True and self.folding_mode == "ema_stats_folding": + batch_inv = math_ops.rsqrt(variance + self.batchnorm.epsilon) + y_corr = tf_utils.smart_cond( + bn_training, + lambda: (math_ops.sqrt(moving_variance + self.batchnorm.epsilon) * + math_ops.rsqrt(variance + self.batchnorm.epsilon)), + lambda: tf.constant(1.0, shape=moving_variance.shape)) + folded_outputs = math_ops.mul(folded_outputs, y_corr) + + folded_outputs = tf.keras.backend.bias_add( + folded_outputs, + applied_bias, + data_format="channels_last" + ) + + if self.activation is not None: + return self.activation(folded_outputs) + + return folded_outputs + + def get_config(self): + base_config = super().get_config() + bn_config = self.batchnorm.get_config() + config = {"ema_freeze_delay": self.ema_freeze_delay, + "folding_mode": self.folding_mode} + name = base_config["name"] + out_config = dict( + list(base_config.items()) + + list(bn_config.items()) + list(config.items())) + + # names from different config override each other; use the base layer name + # as the this layer's config name + out_config["name"] = name + return out_config + + def get_quantization_config(self): + return { + "kernel_quantizer": str(self.kernel_quantizer_internal), + "bias_quantizer": str(self.bias_quantizer_internal), + } + def get_quantizers(self): + return self.quantizers + + # def get_prunable_weights(self): + # return [self.kernel] + + def get_folded_weights(self): + """Function to get the batchnorm folded weights. + This function converts the weights by folding batchnorm parameters into + the weight of QDense. The high-level equation: + W_fold = gamma * W / sqrt(variance + epsilon) + bias_fold = gamma * (bias - moving_mean) / sqrt(variance + epsilon) + beta + """ + + kernel = self.kernel + if self.use_bias: + bias = self.bias + else: + bias = 0 + + # get batchnorm weights and moving stats + gamma = self.batchnorm.gamma + beta = self.batchnorm.beta + moving_mean = self.batchnorm.moving_mean + moving_variance = self.batchnorm.moving_variance + + # get the inversion factor so that we replace division by multiplication + inv = math_ops.rsqrt(moving_variance + self.batchnorm.epsilon) + if gamma is not None: + inv *= gamma + + # wrap conv kernel and bias with bn parameters + folded_kernel = inv * kernel + folded_bias = inv * (bias - moving_mean) + beta + + return [folded_kernel, folded_bias] \ No newline at end of file From ce5e5cfd12d019ed7c268f015daa6ec19a563b4a Mon Sep 17 00:00:00 2001 From: julesmuhizi Date: Fri, 30 Apr 2021 14:49:19 +0000 Subject: [PATCH 02/10] added support for quantized object and added layer to __init__ --- qkeras/__init__.py | 1 + qkeras/utils.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/qkeras/__init__.py b/qkeras/__init__.py index 67918c46..3cffdf7f 100644 --- a/qkeras/__init__.py +++ b/qkeras/__init__.py @@ -34,6 +34,7 @@ #from .qtools.settings import cfg from .qconv2d_batchnorm import QConv2DBatchnorm from .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm +from .qdense_batchnorm import QDenseBatchnorm assert tf.executing_eagerly(), "QKeras requires TF with eager execution mode on" diff --git a/qkeras/utils.py b/qkeras/utils.py index 40ca10c2..876eb0dd 100644 --- a/qkeras/utils.py +++ b/qkeras/utils.py @@ -36,6 +36,7 @@ from .qlayers import Clip from .qconv2d_batchnorm import QConv2DBatchnorm from .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm +from .qdense_batchnorm import QDenseBatchnorm from .qlayers import QActivation from .qlayers import QAdaptiveActivation from .qpooling import QAveragePooling2D @@ -96,6 +97,7 @@ "QDepthwiseConv2DBatchnorm", "QAveragePooling2D", "QGlobalAveragePooling2D", + "QDenseBatchnorm", ] @@ -1056,6 +1058,8 @@ def _add_supported_quantized_objects(custom_objects): custom_objects["QConv2DBatchnorm"] = QConv2DBatchnorm custom_objects["QDepthwiseConv2DBatchnorm"] = QDepthwiseConv2DBatchnorm + custom_objects["QDenseBatchnorm"] = QDenseBatchnorm + custom_objects["QAveragePooling2D"] = QAveragePooling2D custom_objects["QGlobalAveragePooling2D"] = QGlobalAveragePooling2D custom_objects["QScaleShift"] = QScaleShift From 995dbc3a451083280e9cb23004cc79ed697fbe78 Mon Sep 17 00:00:00 2001 From: Jules Muhizi Date: Sun, 9 Jan 2022 04:08:36 +0000 Subject: [PATCH 03/10] quantized qdense params-> folded instead of quantized folded params --- qkeras/qdense_batchnorm.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/qkeras/qdense_batchnorm.py b/qkeras/qdense_batchnorm.py index df24ab2c..9a026893 100644 --- a/qkeras/qdense_batchnorm.py +++ b/qkeras/qdense_batchnorm.py @@ -159,6 +159,14 @@ def call(self, inputs, training=None): data_format="channels_last") else: bias = 0 + # If loaded from a ckpt, bias_quantizer is the ckpt value + # Else if the layer is called for the first time, in this case bias + # quantizer is None and we need to calculate bias quantizer + # type according to accumulator type + if self.bias_quantizer_internal is not None: + q_bias = self.bias_quantizer_internal(bias) + else: + q_bias = bias # begin batchnorm _ = self.batchnorm(qdense_outputs, training=bn_training) @@ -195,8 +203,9 @@ def call(self, inputs, training=None): inv = math_ops.rsqrt(new_variance + self.batchnorm.epsilon) if gamma is not None: inv *= gamma + # fold bias with bn stats - folded_bias = inv * (bias - new_mean) + beta + folded_bias = inv * (q_bias - new_mean) + beta elif self.folding_mode == "ema_stats_folding": # We always scale the weights with a correction factor to the long term @@ -218,8 +227,8 @@ def call(self, inputs, training=None): batch_inv *= gamma folded_bias = tf_utils.smart_cond( bn_training, - lambda: batch_inv * (bias - mean) + beta, - lambda: mv_inv * (bias - moving_mean) + beta) + lambda: batch_inv * (q_bias - mean) + beta, + lambda: mv_inv * (q_bias - moving_mean) + beta) # moving stats is always used to fold kernel in tflite; before bn freeze # an additional correction factor will be applied to the conv2d output # end batchnorm @@ -227,26 +236,16 @@ def call(self, inputs, training=None): else: assert ValueError - # wrap qdense kernel with bn parameters - folded_kernel = inv * kernel # quantize the folded kernel if self.kernel_quantizer is not None: - q_folded_kernel = self.kernel_quantizer_internal(folded_kernel) + q_kernel = self.kernel_quantizer_internal(kernel) else: - q_folded_kernel = folded_kernel - - # If loaded from a ckpt, bias_quantizer is the ckpt value - # Else if the layer is called for the first time, in this case bias - # quantizer is None and we need to calculate bias quantizer - # type according to accumulator type - - if self.bias_quantizer_internal is not None: - q_folded_bias = self.bias_quantizer_internal(folded_bias) - else: - q_folded_bias = folded_bias + q_kernel = kernel + # wrap qdense kernel with bn parameters + folded_kernel = inv * q_kernel - applied_kernel = q_folded_kernel - applied_bias = q_folded_bias + applied_kernel = folded_kernel + applied_bias = folded_bias #calculate qdense output using the quantized folded kernel folded_outputs = tf.keras.backend.dot(inputs, applied_kernel) From b601b5451eff778b3ac9882672dcb7cf8eda415b Mon Sep 17 00:00:00 2001 From: Jules Date: Tue, 25 Jan 2022 10:00:37 -0600 Subject: [PATCH 04/10] included weight transpose --- qkeras/qdense_batchnorm.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/qkeras/qdense_batchnorm.py b/qkeras/qdense_batchnorm.py index 9a026893..836f5d46 100644 --- a/qkeras/qdense_batchnorm.py +++ b/qkeras/qdense_batchnorm.py @@ -159,14 +159,6 @@ def call(self, inputs, training=None): data_format="channels_last") else: bias = 0 - # If loaded from a ckpt, bias_quantizer is the ckpt value - # Else if the layer is called for the first time, in this case bias - # quantizer is None and we need to calculate bias quantizer - # type according to accumulator type - if self.bias_quantizer_internal is not None: - q_bias = self.bias_quantizer_internal(bias) - else: - q_bias = bias # begin batchnorm _ = self.batchnorm(qdense_outputs, training=bn_training) @@ -205,7 +197,7 @@ def call(self, inputs, training=None): inv *= gamma # fold bias with bn stats - folded_bias = inv * (q_bias - new_mean) + beta + folded_bias = inv * (bias - new_mean) + beta elif self.folding_mode == "ema_stats_folding": # We always scale the weights with a correction factor to the long term @@ -227,8 +219,8 @@ def call(self, inputs, training=None): batch_inv *= gamma folded_bias = tf_utils.smart_cond( bn_training, - lambda: batch_inv * (q_bias - mean) + beta, - lambda: mv_inv * (q_bias - moving_mean) + beta) + lambda: batch_inv * (bias - mean) + beta, + lambda: mv_inv * (bias - moving_mean) + beta) # moving stats is always used to fold kernel in tflite; before bn freeze # an additional correction factor will be applied to the conv2d output # end batchnorm @@ -236,16 +228,22 @@ def call(self, inputs, training=None): else: assert ValueError + # wrap dense kernel with bn parameters + folded_kernel = inv*kernel # quantize the folded kernel if self.kernel_quantizer is not None: - q_kernel = self.kernel_quantizer_internal(kernel) + q_folded_kernel = self.kernel_quantizer_internal(folded_kernel) + else: + q_folded_kernel = folded_kernel + + #quantize the folded bias + if self.bias_quantizer_internal is not None: + q_folded_bias = self.bias_quantizer_internal(folded_bias) else: - q_kernel = kernel - # wrap qdense kernel with bn parameters - folded_kernel = inv * q_kernel + q_folded_bias = folded_bias - applied_kernel = folded_kernel - applied_bias = folded_bias + applied_kernel = q_folded_kernel + applied_bias = q_folded_bias #calculate qdense output using the quantized folded kernel folded_outputs = tf.keras.backend.dot(inputs, applied_kernel) @@ -290,8 +288,9 @@ def get_quantization_config(self): "kernel_quantizer": str(self.kernel_quantizer_internal), "bias_quantizer": str(self.bias_quantizer_internal), } - def get_quantizers(self): - return self.quantizers + + def get_quantizers(self): + return self.quantizers # def get_prunable_weights(self): # return [self.kernel] From ddb529758c42ac82db663b75800094550bfd53dd Mon Sep 17 00:00:00 2001 From: Jules Muhizi <54924909+julesmuhizi@users.noreply.github.com> Date: Fri, 1 Apr 2022 23:44:41 -0400 Subject: [PATCH 05/10] update model_save_quantized_weights to include QDenseBN --- qkeras/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qkeras/utils.py b/qkeras/utils.py index 876eb0dd..a63d4ac8 100644 --- a/qkeras/utils.py +++ b/qkeras/utils.py @@ -266,7 +266,7 @@ def model_save_quantized_weights(model, filename=None): hw_weights = [] if any(isinstance(layer, t) for t in [ - QConv2DBatchnorm, QDepthwiseConv2DBatchnorm]): + QConv2DBatchnorm, QDenseBatchnorm, QDepthwiseConv2DBatchnorm]): qs = layer.get_quantizers() ws = layer.get_folded_weights() elif any(isinstance(layer, t) for t in [QSimpleRNN, QLSTM, QGRU]): From 2ffe832922529f84637ebd89eca13dde053aa43b Mon Sep 17 00:00:00 2001 From: Jules Muhizi <54924909+julesmuhizi@users.noreply.github.com> Date: Sat, 2 Apr 2022 00:31:46 -0400 Subject: [PATCH 06/10] update utils.py for support QDenseBN quant model save --- qkeras/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qkeras/utils.py b/qkeras/utils.py index a63d4ac8..80d3ba82 100644 --- a/qkeras/utils.py +++ b/qkeras/utils.py @@ -382,7 +382,7 @@ def model_save_quantized_weights(model, filename=None): if has_scale: saved_weights[layer.name]["scales"] = scales if not any(isinstance(layer, t) for t in [ - QConv2DBatchnorm, QDepthwiseConv2DBatchnorm]): + QConv2DBatchnorm, QDenseBatchnorm, QDepthwiseConv2DBatchnorm]): # Set layer weights in the format that software inference uses layer.set_weights(weights) else: From 320372124f4a101c1b96a9c65c11d3eb35119e9f Mon Sep 17 00:00:00 2001 From: Javier Duarte Date: Mon, 24 Oct 2022 14:50:41 -0700 Subject: [PATCH 07/10] add 1 test --- tests/bn_folding_test.py | 126 +++++++++++++++++++++++++++++++-------- 1 file changed, 102 insertions(+), 24 deletions(-) diff --git a/tests/bn_folding_test.py b/tests/bn_folding_test.py index ef152da1..1169fc9f 100644 --- a/tests/bn_folding_test.py +++ b/tests/bn_folding_test.py @@ -29,9 +29,11 @@ from tensorflow.keras.backend import clear_session from tensorflow.keras.utils import to_categorical from tensorflow.keras import metrics +import pytest from qkeras import QConv2DBatchnorm from qkeras import QConv2D +from qkeras import QDenseBatchnorm from qkeras import QDense from qkeras import QActivation from qkeras import QDepthwiseConv2D @@ -110,7 +112,7 @@ def get_qconv2d_batchnorm_model(input_shape, kernel_size, folding_mode, return model -def get_models_with_one_layer(kernel_quantizer, folding_mode, ema_freeze_delay): +def get_conv2d_models_with_one_layer(kernel_quantizer, folding_mode, ema_freeze_delay): x_shape = (2, 2, 1) loss_fn = tf.keras.losses.MeanSquaredError() @@ -164,6 +166,60 @@ def get_models_with_one_layer(kernel_quantizer, folding_mode, ema_freeze_delay): return (unfold_model, fold_model) +def get_dense_models_with_one_layer(kernel_quantizer, folding_mode, ema_freeze_delay): + + x_shape = (4,) + loss_fn = tf.keras.losses.MeanSquaredError() + optimizer = get_sgd_optimizer(learning_rate=1e-3) + + # define a model with seperate conv2d and bn layers + x = x_in = layers.Input(x_shape, name="input") + x = QDense( + 2, + kernel_initializer="ones", + bias_initializer="zeros", use_bias=False, + kernel_quantizer=kernel_quantizer, bias_quantizer=None, + name="conv2d")(x) + x = layers.BatchNormalization( + axis=-1, + momentum=0.99, + epsilon=0.001, + center=True, + scale=True, + beta_initializer="zeros", + gamma_initializer="ones", + moving_mean_initializer="zeros", + moving_variance_initializer="ones", + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + renorm=False, + renorm_clipping=None, + renorm_momentum=0.99, + fused=None, + trainable=True, + virtual_batch_size=None, + adjustment=None, + name="bn")(x) + unfold_model = Model(inputs=[x_in], outputs=[x]) + unfold_model.compile(loss=loss_fn, optimizer=optimizer, metrics="acc") + + x = x_in = layers.Input(x_shape, name="input") + x = QDenseBatchnorm( + 2, + kernel_initializer="ones", bias_initializer="zeros", use_bias=False, + kernel_quantizer=kernel_quantizer, beta_initializer="zeros", + gamma_initializer="ones", moving_mean_initializer="zeros", + moving_variance_initializer="ones", folding_mode=folding_mode, + ema_freeze_delay=ema_freeze_delay, + name="foldconv2d")(x) + fold_model = Model(inputs=[x_in], outputs=[x]) + fold_model.compile(loss=loss_fn, optimizer=optimizer, metrics="acc") + + return (unfold_model, fold_model) + + def get_debug_model(model): layer_output_list = [] for layer in model.layers: @@ -181,10 +237,7 @@ def generate_dataset(train_size=10, output_shape=None): """create tf.data.Dataset with shape: (N,) + input_shape.""" - x_train = np.random.randint( - 4, size=(train_size, input_shape[0], input_shape[1], input_shape[2])) - x_train = np.random.rand( - train_size, input_shape[0], input_shape[1], input_shape[2]) + x_train = np.random.rand(*(train_size,) + input_shape) if output_shape: y_train = np.random.random_sample((train_size,) + output_shape) @@ -399,7 +452,8 @@ def test_loading(): assert_equal(weight1[1], weight2[1]) -def test_same_training_and_prediction(): +@pytest.mark.parametrize("model_name", ["conv2d", "dense"]) +def test_same_training_and_prediction(model_name): """test if fold/unfold layer has the same training and prediction output.""" epochs = 5 @@ -407,23 +461,39 @@ def test_same_training_and_prediction(): loss_metric = metrics.Mean() optimizer = get_sgd_optimizer(learning_rate=1e-3) - x_shape = (2, 2, 1) - kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]]) - gamma = np.array([2., 1.]) - beta = np.array([0., 1.]) - moving_mean = np.array([1., 1.]) - moving_variance = np.array([1., 2.]) + if model_name == "conv2d": + x_shape = (2, 2, 1) + kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]]) + gamma = np.array([2., 1.]) + beta = np.array([0., 1.]) + moving_mean = np.array([1., 1.]) + moving_variance = np.array([1., 2.]) + elif model_name == "dense": + x_shape = (4,) + kernel = np.array([[1., 1.], [1., 0.], [1., 1.], [0., 1.]]) + gamma = np.array([2., 1.]) + beta = np.array([0., 1.]) + moving_mean = np.array([1., 1.]) + moving_variance = np.array([1., 2.]) iteration = np.array(-1) train_ds = generate_dataset(train_size=10, batch_size=10, input_shape=x_shape, num_class=2) - (unfold_model, fold_model_batch) = get_models_with_one_layer( - kernel_quantizer=None, folding_mode="batch_stats_folding", - ema_freeze_delay=10) - (_, fold_model_ema) = get_models_with_one_layer( - kernel_quantizer=None, folding_mode="ema_stats_folding", - ema_freeze_delay=10) + if model_name == "conv2d": + (unfold_model, fold_model_batch) = get_conv2d_models_with_one_layer( + kernel_quantizer=None, folding_mode="batch_stats_folding", + ema_freeze_delay=10) + (_, fold_model_ema) = get_conv2d_models_with_one_layer( + kernel_quantizer=None, folding_mode="ema_stats_folding", + ema_freeze_delay=10) + elif model_name == "dense": + (unfold_model, fold_model_batch) = get_dense_models_with_one_layer( + kernel_quantizer=None, folding_mode="batch_stats_folding", + ema_freeze_delay=10) + (_, fold_model_ema) = get_dense_models_with_one_layer( + kernel_quantizer=None, folding_mode="ema_stats_folding", + ema_freeze_delay=10) unfold_model.layers[1].set_weights([kernel]) unfold_model.layers[2].set_weights( @@ -457,12 +527,20 @@ def test_same_training_and_prediction(): # models should be different, but the two folding modes should be the same epochs = 5 iteration = np.array(8) - (unfold_model, fold_model_batch) = get_models_with_one_layer( - kernel_quantizer=None, folding_mode="batch_stats_folding", - ema_freeze_delay=10) - (_, fold_model_ema) = get_models_with_one_layer( - kernel_quantizer=None, folding_mode="ema_stats_folding", - ema_freeze_delay=10) + if model_name == "conv2d": + (unfold_model, fold_model_batch) = get_conv2d_models_with_one_layer( + kernel_quantizer=None, folding_mode="batch_stats_folding", + ema_freeze_delay=10) + (_, fold_model_ema) = get_conv2d_models_with_one_layer( + kernel_quantizer=None, folding_mode="ema_stats_folding", + ema_freeze_delay=10) + elif model_name == "dense": + (unfold_model, fold_model_batch) = get_dense_models_with_one_layer( + kernel_quantizer=None, folding_mode="batch_stats_folding", + ema_freeze_delay=10) + (_, fold_model_ema) = get_dense_models_with_one_layer( + kernel_quantizer=None, folding_mode="ema_stats_folding", + ema_freeze_delay=10) unfold_model.layers[1].set_weights([kernel]) unfold_model.layers[2].set_weights( [gamma, beta, moving_mean, moving_variance]) From 16884b32ab00e211e79c188b877eabb24aa1cb1d Mon Sep 17 00:00:00 2001 From: Javier Duarte Date: Mon, 24 Oct 2022 17:38:20 -0700 Subject: [PATCH 08/10] fix autoqkeras test --- tests/autoqkeras_test.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/autoqkeras_test.py b/tests/autoqkeras_test.py index 8d0f5239..02b39a9f 100644 --- a/tests/autoqkeras_test.py +++ b/tests/autoqkeras_test.py @@ -35,6 +35,13 @@ from qkeras.autoqkeras import AutoQKerasScheduler +def get_adam_optimizer(learning_rate): + if hasattr(tf.keras.optimizers, "legacy"): + return tf.keras.optimizers.legacy.Adam(learning_rate) + else: + return tf.keras.optimizers.Adam(learning_rate) + + def dense_model(): """Creates test dense model.""" @@ -104,7 +111,7 @@ def test_autoqkeras(): model = dense_model() model.summary() - optimizer = Adam(lr=0.01) + optimizer = get_adam_optimizer(learning_rate=0.01) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"]) @@ -140,7 +147,7 @@ def test_autoqkeras(): qmodel = autoqk.get_best_model() - optimizer = Adam(lr=0.01) + optimizer = get_adam_optimizer(learning_rate=0.01) qmodel.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"]) history = qmodel.fit(x_train, y_train, epochs=5, batch_size=150, From d23596bfe1bfa74eed75c683864f43b07e4018ad Mon Sep 17 00:00:00 2001 From: Javier Duarte Date: Mon, 24 Oct 2022 17:58:53 -0700 Subject: [PATCH 09/10] increase epochs; set seed --- tests/autoqkeras_test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/autoqkeras_test.py b/tests/autoqkeras_test.py index 02b39a9f..5d17e9bf 100644 --- a/tests/autoqkeras_test.py +++ b/tests/autoqkeras_test.py @@ -18,6 +18,7 @@ import tempfile import numpy as np import pytest +import random from sklearn.datasets import load_iris from sklearn.preprocessing import MinMaxScaler import tensorflow.compat.v2 as tf @@ -64,8 +65,10 @@ def dense_model(): def test_autoqkeras(): """Tests AutoQKeras scheduler.""" - np.random.seed(42) - tf.random.set_seed(42) + seed = 42 + random.seed(seed) + np.random.seed(seed) + tf.random.set_seed(seed) x_train, y_train = load_iris(return_X_y=True) @@ -143,14 +146,14 @@ def test_autoqkeras(): } autoqk = AutoQKerasScheduler(model, metrics=["acc"], **run_config) - autoqk.fit(x_train, y_train, validation_split=0.1, batch_size=150, epochs=4) + autoqk.fit(x_train, y_train, validation_split=0.1, batch_size=150, epochs=8) qmodel = autoqk.get_best_model() optimizer = get_adam_optimizer(learning_rate=0.01) qmodel.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"]) - history = qmodel.fit(x_train, y_train, epochs=5, batch_size=150, + history = qmodel.fit(x_train, y_train, epochs=10, batch_size=150, validation_split=0.1) quantized_acc = history.history["acc"][-1] From 96f8b9635699695635075ef69db07aadb0901b5e Mon Sep 17 00:00:00 2001 From: Javier Duarte Date: Tue, 25 Oct 2022 13:27:55 -0700 Subject: [PATCH 10/10] Update bn_folding_test.py --- tests/bn_folding_test.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/bn_folding_test.py b/tests/bn_folding_test.py index 1169fc9f..3b72e29c 100644 --- a/tests/bn_folding_test.py +++ b/tests/bn_folding_test.py @@ -464,17 +464,13 @@ def test_same_training_and_prediction(model_name): if model_name == "conv2d": x_shape = (2, 2, 1) kernel = np.array([[[[1., 1.]], [[1., 0.]]], [[[1., 1.]], [[0., 1.]]]]) - gamma = np.array([2., 1.]) - beta = np.array([0., 1.]) - moving_mean = np.array([1., 1.]) - moving_variance = np.array([1., 2.]) elif model_name == "dense": x_shape = (4,) kernel = np.array([[1., 1.], [1., 0.], [1., 1.], [0., 1.]]) - gamma = np.array([2., 1.]) - beta = np.array([0., 1.]) - moving_mean = np.array([1., 1.]) - moving_variance = np.array([1., 2.]) + gamma = np.array([2., 1.]) + beta = np.array([0., 1.]) + moving_mean = np.array([1., 1.]) + moving_variance = np.array([1., 2.]) iteration = np.array(-1) train_ds = generate_dataset(train_size=10, batch_size=10, input_shape=x_shape,