Skip to content

Commit

Permalink
Added total memory check to require mixed precision. Fixed tests to a…
Browse files Browse the repository at this point in the history
…ccommodate for the changes in running mixed precision.
  • Loading branch information
liord committed May 1, 2024
1 parent 072515f commit 01aa86f
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -237,5 +237,6 @@ def requires_mixed_precision(in_model: Any,

is_mixed_precision |= target_resource_utilization.weights_memory < total_weights_memory_bytes
is_mixed_precision |= target_resource_utilization.activation_memory < max_activation_tensor_size_bytes
is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes + max_activation_tensor_size_bytes
is_mixed_precision |= target_resource_utilization.bops < bops_count
return is_mixed_precision
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def compare(self, qat_model, finalize=False, input_x=None, quantization_info=Non


class QATWrappersMixedPrecisionCfgTest(MixedPrecisionActivationBaseTest):
def __init__(self, unit_test, ru_weights=np.inf, ru_activation=np.inf, expected_mp_cfg=[0, 0, 0, 0]):
def __init__(self, unit_test, ru_weights=17919, ru_activation=5407, expected_mp_cfg=[0, 4, 0, 0]):
self.ru_weights = ru_weights
self.ru_activation = ru_activation
self.expected_mp_cfg = expected_mp_cfg
Expand All @@ -303,7 +303,6 @@ def run_test(self, **kwargs):

def compare(self, qat_ready_model, quantization_info):

# check that MP search returns 8 bits configuration for all layers
self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == self.expected_mp_cfg))

# check that quantizer gets multiple bits configuration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,23 +99,21 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info=


class MixedPercisionSearchTest(MixedPercisionBaseTest):
def __init__(self, unit_test, distance_metric=MpDistanceWeighting.AVG):
def __init__(self, unit_test, distance_metric=MpDistanceWeighting.AVG, expected_mp_config=[0,0]):
super().__init__(unit_test, val_batch_size=2)

self.expected_mp_config = expected_mp_config
self.distance_metric = distance_metric

def get_resource_utilization(self):
# resource utilization is infinity -> should give best model - 8bits
return ResourceUtilization(np.inf)
return ResourceUtilization(17919)

def get_mixed_precision_config(self):
return mct.core.MixedPrecisionQuantizationConfig(num_of_images=1,
distance_weighting_method=self.distance_metric)

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
assert (quantization_info.mixed_precision_cfg == [0,
0]).all() # resource utilization is infinity -> should give best model - 8bits
assert (quantization_info.mixed_precision_cfg == self.expected_mp_config).all()
for i in range(32): # quantized per channel
self.unit_test.assertTrue(
np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256)
Expand Down Expand Up @@ -185,15 +183,13 @@ def create_networks(self):
return model

def get_resource_utilization(self):
# resource utilization is infinity -> should give best model - 8bits
return ResourceUtilization(np.inf)
return ResourceUtilization(1790)

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
# We just needed to verify that the graph finalization is working without failing.
# The actual quantization is not interesting for the sake of this test, so we just verify some
# degenerated things to see that everything worked.
self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [
0]) # resource utilization is infinity -> should give best model - 8bits
self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [1])

dense_layer = get_layers_from_model_by_type(quantized_model, layers.Dense)
self.unit_test.assertTrue(len(dense_layer) == 1)
Expand Down Expand Up @@ -342,7 +338,7 @@ def __init__(self, unit_test):
super().__init__(unit_test)

def get_resource_utilization(self):
return ResourceUtilization(np.inf)
return ResourceUtilization(95)

def create_networks(self):
inputs = layers.Input(shape=self.get_input_shapes()[0][1:])
Expand All @@ -354,8 +350,7 @@ def create_networks(self):

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
self.unit_test.assertTrue(len(quantization_info.mixed_precision_cfg) == 1)
self.unit_test.assertTrue(quantization_info.mixed_precision_cfg[
0] == 0) # Assert model is quantized using 16 bits as ResourceUtilization is inf
self.unit_test.assertTrue(quantization_info.mixed_precision_cfg[0] == 1)

def get_tpc(self):
base_config = generate_test_op_qc(activation_n_bits=16,
Expand Down Expand Up @@ -404,12 +399,11 @@ def get_tpc(self):

def get_resource_utilization(self):
# resource utilization is infinity -> should give best model - 8bits
return ResourceUtilization(800)
return ResourceUtilization(17919)

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
assert (quantization_info.mixed_precision_cfg == [0,
0]).all() # resource utilization is infinity -> should give best model - 8bits
assert (quantization_info.mixed_precision_cfg == [0, 1]).all()
for i in range(32): # quantized per channel
self.unit_test.assertTrue(
np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256)
Expand All @@ -428,13 +422,11 @@ def get_mixed_precision_config(self):
use_hessian_based_scores=False)

def get_resource_utilization(self):
# resource utilization is infinity -> should give best model - 8bits
return ResourceUtilization(np.inf)
return ResourceUtilization(17919)

def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
assert (quantization_info.mixed_precision_cfg == [0,
0]).all() # resource utilization is infinity -> should give best model - 8bits
assert (quantization_info.mixed_precision_cfg == [1, 0]).all()
for i in range(32): # quantized per channel
self.unit_test.assertTrue(
np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,8 @@ def test_mixed_precision_search_4bits_avg_nms(self):
MixedPercisionCombinedNMSTest(self).run_test()

def test_mixed_precision_search(self):
MixedPercisionSearchTest(self, distance_metric=MpDistanceWeighting.AVG).run_test()
MixedPercisionSearchTest(self, distance_metric=MpDistanceWeighting.LAST_LAYER).run_test()
MixedPercisionSearchTest(self, distance_metric=MpDistanceWeighting.AVG, expected_mp_config=[0, 1]).run_test()
MixedPercisionSearchTest(self, distance_metric=MpDistanceWeighting.LAST_LAYER, expected_mp_config=[1, 0]).run_test()

def test_mixed_precision_for_part_weights_layers(self):
MixedPercisionSearchPartWeightsLayersTest(self).run_test()
Expand Down

0 comments on commit 01aa86f

Please sign in to comment.