From ed0d60493b6bbd358566ac7a5421d19c2eb5212e Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Thu, 21 Mar 2024 13:40:43 +0200 Subject: [PATCH] Change KPI to Resource Utilization (#1008) Changing KPI terminology (in mixed precision & pruning constraints and in general model hw footprint evaluation) to Resource Utilization. That includes: - KPI class renaming to ResourceUtilization class - kpi_data API renaming to resource_utilization_data API - Local variable renaming - Functions renaming - Change KPI appearance in comments and documentation --------- Co-authored-by: Ofir Gordon --- README.md | 2 +- .../{KPI.rst => ResourceUtilization.rst} | 6 +- docsrc/source/api/api_docs/index.rst | 6 +- .../api/api_docs/methods/keras_kpi_data.rst | 10 +- .../api/api_docs/methods/pytorch_kpi_data.rst | 10 +- model_compression_toolkit/constants.py | 2 +- model_compression_toolkit/core/__init__.py | 6 +- .../graph/virtual_activation_weights_node.py | 4 +- .../common/mixed_precision/kpi_tools/kpi.py | 112 ---------- .../kpi_tools/kpi_aggregation_methods.py | 105 --------- .../kpi_tools/kpi_functions_mapping.py | 26 --- .../mixed_precision_search_facade.py | 47 ++-- .../mixed_precision_search_manager.py | 206 +++++++++--------- .../__init__.py | 0 .../resource_utilization.py | 114 ++++++++++ .../resource_utilization_data.py} | 38 ++-- .../ru_aggregation_methods.py | 105 +++++++++ .../ru_functions_mapping.py | 26 +++ .../ru_methods.py} | 119 +++++----- .../search_methods/linear_programming.py | 146 +++++++------ .../solution_refinement_procedure.py | 68 +++--- .../common/pruning/greedy_mask_calculator.py | 25 ++- .../common/pruning/mask/per_channel_mask.py | 2 +- .../pruning/mask/per_simd_group_mask.py | 2 +- .../core/common/pruning/pruner.py | 12 +- .../core/common/pruning/pruning_config.py | 2 +- .../virtual_activation_weights_composition.py | 2 +- .../substitutions/weights_activation_split.py | 4 +- .../core/common/user_info.py | 2 +- ...py => resource_utilization_data_facade.py} | 45 ++-- ...py => resource_utilization_data_facade.py} | 43 ++-- model_compression_toolkit/core/runner.py | 85 ++++---- .../gptq/keras/quantization_facade.py | 16 +- .../gptq/pytorch/quantization_facade.py | 8 +- .../pruning/keras/pruning_facade.py | 18 +- .../pruning/pytorch/pruning_facade.py | 18 +- .../ptq/keras/quantization_facade.py | 20 +- .../ptq/pytorch/quantization_facade.py | 8 +- .../qat/keras/quantization_facade.py | 30 +-- .../qat/pytorch/quantization_facade.py | 14 +- tests/common_tests/base_feature_test.py | 4 +- .../function_tests/test_kpi_object.py | 56 ----- .../test_resource_utilization_object.py | 57 +++++ .../helpers/prep_graph_for_func_test.py | 6 +- .../test_sony_ssd_postprocess_layer.py | 2 +- .../feature_networks/gptq/gptq_test.py | 4 +- .../mixed_precision_bops_test.py | 54 ++--- .../feature_networks/mixed_precision_tests.py | 144 ++++++------ .../network_editor/edit_qc_test.py | 18 +- .../feature_networks/qat/qat_test.py | 8 +- .../reused_layer_mixed_precision_test.py | 6 +- .../weights_mixed_precision_tests.py | 167 +++++++------- .../test_features_runner.py | 64 +++--- .../function_tests/test_doc_examples.py | 4 +- ...a.py => test_resource_utilization_data.py} | 66 +++--- ..._sensitivity_eval_non_suppoerted_output.py | 4 +- .../non_parallel_tests/test_keras_tp_model.py | 2 +- .../test_lp_search_bitwidth.py | 169 +++++++------- .../test_tensorboard_writer.py | 6 +- .../conv2d_conv2dtranspose_pruning_test.py | 4 +- .../networks_tests/conv2d_pruning_test.py | 4 +- .../conv2dtranspose_conv2d_pruning_test.py | 4 +- .../conv2dtranspose_pruning_test.py | 4 +- .../networks_tests/dense_pruning_test.py | 4 +- .../pruning_keras_feature_test.py | 2 +- .../pruning_tests/test_pretrained_models.py | 2 +- ...t.py => resource_utilization_data_test.py} | 56 ++--- .../function_tests/test_doc_examples.py | 4 +- .../function_tests/test_function_runner.py | 28 +-- .../function_tests/test_pytorch_tp_model.py | 2 +- .../model_tests/base_pytorch_test.py | 2 +- .../dynamic_size_inputs_test.py | 2 +- .../mixed_precision_activation_test.py | 22 +- .../mixed_precision_bops_test.py | 54 ++--- .../mixed_precision_weights_test.py | 30 +-- .../model_tests/feature_models/qat_test.py | 10 +- .../model_tests/test_feature_models_runner.py | 18 +- .../pruning_pytorch_feature_test.py | 6 +- .../pruning_tests/test_pretrained_models.py | 2 +- tests/test_suite.py | 8 +- .../test_keras_base_quantizer.py | 6 +- .../test_pytorch_base_quantizer.py | 12 +- .../pruning/example_keras_pruning_mnist.ipynb | 12 +- .../ptq/example_keras_effdet_lite0.ipynb | 4 +- ...mple_keras_mobilenet_mixed_precision.ipynb | 14 +- .../keras/ptq/example_keras_yolov8n.ipynb | 8 +- .../keras/ptq/keras_yolov8n_for_imx500.ipynb | 8 +- .../example_pytorch_pruning_mnist.ipynb | 6 +- ...mobilenetv2_cifar100_mixed_precision.ipynb | 14 +- tutorials/quick_start/common/results.py | 4 +- tutorials/quick_start/keras_fw/quant.py | 24 +- tutorials/quick_start/pytorch_fw/quant.py | 20 +- 92 files changed, 1394 insertions(+), 1361 deletions(-) rename docsrc/source/api/api_docs/classes/{KPI.rst => ResourceUtilization.rst} (62%) delete mode 100644 model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py delete mode 100644 model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py delete mode 100644 model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py rename model_compression_toolkit/core/common/mixed_precision/{kpi_tools => resource_utilization_tools}/__init__.py (100%) create mode 100644 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py rename model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_data.py => resource_utilization_tools/resource_utilization_data.py} (82%) create mode 100644 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py create mode 100644 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py rename model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_methods.py => resource_utilization_tools/ru_methods.py} (81%) rename model_compression_toolkit/core/keras/{kpi_data_facade.py => resource_utilization_data_facade.py} (60%) rename model_compression_toolkit/core/pytorch/{kpi_data_facade.py => resource_utilization_data_facade.py} (60%) delete mode 100644 tests/common_tests/function_tests/test_kpi_object.py create mode 100644 tests/common_tests/function_tests/test_resource_utilization_object.py rename tests/keras_tests/function_tests/{test_kpi_data.py => test_resource_utilization_data.py} (68%) rename tests/pytorch_tests/function_tests/{kpi_data_test.py => resource_utilization_data_test.py} (72%) diff --git a/README.md b/README.md index 861d95cae..13ebb2779 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ This pruning technique is designed to compress models for specific hardware arch taking into account the target platform's Single Instruction, Multiple Data (SIMD) capabilities. By pruning groups of channels (SIMD groups), our approach not only reduces model size and complexity, but ensures that better utilization of channels is in line with the SIMD architecture -for a target KPI of weights memory footprint. +for a target Resource Utilization of weights memory footprint. [Keras API](https://sony.github.io/model_optimization/docs/api/experimental_api_docs/methods/keras_pruning_experimental.html) [Pytorch API](https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/pruning/pytorch/pruning_facade.py#L43) diff --git a/docsrc/source/api/api_docs/classes/KPI.rst b/docsrc/source/api/api_docs/classes/ResourceUtilization.rst similarity index 62% rename from docsrc/source/api/api_docs/classes/KPI.rst rename to docsrc/source/api/api_docs/classes/ResourceUtilization.rst index a25c98289..005240e15 100644 --- a/docsrc/source/api/api_docs/classes/KPI.rst +++ b/docsrc/source/api/api_docs/classes/ResourceUtilization.rst @@ -1,11 +1,11 @@ :orphan: -.. _ug-KPI: +.. _ug-ResourceUtilization: -KPI +ResourceUtilization ================================ **Object to configure resources to use when searching for a configuration for the optimized model (such as in mixed-precision, pruning, etc.):** -.. autoclass:: model_compression_toolkit.core.KPI +.. autoclass:: model_compression_toolkit.core.ResourceUtilization diff --git a/docsrc/source/api/api_docs/index.rst b/docsrc/source/api/api_docs/index.rst index fd5bb6e01..bd0a1c95e 100644 --- a/docsrc/source/api/api_docs/index.rst +++ b/docsrc/source/api/api_docs/index.rst @@ -49,12 +49,12 @@ core - :ref:`QuantizationConfig`: Module to configure the quantization process. - :ref:`QuantizationErrorMethod`: Select a method for quantization parameters' selection. - :ref:`MixedPrecisionQuantizationConfig`: Module to configure the quantization process when using mixed-precision PTQ. -- :ref:`KPI`: Module to configure resources to use when searching for a configuration for the optimized model. +- :ref:`ResourceUtilization`: Module to configure resources to use when searching for a configuration for the optimized model. - :ref:`MpDistanceWeighting`: Mixed precision distance metric weighting methods. - :ref:`network_editor`: Module to modify the optimization process for troubleshooting. - :ref:`FolderImageLoader`: Class to use an images directory as a representative dataset. -- :ref:`pytorch_kpi_data`: A function to compute KPI data that can be used to calculate the desired target KPI for PyTorch models. -- :ref:`keras_kpi_data`: A function to compute KPI data that can be used to calculate the desired target KPI for Keras models. +- :ref:`pytorch_resource_utilization_data`: A function to compute Resource Utilization data that can be used to calculate the desired target resource utilization for PyTorch models. +- :ref:`keras_resource_utilization_data`: A function to compute Resource Utilization data that can be used to calculate the desired target resource utilization for Keras models. data_generation diff --git a/docsrc/source/api/api_docs/methods/keras_kpi_data.rst b/docsrc/source/api/api_docs/methods/keras_kpi_data.rst index a0205814f..4a10d2149 100644 --- a/docsrc/source/api/api_docs/methods/keras_kpi_data.rst +++ b/docsrc/source/api/api_docs/methods/keras_kpi_data.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-keras_kpi_data: +.. _ug-keras_resource_utilization_data: -======================================= -Get KPI information for Keras Models -======================================= +======================================================= +Get Resource Utilization information for Keras Models +======================================================= -.. autofunction:: model_compression_toolkit.core.keras_kpi_data +.. autofunction:: model_compression_toolkit.core.keras_resource_utilization_data diff --git a/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst b/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst index 86f512eae..df1e55039 100644 --- a/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst +++ b/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-pytorch_kpi_data: +.. _ug-pytorch_resource_utilization_data: -======================================= -Get KPI information for PyTorch Models -======================================= +============================================================ +Get Resource Utilization information for PyTorch Models +============================================================ -.. autofunction:: model_compression_toolkit.core.pytorch_kpi_data +.. autofunction:: model_compression_toolkit.core.pytorch_resource_utilization_data diff --git a/model_compression_toolkit/constants.py b/model_compression_toolkit/constants.py index 31ff6625f..9bcf6b68f 100644 --- a/model_compression_toolkit/constants.py +++ b/model_compression_toolkit/constants.py @@ -93,7 +93,7 @@ DEC_RANGE_BOTTOM = 0.97 DEC_RANGE_UPPER = 1.03 -# KPI computation parameters +# Resource utilization computation parameters BITS_TO_BYTES = 8.0 # Default threshold for Softmax layer diff --git a/model_compression_toolkit/core/__init__.py b/model_compression_toolkit/core/__init__.py index 7b993610d..d477d8174 100644 --- a/model_compression_toolkit/core/__init__.py +++ b/model_compression_toolkit/core/__init__.py @@ -21,9 +21,9 @@ from model_compression_toolkit.core.common.mixed_precision import mixed_precision_quantization_config from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig, QuantizationErrorMethod, DEFAULTCONFIG from model_compression_toolkit.core.common.quantization.core_config import CoreConfig -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig -from model_compression_toolkit.core.keras.kpi_data_facade import keras_kpi_data -from model_compression_toolkit.core.pytorch.kpi_data_facade import pytorch_kpi_data +from model_compression_toolkit.core.keras.resource_utilization_data_facade import keras_resource_utilization_data +from model_compression_toolkit.core.pytorch.resource_utilization_data_facade import pytorch_resource_utilization_data from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting diff --git a/model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py b/model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py index b64d3df49..77347a7c7 100644 --- a/model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py +++ b/model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py @@ -113,14 +113,14 @@ def __init__(self, origin_node: BaseNode, activation_class: type, fw_attr: dict) class VirtualActivationWeightsNode(BaseNode): """ A node that represents a composition of pair of sequential activation node and weights (kernel) node. - This structure is used for mixed-precision search with bit-operation KPI. + This structure is used for mixed-precision search with bit-operation constraint. The node's candidates are the cartesian product of both nodes' candidates. Important: note that not like regular BaseNode or FunctionalNode, in VirtualActivationWeightsNode the activation candidates config refer to the quantization config of the activation that precedes the linear operation! instead of the output of the linear operation. It is ok, since this node is not meant to be used in a graph for creating an actual model, but only a virtual - representation of the model's graph only for allowing to compute the bit-operations KPI in mixed-precision. + representation of the model's graph only for allowing to compute the bit-operations constraint in mixed-precision. """ def __init__(self, diff --git a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py b/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py deleted file mode 100644 index 2f22529c3..000000000 --- a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from enum import Enum -from typing import Dict, Any - -import numpy as np - - -class KPITarget(Enum): - """ - Targets for which we define KPIs metrics for mixed-precision search. - For each target that we care to consider in a mixed-precision search, there should be defined a set of - kpi computation function, kpi aggregation function, and kpi target (within a KPI object). - - Whenever adding a kpi metric to KPI class we should add a matching target to this enum. - - WEIGHTS - Weights memory KPI metric. - - ACTIVATION - Activation memory KPI metric. - - TOTAL - Total memory KPI metric. - - BOPS - Total Bit-Operations KPI Metric. - - """ - - WEIGHTS = 'weights' - ACTIVATION = 'activation' - TOTAL = 'total' - BOPS = 'bops' - - -class KPI: - """ - Class to represent measurements of performance. - """ - - def __init__(self, - weights_memory: float = np.inf, - activation_memory: float = np.inf, - total_memory: float = np.inf, - bops: float = np.inf): - """ - - Args: - weights_memory: Memory of a model's weights in bytes. Note that this includes only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not). - activation_memory: Memory of a model's activation in bytes, according to the given activation kpi metric. - total_memory: The sum of model's activation and weights memory in bytes, according to the given total kpi metric. - bops: The total bit-operations in the model. - """ - self.weights_memory = weights_memory - self.activation_memory = activation_memory - self.total_memory = total_memory - self.bops = bops - - def __repr__(self): - return f"Weights_memory: {self.weights_memory}, " \ - f"Activation_memory: {self.activation_memory}, " \ - f"Total_memory: {self.total_memory}, " \ - f"BOPS: {self.bops}" - - def get_kpi_dict(self) -> Dict[KPITarget, float]: - """ - Returns: a dictionary with the KPI object's values for each KPI target. - """ - return {KPITarget.WEIGHTS: self.weights_memory, - KPITarget.ACTIVATION: self.activation_memory, - KPITarget.TOTAL: self.total_memory, - KPITarget.BOPS: self.bops} - - def set_kpi_by_target(self, kpis_mapping: Dict[KPITarget, float]): - """ - Setting a KPI object values for each KPI target in the given dictionary. - - Args: - kpis_mapping: A mapping from a KPITarget to a matching KPI value. - - """ - self.weights_memory = kpis_mapping.get(KPITarget.WEIGHTS, np.inf) - self.activation_memory = kpis_mapping.get(KPITarget.ACTIVATION, np.inf) - self.total_memory = kpis_mapping.get(KPITarget.TOTAL, np.inf) - self.bops = kpis_mapping.get(KPITarget.BOPS, np.inf) - - def holds_constraints(self, kpi: Any) -> bool: - """ - Checks whether the given KPI holds a set of KPI constraints defined by the currect KPI object. - - Args: - kpi: A KPI object to check if it holds the constraints. - - Returns: True if all the given KPI values are not greater than the referenced KPI values. - - """ - if not isinstance(kpi, KPI): - return False - - return kpi.weights_memory <= self.weights_memory and \ - kpi.activation_memory <= self.activation_memory and \ - kpi.total_memory <= self.total_memory and \ - kpi.bops <= self.bops diff --git a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py b/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py deleted file mode 100644 index 083205aea..000000000 --- a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -import copy -from enum import Enum -from functools import partial -from typing import List, Any -import numpy as np - -from pulp import lpSum - - -def sum_kpi(kpi_vector: np.ndarray, set_constraints: bool = True) -> List[Any]: - """ - Aggregates KPIs vector to a single KPI measure by summing all values. - - Args: - kpi_vector: A vector with nodes' KPI values. - set_constraints: A flag for utilizing the method for KPI computation of a - given config not for LP formalization purposes. - - Returns: A list with an lpSum object for lp problem definition with the vector's sum. - - """ - if not set_constraints: - return [0] if len(kpi_vector) == 0 else [sum(kpi_vector)] - return [lpSum(kpi_vector)] - - -def max_kpi(kpi_vector: np.ndarray, set_constraints: bool = True) -> List[float]: - """ - Aggregates KPIs vector to allow max constraint in the linear programming problem formalization. - In order to do so, we need to define a separate constraint on each value in the KPI vector, - to be bounded by the target KPI. - - Args: - kpi_vector: A vector with nodes' KPI values. - set_constraints: A flag for utilizing the method for KPI computation of a - given config not for LP formalization purposes. - - Returns: A list with the vector's values, to be used to define max constraint - in the linear programming problem formalization. - - """ - if not set_constraints: - return [0] if len(kpi_vector) == 0 else [max(kpi_vector)] - return [kpi for kpi in kpi_vector] - - -def total_kpi(kpi_tensor: np.ndarray, set_constraints: bool = True) -> List[float]: - """ - Aggregates KPIs vector to allow weights and activation total kpi constraint in the linear programming - problem formalization. In order to do so, we need to define a separate constraint on each activation value in - the KPI vector, combined with the sum weights kpi. - Note that the given kpi_tensor should contain weights and activation kpi values in each entry. - - Args: - kpi_tensor: A tensor with nodes' KPI values for weights and activation. - set_constraints: A flag for utilizing the method for KPI computation of a - given config not for LP formalization purposes. - - Returns: A list with lpSum objects, to be used to define total constraint - in the linear programming problem formalization. - - """ - if not set_constraints: - weights_kpi = sum([kpi[0] for kpi in kpi_tensor]) - activation_kpi = max([kpi[1] for kpi in kpi_tensor]) - return [weights_kpi + activation_kpi] - - weights_kpi = lpSum([kpi[0] for kpi in kpi_tensor]) - total_kpis = [weights_kpi + activation_kpi for _, activation_kpi in kpi_tensor] - - return total_kpis - - -class MpKpiAggregation(Enum): - """ - Defines kpi aggregation functions that can be used to compute final KPI metric. - The enum values can be used to call a function on a set of arguments. - - SUM - applies the sum_kpi function - - MAX - applies the max_kpi function - - TOTAL - applies the total_kpi function - - """ - SUM = partial(sum_kpi) - MAX = partial(max_kpi) - TOTAL = partial(total_kpi) - - def __call__(self, *args): - return self.value(*args) diff --git a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py b/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py deleted file mode 100644 index 259e89c93..000000000 --- a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPITarget -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_aggregation_methods import MpKpiAggregation -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_methods import MpKpiMetric - - -# When adding a KPITarget that we want to consider in our mp search, -# a matching pair of kpi_tools computation function and a kpi_tools -# aggregation function should be added to this dictionary -kpi_functions_mapping = {KPITarget.WEIGHTS: (MpKpiMetric.WEIGHTS_SIZE, MpKpiAggregation.SUM), - KPITarget.ACTIVATION: (MpKpiMetric.ACTIVATION_OUTPUT_SIZE, MpKpiAggregation.MAX), - KPITarget.TOTAL: (MpKpiMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpKpiAggregation.TOTAL), - KPITarget.BOPS: (MpKpiMetric.BOPS_COUNT, MpKpiAggregation.SUM)} diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py index 08e4b0660..059a42fad 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py @@ -21,8 +21,8 @@ from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.core.common import Graph from model_compression_toolkit.core.common.hessian import HessianInfoService -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI, KPITarget -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_functions_mapping import kpi_functions_mapping +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import ru_functions_mapping from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \ @@ -47,7 +47,7 @@ class BitWidthSearchMethod(Enum): def search_bit_width(graph_to_search_cfg: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, - target_kpi: KPI, + target_resource_utilization: ResourceUtilization, mp_config: MixedPrecisionQuantizationConfig, representative_data_gen: Callable, search_method: BitWidthSearchMethod = BitWidthSearchMethod.INTEGER_PROGRAMMING, @@ -56,15 +56,15 @@ def search_bit_width(graph_to_search_cfg: Graph, Search for an MP configuration for a given graph. Given a search_method method (by default, it's linear programming), we use the sensitivity_evaluator object that provides a function to compute an evaluation for the expected sensitivity for a bit-width configuration. - Then, and after computing the KPI for each node in the graph for each bit-width in the search space, - we search for the optimal solution, given some target_kpi, the solution should fit. - target_kpi have to be passed. If it was not passed, the facade is not supposed to get here by now. + Then, and after computing the resource utilization for each node in the graph for each bit-width in the search space, + we search for the optimal solution, given some target_resource_utilization, the solution should fit. + target_resource_utilization have to be passed. If it was not passed, the facade is not supposed to get here by now. Args: graph_to_search_cfg: Graph to search a MP configuration for. fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize). fw_impl: FrameworkImplementation object with specific framework methods implementation. - target_kpi: Target KPI to bound our feasible solution space s.t the configuration does not violate it. + target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it. mp_config: Mixed-precision quantization configuration. representative_data_gen: Dataset to use for retrieving images for the models inputs. search_method: BitWidthSearchMethod to define which searching method to use. @@ -77,25 +77,25 @@ def search_bit_width(graph_to_search_cfg: Graph, """ - # target_kpi have to be passed. If it was not passed, the facade is not supposed to get here by now. - if target_kpi is None: - Logger.critical("Target KPI is required for the bit-width search method's configuration.") # pragma: no cover + # target_resource_utilization have to be passed. If it was not passed, the facade is not supposed to get here by now. + if target_resource_utilization is None: + Logger.critical("Target ResourceUtilization is required for the bit-width search method's configuration.") # pragma: no cover # Set graph for MP search graph = copy.deepcopy(graph_to_search_cfg) # Copy graph before searching - if target_kpi.bops < np.inf: - # Since Bit-operations count target KPI is set, we need to reconstruct the graph for the MP search + if target_resource_utilization.bops < np.inf: + # Since Bit-operations count target resource utilization is set, we need to reconstruct the graph for the MP search graph = substitute(graph, fw_impl.get_substitutions_virtual_weights_activation_coupling()) # If we only run weights compression with MP than no need to consider activation quantization when computing the # MP metric (it adds noise to the computation) - disable_activation_for_metric = (target_kpi.weights_memory < np.inf and - (target_kpi.activation_memory == np.inf and - target_kpi.total_memory == np.inf and - target_kpi.bops == np.inf)) or graph_to_search_cfg.is_single_activation_cfg() + disable_activation_for_metric = (target_resource_utilization.weights_memory < np.inf and + (target_resource_utilization.activation_memory == np.inf and + target_resource_utilization.total_memory == np.inf and + target_resource_utilization.bops == np.inf)) or graph_to_search_cfg.is_single_activation_cfg() # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph, - # even if a virtual graph was created (and is used only for BOPS KPI computation purposes) + # even if a virtual graph was created (and is used only for BOPS utilization computation purposes) se = fw_impl.get_sensitivity_evaluator( graph_to_search_cfg, mp_config, @@ -104,16 +104,17 @@ def search_bit_width(graph_to_search_cfg: Graph, disable_activation_for_metric=disable_activation_for_metric, hessian_info_service=hessian_info_service) - # Each pair of (KPI method, KPI aggregation) should match to a specific provided kpi target - kpi_functions = kpi_functions_mapping + # Each pair of (resource utilization method, resource utilization aggregation) should match to a specific + # provided target resource utilization + ru_functions = ru_functions_mapping # Instantiate a manager object search_manager = MixedPrecisionSearchManager(graph, fw_info, fw_impl, se, - kpi_functions, - target_kpi, + ru_functions, + target_resource_utilization, original_graph=graph_to_search_cfg) if search_method in search_methods: # Get a specific search function @@ -123,9 +124,9 @@ def search_bit_width(graph_to_search_cfg: Graph, # Search for the desired mixed-precision configuration result_bit_cfg = search_method_fn(search_manager, - target_kpi) + target_resource_utilization) if mp_config.refine_mp_solution: - result_bit_cfg = greedy_solution_refinement_procedure(result_bit_cfg, search_manager, target_kpi) + result_bit_cfg = greedy_solution_refinement_procedure(result_bit_cfg, search_manager, target_resource_utilization) return result_bit_cfg diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py index 99d1a0663..37f36275c 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py @@ -23,9 +23,9 @@ from model_compression_toolkit.core.common.graph.base_graph import Graph from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \ VirtualSplitWeightsNode, VirtualSplitActivationNode -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPITarget, KPI -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_aggregation_methods import MpKpiAggregation -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_methods import MpKpiMetric +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric from model_compression_toolkit.core.common.framework_info import FrameworkInfo from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation @@ -40,8 +40,8 @@ def __init__(self, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, sensitivity_evaluator: SensitivityEvaluation, - kpi_functions: Dict[KPITarget, Tuple[MpKpiMetric, MpKpiAggregation]], - target_kpi: KPI, + ru_functions: Dict[RUTarget, Tuple[MpRuMetric, MpRuAggregation]], + target_resource_utilization: ResourceUtilization, original_graph: Graph = None): """ @@ -51,10 +51,10 @@ def __init__(self, fw_impl: FrameworkImplementation object with specific framework methods implementation. sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of a bit-width configuration for the MP model. - kpi_functions: A dictionary with pairs of (MpKpiMethod, MpKpiAggregationMethod) mapping a KPITarget to - a couple of kpi metric function and kpi aggregation function. - target_kpi: Target KPI to bound our feasible solution space s.t the configuration does not violate it. - original_graph: In case we have a search over a virtual graph (if we have BOPS KPI target), then this argument + ru_functions: A dictionary with pairs of (MpRuMethod, MpRuAggregationMethod) mapping a RUTarget to + a couple of resource utilization metric function and resource utilization aggregation function. + target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it. + original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument will contain the original graph (for config reconstruction purposes). """ @@ -66,12 +66,12 @@ def __init__(self, self.layer_to_bitwidth_mapping = self.get_search_space() self.compute_metric_fn = self.get_sensitivity_metric() - self.compute_kpi_functions = kpi_functions - self.target_kpi = target_kpi - self.min_kpi_config = self.graph.get_min_candidates_config(fw_info) - self.max_kpi_config = self.graph.get_max_candidates_config(fw_info) - self.min_kpi = self.compute_min_kpis() - self.non_conf_kpi_dict = self._non_configurable_nodes_kpi() + self.compute_ru_functions = ru_functions + self.target_resource_utilization = target_resource_utilization + self.min_ru_config = self.graph.get_min_candidates_config(fw_info) + self.max_ru_config = self.graph.get_max_candidates_config(fw_info) + self.min_ru = self.compute_min_ru() + self.non_conf_ru_dict = self._non_configurable_nodes_ru() self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph, original_graph=self.original_graph) @@ -106,112 +106,114 @@ def get_sensitivity_metric(self) -> Callable: return self.sensitivity_evaluator.compute_metric - def compute_min_kpis(self) -> Dict[KPITarget, np.ndarray]: + def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]: """ - Computes a KPIs vector with the values matching to the minimal mp configuration + Computes a resource utilization vector with the values matching to the minimal mp configuration (i.e., each node is configured with the quantization candidate that would give the minimal size of the - node's KPI). - The method computes the minimal KPIs vector for each kpi target. + node's resource utilization). + The method computes the minimal resource utilization vector for each target resource utilization. - Returns: A dictionary mapping each kpi target to its respective minimal KPIs values. + Returns: A dictionary mapping each target resource utilization to its respective minimal + resource utilization values. """ - min_kpis = {} - for kpi_target, kpi_fns in self.compute_kpi_functions.items(): - # kpi_fns is a pair of kpi computation method and kpi aggregation method (in this method we only need - # the first one) - min_kpis[kpi_target] = kpi_fns[0](self.min_kpi_config, self.graph, self.fw_info, self.fw_impl) + min_ru = {} + for ru_target, ru_fns in self.compute_ru_functions.items(): + # ru_fns is a pair of resource utilization computation method and + # resource utilization aggregation method (in this method we only need the first one) + min_ru[ru_target] = ru_fns[0](self.min_ru_config, self.graph, self.fw_info, self.fw_impl) - return min_kpis + return min_ru - def compute_kpi_matrix(self, target: KPITarget) -> np.ndarray: + def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray: """ - Computes and builds a KPIs matrix, to be used for the mixed-precision search problem formalization. + Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization. The matrix is constructed as follows (for a given target): - - Each row represents the set of KPI values for a specific KPI measure (number of rows should be equal to the - length of the output of the respective target compute_kpi function). - - Each entry in a specific column represents the KPI value of a given configuration (single layer is configured - with specific candidate, all other layer are at the minimal KPI configuration) for the KPI measure of the - respective row. + - Each row represents the set of resource utilization values for a specific resource utilization + measure (number of rows should be equal to the length of the output of the respective target compute_ru function). + - Each entry in a specific column represents the resource utilization value of a given configuration + (single layer is configured with specific candidate, all other layer are at the minimal resource + utilization configuration) for the resource utilization measure of the respective row. Args: - target: The target for which the KPI is calculated (a KPITarget value). + target: The resource target for which the resource utilization is calculated (a RUTarget value). - Returns: A KPI matrix. + Returns: A resource utilization matrix. """ - assert isinstance(target, KPITarget), f"{target} is not a valid KPI target" + assert isinstance(target, RUTarget), f"{target} is not a valid resource target" configurable_sorted_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info) - kpi_matrix = [] + ru_matrix = [] for c, c_n in enumerate(configurable_sorted_nodes): for candidate_idx in range(len(c_n.candidates_quantization_cfg)): - if candidate_idx == self.min_kpi_config[c]: - # skip KPI computation for min configuration. Since we compute the difference from min_kpi it'll + if candidate_idx == self.min_ru_config[c]: + # skip ru computation for min configuration. Since we compute the difference from min_ru it'll # always be 0 for all entries in the results vector. - candidate_kpis = np.zeros(shape=self.min_kpi[target].shape) + candidate_rus = np.zeros(shape=self.min_ru[target].shape) else: - candidate_kpis = self.compute_candidate_relative_kpis(c, candidate_idx, target) - kpi_matrix.append(np.asarray(candidate_kpis)) + candidate_rus = self.compute_candidate_relative_ru(c, candidate_idx, target) + ru_matrix.append(np.asarray(candidate_rus)) - # We need to transpose the calculated kpi matrix to allow later multiplication with + # We need to transpose the calculated ru matrix to allow later multiplication with # the indicators' diagonal matrix. # We only move the first axis (num of configurations) to be last, # the remaining axes include the metric specific nodes (rows dimension of the new tensor) - # and the kpi metric values (if they are non-scalars) - np_kpi_matrix = np.array(kpi_matrix) - return np.moveaxis(np_kpi_matrix, source=0, destination=len(np_kpi_matrix.shape) - 1) - - def compute_candidate_relative_kpis(self, - conf_node_idx: int, - candidate_idx: int, - target: KPITarget) -> np.ndarray: - """ - Computes a KPIs vector for a given candidates of a given configurable node, i.e., the matching KPI vector - which is obtained by computing the given target's KPI function on a minimal configuration in which the given + # and the ru metric values (if they are non-scalars) + np_ru_matrix = np.array(ru_matrix) + return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1) + + def compute_candidate_relative_ru(self, + conf_node_idx: int, + candidate_idx: int, + target: RUTarget) -> np.ndarray: + """ + Computes a resource utilization vector for a given candidates of a given configurable node, + i.e., the matching resource utilization vector which is obtained by computing the given target's + resource utilization function on a minimal configuration in which the given layer's candidates is changed to the new given one. - The result is normalized by subtracting the target's minimal KPIs vector. + The result is normalized by subtracting the target's minimal resource utilization vector. Args: conf_node_idx: The index of a node in a sorted configurable nodes list. candidate_idx: The index of a node's quantization configuration candidate. - target: The target for which the KPI is calculated (a KPITarget value). + target: The target for which the resource utilization is calculated (a RUTarget value). - Returns: Normalized node's KPIs vector + Returns: Normalized node's resource utilization vector """ - return self.compute_node_kpi_for_candidate(conf_node_idx, candidate_idx, target) - \ - self.get_min_target_kpi(target) + return self.compute_node_ru_for_candidate(conf_node_idx, candidate_idx, target) - \ + self.get_min_target_resource_utilization(target) - def get_min_target_kpi(self, target: KPITarget) -> np.ndarray: + def get_min_target_resource_utilization(self, target: RUTarget) -> np.ndarray: """ - Returns the minimal KPIs vector (pre-calculated on initialization) of a specific target. + Returns the minimal resource utilization vector (pre-calculated on initialization) of a specific target. Args: - target: The target for which the KPI is calculated (a KPITarget value). + target: The target for which the resource utilization is calculated (a RUTarget value). - Returns: Minimal KPIs vector. + Returns: Minimal resource utilization vector. """ - return self.min_kpi[target] + return self.min_ru[target] - def compute_node_kpi_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: KPITarget) -> np.ndarray: + def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray: """ - Computes a KPIs vector after replacing the given node's configuration candidate in the minimal + Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal target configuration with the given candidate index. Args: conf_node_idx: The index of a node in a sorted configurable nodes list. - candidate_idx: Quantization config candidate to be used for the node's KPI computation. - target: The target for which the KPI is calculated (a KPITarget value). + candidate_idx: Quantization config candidate to be used for the node's resource utilization computation. + target: The target for which the resource utilization is calculated (a RUTarget value). - Returns: Node's KPIs vector. + Returns: Node's resource utilization vector. """ - return self.compute_kpi_functions[target][0]( + return self.compute_ru_functions[target][0]( self.replace_config_in_index( - self.min_kpi_config, + self.min_ru_config, conf_node_idx, candidate_idx), self.graph, @@ -236,58 +238,60 @@ def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int updated_cfg[idx] = value return updated_cfg - def _non_configurable_nodes_kpi(self) -> Dict[KPITarget, np.ndarray]: + def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]: """ - Computes a KPI vector of all non-configurable nodes in the given graph for each of the KPI target. + Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the + resource utilization targets. - Returns: A mapping between a KPITarget and its non-configurable nodes' KPI vector. + Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector. """ - non_conf_kpi_dict = {} - for target, kpi_value in self.target_kpi.get_kpi_dict().items(): - # Call for the KPI method of the given target - empty quantization configuration list is passed since we + non_conf_ru_dict = {} + for target, ru_value in self.target_resource_utilization.get_resource_utilization_dict().items(): + # Call for the ru method of the given target - empty quantization configuration list is passed since we # compute for non-configurable nodes - if target == KPITarget.BOPS: - kpi_vector = None + if target == RUTarget.BOPS: + ru_vector = None else: - kpi_vector = self.compute_kpi_functions[target][0]([], self.graph, self.fw_info, self.fw_impl) + ru_vector = self.compute_ru_functions[target][0]([], self.graph, self.fw_info, self.fw_impl) - non_conf_kpi_dict[target] = kpi_vector + non_conf_ru_dict[target] = ru_vector - return non_conf_kpi_dict + return non_conf_ru_dict - def compute_kpi_for_config(self, config: List[int]) -> KPI: + def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization: """ - Computes the KPI values for a given mixed-precision configuration. + Computes the resource utilization values for a given mixed-precision configuration. Args: config: A mixed-precision configuration (list of candidates indices) - Returns: A KPI object with the model's KPI values when quantized with the given config. + Returns: A ResourceUtilization object with the model's resource utilization values when quantized + with the given config. """ - kpis_dict = {} + ru_dict = {} - for kpi_target, kpi_fns in self.compute_kpi_functions.items(): - # Passing False to kpi methods and aggregations to indicates that the computations + for ru_target, ru_fns in self.compute_ru_functions.items(): + # Passing False to ru methods and aggregations to indicates that the computations # are not for constraints setting - if kpi_target == KPITarget.BOPS: - configurable_nodes_kpi_vector = kpi_fns[0](config, self.original_graph, self.fw_info, self.fw_impl, False) + if ru_target == RUTarget.BOPS: + configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl, False) else: - configurable_nodes_kpi_vector = kpi_fns[0](config, self.original_graph, self.fw_info, self.fw_impl) - non_configurable_nodes_kpi_vector = self.non_conf_kpi_dict.get(kpi_target) - if non_configurable_nodes_kpi_vector is None or len(non_configurable_nodes_kpi_vector) == 0: - aggr_kpi = self.compute_kpi_functions[kpi_target][1](configurable_nodes_kpi_vector, False) + configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl) + non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target) + if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0: + ru_ru = self.compute_ru_functions[ru_target][1](configurable_nodes_ru_vector, False) else: - aggr_kpi = self.compute_kpi_functions[kpi_target][1]( - np.concatenate([configurable_nodes_kpi_vector, non_configurable_nodes_kpi_vector]), False) + ru_ru = self.compute_ru_functions[ru_target][1]( + np.concatenate([configurable_nodes_ru_vector, non_configurable_nodes_ru_vector]), False) - kpis_dict[kpi_target] = aggr_kpi[0] + ru_dict[ru_target] = ru_ru[0] - config_kpi = KPI() - config_kpi.set_kpi_by_target(kpis_dict) - return config_kpi + config_ru = ResourceUtilization() + config_ru.set_resource_utilization_by_target(ru_dict) + return config_ru def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]): """ @@ -317,7 +321,7 @@ def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, class ConfigReconstructionHelper: """ A class to help reconstruct an original mixed-precision configuration from a virtual one, - when running mixed-precision search with BOPS KPI. + when running mixed-precision search with BOPS utilization. It provides a reconstruct_config_from_virtual_graph which allows to translate a bit-width config of a virtual graph to a config of the original configurable nodes. """ diff --git a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/__init__.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py similarity index 100% rename from model_compression_toolkit/core/common/mixed_precision/kpi_tools/__init__.py rename to model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py new file mode 100644 index 000000000..a9314eb95 --- /dev/null +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py @@ -0,0 +1,114 @@ +# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from enum import Enum +from typing import Dict, Any + +import numpy as np + + +class RUTarget(Enum): + """ + Targets for which we define Resource Utilization metrics for mixed-precision search. + For each target that we care to consider in a mixed-precision search, there should be defined a set of + resource utilization computation function, resource utilization aggregation function, + and resource utilization target (within a ResourceUtilization object). + + Whenever adding a resource utilization metric to ResourceUtilization class we should add a matching target to this enum. + + WEIGHTS - Weights memory ResourceUtilization metric. + + ACTIVATION - Activation memory ResourceUtilization metric. + + TOTAL - Total memory ResourceUtilization metric. + + BOPS - Total Bit-Operations ResourceUtilization Metric. + + """ + + WEIGHTS = 'weights' + ACTIVATION = 'activation' + TOTAL = 'total' + BOPS = 'bops' + + +class ResourceUtilization: + """ + Class to represent measurements of performance. + """ + + def __init__(self, + weights_memory: float = np.inf, + activation_memory: float = np.inf, + total_memory: float = np.inf, + bops: float = np.inf): + """ + + Args: + weights_memory: Memory of a model's weights in bytes. Note that this includes only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not). + activation_memory: Memory of a model's activation in bytes, according to the given activation resource utilization metric. + total_memory: The sum of model's activation and weights memory in bytes, according to the given total resource utilization metric. + bops: The total bit-operations in the model. + """ + self.weights_memory = weights_memory + self.activation_memory = activation_memory + self.total_memory = total_memory + self.bops = bops + + def __repr__(self): + return f"Weights_memory: {self.weights_memory}, " \ + f"Activation_memory: {self.activation_memory}, " \ + f"Total_memory: {self.total_memory}, " \ + f"BOPS: {self.bops}" + + def get_resource_utilization_dict(self) -> Dict[RUTarget, float]: + """ + Returns: a dictionary with the ResourceUtilization object's values for each resource utilization target. + """ + return {RUTarget.WEIGHTS: self.weights_memory, + RUTarget.ACTIVATION: self.activation_memory, + RUTarget.TOTAL: self.total_memory, + RUTarget.BOPS: self.bops} + + def set_resource_utilization_by_target(self, ru_mapping: Dict[RUTarget, float]): + """ + Setting a ResourceUtilization object values for each ResourceUtilization target in the given dictionary. + + Args: + ru_mapping: A mapping from a RUTarget to a matching resource utilization value. + + """ + self.weights_memory = ru_mapping.get(RUTarget.WEIGHTS, np.inf) + self.activation_memory = ru_mapping.get(RUTarget.ACTIVATION, np.inf) + self.total_memory = ru_mapping.get(RUTarget.TOTAL, np.inf) + self.bops = ru_mapping.get(RUTarget.BOPS, np.inf) + + def holds_constraints(self, ru: Any) -> bool: + """ + Checks whether the given ResourceUtilization object holds a set of ResourceUtilization constraints defined by + the current ResourceUtilization object. + + Args: + ru: A ResourceUtilization object to check if it holds the constraints. + + Returns: True if all the given resource utilization values are not greater than the referenced resource utilization values. + + """ + if not isinstance(ru, ResourceUtilization): + return False + + return ru.weights_memory <= self.weights_memory and \ + ru.activation_memory <= self.activation_memory and \ + ru.total_memory <= self.total_memory and \ + ru.bops <= self.bops diff --git a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_data.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py similarity index 82% rename from model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_data.py rename to model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py index c4862e5cb..fc430f944 100644 --- a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_data.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py @@ -12,26 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from typing import Callable, Any import numpy as np +from typing import Callable, Any -from model_compression_toolkit.core import FrameworkInfo, KPI, CoreConfig -from model_compression_toolkit.core.common import Graph from model_compression_toolkit.constants import FLOAT_BITWIDTH +from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig +from model_compression_toolkit.core.common import Graph from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities -def compute_kpi_data(in_model: Any, - representative_data_gen: Callable, - core_config: CoreConfig, - tpc: TargetPlatformCapabilities, - fw_info: FrameworkInfo, - fw_impl: FrameworkImplementation) -> KPI: +def compute_resource_utilization_data(in_model: Any, + representative_data_gen: Callable, + core_config: CoreConfig, + tpc: TargetPlatformCapabilities, + fw_info: FrameworkInfo, + fw_impl: FrameworkImplementation) -> ResourceUtilization: """ - Compute KPI information that can be relevant for defining target KPI for mixed precision search. + Compute Resource Utilization information that can be relevant for defining target ResourceUtilization for mixed precision search. Calculates maximal activation tensor, sum of weights' parameters and total (sum of both). Args: @@ -43,12 +43,12 @@ def compute_kpi_data(in_model: Any, fw_info: Information needed for quantization about the specific framework. fw_impl: FrameworkImplementation object with a specific framework methods implementation. - Returns: A KPI object with the results. + Returns: A ResourceUtilization object with the results. """ - # We assume that the kpi_data API is used to compute the model KPI for mixed precision scenario, - # so we run graph preparation under the assumption of enabled mixed precision. + # We assume that the resource_utilization_data API is used to compute the model resource utilization for + # mixed precision scenario, so we run graph preparation under the assumption of enabled mixed precision. transformed_graph = graph_preparation_runner(in_model, representative_data_gen, core_config.quantization_config, @@ -65,17 +65,17 @@ def compute_kpi_data(in_model: Any, activation_output_sizes = compute_activation_output_sizes(graph=transformed_graph) max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes) - # Compute total kpi - parameters sum + max activation tensor + # Compute total memory utilization - parameters sum + max activation tensor total_size = total_weights_params + max_activation_tensor_size - # Compute BOPS kpi - total count of bit-operations for all configurable layers with kernel + # Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl) bops_count = np.inf if len(bops_count) == 0 else sum(bops_count) - return KPI(weights_memory=total_weights_params, - activation_memory=max_activation_tensor_size, - total_memory=total_size, - bops=bops_count) + return ResourceUtilization(weights_memory=total_weights_params, + activation_memory=max_activation_tensor_size, + total_memory=total_size, + bops=bops_count) def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> np.ndarray: diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py new file mode 100644 index 000000000..2a75e51bc --- /dev/null +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py @@ -0,0 +1,105 @@ +# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import copy +from enum import Enum +from functools import partial +from typing import List, Any +import numpy as np + +from pulp import lpSum + + +def sum_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[Any]: + """ + Aggregates resource utilization vector to a single resource utilization measure by summing all values. + + Args: + ru_vector: A vector with nodes' resource utilization values. + set_constraints: A flag for utilizing the method for resource utilization computation of a + given config not for LP formalization purposes. + + Returns: A list with an lpSum object for lp problem definition with the vector's sum. + + """ + if not set_constraints: + return [0] if len(ru_vector) == 0 else [sum(ru_vector)] + return [lpSum(ru_vector)] + + +def max_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[float]: + """ + Aggregates resource utilization vector to allow max constraint in the linear programming problem formalization. + In order to do so, we need to define a separate constraint on each value in the resource utilization vector, + to be bounded by the target resource utilization. + + Args: + ru_vector: A vector with nodes' resource utilization values. + set_constraints: A flag for utilizing the method for resource utilization computation of a + given config not for LP formalization purposes. + + Returns: A list with the vector's values, to be used to define max constraint + in the linear programming problem formalization. + + """ + if not set_constraints: + return [0] if len(ru_vector) == 0 else [max(ru_vector)] + return [ru for ru in ru_vector] + + +def total_ru(ru_tensor: np.ndarray, set_constraints: bool = True) -> List[float]: + """ + Aggregates resource utilization vector to allow weights and activation total utilization constraint in the linear programming + problem formalization. In order to do so, we need to define a separate constraint on each activation memory utilization value in + the resource utilization vector, combined with the sum weights memory utilization. + Note that the given ru_tensor should contain weights and activation utilization values in each entry. + + Args: + ru_tensor: A tensor with nodes' resource utilization values for weights and activation. + set_constraints: A flag for utilizing the method for resource utilization computation of a + given config not for LP formalization purposes. + + Returns: A list with lpSum objects, to be used to define total constraint + in the linear programming problem formalization. + + """ + if not set_constraints: + weights_ru = sum([ru[0] for ru in ru_tensor]) + activation_ru = max([ru[1] for ru in ru_tensor]) + return [weights_ru + activation_ru] + + weights_ru = lpSum([ru[0] for ru in ru_tensor]) + total_ru = [weights_ru + activation_ru for _, activation_ru in ru_tensor] + + return total_ru + + +class MpRuAggregation(Enum): + """ + Defines resource utilization aggregation functions that can be used to compute final resource utilization metric. + The enum values can be used to call a function on a set of arguments. + + SUM - applies the sum_ru_values function + + MAX - applies the max_ru_values function + + TOTAL - applies the total_ru function + + """ + SUM = partial(sum_ru_values) + MAX = partial(max_ru_values) + TOTAL = partial(total_ru) + + def __call__(self, *args): + return self.value(*args) diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py new file mode 100644 index 000000000..473254df9 --- /dev/null +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py @@ -0,0 +1,26 @@ +# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric + + +# When adding a RUTarget that we want to consider in our mp search, +# a matching pair of resource_utilization_tools computation function and a resource_utilization_tools +# aggregation function should be added to this dictionary +ru_functions_mapping = {RUTarget.WEIGHTS: (MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM), + RUTarget.ACTIVATION: (MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX), + RUTarget.TOTAL: (MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL), + RUTarget.BOPS: (MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)} diff --git a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_methods.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py similarity index 81% rename from model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_methods.py rename to model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py index aedec90c7..1da7bad76 100644 --- a/model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_methods.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py @@ -28,14 +28,14 @@ from model_compression_toolkit.logger import Logger -def weights_size_kpi(mp_cfg: List[int], - graph: Graph, - fw_info: FrameworkInfo, - fw_impl: FrameworkImplementation) -> np.ndarray: +def weights_size_utilization(mp_cfg: List[int], + graph: Graph, + fw_info: FrameworkInfo, + fw_impl: FrameworkImplementation) -> np.ndarray: """ - Computes a KPIs vector with the respective weights' memory size for the given weight configurable node, + Computes a resource utilization vector with the respective weights' memory size for the given weight configurable node, according to the given mixed-precision configuration. - If an empty configuration is given, then computes KPI vector for non-configurable nodes. + If an empty configuration is given, then computes resource utilization vector for non-configurable nodes. Args: mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node) @@ -52,7 +52,7 @@ def weights_size_kpi(mp_cfg: List[int], weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)] if len(mp_cfg) == 0: - # Computing non-configurable nodes KPI + # Computing non-configurable nodes resource utilization # TODO: when enabling multiple attribute quantization by default (currently, # only kernel quantization is enabled) we should include other attributes memory in the sum of all # weights memory (when quantized to their default 8-bit, non-configurable). @@ -71,7 +71,8 @@ def weights_size_kpi(mp_cfg: List[int], node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info) weights_memory.append(node_weights_memory_in_bytes) else: - # Go over configurable all nodes that should be taken into consideration when computing the weights KPI. + # Go over configurable all nodes that should be taken into consideration when computing the weights + # resource utilization. for n in graph.get_sorted_weights_configurable_nodes(fw_info): # Only nodes with kernel op can be considered configurable kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0] @@ -86,14 +87,14 @@ def weights_size_kpi(mp_cfg: List[int], return np.array(weights_memory) -def activation_output_size_kpi(mp_cfg: List[int], - graph: Graph, - fw_info: FrameworkInfo, - fw_impl: FrameworkImplementation) -> np.ndarray: +def activation_output_size_utilization(mp_cfg: List[int], + graph: Graph, + fw_info: FrameworkInfo, + fw_impl: FrameworkImplementation) -> np.ndarray: """ - Computes a KPIs vector with the respective output memory size for each activation configurable node, + Computes a resource utilization vector with the respective output memory size for each activation configurable node, according to the given mixed-precision configuration. - If an empty configuration is given, then computes KPI vector for non-configurable nodes. + If an empty configuration is given, then computes resource utilization vector for non-configurable nodes. Args: mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node) @@ -111,7 +112,7 @@ def activation_output_size_kpi(mp_cfg: List[int], activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()] if len(mp_cfg) == 0: - # Computing non-configurable nodes KPI + # Computing non-configurable nodes resource utilization for n in graph.nodes: non_configurable_node = n.name not in activation_mp_nodes \ and n.has_activation_quantization_enabled_candidate() \ @@ -122,7 +123,7 @@ def activation_output_size_kpi(mp_cfg: List[int], node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits) activation_memory.append(node_activation_memory_in_bytes) else: - # Go over all nodes that should be taken into consideration when computing the weights KPI. + # Go over all nodes that should be taken into consideration when computing the weights memory utilization. for n in graph.get_sorted_activation_configurable_nodes(): node_idx = mp_nodes.index(n.name) node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]] @@ -135,14 +136,14 @@ def activation_output_size_kpi(mp_cfg: List[int], return np.array(activation_memory) -def total_weights_activation_kpi(mp_cfg: List[int], - graph: Graph, - fw_info: FrameworkInfo, - fw_impl: FrameworkImplementation) -> np.ndarray: +def total_weights_activation_utilization(mp_cfg: List[int], + graph: Graph, + fw_info: FrameworkInfo, + fw_impl: FrameworkImplementation) -> np.ndarray: """ - Computes KPIs tensor with the respective weights size and output memory size for each activation configurable node, + Computes resource utilization tensor with the respective weights size and output memory size for each activation configurable node, according to the given mixed-precision configuration. - If an empty configuration is given, then computes KPI vector for non-configurable nodes. + If an empty configuration is given, then computes resource utilization vector for non-configurable nodes. Args: mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node) @@ -160,15 +161,15 @@ def total_weights_activation_kpi(mp_cfg: List[int], activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()] if len(mp_cfg) == 0: - # Computing non-configurable nodes KPI + # Computing non-configurable nodes utilization for n in graph.nodes: non_configurable = False node_weights_memory_in_bytes, node_activation_memory_in_bytes = 0, 0 # Non-configurable Weights - # TODO: currently considering only kernel attributes in weights KPI. When enabling multi-attribute - # quantization we need to modify this method to count all attributes. + # TODO: currently considering only kernel attributes in weights memory utilization. + # When enabling multi-attribute quantization we need to modify this method to count all attributes. kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0] if kernel_attr is not None: is_non_configurable_weights = n.name not in weights_mp_nodes and \ @@ -196,9 +197,9 @@ def total_weights_activation_kpi(mp_cfg: List[int], np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes])) else: # Go over all nodes that should be taken into consideration when computing the weights or - # activation KPI (all configurable nodes). + # activation memory utilization (all configurable nodes). for node_idx, n in enumerate(graph.get_configurable_sorted_nodes(fw_info)): - # TODO: currently considering only kernel attributes in weights KPI. When enabling multi-attribute + # TODO: currently considering only kernel attributes in weights memory utilization. When enabling multi-attribute # quantization we need to modify this method to count all attributes. node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]] @@ -222,13 +223,13 @@ def total_weights_activation_kpi(mp_cfg: List[int], return np.array(weights_activation_memory) -def bops_kpi(mp_cfg: List[int], - graph: Graph, - fw_info: FrameworkInfo, - fw_impl: FrameworkImplementation, - set_constraints: bool = True) -> np.ndarray: +def bops_utilization(mp_cfg: List[int], + graph: Graph, + fw_info: FrameworkInfo, + fw_impl: FrameworkImplementation, + set_constraints: bool = True) -> np.ndarray: """ - Computes a KPIs vector with the respective bit-operations (BOPS) count for each configurable node, + Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node, according to the given mixed-precision configuration of a virtual graph with composed nodes. Args: @@ -236,7 +237,7 @@ def bops_kpi(mp_cfg: List[int], graph: Graph object. fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize). fw_impl: FrameworkImplementation object with specific framework methods implementation. - set_constraints: A flag for utilizing the method for KPI computation of a + set_constraints: A flag for utilizing the method for resource utilization computation of a given config not for LP formalization purposes. Returns: A vector of node's BOPS count. @@ -245,12 +246,12 @@ def bops_kpi(mp_cfg: List[int], """ if not set_constraints: - return _bops_kpi(mp_cfg, - graph, - fw_info, - fw_impl) + return _bops_utilization(mp_cfg, + graph, + fw_info, + fw_impl) - # BOPs KPI method considers non-configurable nodes, therefore, it doesn't need separate implementation + # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0). virtual_bops_nodes = [n for n in graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)] @@ -261,12 +262,12 @@ def bops_kpi(mp_cfg: List[int], return np.array(bops) -def _bops_kpi(mp_cfg: List[int], - graph: Graph, - fw_info: FrameworkInfo, - fw_impl: FrameworkImplementation) -> np.ndarray: +def _bops_utilization(mp_cfg: List[int], + graph: Graph, + fw_info: FrameworkInfo, + fw_impl: FrameworkImplementation) -> np.ndarray: """ - Computes a KPIs vector with the respective bit-operations (BOPS) count for each configurable node, + Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node, according to the given mixed-precision configuration of an original graph. Args: @@ -281,7 +282,7 @@ def _bops_kpi(mp_cfg: List[int], mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info) - # Go over all nodes that should be taken into consideration when computing the BOPS KPI. + # Go over all nodes that should be taken into consideration when computing the BOPS utilization. bops = [] for n in graph.get_topo_sorted_nodes(): if n.has_kernel_weight_to_quantize(fw_info): @@ -292,7 +293,7 @@ def _bops_kpi(mp_cfg: List[int], input_activation_node = incoming_edges[0].source_node if len(graph.out_edges(input_activation_node)) > 1: # In the case where the activation node has multiple outgoing edges - # we don't consider this edge in the BOPS KPI calculation + # we don't consider this edge in the BOPS utilization calculation continue input_activation_node_cfg = input_activation_node.candidates_quantization_cfg[_get_node_cfg_idx(input_activation_node, mp_cfg, mp_nodes)] @@ -337,7 +338,7 @@ def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nod def _get_origin_weights_node(n: BaseNode) -> BaseNode: """ - In case we run a KPI computation on a virtual graph, + In case we run a resource utilization computation on a virtual graph, this method is used to retrieve the original node out of a virtual weights node, Args: @@ -357,7 +358,7 @@ def _get_origin_weights_node(n: BaseNode) -> BaseNode: def _get_origin_activation_node(n: BaseNode) -> BaseNode: """ - In case we run a KPI computation on a virtual graph, + In case we run a resource utilization computation on a virtual graph, this method is used to retrieve the original node out of a virtual activation node, Args: @@ -416,25 +417,25 @@ def _compute_node_activation_memory(n: BaseNode, node_nbits: int) -> float: return node_output_size * node_nbits / BITS_TO_BYTES -class MpKpiMetric(Enum): +class MpRuMetric(Enum): """ - Defines kpi computation functions that can be used to compute KPI for a given target for a given mp config. - The enum values can be used to call a function on a set of arguments. + Defines resource utilization computation functions that can be used to compute bops_utilization for a given target + for a given mp config. The enum values can be used to call a function on a set of arguments. - WEIGHTS_SIZE - applies the weights_size_kpi function + WEIGHTS_SIZE - applies the weights_size_utilization function - ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_kpi function + ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function - TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_kpi function + TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function - BOPS_COUNT - applies the bops_kpi function + BOPS_COUNT - applies the bops_utilization function """ - WEIGHTS_SIZE = partial(weights_size_kpi) - ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_kpi) - TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_kpi) - BOPS_COUNT = partial(bops_kpi) + WEIGHTS_SIZE = partial(weights_size_utilization) + ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization) + TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization) + BOPS_COUNT = partial(bops_utilization) def __call__(self, *args): return self.value(*args) diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py index 3fa275c6d..bf10ef49b 100644 --- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py @@ -19,27 +19,28 @@ from typing import Dict, List, Tuple, Callable from model_compression_toolkit.logger import Logger -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI, KPITarget +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager # Limit ILP solver runtime in seconds SOLVER_TIME_LIMIT = 60 + def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager, - target_kpi: KPI = None) -> List[int]: + target_resource_utilization: ResourceUtilization = None) -> List[int]: """ Searching and returning a mixed-precision configuration using an ILP optimization solution. It first builds a mapping from each layer's index (in the model) to a dictionary that maps the bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer. Then, it creates a mapping from each node's index (in the graph) to a dictionary that maps the bitwidth index to the contribution of configuring this node with this - bitwidth to the minimal possible KPI of the model. + bitwidth to the minimal possible resource utilization of the model. Then, and using these mappings, it builds an LP problem and finds an optimal solution. If a solution could not be found, exception is thrown. Args: search_manager: MixedPrecisionSearchManager object to be used for problem formalization. - target_kpi: KPI to constrain our LP problem with some resources limitations (like model' weights memory + target_resource_utilization: Target resource utilization to constrain our LP problem with some resources limitations (like model' weights memory consumption). Returns: @@ -50,10 +51,11 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager, # Build a mapping from each layer's index (in the model) to a dictionary that maps the # bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer. - if target_kpi is None or search_manager is None: - Logger.critical("Invalid parameters: 'target_kpi' and 'search_manager' must not be 'None' for mixed-precision search. Ensure valid inputs are provided.") + if target_resource_utilization is None or search_manager is None: + Logger.critical("Invalid parameters: 'target_resource_utilization' and 'search_manager' must not be 'None' " + "for mixed-precision search. Ensure valid inputs are provided.") - layer_to_metrics_mapping = _build_layer_to_metrics_mapping(search_manager, target_kpi) + layer_to_metrics_mapping = _build_layer_to_metrics_mapping(search_manager, target_resource_utilization) # Init variables to find their values when solving the lp problem. layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping = _init_problem_vars(layer_to_metrics_mapping) @@ -62,7 +64,7 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager, lp_problem = _formalize_problem(layer_to_indicator_vars_mapping, layer_to_metrics_mapping, layer_to_objective_vars_mapping, - target_kpi, + target_resource_utilization, search_manager) # Use default PULP solver. Limit runtime in seconds @@ -70,7 +72,7 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager, lp_problem.solve(solver=solver) # Try to solve the problem. assert lp_problem.status == LpStatusOptimal, Logger.critical( - "No solution found for the LP problem.") + "No solution was found during solving the LP problem") Logger.info(LpStatus[lp_problem.status]) # Take the bitwidth index only if its corresponding indicator is one. @@ -80,7 +82,7 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager, in layer_to_indicator_vars_mapping.values()] ).flatten() - if target_kpi.bops < np.inf: + if target_resource_utilization.bops < np.inf: return search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config) else: return config @@ -121,7 +123,7 @@ def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVariable]], layer_to_metrics_mapping: Dict[int, Dict[int, float]], layer_to_objective_vars_mapping: Dict[int, LpVariable], - target_kpi: KPI, + target_resource_utilization: ResourceUtilization, search_manager: MixedPrecisionSearchManager) -> LpProblem: """ Formalize the LP problem by defining all inequalities that define the solution space. @@ -133,8 +135,8 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa evaluation. layer_to_objective_vars_mapping: Dictionary that maps each node's index to a bitwidth variable we find its value. - target_kpi: KPI to reduce our feasible solution space. - search_manager: MixedPrecisionSearchManager object to be used for kpi constraints formalization. + target_resource_utilization: Target resource utilization to reduce our feasible solution space. + search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization. Returns: The formalized LP problem. @@ -154,9 +156,9 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa lp_problem += lpSum( [v for v in layer_to_indicator_vars_mapping[layer].values()]) == 1 - # Bound the feasible solution space with the desired KPI. - # Creates separate constraints for weights KPI and activation KPI. - if target_kpi is not None: + # Bound the feasible solution space with the desired resource utilization values. + # Creates separate constraints for weights utilization and activation utilization. + if target_resource_utilization is not None: indicators = [] for layer in layer_to_metrics_mapping.keys(): for _, indicator in layer_to_indicator_vars_mapping[layer].items(): @@ -165,72 +167,76 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa indicators_arr = np.array(indicators) indicators_matrix = np.diag(indicators_arr) - for target, kpi_value in target_kpi.get_kpi_dict().items(): - if not np.isinf(kpi_value): - non_conf_kpi_vector = None if search_manager.non_conf_kpi_dict is None \ - else search_manager.non_conf_kpi_dict.get(target) - _add_set_of_kpi_constraints(search_manager=search_manager, - target=target, - target_kpi_value=kpi_value, - indicators_matrix=indicators_matrix, - lp_problem=lp_problem, - non_conf_kpi_vector=non_conf_kpi_vector) + for target, ru_value in target_resource_utilization.get_resource_utilization_dict().items(): + if not np.isinf(ru_value): + non_conf_ru_vector = None if search_manager.non_conf_ru_dict is None \ + else search_manager.non_conf_ru_dict.get(target) + _add_set_of_ru_constraints(search_manager=search_manager, + target=target, + target_resource_utilization_value=ru_value, + indicators_matrix=indicators_matrix, + lp_problem=lp_problem, + non_conf_ru_vector=non_conf_ru_vector) else: # pragma: no cover - Logger.critical("Unable to execute mixed-precision search: 'target_kpi' is None. A valid 'target_kpi' is required.") + Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. " + "A valid 'target_resource_utilization' is required.") return lp_problem -def _add_set_of_kpi_constraints(search_manager: MixedPrecisionSearchManager, - target: KPITarget, - target_kpi_value: float, - indicators_matrix: np.ndarray, - lp_problem: LpProblem, - non_conf_kpi_vector: np.ndarray): +def _add_set_of_ru_constraints(search_manager: MixedPrecisionSearchManager, + target: RUTarget, + target_resource_utilization_value: float, + indicators_matrix: np.ndarray, + lp_problem: LpProblem, + non_conf_ru_vector: np.ndarray): """ - Adding a constraint for the Lp problem for the given KPI target. + Adding a constraint for the Lp problem for the given target resource utilization. The update to the Lp problem object is done inplace. Args: - search_manager: MixedPrecisionSearchManager object to be used for kpi constraints formalization. - target: A KPITarget. - target_kpi_value: Target KPI value of the given KPI target for which the constraint is added. + search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization. + target: A RUTarget. + target_resource_utilization_value: Target resource utilization value of the given target resource utilization + for which the constraint is added. indicators_matrix: A diagonal matrix of the Lp problem's indicators. lp_problem: An Lp problem object to add constraint to. - non_conf_kpi_vector: A non-configurable nodes' KPI vector. + non_conf_ru_vector: A non-configurable nodes' resource utilization vector. """ - kpi_matrix = search_manager.compute_kpi_matrix(target) - indicated_kpi_matrix = np.matmul(kpi_matrix, indicators_matrix) + ru_matrix = search_manager.compute_resource_utilization_matrix(target) + indicated_ru_matrix = np.matmul(ru_matrix, indicators_matrix) # Need to re-organize the tensor such that the configurations' axis will be second, # and all metric values' axis will come afterword - indicated_kpi_matrix = np.moveaxis(indicated_kpi_matrix, source=len(indicated_kpi_matrix.shape) - 1, destination=1) - - # In order to get the result KPI according to a chosen set of indicators, we sum each row in the result matrix. - # Each row represents the KPI values for a specific KPI metric, such that only elements corresponding - # to a configuration which implied by the set of indicators will have some positive value different than 0 - # (and will contribute to the total KPI). - kpi_sum_vector = np.array([ - np.sum(indicated_kpi_matrix[i], axis=0) + # sum of metric values over all configurations in a row - search_manager.min_kpi[target][i] for i in range(indicated_kpi_matrix.shape[0])]) - - # search_manager.compute_kpi_functions contains a pair of kpi_metric and kpi_aggregation for each kpi target - # get aggregated KPI, considering both configurable and non-configurable nodes - if non_conf_kpi_vector is None or len(non_conf_kpi_vector) == 0: - aggr_kpi = search_manager.compute_kpi_functions[target][1](kpi_sum_vector) + indicated_ru_matrix = np.moveaxis(indicated_ru_matrix, source=len(indicated_ru_matrix.shape) - 1, destination=1) + + # In order to get the result resource utilization according to a chosen set of indicators, we sum each row in + # the result matrix. Each row represents the resource utilization values for a specific resource utilization metric, + # such that only elements corresponding to a configuration which implied by the set of indicators will have some + # positive value different than 0 (and will contribute to the total resource utilization). + ru_sum_vector = np.array([ + np.sum(indicated_ru_matrix[i], axis=0) + # sum of metric values over all configurations in a row + search_manager.min_ru[target][i] for i in range(indicated_ru_matrix.shape[0])]) + + # search_manager.compute_ru_functions contains a pair of ru_metric and ru_aggregation for each ru target + # get aggregated ru, considering both configurable and non-configurable nodes + if non_conf_ru_vector is None or len(non_conf_ru_vector) == 0: + aggr_ru = search_manager.compute_ru_functions[target][1](ru_sum_vector) else: - aggr_kpi = search_manager.compute_kpi_functions[target][1](np.concatenate([kpi_sum_vector, non_conf_kpi_vector])) + aggr_ru = search_manager.compute_ru_functions[target][1](np.concatenate([ru_sum_vector, non_conf_ru_vector])) - for v in aggr_kpi: + for v in aggr_ru: if isinstance(v, float): - if v > target_kpi_value: - Logger.critical(f"The model cannot be quantized to meet the specified target KPI ({target.value}) with the value {target_kpi_value}.") # pragma: no cover + if v > target_resource_utilization_value: + Logger.critical( + f"The model cannot be quantized to meet the specified target resource utilization {target.value} " + f"with the value {target_resource_utilization_value}.") # pragma: no cover else: - lp_problem += v <= target_kpi_value + lp_problem += v <= target_resource_utilization_value def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager, - target_kpi: KPI, + target_resource_utilization: ResourceUtilization, eps: float = EPS) -> Dict[int, Dict[int, float]]: """ This function measures the sensitivity of a change in a bitwidth of a layer on the entire model. @@ -242,8 +248,8 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager, Args: search_manager: MixedPrecisionSearchManager object to be used for problem formalization. - target_kpi: KPI to constrain our LP problem with some resources limitations (like model' weights memory - consumption). + target_resource_utilization: ResourceUtilization to constrain our LP problem with some resources limitations + (like model' weights memory consumption). eps: Epsilon value to manually increase metric value (if necessary) for numerical stability Returns: @@ -255,30 +261,30 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager, Logger.info('Starting to evaluate metrics') layer_to_metrics_mapping = {} - is_bops_target_kpi = target_kpi.bops < np.inf + is_bops_target_resource_utilization = target_resource_utilization.bops < np.inf - if is_bops_target_kpi: - origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_kpi_config) + if is_bops_target_resource_utilization: + origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config) max_config_value = search_manager.compute_metric_fn(origin_max_config) else: - max_config_value = search_manager.compute_metric_fn(search_manager.max_kpi_config) + max_config_value = search_manager.compute_metric_fn(search_manager.max_ru_config) for node_idx, layer_possible_bitwidths_indices in tqdm(search_manager.layer_to_bitwidth_mapping.items(), total=len(search_manager.layer_to_bitwidth_mapping)): layer_to_metrics_mapping[node_idx] = {} for bitwidth_idx in layer_possible_bitwidths_indices: - if search_manager.max_kpi_config[node_idx] == bitwidth_idx: + if search_manager.max_ru_config[node_idx] == bitwidth_idx: # This is a computation of the metric for the max configuration, assign pre-calculated value layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value continue # Create a configuration that differs at one layer only from the baseline model - mp_model_configuration = search_manager.max_kpi_config.copy() + mp_model_configuration = search_manager.max_ru_config.copy() mp_model_configuration[node_idx] = bitwidth_idx # Build a distance matrix using the function we got from the framework implementation. - if is_bops_target_kpi: + if is_bops_target_resource_utilization: # Reconstructing original graph's configuration from virtual graph's configuration origin_mp_model_configuration = \ search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph( @@ -295,7 +301,7 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager, metric_value = search_manager.compute_metric_fn( mp_model_configuration, [node_idx], - search_manager.max_kpi_config) + search_manager.max_ru_config) layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps) diff --git a/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py b/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py index aaed945f6..397074b22 100644 --- a/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +++ b/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py @@ -15,7 +15,7 @@ from typing import List -from model_compression_toolkit.core import KPI +from model_compression_toolkit.core import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import \ MixedPrecisionSearchManager from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \ @@ -26,29 +26,29 @@ def greedy_solution_refinement_procedure(mp_solution: List[int], search_manager: MixedPrecisionSearchManager, - target_kpi: KPI) -> List[int]: + target_resource_utilization: ResourceUtilization) -> List[int]: """ A greedy procedure to try and improve a mixed-precision solution that was found by a mixed-precision optimization algorithm. This procedure tries to increase the bit-width precision of configurable nodes that did not get the maximal candidate in the found solution. - It iteratively goes over all such nodes, computes the KPI values on a modified configuration (with the node's next - best candidate), filters out all configs that hold the KPI constraints and chooses one of them as an improvement + It iteratively goes over all such nodes, computes the resource utilization values on a modified configuration (with the node's next + best candidate), filters out all configs that hold the resource utilization constraints and chooses one of them as an improvement step - The choice is done in a greedy approach where we take the configuration that modifies the KPI the least. + The choice is done in a greedy approach where we take the configuration that modifies the resource utilization the least. Args: mp_solution: A mixed-precision configuration that was found by a mixed-precision optimization algorithm. search_manager: A MixedPrecisionSearchManager object. - target_kpi: The target KPIs for the mixed-precision search. + target_resource_utilization: The target resource utilization for the mixed-precision search. Returns: A new, possibly updated, mixed-precision bit-width configuration. """ - # Refinement is not supported for BOPs KPI for now... - if target_kpi.bops < np.inf: - Logger.info(f'Target KPI constraint BOPs - Skipping MP greedy solution refinement') + # Refinement is not supported for BOPs utilization for now... + if target_resource_utilization.bops < np.inf: + Logger.info(f'Target resource utilization constraint BOPs - Skipping MP greedy solution refinement') return mp_solution new_solution = mp_solution.copy() @@ -56,7 +56,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int], while changed: changed = False - nodes_kpis = {} + nodes_ru = {} nodes_next_candidate = {} for node_idx in range(len(mp_solution)): @@ -72,32 +72,32 @@ def greedy_solution_refinement_procedure(mp_solution: List[int], kernel_attr = None if kernel_attr is None else kernel_attr[0] valid_candidates = _get_valid_candidates_indices(node_candidates, new_solution[node_idx], kernel_attr) - # Create a list of KPIs for the valid candidates. - updated_kpis = [] + # Create a list of ru for the valid candidates. + updated_ru = [] for valid_idx in valid_candidates: - node_updated_kpis = search_manager.compute_kpi_for_config( + node_updated_ru = search_manager.compute_resource_utilization_for_config( config=search_manager.replace_config_in_index(new_solution, node_idx, valid_idx)) - updated_kpis.append(node_updated_kpis) - - # filter out new configs that don't hold the KPI restrictions - node_filtered_kpis = [(node_idx, kpis) for node_idx, kpis in zip(valid_candidates, updated_kpis) if - target_kpi.holds_constraints(kpis)] - - if len(node_filtered_kpis) > 0: - sorted_by_kpi = sorted(node_filtered_kpis, key=lambda node_kpis: (node_kpis[1].total_memory, - node_kpis[1].weights_memory, - node_kpis[1].activation_memory)) - nodes_kpis[node_idx] = sorted_by_kpi[0][1] - nodes_next_candidate[node_idx] = sorted_by_kpi[0][0] - - if len(nodes_kpis) > 0: - # filter out new configs that don't hold the KPI restrictions - node_filtered_kpis = [(node_idx, kpis) for node_idx, kpis in nodes_kpis.items()] - sorted_by_kpi = sorted(node_filtered_kpis, key=lambda node_kpis: (node_kpis[1].total_memory, - node_kpis[1].weights_memory, - node_kpis[1].activation_memory)) - - node_idx_to_upgrade = sorted_by_kpi[0][0] + updated_ru.append(node_updated_ru) + + # filter out new configs that don't hold the resource utilization restrictions + node_filtered_ru = [(node_idx, ru) for node_idx, ru in zip(valid_candidates, updated_ru) if + target_resource_utilization.holds_constraints(ru)] + + if len(node_filtered_ru) > 0: + sorted_by_ru = sorted(node_filtered_ru, key=lambda node_ru: (node_ru[1].total_memory, + node_ru[1].weights_memory, + node_ru[1].activation_memory)) + nodes_ru[node_idx] = sorted_by_ru[0][1] + nodes_next_candidate[node_idx] = sorted_by_ru[0][0] + + if len(nodes_ru) > 0: + # filter out new configs that don't hold the ru restrictions + node_filtered_ru = [(node_idx, ru) for node_idx, ru in nodes_ru.items()] + sorted_by_ru = sorted(node_filtered_ru, key=lambda node_ru: (node_ru[1].total_memory, + node_ru[1].weights_memory, + node_ru[1].activation_memory)) + + node_idx_to_upgrade = sorted_by_ru[0][0] new_solution[node_idx_to_upgrade] = nodes_next_candidate[node_idx_to_upgrade] changed = True diff --git a/model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py b/model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py index 5e1f3fe9e..8e583a537 100644 --- a/model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +++ b/model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py @@ -18,7 +18,7 @@ from model_compression_toolkit.core.common import BaseNode, Graph from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.pruning.mask.per_channel_mask import MaskIndicator from model_compression_toolkit.core.common.pruning.memory_calculator import MemoryCalculator from model_compression_toolkit.core.common.pruning.pruning_framework_implementation import PruningFrameworkImplementation @@ -30,16 +30,16 @@ class GreedyMaskCalculator: """ GreedyMaskCalculator calculates pruning masks for prunable nodes to meet a - specified target KPI. It employs a greedy approach to selectively unprune channel + specified target resource utilization. It employs a greedy approach to selectively unprune channel groups (SIMD groups) based on their importance scores. Initially, all channels are pruned (mask set to zero), and the calculator iteratively adds back the most significant - channel groups until the memory footprint meets the target KPI or all channels are unpruned. + channel groups until the memory footprint meets the target resource utilization or all channels are unpruned. """ def __init__(self, prunable_nodes: List[BaseNode], fw_info: FrameworkInfo, simd_groups_scores: Dict[BaseNode, np.ndarray], - target_kpi: KPI, + target_resource_utilization: ResourceUtilization, graph: Graph, fw_impl: PruningFrameworkImplementation, tpc: TargetPlatformCapabilities, @@ -49,7 +49,7 @@ def __init__(self, prunable_nodes (List[BaseNode]): Nodes that are eligible for pruning. fw_info (FrameworkInfo): Framework-specific information and utilities. simd_groups_scores (Dict[BaseNode, np.ndarray]): Importance scores for each SIMG group in a prunable node. - target_kpi (KPI): The target KPI to achieve. + target_resource_utilization (ResourceUtilization): The target resource utilization to achieve. graph (Graph): The computational graph of the model. fw_impl (PruningFrameworkImplementation): Framework-specific implementation details. tpc (TargetPlatformCapabilities): Platform-specific constraints and capabilities. @@ -57,7 +57,7 @@ def __init__(self, """ self.prunable_nodes = prunable_nodes self.fw_info = fw_info - self.target_kpi = target_kpi + self.target_resource_utilization = target_resource_utilization self.graph = graph self.fw_impl = fw_impl self.tpc = tpc @@ -86,17 +86,18 @@ def get_mask(self) -> Dict[BaseNode, np.ndarray]: def compute_mask(self): """ Computes the pruning mask by iteratively adding SIMD groups to unpruned state - based on their importance and the target KPI. + based on their importance and the target resource utilization. """ # Iteratively unprune the graph while monitoring the memory footprint. current_memory = self.memory_calculator.get_pruned_graph_memory(masks=self.oc_pruning_mask.get_mask(), include_padded_channels=self.tpc.is_simd_padding) - if current_memory > self.target_kpi.weights_memory: - Logger.critical(f"Insufficient memory for the target KPI: current memory {current_memory}, target KPI {self.target_kpi.weights_memory}.") + if current_memory > self.target_resource_utilization.weights_memory: + Logger.critical(f"Insufficient memory for the target resource utilization: current memory {current_memory}, " + f"target memory {self.target_resource_utilization.weights_memory}.") # Greedily unprune groups (by setting their mask to 1) until the memory target is met # or all channels unpruned. - while current_memory < self.target_kpi.weights_memory and self.oc_pruning_mask.has_pruned_channel(): + while current_memory < self.target_resource_utilization.weights_memory and self.oc_pruning_mask.has_pruned_channel(): # Select the best SIMD group (best means highest score which means most sensitive group) # to add based on the scores. node_to_remain, group_to_remain_idx = self._get_most_sensitive_simd_group_candidate() @@ -107,7 +108,7 @@ def compute_mask(self): include_padded_channels=self.tpc.is_simd_padding) # If the target memory is exceeded, revert the last addition. - if current_memory > self.target_kpi.weights_memory: + if current_memory > self.target_resource_utilization.weights_memory: self.oc_pruning_mask.set_mask_value_for_simd_group(node=node_to_remain, group_index=group_to_remain_idx, mask_indicator=MaskIndicator.PRUNED) @@ -140,7 +141,7 @@ def _get_most_sensitive_simd_group_candidate(self) -> Tuple[BaseNode, int]: best_group_idx = group_idx if best_node is None: - Logger.critical("No prunable SIMD group identified.") + Logger.error("No prunable SIMD group found.") return best_node, best_group_idx diff --git a/model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py b/model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py index 9c73cfdc2..a86e748f9 100644 --- a/model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +++ b/model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py @@ -19,7 +19,7 @@ from model_compression_toolkit.core.common import BaseNode, Graph from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.pruning.memory_calculator import MemoryCalculator from model_compression_toolkit.core.common.pruning.pruning_framework_implementation import PruningFrameworkImplementation from model_compression_toolkit.logger import Logger diff --git a/model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py b/model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py index f4dc8f940..4c200ab87 100644 --- a/model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +++ b/model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py @@ -18,7 +18,7 @@ from model_compression_toolkit.core.common import BaseNode, Graph from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.pruning.mask.per_channel_mask import PerChannelMask, MaskIndicator from model_compression_toolkit.core.common.pruning.memory_calculator import MemoryCalculator from model_compression_toolkit.core.common.pruning.pruning_framework_implementation import PruningFrameworkImplementation diff --git a/model_compression_toolkit/core/common/pruning/pruner.py b/model_compression_toolkit/core/common/pruning/pruner.py index 612188cc3..86d25e88f 100644 --- a/model_compression_toolkit/core/common/pruning/pruner.py +++ b/model_compression_toolkit/core/common/pruning/pruner.py @@ -18,7 +18,7 @@ from model_compression_toolkit.core.common import Graph, BaseNode from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.pruning.greedy_mask_calculator import GreedyMaskCalculator from model_compression_toolkit.core.common.pruning.importance_metrics.importance_metric_factory import \ get_importance_metric @@ -33,14 +33,14 @@ class Pruner: """ - Pruner class responsible for applying pruning to a computational graph to meet a target KPI. + Pruner class responsible for applying pruning to a computational graph to meet a target resource utilization. It identifies and prunes less significant channels based on importance scores, considering SIMD constraints. """ def __init__(self, float_graph: Graph, fw_info: FrameworkInfo, fw_impl: PruningFrameworkImplementation, - target_kpi: KPI, + target_resource_utilization: ResourceUtilization, representative_data_gen: Callable, pruning_config: PruningConfig, target_platform_capabilities: TargetPlatformCapabilities): @@ -49,7 +49,7 @@ def __init__(self, float_graph (Graph): The floating-point representation of the model's computation graph. fw_info (FrameworkInfo): Contains metadata and helper functions for the framework. fw_impl (PruningFrameworkImplementation): Implementation of specific framework methods required for pruning. - target_kpi (KPI): The target KPIs to be achieved after pruning. + target_resource_utilization (ResourceUtilization): The target resource utilization to be achieved after pruning. representative_data_gen (Callable): Generator function for representative dataset used in pruning analysis. pruning_config (PruningConfig): Configuration object specifying how pruning should be performed. target_platform_capabilities (TargetPlatformCapabilities): Object encapsulating the capabilities of the target hardware platform. @@ -57,7 +57,7 @@ def __init__(self, self.float_graph = float_graph self.fw_info = fw_info self.fw_impl = fw_impl - self.target_kpi = target_kpi + self.target_resource_utilization = target_resource_utilization self.representative_data_gen = representative_data_gen self.pruning_config = pruning_config self.target_platform_capabilities = target_platform_capabilities @@ -84,7 +84,7 @@ def prune_graph(self): mask_calculator = GreedyMaskCalculator(entry_nodes, self.fw_info, self.simd_scores, - self.target_kpi, + self.target_resource_utilization, self.float_graph, self.fw_impl, self.target_platform_capabilities, diff --git a/model_compression_toolkit/core/common/pruning/pruning_config.py b/model_compression_toolkit/core/common/pruning/pruning_config.py index b0dbf7b3c..0ced32763 100644 --- a/model_compression_toolkit/core/common/pruning/pruning_config.py +++ b/model_compression_toolkit/core/common/pruning/pruning_config.py @@ -32,7 +32,7 @@ class ChannelsFilteringStrategy(Enum): """ Enum for specifying the strategy used for filtering (pruning) channels: - GREEDY - Prune the least important channel groups up to allowed resources in the KPI (for now, only weights_memory is considered). + GREEDY - Prune the least important channel groups up to the allowed resources utilization limit (for now, only weights_memory is considered). """ GREEDY = 0 # Greedy strategy for pruning channels based on importance metrics. diff --git a/model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py b/model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py index f6d74b505..dec1ec3a7 100644 --- a/model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py +++ b/model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py @@ -48,7 +48,7 @@ def substitute(self, if len(graph.out_edges(act_node)) > 1: Logger.warning(f"Node {act_node.name} has multiple outgoing edges, which is not supported with " - f"mixed-precision bit-operations KPI, thus, edge {act_node.name} --> {weights_node.name} " + f"mixed-precision bit-operations utilization, thus, edge {act_node.name} --> {weights_node.name} " f"would not be counted in the bit-operations calculations.") return graph diff --git a/model_compression_toolkit/core/common/substitutions/weights_activation_split.py b/model_compression_toolkit/core/common/substitutions/weights_activation_split.py index aa899b844..caca2d159 100644 --- a/model_compression_toolkit/core/common/substitutions/weights_activation_split.py +++ b/model_compression_toolkit/core/common/substitutions/weights_activation_split.py @@ -65,7 +65,9 @@ def substitute(self, for c in node.candidates_quantization_cfg] if not set(expected_candidates).issubset(all_candidates_bits): # Node is not composite, therefore, can't be split - Logger.critical(f"The node {node.name} cannot be split as it has non-composite candidates. For mixed-precision search with BOPS target KPI, all model layers must be composite.") # pragma: no cover + Logger.critical(f"The node {node.name} cannot be split as it has non-composite candidates. " + f"For mixed-precision search with BOPS target resource utilization, " + f"all model layers must be composite.") # pragma: no cover weights_node = VirtualSplitWeightsNode(node, kernel_attr) activation_node = VirtualSplitActivationNode(node, self.activation_layer_type, self.fw_attr) diff --git a/model_compression_toolkit/core/common/user_info.py b/model_compression_toolkit/core/common/user_info.py index a0e3f26b3..1a3566c65 100644 --- a/model_compression_toolkit/core/common/user_info.py +++ b/model_compression_toolkit/core/common/user_info.py @@ -29,7 +29,7 @@ def __init__(self): self.input_scale = 1 self.gptq_info_dict = dict() self.mixed_precision_cfg = None - self.final_kpi = None + self.final_resource_utilization = None def set_input_scale(self, scale_value: float): """ diff --git a/model_compression_toolkit/core/keras/kpi_data_facade.py b/model_compression_toolkit/core/keras/resource_utilization_data_facade.py similarity index 60% rename from model_compression_toolkit/core/keras/kpi_data_facade.py rename to model_compression_toolkit/core/keras/resource_utilization_data_facade.py index f5e6ec66d..475aeb6b2 100644 --- a/model_compression_toolkit/core/keras/kpi_data_facade.py +++ b/model_compression_toolkit/core/keras/resource_utilization_data_facade.py @@ -15,12 +15,11 @@ from typing import Callable from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, CoreConfig -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import TENSORFLOW from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_data import compute_kpi_data -from model_compression_toolkit.core.common.framework_info import FrameworkInfo +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import compute_resource_utilization_data from model_compression_toolkit.constants import FOUND_TF if FOUND_TF: @@ -33,13 +32,16 @@ KERAS_DEFAULT_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL) - def keras_kpi_data(in_model: Model, - representative_data_gen: Callable, - core_config: CoreConfig = CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig()), - target_platform_capabilities: TargetPlatformCapabilities = KERAS_DEFAULT_TPC) -> KPI: + def keras_resource_utilization_data(in_model: Model, + representative_data_gen: Callable, + core_config: CoreConfig = CoreConfig( + mixed_precision_config=MixedPrecisionQuantizationConfig()), + target_platform_capabilities: TargetPlatformCapabilities = KERAS_DEFAULT_TPC) -> ResourceUtilization: """ - Computes KPI data that can be used to calculate the desired target KPI for mixed-precision quantization. - Builds the computation graph from the given model and hw modeling, and uses it to compute the KPI data. + Computes resource utilization data that can be used to calculate the desired target resource utilization + for mixed-precision quantization. + Builds the computation graph from the given model and hw modeling, and uses it to compute the + resource utilization data. Args: in_model (Model): Keras model to quantize. @@ -49,7 +51,7 @@ def keras_kpi_data(in_model: Model, Returns: - A KPI object with total weights parameters sum and max activation tensor. + A ResourceUtilization object with total weights parameters sum and max activation tensor. Examples: @@ -63,28 +65,29 @@ def keras_kpi_data(in_model: Model, >>> import numpy as np >>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))] - Import MCT and call for KPI data calculation: + Import MCT and call for resource utilization data calculation: >>> import model_compression_toolkit as mct - >>> kpi_data = mct.core.keras_kpi_data(model, repr_datagen) + >>> ru_data = mct.core.keras_resource_utilization_data(model, repr_datagen) """ if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig): - Logger.critical("KPI data computation requires a MixedPrecisionQuantizationConfig object; provided config is of an incorrect type.") + Logger.critical("Resource utilization data computation requires a MixedPrecisionQuantizationConfig object; " + "provided config is of an incorrect type.") fw_impl = KerasImplementation() - return compute_kpi_data(in_model, - representative_data_gen, - core_config, - target_platform_capabilities, - DEFAULT_KERAS_INFO, - fw_impl) + return compute_resource_utilization_data(in_model, + representative_data_gen, + core_config, + target_platform_capabilities, + DEFAULT_KERAS_INFO, + fw_impl) else: # If tensorflow is not installed, # we raise an exception when trying to use this function. - def keras_kpi_data(*args, **kwargs): - Logger.critical("Tensorflow must be installed to use keras_kpi_data. " + def keras_resource_utilization_data(*args, **kwargs): + Logger.critical("Tensorflow must be installed to use keras_resource_utilization_data. " "The 'tensorflow' package is missing.") # pragma: no cover diff --git a/model_compression_toolkit/core/pytorch/kpi_data_facade.py b/model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py similarity index 60% rename from model_compression_toolkit/core/pytorch/kpi_data_facade.py rename to model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py index 5db974a40..d3f5b61c4 100644 --- a/model_compression_toolkit/core/pytorch/kpi_data_facade.py +++ b/model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py @@ -18,9 +18,9 @@ from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import PYTORCH from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_data import compute_kpi_data +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import compute_resource_utilization_data from model_compression_toolkit.core.common.quantization.core_config import CoreConfig from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig from model_compression_toolkit.constants import FOUND_TORCH @@ -36,13 +36,14 @@ PYTORCH_DEFAULT_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL) - def pytorch_kpi_data(in_model: Module, - representative_data_gen: Callable, - core_config: CoreConfig = CoreConfig(), - target_platform_capabilities: TargetPlatformCapabilities = PYTORCH_DEFAULT_TPC) -> KPI: + def pytorch_resource_utilization_data(in_model: Module, + representative_data_gen: Callable, + core_config: CoreConfig = CoreConfig(), + target_platform_capabilities: TargetPlatformCapabilities = PYTORCH_DEFAULT_TPC + ) -> ResourceUtilization: """ - Computes KPI data that can be used to calculate the desired target KPI for mixed-precision quantization. - Builds the computation graph from the given model and target platform capabilities, and uses it to compute the KPI data. + Computes resource utilization data that can be used to calculate the desired target resource utilization for mixed-precision quantization. + Builds the computation graph from the given model and target platform capabilities, and uses it to compute the resource utilization data. Args: in_model (Model): PyTorch model to quantize. @@ -52,7 +53,7 @@ def pytorch_kpi_data(in_model: Module, Returns: - A KPI object with total weights parameters sum and max activation tensor. + A ResourceUtilization object with total weights parameters sum and max activation tensor. Examples: @@ -66,28 +67,30 @@ def pytorch_kpi_data(in_model: Module, >>> import numpy as np >>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))] - Import mct and call for KPI data calculation: + Import mct and call for resource utilization data calculation: >>> import model_compression_toolkit as mct - >>> kpi_data = mct.core.pytorch_kpi_data(module, repr_datagen) + >>> ru_data = mct.core.pytorch_resource_utilization_data(module, repr_datagen) """ if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig): - Logger.critical("KPI data computation requires a MixedPrecisionQuantizationConfig object. The provided 'mixed_precision_config' is not of this type.") + Logger.critical("Resource utilization data computation requires a MixedPrecisionQuantizationConfig object. " + "The provided 'mixed_precision_config' is not of this type.") fw_impl = PytorchImplementation() - return compute_kpi_data(in_model, - representative_data_gen, - core_config, - target_platform_capabilities, - DEFAULT_PYTORCH_INFO, - fw_impl) + return compute_resource_utilization_data(in_model, + representative_data_gen, + core_config, + target_platform_capabilities, + DEFAULT_PYTORCH_INFO, + fw_impl) else: # If torch is not installed, # we raise an exception when trying to use this function. - def pytorch_kpi_data(*args, **kwargs): - Logger.critical("PyTorch must be installed to use 'pytorch_kpi_data'. The 'torch' package is missing.") # pragma: no cover + def pytorch_resource_utilization_data(*args, **kwargs): + Logger.critical("PyTorch must be installed to use 'pytorch_resource_utilization_data'. " + "The 'torch' package is missing.") # pragma: no cover diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index 94b6d1aec..fb3541c22 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -26,10 +26,10 @@ from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.graph.base_graph import Graph from model_compression_toolkit.core.common.mixed_precision.bit_width_setter import set_bit_widths -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI, KPITarget -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_aggregation_methods import MpKpiAggregation -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_functions_mapping import kpi_functions_mapping -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_methods import MpKpiMetric +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import ru_functions_mapping +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph from model_compression_toolkit.core.common.quantization.core_config import CoreConfig @@ -47,7 +47,7 @@ def core_runner(in_model: Any, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, tpc: TargetPlatformCapabilities, - target_kpi: KPI = None, + target_resource_utilization: ResourceUtilization = None, tb_w: TensorboardWriter = None): """ Quantize a trained model using post-training quantization. @@ -67,7 +67,7 @@ def core_runner(in_model: Any, fw_impl: FrameworkImplementation object with a specific framework methods implementation. tpc: TargetPlatformCapabilities object that models the inference target platform and the attached framework operator's information. - target_kpi: KPI to constraint the search of the mixed-precision configuration for the model. + target_resource_utilization: ResourceUtilization to constraint the search of the mixed-precision configuration for the model. tb_w: TensorboardWriter object for logging Returns: @@ -84,9 +84,9 @@ def core_runner(in_model: Any, ' consider increasing the batch size') # Checking whether to run mixed precision quantization - if target_kpi is not None: + if target_resource_utilization is not None: if core_config.mixed_precision_config is None: - Logger.critical("Provided an initialized target_kpi, that means that mixed precision quantization is " + Logger.critical("Provided an initialized target_resource_utilization, that means that mixed precision quantization is " "enabled, but the provided MixedPrecisionQuantizationConfig is None.") core_config.mixed_precision_config.set_mixed_precision_enable() @@ -119,7 +119,7 @@ def core_runner(in_model: Any, bit_widths_config = search_bit_width(tg, fw_info, fw_impl, - target_kpi, + target_resource_utilization, core_config.mixed_precision_config, representative_data_gen, hessian_info_service=hessian_info_service) @@ -139,11 +139,11 @@ def core_runner(in_model: Any, # This is since some actions regard the final configuration and should be edited. edit_network_graph(tg, fw_info, core_config.debug_config.network_editor) - _set_final_kpi(graph=tg, - final_bit_widths_config=bit_widths_config, - kpi_functions_dict=kpi_functions_mapping, - fw_info=fw_info, - fw_impl=fw_impl) + _set_final_resource_utilization(graph=tg, + final_bit_widths_config=bit_widths_config, + ru_functions_dict=ru_functions_mapping, + fw_info=fw_info, + fw_impl=fw_impl) if core_config.mixed_precision_enable: # Retrieve lists of tuples (node, node's final weights/activation bitwidth) @@ -164,49 +164,50 @@ def core_runner(in_model: Any, return tg, bit_widths_config, hessian_info_service -def _set_final_kpi(graph: Graph, - final_bit_widths_config: List[int], - kpi_functions_dict: Dict[KPITarget, Tuple[MpKpiMetric, MpKpiAggregation]], - fw_info: FrameworkInfo, - fw_impl: FrameworkImplementation): +def _set_final_resource_utilization(graph: Graph, + final_bit_widths_config: List[int], + ru_functions_dict: Dict[RUTarget, Tuple[MpRuMetric, MpRuAggregation]], + fw_info: FrameworkInfo, + fw_impl: FrameworkImplementation): """ - Computing the KPIs of the model according to the final bit-width configuration, + Computing the resource utilization of the model according to the final bit-width configuration, and setting it (inplace) in the graph's UserInfo field. Args: - graph: Graph to compute the KPI for. + graph: Graph to compute the resource utilization for. final_bit_widths_config: The final bit-width configuration to quantize the model accordingly. - kpi_functions_dict: A mapping between a KPITarget and a pair of kpi method and kpi aggregation functions. + ru_functions_dict: A mapping between a RUTarget and a pair of resource utilization method and resource utilization aggregation functions. fw_info: A FrameworkInfo object. fw_impl: FrameworkImplementation object with specific framework methods implementation. """ - final_kpis_dict = {} - for kpi_target, kpi_funcs in kpi_functions_dict.items(): - kpi_method, kpi_aggr = kpi_funcs - if kpi_target == KPITarget.BOPS: - final_kpis_dict[kpi_target] = kpi_aggr(kpi_method(final_bit_widths_config, graph, fw_info, fw_impl, False), False)[0] + final_ru_dict = {} + for ru_target, ru_funcs in ru_functions_dict.items(): + ru_method, ru_aggr = ru_funcs + if ru_target == RUTarget.BOPS: + final_ru_dict[ru_target] = \ + ru_aggr(ru_method(final_bit_widths_config, graph, fw_info, fw_impl, False), False)[0] else: - non_conf_kpi = kpi_method([], graph, fw_info, fw_impl) - conf_kpi = kpi_method(final_bit_widths_config, graph, fw_info, fw_impl) - if len(final_bit_widths_config) > 0 and len(non_conf_kpi) > 0: - final_kpis_dict[kpi_target] = kpi_aggr(np.concatenate([conf_kpi, non_conf_kpi]), False)[0] - elif len(final_bit_widths_config) > 0 and len(non_conf_kpi) == 0: - final_kpis_dict[kpi_target] = kpi_aggr(conf_kpi, False)[0] - elif len(final_bit_widths_config) == 0 and len(non_conf_kpi) > 0: + non_conf_ru = ru_method([], graph, fw_info, fw_impl) + conf_ru = ru_method(final_bit_widths_config, graph, fw_info, fw_impl) + if len(final_bit_widths_config) > 0 and len(non_conf_ru) > 0: + final_ru_dict[ru_target] = ru_aggr(np.concatenate([conf_ru, non_conf_ru]), False)[0] + elif len(final_bit_widths_config) > 0 and len(non_conf_ru) == 0: + final_ru_dict[ru_target] = ru_aggr(conf_ru, False)[0] + elif len(final_bit_widths_config) == 0 and len(non_conf_ru) > 0: # final_bit_widths_config == 0 ==> no configurable nodes, - # thus, KPI can be computed from non_conf_kpi alone - final_kpis_dict[kpi_target] = kpi_aggr(non_conf_kpi, False)[0] + # thus, ru can be computed from non_conf_ru alone + final_ru_dict[ru_target] = ru_aggr(non_conf_ru, False)[0] else: # No relevant nodes have been quantized with affect on the given target - since we only consider # in the model's final size the quantized layers size, this means that the final size for this target # is zero. - Logger.warning(f"No relevant quantized layers for the KPI target {kpi_target} were found, the recorded" - f"final KPI for this target would be 0.") - final_kpis_dict[kpi_target] = 0 + Logger.warning(f"No relevant quantized layers for the ru target {ru_target} were found, the recorded" + f"final ru for this target would be 0.") + final_ru_dict[ru_target] = 0 - final_kpi = KPI() - final_kpi.set_kpi_by_target(final_kpis_dict) - graph.user_info.final_kpi = final_kpi + final_ru = ResourceUtilization() + final_ru.set_resource_utilization_by_target(final_ru_dict) + graph.user_info.final_resource_utilization = final_ru graph.user_info.mixed_precision_cfg = final_bit_widths_config diff --git a/model_compression_toolkit/gptq/keras/quantization_facade.py b/model_compression_toolkit/gptq/keras/quantization_facade.py index adf0dc88f..d86feb9c4 100644 --- a/model_compression_toolkit/gptq/keras/quantization_facade.py +++ b/model_compression_toolkit/gptq/keras/quantization_facade.py @@ -22,7 +22,7 @@ from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.framework_info import FrameworkInfo from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig from model_compression_toolkit.core import CoreConfig @@ -116,7 +116,7 @@ def get_keras_gptq_config(n_epochs: int, def keras_gradient_post_training_quantization(in_model: Model, representative_data_gen: Callable, gptq_config: GradientPTQConfig, gptq_representative_data_gen: Callable = None, - target_kpi: KPI = None, + target_resource_utilization: ResourceUtilization = None, core_config: CoreConfig = CoreConfig(), target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> Tuple[Model, UserInformation]: """ @@ -129,7 +129,7 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da statistics. Then, if given a mixed precision config in the core_config, using an ILP solver we find a mixed-precision configuration, and set a bit-width for each layer. The model is then quantized (both coefficients and activations by default). - In order to limit the maximal model's size, a target KPI need to be passed after weights_memory + In order to limit the maximal model's size, a target resource utilization need to be passed after weights_memory is set (in bytes). Then, the quantized weights are optimized using gradient based post training quantization by comparing points between the float and quantized models, and minimizing the observed @@ -140,7 +140,7 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da representative_data_gen (Callable): Dataset used for calibration. gptq_config (GradientPTQConfig): Configuration for using gptq (e.g. optimizer). gptq_representative_data_gen (Callable): Dataset used for GPTQ training. If None defaults to representative_data_gen - target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired. + target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired. core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters. target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to. @@ -174,12 +174,12 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da >>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=1)) - For mixed-precision set a target KPI object: - Create a KPI object to limit our returned model's size. Note that this value affects only coefficients + For mixed-precision set a target resource utilization object: + Create a resource utilization object to limit our returned model's size. Note that this value affects only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not): - >>> kpi = mct.core.KPI(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. + >>> ru = mct.core.ResourceUtilization(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. Create GPTQ config: @@ -187,7 +187,7 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da Pass the model with the representative dataset generator to get a quantized model: - >>> quantized_model, quantization_info = mct.gptq.keras_gradient_post_training_quantization(model, repr_datagen, gptq_config, target_kpi=kpi, core_config=config) + >>> quantized_model, quantization_info = mct.gptq.keras_gradient_post_training_quantization(model, repr_datagen, gptq_config, target_resource_utilization=ru, core_config=config) """ KerasModelValidation(model=in_model, diff --git a/model_compression_toolkit/gptq/pytorch/quantization_facade.py b/model_compression_toolkit/gptq/pytorch/quantization_facade.py index 8faf167b0..b65dc131a 100644 --- a/model_compression_toolkit/gptq/pytorch/quantization_facade.py +++ b/model_compression_toolkit/gptq/pytorch/quantization_facade.py @@ -21,7 +21,7 @@ from model_compression_toolkit.constants import PYTORCH from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.gptq.keras.quantization_facade import GPTQ_MOMENTUM from model_compression_toolkit.gptq.runner import gptq_runner @@ -94,7 +94,7 @@ def get_pytorch_gptq_config(n_epochs: int, def pytorch_gradient_post_training_quantization(model: Module, representative_data_gen: Callable, - target_kpi: KPI = None, + target_resource_utilization: ResourceUtilization = None, core_config: CoreConfig = CoreConfig(), gptq_config: GradientPTQConfig = None, gptq_representative_data_gen: Callable = None, @@ -118,7 +118,7 @@ def pytorch_gradient_post_training_quantization(model: Module, Args: model (Module): Pytorch model to quantize. representative_data_gen (Callable): Dataset used for calibration. - target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired. + target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired. core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters. gptq_config (GradientPTQConfig): Configuration for using gptq (e.g. optimizer). gptq_representative_data_gen (Callable): Dataset used for GPTQ training. If None defaults to representative_data_gen @@ -176,7 +176,7 @@ def pytorch_gradient_post_training_quantization(model: Module, fw_info=DEFAULT_PYTORCH_INFO, fw_impl=fw_impl, tpc=target_platform_capabilities, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, tb_w=tb_w) # ---------------------- # diff --git a/model_compression_toolkit/pruning/keras/pruning_facade.py b/model_compression_toolkit/pruning/keras/pruning_facade.py index 694c906ae..b72eaecd2 100644 --- a/model_compression_toolkit/pruning/keras/pruning_facade.py +++ b/model_compression_toolkit/pruning/keras/pruning_facade.py @@ -17,7 +17,7 @@ from model_compression_toolkit import get_target_platform_capabilities from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.pruning.pruner import Pruner from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo @@ -37,13 +37,13 @@ DEFAULT_KERAS_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL) def keras_pruning_experimental(model: Model, - target_kpi: KPI, + target_resource_utilization: ResourceUtilization, representative_data_gen: Callable, pruning_config: PruningConfig = PruningConfig(), target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> Tuple[Model, PruningInfo]: """ - Perform structured pruning on a Keras model to meet a specified target KPI. - This function prunes the provided model according to the target KPI by grouping and pruning + Perform structured pruning on a Keras model to meet a specified target resource utilization. + This function prunes the provided model according to the target resource utilization by grouping and pruning channels based on each layer's SIMD configuration in the Target Platform Capabilities (TPC). By default, the importance of each channel group is determined using the Label-Free Hessian (LFH) method, assessing each channel's sensitivity to the Hessian of the loss function. @@ -55,7 +55,7 @@ def keras_pruning_experimental(model: Model, Args: model (Model): The original Keras model to be pruned. - target_kpi (KPI): The target Key Performance Indicators to be achieved through pruning. + target_resource_utilization (ResourceUtilization): The target Key Performance Indicators to be achieved through pruning. representative_data_gen (Callable): A function to generate representative data for pruning analysis. pruning_config (PruningConfig): Configuration settings for the pruning process. Defaults to standard config. target_platform_capabilities (TargetPlatformCapabilities): Platform-specific constraints and capabilities. Defaults to DEFAULT_KERAS_TPC. @@ -82,12 +82,12 @@ def keras_pruning_experimental(model: Model, >>> import numpy as np >>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))] - Define a target KPI for pruning. + Define a target resource utilization for pruning. Here, we aim to reduce the memory footprint of weights by 50%, assuming the model weights are represented in float32 data type (thus, each parameter is represented using 4 bytes): >>> dense_nparams = sum([l.count_params() for l in model.layers]) - >>> target_kpi = mct.core.KPI(weights_memory=dense_nparams * 4 * 0.5) + >>> target_resource_utilization = mct.core.ResourceUtilization(weights_memory=dense_nparams * 4 * 0.5) Optionally, define a pruning configuration. num_score_approximations can be passed to configure the number of importance scores that will be calculated for each channel. @@ -98,7 +98,7 @@ def keras_pruning_experimental(model: Model, Perform pruning: - >>> pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(model=model, target_kpi=target_kpi, representative_data_gen=repr_datagen, pruning_config=pruning_config) + >>> pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(model=model, target_resource_utilization=target_resource_utilization, representative_data_gen=repr_datagen, pruning_config=pruning_config) """ @@ -126,7 +126,7 @@ def keras_pruning_experimental(model: Model, pruner = Pruner(float_graph_with_compression_config, DEFAULT_KERAS_INFO, fw_impl, - target_kpi, + target_resource_utilization, representative_data_gen, pruning_config, target_platform_capabilities) diff --git a/model_compression_toolkit/pruning/pytorch/pruning_facade.py b/model_compression_toolkit/pruning/pytorch/pruning_facade.py index 64f29b81f..00d8c2d8c 100644 --- a/model_compression_toolkit/pruning/pytorch/pruning_facade.py +++ b/model_compression_toolkit/pruning/pytorch/pruning_facade.py @@ -16,7 +16,7 @@ from typing import Callable, Tuple from model_compression_toolkit import get_target_platform_capabilities from model_compression_toolkit.constants import FOUND_TORCH, PYTORCH -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.pruning.pruner import Pruner from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo @@ -41,14 +41,14 @@ DEFAULT_PYOTRCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL) def pytorch_pruning_experimental(model: Module, - target_kpi: KPI, + target_resource_utilization: ResourceUtilization, representative_data_gen: Callable, pruning_config: PruningConfig = PruningConfig(), target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYOTRCH_TPC) -> \ Tuple[Module, PruningInfo]: """ - Perform structured pruning on a Pytorch model to meet a specified target KPI. - This function prunes the provided model according to the target KPI by grouping and pruning + Perform structured pruning on a Pytorch model to meet a specified target resource utilization. + This function prunes the provided model according to the target resource utilization by grouping and pruning channels based on each layer's SIMD configuration in the Target Platform Capabilities (TPC). By default, the importance of each channel group is determined using the Label-Free Hessian (LFH) method, assessing each channel's sensitivity to the Hessian of the loss function. @@ -60,7 +60,7 @@ def pytorch_pruning_experimental(model: Module, Args: model (Module): The PyTorch model to be pruned. - target_kpi (KPI): Key Performance Indicators specifying the pruning targets. + target_resource_utilization (ResourceUtilization): Key Performance Indicators specifying the pruning targets. representative_data_gen (Callable): A function to generate representative data for pruning analysis. pruning_config (PruningConfig): Configuration settings for the pruning process. Defaults to standard config. target_platform_capabilities (TargetPlatformCapabilities): Platform-specific constraints and capabilities. @@ -88,12 +88,12 @@ def pytorch_pruning_experimental(model: Module, >>> import numpy as np >>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))] - Define a target KPI for pruning. + Define a target resource utilization for pruning. Here, we aim to reduce the memory footprint of weights by 50%, assuming the model weights are represented in float32 data type (thus, each parameter is represented using 4 bytes): >>> dense_nparams = sum(p.numel() for p in model.state_dict().values()) - >>> target_kpi = mct.core.KPI(weights_memory=dense_nparams * 4 * 0.5) + >>> target_resource_utilization = mct.core.ResourceUtilization(weights_memory=dense_nparams * 4 * 0.5) Optionally, define a pruning configuration. num_score_approximations can be passed to configure the number of importance scores that will be calculated for each channel. @@ -104,7 +104,7 @@ def pytorch_pruning_experimental(model: Module, Perform pruning: - >>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model, target_kpi=target_kpi, representative_data_gen=repr_datagen, pruning_config=pruning_config) + >>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model, target_resource_utilization=target_resource_utilization, representative_data_gen=repr_datagen, pruning_config=pruning_config) """ @@ -132,7 +132,7 @@ def pytorch_pruning_experimental(model: Module, pruner = Pruner(float_graph_with_compression_config, DEFAULT_PYTORCH_INFO, fw_impl, - target_kpi, + target_resource_utilization, representative_data_gen, pruning_config, target_platform_capabilities) diff --git a/model_compression_toolkit/ptq/keras/quantization_facade.py b/model_compression_toolkit/ptq/keras/quantization_facade.py index a7a88526e..8cc210421 100644 --- a/model_compression_toolkit/ptq/keras/quantization_facade.py +++ b/model_compression_toolkit/ptq/keras/quantization_facade.py @@ -20,7 +20,7 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities @@ -42,7 +42,7 @@ def keras_post_training_quantization(in_model: Model, representative_data_gen: Callable, - target_kpi: KPI = None, + target_resource_utilization: ResourceUtilization = None, core_config: CoreConfig = CoreConfig(), target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC): """ @@ -55,13 +55,13 @@ def keras_post_training_quantization(in_model: Model, statistics. Then, if given a mixed precision config in the core_config, using an ILP solver we find a mixed-precision configuration, and set a bit-width for each layer. The model is then quantized (both coefficients and activations by default). - In order to limit the maximal model's size, a target KPI need to be passed after weights_memory + In order to limit the maximal model's size, a target ResourceUtilization need to be passed after weights_memory is set (in bytes). Args: in_model (Model): Keras model to quantize. representative_data_gen (Callable): Dataset used for calibration. - target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired. + target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired. core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters. target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to. @@ -99,17 +99,17 @@ def keras_post_training_quantization(in_model: Model, >>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=1)) - For mixed-precision set a target KPI object: - Create a KPI object to limit our returned model's size. Note that this value affects only coefficients + For mixed-precision set a target ResourceUtilization object: + Create a ResourceUtilization object to limit our returned model's size. Note that this value affects only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not): - >>> kpi = mct.core.KPI(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. + >>> ru = mct.core.ResourceUtilization(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. - Pass the model, the representative dataset generator, the configuration and the target KPI to get a + Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a quantized model: - >>> quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(model, repr_datagen, kpi, core_config=config) + >>> quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(model, repr_datagen, ru, core_config=config) For more configuration options, please take a look at our `API documentation `_. @@ -137,7 +137,7 @@ def keras_post_training_quantization(in_model: Model, fw_info=fw_info, fw_impl=fw_impl, tpc=target_platform_capabilities, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, tb_w=tb_w) tg = ptq_runner(tg, representative_data_gen, core_config, fw_info, fw_impl, tb_w) diff --git a/model_compression_toolkit/ptq/pytorch/quantization_facade.py b/model_compression_toolkit/ptq/pytorch/quantization_facade.py index 55943f6cd..1500a7f03 100644 --- a/model_compression_toolkit/ptq/pytorch/quantization_facade.py +++ b/model_compression_toolkit/ptq/pytorch/quantization_facade.py @@ -19,7 +19,7 @@ from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import PYTORCH, FOUND_TORCH from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core import CoreConfig from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig @@ -41,7 +41,7 @@ def pytorch_post_training_quantization(in_module: Module, representative_data_gen: Callable, - target_kpi: KPI = None, + target_resource_utilization: ResourceUtilization = None, core_config: CoreConfig = CoreConfig(), target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC): """ @@ -60,7 +60,7 @@ def pytorch_post_training_quantization(in_module: Module, Args: in_module (Module): Pytorch module to quantize. representative_data_gen (Callable): Dataset used for calibration. - target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired. + target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired. core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters. target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the PyTorch model according to. @@ -109,7 +109,7 @@ def pytorch_post_training_quantization(in_module: Module, fw_info=DEFAULT_PYTORCH_INFO, fw_impl=fw_impl, tpc=target_platform_capabilities, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, tb_w=tb_w) tg = ptq_runner(tg, representative_data_gen, core_config, DEFAULT_PYTORCH_INFO, fw_impl, tb_w) diff --git a/model_compression_toolkit/qat/keras/quantization_facade.py b/model_compression_toolkit/qat/keras/quantization_facade.py index e75364f70..760d4162a 100644 --- a/model_compression_toolkit/qat/keras/quantization_facade.py +++ b/model_compression_toolkit/qat/keras/quantization_facade.py @@ -20,7 +20,7 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import FOUND_TF -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig from mct_quantizers import KerasActivationQuantizationHolder @@ -87,7 +87,7 @@ def qat_wrapper(n: common.BaseNode, def keras_quantization_aware_training_init_experimental(in_model: Model, representative_data_gen: Callable, - target_kpi: KPI = None, + target_resource_utilization: ResourceUtilization = None, core_config: CoreConfig = CoreConfig(), qat_config: QATConfig = QATConfig(), target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC): @@ -103,13 +103,13 @@ def keras_quantization_aware_training_init_experimental(in_model: Model, a mixed-precision configuration, and set a bit-width for each layer. The model is built with fake_quant nodes for quantizing activation. Weights are kept as float and are quantized online while training by the quantization wrapper's weight quantizer. - In order to limit the maximal model's size, a target KPI need to be passed after weights_memory + In order to limit the maximal model's size, a target resource utilization need to be passed after weights_memory is set (in bytes). Args: in_model (Model): Keras model to quantize. representative_data_gen (Callable): Dataset used for initial calibration. - target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired. + target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired. core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters. qat_config (QATConfig): QAT configuration target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to. @@ -149,17 +149,17 @@ def keras_quantization_aware_training_init_experimental(in_model: Model, >>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig()) - For mixed-precision set a target KPI object: - Create a KPI object to limit our returned model's size. Note that this value affects only coefficients + For mixed-precision set a target ResourceUtilization object: + Create a ResourceUtilization object to limit our returned model's size. Note that this value affects only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not): - >>> kpi = mct.core.KPI(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. + >>> ru = mct.core.ResourceUtilization(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. - Pass the model, the representative dataset generator, the configuration and the target KPI to get a + Pass the model, the representative dataset generator, the configuration and the target Resource Utilization to get a quantized model: - >>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen, kpi, core_config=config) + >>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen, ru, core_config=config) Use the quantized model for fine-tuning. For loading the model from file, use the custom_objects dictionary: @@ -193,7 +193,7 @@ def keras_quantization_aware_training_init_experimental(in_model: Model, fw_info=DEFAULT_KERAS_INFO, fw_impl=fw_impl, tpc=target_platform_capabilities, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, tb_w=tb_w) tg = ptq_runner(tg, representative_data_gen, core_config, DEFAULT_KERAS_INFO, fw_impl, tb_w) @@ -245,17 +245,17 @@ def keras_quantization_aware_training_finalize_experimental(in_model: Model) -> >>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig()) - For mixed-precision set a target KPI object: - Create a KPI object to limit our returned model's size. Note that this value affects only coefficients + For mixed-precision set a target ResourceUtilization object: + Create a ResourceUtilization object to limit our returned model's size. Note that this value affects only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not): - >>> kpi = mct.core.KPI(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. + >>> ru = mct.core.ResourceUtilization(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. - Pass the model, the representative dataset generator, the configuration and the target KPI to get a + Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a quantized model: - >>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen, kpi, core_config=config) + >>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen, ru, core_config=config) Use the quantized model for fine-tuning. For loading the model from file, use the custom_objects dictionary: diff --git a/model_compression_toolkit/qat/pytorch/quantization_facade.py b/model_compression_toolkit/qat/pytorch/quantization_facade.py index c3a478380..a98854821 100644 --- a/model_compression_toolkit/qat/pytorch/quantization_facade.py +++ b/model_compression_toolkit/qat/pytorch/quantization_facade.py @@ -23,7 +23,7 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import \ @@ -75,7 +75,7 @@ def qat_wrapper(n: common.BaseNode, def pytorch_quantization_aware_training_init_experimental(in_model: Module, representative_data_gen: Callable, - target_kpi: KPI = None, + target_resource_utilization: ResourceUtilization = None, core_config: CoreConfig = CoreConfig(), qat_config: QATConfig = QATConfig(), target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC): @@ -91,13 +91,13 @@ def pytorch_quantization_aware_training_init_experimental(in_model: Module, a mixed-precision configuration, and set a bit-width for each layer. The model is built with fake_quant nodes for quantizing activation. Weights are kept as float and are quantized online while training by the quantization wrapper's weight quantizer. - In order to limit the maximal model's size, a target KPI need to be passed after weights_memory + In order to limit the maximal model's size, a target resource utilization need to be passed after weights_memory is set (in bytes). Args: in_model (Model): Pytorch model to quantize. representative_data_gen (Callable): Dataset used for initial calibration. - target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired. + target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired. core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters. qat_config (QATConfig): QAT configuration target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Pytorch model according to. @@ -131,7 +131,7 @@ def pytorch_quantization_aware_training_init_experimental(in_model: Module, >>> config = mct.core.CoreConfig() - Pass the model, the representative dataset generator, the configuration and the target KPI to get a + Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a quantized model. Now the model contains quantizer wrappers for fine tunning the weights: >>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config) @@ -160,7 +160,7 @@ def pytorch_quantization_aware_training_init_experimental(in_model: Module, fw_info=DEFAULT_PYTORCH_INFO, fw_impl=fw_impl, tpc=target_platform_capabilities, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, tb_w=tb_w) tg = ptq_runner(tg, representative_data_gen, core_config, DEFAULT_PYTORCH_INFO, fw_impl, tb_w) @@ -213,7 +213,7 @@ def pytorch_quantization_aware_training_finalize_experimental(in_model: Module): >>> config = mct.core.CoreConfig() - Pass the model, the representative dataset generator, the configuration and the target KPI to get a + Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a quantized model: >>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config) diff --git a/tests/common_tests/base_feature_test.py b/tests/common_tests/base_feature_test.py index 64fcfe473..f4bfff1f9 100644 --- a/tests/common_tests/base_feature_test.py +++ b/tests/common_tests/base_feature_test.py @@ -33,7 +33,7 @@ def __init__(self, def get_gptq_config(self): return None - def get_kpi(self): + def get_resource_utilization(self): return None def run_test(self): @@ -43,7 +43,7 @@ def run_test(self): core_config = self.get_core_config() ptq_model, quantization_info = self.get_ptq_facade()(model_float, self.representative_data_gen_experimental, - target_kpi=self.get_kpi(), + target_resource_utilization=self.get_resource_utilization(), core_config=core_config, target_platform_capabilities=self.get_tpc() ) diff --git a/tests/common_tests/function_tests/test_kpi_object.py b/tests/common_tests/function_tests/test_kpi_object.py deleted file mode 100644 index afd0d3e27..000000000 --- a/tests/common_tests/function_tests/test_kpi_object.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -import unittest -import numpy as np -from model_compression_toolkit.core import KPI -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPITarget - -default_kpi = KPI() -custom_kpi = KPI(1, 2, 3, 4) - - -class TestKPIObject(unittest.TestCase): - - def test_default(self): - self.assertTrue(default_kpi.weights_memory, np.inf) - self.assertTrue(default_kpi.activation_memory, np.inf) - self.assertTrue(default_kpi.total_memory, np.inf) - self.assertTrue(default_kpi.bops, np.inf) - - self.assertTrue(custom_kpi.weights_memory, 1) - self.assertTrue(custom_kpi.activation_memory, 2) - self.assertTrue(custom_kpi.total_memory, 3) - self.assertTrue(custom_kpi.bops, 4) - - def test_representation(self): - self.assertEqual(repr(default_kpi), f"Weights_memory: {np.inf}, " - f"Activation_memory: {np.inf}, " - f"Total_memory: {np.inf}, " - f"BOPS: {np.inf}") - - self.assertEqual(repr(custom_kpi), f"Weights_memory: {1}, " - f"Activation_memory: {2}, " - f"Total_memory: {3}, " - f"BOPS: {4}") - - def test_kpi_hold_constraints(self): - self.assertTrue(default_kpi.holds_constraints(custom_kpi)) - self.assertFalse(custom_kpi.holds_constraints(default_kpi)) - self.assertFalse(custom_kpi.holds_constraints({KPITarget.WEIGHTS: 1, - KPITarget.ACTIVATION: 1, - KPITarget.TOTAL: 1, - KPITarget.BOPS: 1})) diff --git a/tests/common_tests/function_tests/test_resource_utilization_object.py b/tests/common_tests/function_tests/test_resource_utilization_object.py new file mode 100644 index 000000000..94ad8a633 --- /dev/null +++ b/tests/common_tests/function_tests/test_resource_utilization_object.py @@ -0,0 +1,57 @@ +# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +import unittest +import numpy as np +from model_compression_toolkit.core import ResourceUtilization +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ + RUTarget + +default_ru = ResourceUtilization() +custom_ru = ResourceUtilization(1, 2, 3, 4) + + +class TestResourceUtilizationObject(unittest.TestCase): + + def test_default(self): + self.assertTrue(default_ru.weights_memory, np.inf) + self.assertTrue(default_ru.activation_memory, np.inf) + self.assertTrue(default_ru.total_memory, np.inf) + self.assertTrue(default_ru.bops, np.inf) + + self.assertTrue(custom_ru.weights_memory, 1) + self.assertTrue(custom_ru.activation_memory, 2) + self.assertTrue(custom_ru.total_memory, 3) + self.assertTrue(custom_ru.bops, 4) + + def test_representation(self): + self.assertEqual(repr(default_ru), f"Weights_memory: {np.inf}, " + f"Activation_memory: {np.inf}, " + f"Total_memory: {np.inf}, " + f"BOPS: {np.inf}") + + self.assertEqual(repr(custom_ru), f"Weights_memory: {1}, " + f"Activation_memory: {2}, " + f"Total_memory: {3}, " + f"BOPS: {4}") + + def test_ru_hold_constraints(self): + self.assertTrue(default_ru.holds_constraints(custom_ru)) + self.assertFalse(custom_ru.holds_constraints(default_ru)) + self.assertFalse(custom_ru.holds_constraints({RUTarget.WEIGHTS: 1, + RUTarget.ACTIVATION: 1, + RUTarget.TOTAL: 1, + RUTarget.BOPS: 1})) diff --git a/tests/common_tests/helpers/prep_graph_for_func_test.py b/tests/common_tests/helpers/prep_graph_for_func_test.py index e7b14c7d4..08f943c44 100644 --- a/tests/common_tests/helpers/prep_graph_for_func_test.py +++ b/tests/common_tests/helpers/prep_graph_for_func_test.py @@ -93,7 +93,7 @@ def prepare_graph_with_quantization_parameters(in_model, def prepare_graph_set_bit_widths(in_model, fw_impl, representative_data_gen, - target_kpi, + target_resource_utilization, n_iter, quant_config, fw_info, @@ -108,7 +108,7 @@ def prepare_graph_set_bit_widths(in_model, debug_config=DebugConfig(analyze_similarity=analyze_similarity, network_editor=network_editor)) - if target_kpi is not None: + if target_resource_utilization is not None: core_config.mixed_precision_config.set_mixed_precision_enable() tb_w = init_tensorboard_writer(fw_info) @@ -143,7 +143,7 @@ def _representative_data_gen(): bit_widths_config = search_bit_width(tg, fw_info, fw_impl, - target_kpi, + target_resource_utilization, core_config.mixed_precision_config, _representative_data_gen) else: diff --git a/tests/keras_tests/custom_layers_tests/test_sony_ssd_postprocess_layer.py b/tests/keras_tests/custom_layers_tests/test_sony_ssd_postprocess_layer.py index 6e7f8d68d..cf57d8a5f 100644 --- a/tests/keras_tests/custom_layers_tests/test_sony_ssd_postprocess_layer.py +++ b/tests/keras_tests/custom_layers_tests/test_sony_ssd_postprocess_layer.py @@ -53,7 +53,7 @@ def test_custom_layer(self): q_model, _ = mct.ptq.keras_post_training_quantization(model, get_rep_dataset(2, (1, 8, 8, 3)), core_config=core_config, - target_kpi=mct.core.KPI(weights_memory=6000)) + target_resource_utilization=mct.core.ResourceUtilization(weights_memory=6000)) # verify the custom layer is in the quantized model self.assertTrue(isinstance(q_model.layers[-1], SSDPostProcess), 'Custom layer should be in the quantized model') diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py index 73868df77..0b3108491 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py @@ -114,7 +114,7 @@ def representative_data_gen(): ptq_model, quantization_info = mct.ptq.keras_post_training_quantization( model_float, representative_data_gen, - target_kpi=self.get_kpi(), + target_resource_utilization=self.get_resource_utilization(), core_config=core_config, target_platform_capabilities=tpc ) @@ -122,7 +122,7 @@ def representative_data_gen(): model_float, representative_data_gen, gptq_config=self.get_gptq_config(), - target_kpi=self.get_kpi(), + target_resource_utilization=self.get_resource_utilization(), core_config=core_config, target_platform_capabilities=tpc ) diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_bops_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_bops_test.py index 010c52083..a63983dbb 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_bops_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_bops_test.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== -from model_compression_toolkit.core import KPI, MixedPrecisionQuantizationConfig +from model_compression_toolkit.core import ResourceUtilization, MixedPrecisionQuantizationConfig from keras.layers import Conv2D, Conv2DTranspose, DepthwiseConv2D, Dense, BatchNormalization, ReLU, Input, Add from tests.keras_tests.feature_networks_tests.base_keras_feature_test import BaseKerasFeatureNetworkTest @@ -55,8 +55,8 @@ def get_input_shapes(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # Verify that some layers got bit-width smaller than 8 bits (so checking candidate index is not 0) self.unit_test.assertTrue(any(i > 0 for i in quantization_info.mixed_precision_cfg)) - # Verify final BOPs KPI - self.unit_test.assertTrue(quantization_info.final_kpi.bops <= self.get_kpi().bops) + # Verify final BOPs utilization + self.unit_test.assertTrue(quantization_info.final_resource_utilization.bops <= self.get_resource_utilization().bops) class MixedPrecisionBopsBasicTest(BaseMixedPrecisionBopsTest): @@ -74,8 +74,8 @@ def create_networks(self): outputs = Conv2D(3, 4)(x) return keras.Model(inputs=inputs, outputs=outputs) - def get_kpi(self): - return KPI(bops=1000000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(bops=1000000) # should require some quantization to all layers class MixedPrecisionBopsAllWeightsLayersTest(BaseMixedPrecisionBopsTest): @@ -99,62 +99,62 @@ def create_networks(self): outputs = Dense(5)(x) return keras.Model(inputs=inputs, outputs=outputs) - def get_kpi(self): - return KPI(bops=1252512) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(bops=1252512) # should require some quantization to all layers class MixedPrecisionWeightsOnlyBopsTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test, mixed_precision_candidates_list=[(8, 8), (4, 8), (2, 8)]) - def get_kpi(self): - return KPI(bops=5010100) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(bops=5010100) # should require some quantization to all layers class MixedPrecisionActivationOnlyBopsTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test, mixed_precision_candidates_list=[(8, 8), (8, 4), (8, 2)]) - def get_kpi(self): - return KPI(bops=5010100) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(bops=5010100) # should require some quantization to all layers def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # Verify that some layers got bit-width smaller than 8 bits (so checking candidate index is not 0) self.unit_test.assertTrue(any(i > 0 for i in quantization_info.mixed_precision_cfg)) - # Verify final BOPs KPI - self.unit_test.assertTrue(quantization_info.final_kpi.bops <= self.get_kpi().bops) + # Verify final BOPs utilization + self.unit_test.assertTrue(quantization_info.final_resource_utilization.bops <= self.get_resource_utilization().bops) -class MixedPrecisionBopsAndWeightsKPITest(MixedPrecisionBopsAllWeightsLayersTest): +class MixedPrecisionBopsAndWeightsUtilizationTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(weights_memory=170, bops=1300000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(weights_memory=170, bops=1300000) # should require some quantization to all layers -class MixedPrecisionBopsAndActivationKPITest(MixedPrecisionBopsAllWeightsLayersTest): +class MixedPrecisionBopsAndActivationUtilizationTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(activation_memory=460, bops=1300000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(activation_memory=460, bops=1300000) # should require some quantization to all layers -class MixedPrecisionBopsAndTotalKPITest(MixedPrecisionBopsAllWeightsLayersTest): +class MixedPrecisionBopsAndTotalUtilizationTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(total_memory=650, bops=1300000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(total_memory=650, bops=1300000) # should require some quantization to all layers -class MixedPrecisionBopsWeightsActivationKPITest(MixedPrecisionBopsAllWeightsLayersTest): +class MixedPrecisionBopsWeightsActivationUtilizationTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(weights_memory=200, activation_memory=500, bops=1300000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(weights_memory=200, activation_memory=500, bops=1300000) # should require some quantization to all layers class MixedPrecisionBopsMultipleOutEdgesTest(BaseMixedPrecisionBopsTest): @@ -171,8 +171,8 @@ def create_networks(self): outputs = Add()([x, y]) return keras.Model(inputs=inputs, outputs=outputs) - def get_kpi(self): - return KPI(bops=1) # No layers with BOPs count + def get_resource_utilization(self): + return ResourceUtilization(bops=1) # No layers with BOPs count def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # Verify that all layers got 8 bits (so checking candidate index is 0) diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py index baccc1fdb..50e7ea79d 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/mixed_precision_tests.py @@ -23,7 +23,7 @@ from keras import backend as K import model_compression_toolkit as mct -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.user_info import UserInformation from tests.keras_tests.tpc_keras import get_tpc_with_activation_mp_keras from tests.keras_tests.utils import get_layers_from_model_by_type @@ -110,13 +110,13 @@ class MixedPrecisionActivationSearchTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[1, 2, 4]) - def get_kpi(self): - # kpi is infinity -> should give best model - 8bits on all layers for both weights and activations - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + # resource utilization is infinity -> should give best model - 8bits on all layers for both weights and activations + return ResourceUtilization(np.inf, np.inf) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is infinity -> should give best model - 8bits + # resource utilization is infinity -> should give best model - 8bits holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder) activation_bits = [layer.activation_holder_quantizer.get_config()['num_bits'] for layer in holder_layers] self.unit_test.assertTrue((activation_bits == [8, 8, 8])) @@ -128,13 +128,13 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= unique_tensor_values=256) -class MixedPrecisionActivationSearchKPI4BitsAvgTest(MixedPrecisionActivationBaseTest): +class MixedPrecisionActivationSearch4BitsAvgTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[2,4]) - def get_kpi(self): - # kpi is for 4 bits on average - return KPI(weights_memory=17920 * 4 / 8, activation_memory=5408 * 4 / 8) + def get_resource_utilization(self): + # resource utilization is for 4 bits on average + return ResourceUtilization(weights_memory=17920 * 4 / 8, activation_memory=5408 * 4 / 8) def get_tpc(self): eight_bits = generate_test_op_qc(**generate_test_attr_configs()) @@ -149,35 +149,36 @@ def get_tpc(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is 4 bit average + # resource utilization is 4 bit average holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[1:] activation_bits = [layer.activation_holder_quantizer.get_config()['num_bits'] for layer in holder_layers] - # Note that since we're using default max aggregation for activation KPI, then there is no guarantee that the - # activation bitwidth for each layer would be 4-bit, this assertion tests the expected result for this specific + # Note that since we're using default max aggregation for activation resource utilization, + # then there is no guarantee that the activation bitwidth for each layer would be 4-bit, + # this assertion tests the expected result for this specific # test with its current setup (therefore, we don't check the input layer's bitwidth) self.unit_test.assertTrue((activation_bits == [4, 4])) - # Verify final KPI + # Verify final resource utilization self.unit_test.assertTrue( - quantization_info.final_kpi.total_memory == - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory, + quantization_info.final_resource_utilization.total_memory == + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory, "Running weights and activation mixed-precision, " "final total memory should be equal to sum of weights and activation memory.") -class MixedPrecisionActivationSearchKPI2BitsAvgTest(MixedPrecisionActivationBaseTest): +class MixedPrecisionActivationSearch2BitsAvgTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[2, 4]) - def get_kpi(self): - # kpi is for 2 bits on average - return KPI(weights_memory=17920.0 * 2 / 8, activation_memory=5408.0 * 2 / 8) + def get_resource_utilization(self): + # resource utilization is for 2 bits on average + return ResourceUtilization(weights_memory=17920.0 * 2 / 8, activation_memory=5408.0 * 2 / 8) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is minimal - # Note that since we're using default max aggregation for activation KPI, then there is no guarantee that the + # resource utilization is minimal + # Note that since we're using default max aggregation for activation resource utilization, then there is no guarantee that the # activation bitwidth for each layer would be 2-bit, this assertion tests the expected result for this specific # test with its current setup (therefore, we don't check the input layer's bitwidth) holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[1:] @@ -190,10 +191,10 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= activation_layers_idx=self.activation_layers_idx, unique_tensor_values=4) - # Verify final KPI + # Verify final resource utilization self.unit_test.assertTrue( - quantization_info.final_kpi.total_memory == - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory, + quantization_info.final_resource_utilization.total_memory == + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory, "Running weights and activation mixed-precision, " "final total memory should be equal to sum of weights and activation memory.") @@ -202,8 +203,8 @@ class MixedPrecisionActivationDepthwiseTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[1, 3]) - def get_kpi(self): - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf, np.inf) def create_networks(self): inputs = layers.Input(shape=self.get_input_shapes()[0][1:]) @@ -215,7 +216,7 @@ def create_networks(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is infinity -> should give best model - 8bits + # resource utilization is infinity -> should give best model - 8bits holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder) activation_bits = [layer.activation_holder_quantizer.get_config()['num_bits'] for layer in holder_layers] self.unit_test.assertTrue((activation_bits == [8, 8])) @@ -225,9 +226,8 @@ class MixedPrecisionActivationDepthwise4BitTest(MixedPrecisionActivationBaseTest def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[1]) - def get_kpi(self): - # return KPI(np.inf, np.inf) - return KPI(48.0 * 4 / 8, 768.0 * 4 / 8) + def get_resource_utilization(self): + return ResourceUtilization(48.0 * 4 / 8, 768.0 * 4 / 8) def get_tpc(self): eight_bits = generate_test_op_qc(**generate_test_attr_configs()) @@ -250,8 +250,8 @@ def create_networks(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is 4 bit average - # Note that since we're using default max aggregation for activation KPI, then there is no guarantee that the + # resource utilization is 4 bit average + # Note that since we're using default max aggregation for activation resource utilization, then there is no guarantee that the # activation bitwidth for each layer would be 4-bit, this assertion tests the expected result for this specific # test with its current setup (therefore, we don't check the relu layer's bitwidth) holder_layer = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder)[0] @@ -270,13 +270,13 @@ def create_networks(self): model = keras.Model(inputs=inputs, outputs=[c0, c1]) return model - def get_kpi(self): - # kpi is infinity -> should give best model - 8bits on all layers for both weights and activations - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + # resource utilization is infinity -> should give best model - 8bits on all layers for both weights and activations + return ResourceUtilization(np.inf, np.inf) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is infinity -> should give best model - 8bits + # resource utilization is infinity -> should give best model - 8bits holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder) activation_bits = [layer.activation_holder_quantizer.get_config()['num_bits'] for layer in holder_layers] self.unit_test.assertTrue((activation_bits == [8, 8, 8])) @@ -311,13 +311,13 @@ def get_tpc(self): mp_bitwidth_candidates_list=mixed_precision_candidates_list, name="mixed_precision_activation_weights_disabled_test") - def get_kpi(self): - # kpi is infinity -> should give best model - 8bits on all layers for both weights and activations - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + # resource utilization is infinity -> should give best model - 8bits on all layers for both weights and activations + return ResourceUtilization(np.inf, np.inf) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is infinity -> should give best model - 8bits + # resource utilization is infinity -> should give best model - 8bits holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder) activation_bits = [layer.activation_holder_quantizer.get_config()['num_bits'] for layer in holder_layers] self.unit_test.assertTrue((activation_bits == [8, 8, 8])) @@ -328,11 +328,11 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= activation_layers_idx=self.activation_layers_idx, unique_tensor_values=256) - # Verify final KPI + # Verify final ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.activation_memory + quantization_info.final_kpi.weights_memory == - quantization_info.final_kpi.total_memory, - "Running activation mixed-precision with unconstrained weights and total KPI, " + quantization_info.final_resource_utilization.activation_memory + quantization_info.final_resource_utilization.weights_memory == + quantization_info.final_resource_utilization.total_memory, + "Running activation mixed-precision with unconstrained weights and total resource utilization, " "final total memory should be equal to the sum of activation and weights memory.") @@ -359,13 +359,13 @@ def get_tpc(self): mp_bitwidth_candidates_list=mixed_precision_candidates_list, name="mixed_precision_activation_weights_disabled_test") - def get_kpi(self): - # kpi is infinity -> should give best model - 8bits on all layers for both weights and activations - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + # resource utilization is infinity -> should give best model - 8bits on all layers for both weights and activations + return ResourceUtilization(np.inf, np.inf) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is infinity -> should give best model - 8bits + # resource utilization is infinity -> should give best model - 8bits holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder) activation_bits = [layer.activation_holder_quantizer.get_config()['num_bits'] for layer in holder_layers] self.unit_test.assertTrue((activation_bits == [8, 8, 8])) @@ -381,8 +381,8 @@ class MixedPrecisionActivationAddLayerTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[1, 2, 3]) - def get_kpi(self): - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf, np.inf) def create_networks(self): inputs = layers.Input(shape=self.get_input_shapes()[0][1:]) @@ -393,7 +393,7 @@ def create_networks(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is infinity -> should give best model - 8bits + # resource utilization is infinity -> should give best model - 8bits holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder) activation_bits = [h.activation_holder_quantizer.get_config()['num_bits'] for h in holder_layers] self.unit_test.assertTrue((activation_bits == [8, 8, 8])) @@ -411,8 +411,8 @@ def __init__(self, unit_test): self.num_of_inputs = 4 self.val_batch_size = 2 - def get_kpi(self): - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf, np.inf) def get_input_shapes(self): return [[self.val_batch_size, 224, 244, 3] for _ in range(self.num_of_inputs)] @@ -440,7 +440,7 @@ def create_networks(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # verify chosen activation bitwidth config - # kpi is infinity -> should give best model - 8bits + # resource utilization is infinity -> should give best model - 8bits holder_layers = get_layers_from_model_by_type(quantized_model, KerasActivationQuantizationHolder) activation_bits = [layer.activation_holder_quantizer.get_config()['num_bits'] for layer in holder_layers] self.unit_test.assertTrue((activation_bits == [8, 8, 8, 8, 8, 8, 8, 8, 8])) @@ -452,12 +452,12 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= unique_tensor_values=256) -class MixedPrecisionTotalKPISearchTest(MixedPrecisionActivationBaseTest): +class MixedPrecisionTotalMemoryUtilizationSearchTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[2, 4]) - def get_kpi(self): - return KPI(np.inf, np.inf, total_memory=(17920 + 5408) * 4 / 8) + def get_resource_utilization(self): + return ResourceUtilization(np.inf, np.inf, total_memory=(17920 + 5408) * 4 / 8) def compare(self, quantized_model, float_model, input_x=None, quantization_info: UserInformation = None): # verify chosen activation bitwidth config @@ -471,22 +471,22 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info: activation_layers_idx=self.activation_layers_idx, unique_tensor_values=16) - # Verify final KPI + # Verify final ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.total_memory == - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory, + quantization_info.final_resource_utilization.total_memory == + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory, "Running weights and activation mixed-precision, " "final total memory should be equal to sum of weights and activation memory.") -class MixedPrecisionMultipleKPIsTightSearchTest(MixedPrecisionActivationBaseTest): +class MixedPrecisionMultipleResourcesTightUtilizationSearchTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[2, 4]) - def get_kpi(self): + def get_resource_utilization(self): weights = 17920 * 4 / 8 activation = 5408 * 4 / 8 - return KPI(weights, activation, total_memory=weights + activation) + return ResourceUtilization(weights, activation, total_memory=weights + activation) def compare(self, quantized_model, float_model, input_x=None, quantization_info: UserInformation = None): # verify chosen activation bitwidth config @@ -500,22 +500,22 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info: activation_layers_idx=self.activation_layers_idx, unique_tensor_values=16) - # Verify final KPI + # Verify final ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.total_memory == - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory, + quantization_info.final_resource_utilization.total_memory == + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory, "Running weights and activation mixed-precision, " "final total memory should be equal to sum of weights and activation memory.") -class MixedPrecisionReducedTotalKPISearchTest(MixedPrecisionActivationBaseTest): +class MixedPrecisionReducedTotalMemorySearchTest(MixedPrecisionActivationBaseTest): def __init__(self, unit_test): super().__init__(unit_test, activation_layers_idx=[2, 4]) - def get_kpi(self): + def get_resource_utilization(self): weights = 17920 * 4 / 8 activation = 5408 * 4 / 8 - return KPI(weights, activation, total_memory=(weights + activation) / 2) + return ResourceUtilization(weights, activation, total_memory=(weights + activation) / 2) def compare(self, quantized_model, float_model, input_x=None, quantization_info: UserInformation = None): # verify chosen activation bitwidth config @@ -529,9 +529,9 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info: activation_layers_idx=self.activation_layers_idx, unique_tensor_values=16) - # Verify final KPI + # Verify final ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.total_memory == - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory, + quantization_info.final_resource_utilization.total_memory == + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory, "Running weights and activation mixed-precision, " "final total memory should be equal to sum of weights and activation memory.") diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/network_editor/edit_qc_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/network_editor/edit_qc_test.py index 2760607f4..e1c93767d 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/network_editor/edit_qc_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/network_editor/edit_qc_test.py @@ -43,9 +43,9 @@ def prepare_graph_for_first_network_editor(in_model, representative_data_gen, core_config, fw_info, fw_impl, - tpc, target_kpi=None, tb_w=None): + tpc, target_resource_utilization=None, tb_w=None): - if target_kpi is not None: + if target_resource_utilization is not None: core_config.mixed_precision_enable.set_mixed_precision_enable() transformed_graph = graph_preparation_runner(in_model, @@ -85,17 +85,17 @@ def prepare_graph_for_first_network_editor(in_model, representative_data_gen, co def prepare_graph_for_second_network_editor(in_model, representative_data_gen, core_config, fw_info, fw_impl, - tpc, target_kpi=None, tb_w=None): + tpc, target_resource_utilization=None, tb_w=None): transformed_graph = prepare_graph_for_first_network_editor(in_model=in_model, representative_data_gen=representative_data_gen, core_config=core_config, fw_info=fw_info, fw_impl=fw_impl, tpc=tpc, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, tb_w=tb_w) - if target_kpi is not None: + if target_resource_utilization is not None: core_config.mixed_precision_enable.set_mixed_precision_enable() ###################################### @@ -140,14 +140,14 @@ def prepare_graph_for_second_network_editor(in_model, representative_data_gen, c ###################################### # Finalize bit widths ###################################### - if target_kpi is not None: + if target_resource_utilization is not None: assert core_config.mixed_precision_enable if core_config.mixed_precision_config.configuration_overwrite is None: bit_widths_config = search_bit_width(tg_with_bias, fw_info, fw_impl, - target_kpi, + target_resource_utilization, core_config.mixed_precision_config, representative_data_gen) else: @@ -196,7 +196,7 @@ def run_test(self): core_config=core_config, fw_info=self.get_fw_info(), fw_impl=self.get_fw_impl(), - target_kpi=self.get_kpi(), + target_resource_utilization=self.get_resource_utilization(), tpc=self.get_tpc()) filtered_nodes = ptq_graph.filter(self.edit_filter) @@ -284,7 +284,7 @@ def run_test(self): core_config=core_config, fw_info=self.get_fw_info(), fw_impl=self.get_fw_impl(), - target_kpi=self.get_kpi(), + target_resource_utilization=self.get_resource_utilization(), tpc=self.get_tpc()) filtered_nodes = ptq_graph.filter(self.edit_filter) diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py index de1dfc70a..4ae714831 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py @@ -283,9 +283,9 @@ def compare(self, qat_model, finalize=False, input_x=None, quantization_info=Non class QATWrappersMixedPrecisionCfgTest(MixedPrecisionActivationBaseTest): - def __init__(self, unit_test, kpi_weights=np.inf, kpi_activation=np.inf, expected_mp_cfg=[0, 0, 0, 0]): - self.kpi_weights = kpi_weights - self.kpi_activation = kpi_activation + def __init__(self, unit_test, ru_weights=np.inf, ru_activation=np.inf, expected_mp_cfg=[0, 0, 0, 0]): + self.ru_weights = ru_weights + self.ru_activation = ru_activation self.expected_mp_cfg = expected_mp_cfg super().__init__(unit_test, activation_layers_idx=[1, 3, 6]) @@ -295,7 +295,7 @@ def run_test(self, **kwargs): qat_ready_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental( model_float, self.representative_data_gen_experimental, - mct.core.KPI(weights_memory=self.kpi_weights, activation_memory=self.kpi_activation), + mct.core.ResourceUtilization(weights_memory=self.ru_weights, activation_memory=self.ru_activation), core_config=config, target_platform_capabilities=self.get_tpc()) diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/reused_layer_mixed_precision_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/reused_layer_mixed_precision_test.py index c2b2db035..e5e648b8d 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/reused_layer_mixed_precision_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/reused_layer_mixed_precision_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig, MixedPrecisionQuantizationConfig from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs, generate_keras_tpc @@ -63,8 +63,8 @@ def create_networks(self): model = keras.Model(inputs=inputs, outputs=x) return model - def get_kpi(self): - return KPI(np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): if isinstance(float_model.layers[1], layers.Conv2D): diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py index 0d2033115..e447b9221 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py @@ -20,12 +20,14 @@ from model_compression_toolkit.defaultdict import DefaultDict from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs, generate_keras_tpc +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \ + get_op_quantization_configs, generate_keras_tpc from tests.common_tests.helpers.generate_test_tp_model import generate_test_op_qc, generate_test_attr_configs from tests.keras_tests.feature_networks_tests.base_keras_feature_test import BaseKerasFeatureNetworkTest import model_compression_toolkit as mct -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ + ResourceUtilization from model_compression_toolkit.core.common.user_info import UserInformation from tests.keras_tests.tpc_keras import get_weights_only_mp_tpc_keras from tests.keras_tests.utils import get_layers_from_model_by_type @@ -81,13 +83,13 @@ def get_quantization_config(self): input_scaling=True, activation_channel_equalization=True) def get_mixed_precision_config(self): - return mct.core.MixedPrecisionQuantizationConfig(target_kpi=self.get_kpi()) + return mct.core.MixedPrecisionQuantizationConfig(target_resource_utilization=self.get_resource_utilization()) - def get_kpi(self): - # Return some KPI (it does not really matter the value here as search_methods is not done, + def get_resource_utilization(self): + # Return some ResourceUtilization (it does not really matter the value here as search_methods is not done, # and the configuration is # set manually) - return KPI(1) + return ResourceUtilization(1) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): assert quantization_info.mixed_precision_cfg == [2, 1] @@ -102,9 +104,9 @@ def __init__(self, unit_test, distance_metric=MpDistanceWeighting.AVG): self.distance_metric = distance_metric - def get_kpi(self): - # kpi is infinity -> should give best model - 8bits - return KPI(np.inf) + def get_resource_utilization(self): + # resource utilization is infinity -> should give best model - 8bits + return ResourceUtilization(np.inf) def get_mixed_precision_config(self): return mct.core.MixedPrecisionQuantizationConfig(num_of_images=1, @@ -113,7 +115,7 @@ def get_mixed_precision_config(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) assert (quantization_info.mixed_precision_cfg == [0, - 0]).all() # kpi is infinity -> should give best model - 8bits + 0]).all() # resource utilization is infinity -> should give best model - 8bits for i in range(32): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256) @@ -121,11 +123,11 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= self.unit_test.assertTrue( np.unique(conv_layers[1].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256) - # Verify final KPI + # Verify final ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory == - quantization_info.final_kpi.total_memory, - "Running weights mixed-precision with unconstrained KPI, " + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory == + quantization_info.final_resource_utilization.total_memory, + "Running weights mixed-precision with unconstrained ResourceUtilization, " "final weights and activation memory sum should be equal to total memory.") @@ -182,15 +184,16 @@ def create_networks(self): model = keras.Model(inputs=inputs, outputs=x) return model - def get_kpi(self): - # kpi is infinity -> should give best model - 8bits - return KPI(np.inf) + def get_resource_utilization(self): + # resource utilization is infinity -> should give best model - 8bits + return ResourceUtilization(np.inf) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # We just needed to verify that the graph finalization is working without failing. # The actual quantization is not interesting for the sake of this test, so we just verify some # degenerated things to see that everything worked. - self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [0]) # kpi is infinity -> should give best model - 8bits + self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [ + 0]) # resource utilization is infinity -> should give best model - 8bits dense_layer = get_layers_from_model_by_type(quantized_model, layers.Dense) self.unit_test.assertTrue(len(dense_layer) == 1) @@ -200,13 +203,13 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= np.unique(dense_layer.get_quantized_weights()['kernel'][:, i]).flatten().shape[0] <= 4) -class MixedPercisionSearchKPI4BitsAvgTest(MixedPercisionBaseTest): +class MixedPercisionSearch4BitsAvgTest(MixedPercisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - # kpi is for 4 bits on average - return KPI(17920 * 4 / 8) + def get_resource_utilization(self): + # Resource Utilization is for 4 bits on average + return ResourceUtilization(17920 * 4 / 8) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) @@ -218,11 +221,11 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= self.unit_test.assertTrue( np.unique(conv_layers[1].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 16) - # Verify final KPI + # Verify final ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory == - quantization_info.final_kpi.total_memory, - "Running weights mixed-precision with unconstrained KPI, " + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory == + quantization_info.final_resource_utilization.total_memory, + "Running weights mixed-precision with unconstrained ResourceUtilization, " "final weights and activation memory sum should be equal to total memory.") @@ -234,9 +237,9 @@ def get_mixed_precision_config(self): return mct.core.MixedPrecisionQuantizationConfig(num_of_images=1, use_hessian_based_scores=False) - def get_kpi(self): - # kpi is for 4 bits on average - return KPI(17920 * 4 / 8) + def get_resource_utilization(self): + # Resource Utilization is for 4 bits on average + return ResourceUtilization(17920 * 4 / 8) def create_networks(self): inputs = layers.Input(shape=self.get_input_shapes()[0][1:]) @@ -258,21 +261,21 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 16 or np.unique(conv_layers[1].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 16) - # Verify final KPI + # Verify final ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory == - quantization_info.final_kpi.total_memory, - "Running weights mixed-precision with unconstrained KPI, " + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory == + quantization_info.final_resource_utilization.total_memory, + "Running weights mixed-precision with unconstrained ResourceUtilization, " "final weights and activation memory sum should be equal to total memory.") -class MixedPercisionSearchKPI2BitsAvgTest(MixedPercisionBaseTest): +class MixedPercisionSearch2BitsAvgTest(MixedPercisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - # kpi is for 2 bits on average - return KPI(17920 * 2 / 8) + def get_resource_utilization(self): + # Resource Utilization is for 2 bits on average + return ResourceUtilization(17920 * 2 / 8) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) @@ -284,62 +287,62 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= self.unit_test.assertTrue( np.unique(conv_layers[1].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 4) - # Verify final KPI + # Verify final ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory == - quantization_info.final_kpi.total_memory, - "Running weights mixed-precision with unconstrained KPI, " + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory == + quantization_info.final_resource_utilization.total_memory, + "Running weights mixed-precision with unconstrained ResourceUtilization, " "final weights and activation memory sum should be equal to total memory.") -class MixedPercisionSearchActivationKPINonConfNodesTest(MixedPercisionBaseTest): +class MixedPercisionSearchActivationNonConfNodesTest(MixedPercisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) - # Total KPI for weights in 2 bit avg and non-configurable activation in 8 bit - self.target_total_kpi = KPI(weights_memory=17920 * 2 / 8, activation_memory=5408) + # Total ResourceUtilization for weights in 2 bit avg and non-configurable activation in 8 bit + self.target_total_ru = ResourceUtilization(weights_memory=17920 * 2 / 8, activation_memory=5408) - def get_kpi(self): - return self.target_total_kpi + def get_resource_utilization(self): + return self.target_total_ru def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # No need to verify quantization configuration here since this test is similar to other tests we have, - # we're only interested in the KPI - self.unit_test.assertTrue(quantization_info.final_kpi.activation_memory <= - self.target_total_kpi.activation_memory) + # we're only interested in the ResourceUtilization + self.unit_test.assertTrue(quantization_info.final_resource_utilization.activation_memory <= + self.target_total_ru.activation_memory) self.unit_test.assertTrue( - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory == - quantization_info.final_kpi.total_memory, - "Running weights mixed-precision with unconstrained KPI, " + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory == + quantization_info.final_resource_utilization.total_memory, + "Running weights mixed-precision with unconstrained Resource Utilization, " "final weights and activation memory sum should be equal to total memory.") -class MixedPercisionSearchTotalKPINonConfNodesTest(MixedPercisionBaseTest): +class MixedPercisionSearchTotalMemoryNonConfNodesTest(MixedPercisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) - # Total KPI for weights in 2 bit avg and non-configurable activation in 8 bit - self.target_total_kpi = KPI(total_memory=17920 * 2 / 8 + 5408) + # Total ResourceUtilization for weights in 2 bit avg and non-configurable activation in 8 bit + self.target_total_ru = ResourceUtilization(total_memory=17920 * 2 / 8 + 5408) - def get_kpi(self): - return self.target_total_kpi + def get_resource_utilization(self): + return self.target_total_ru def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # No need to verify quantization configuration here since this test is similar to other tests we have, - # we're only interested in the KPI - self.unit_test.assertTrue(quantization_info.final_kpi.total_memory <= self.target_total_kpi.total_memory) + # we're only interested in the ResourceUtilization self.unit_test.assertTrue( - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory == - quantization_info.final_kpi.total_memory, - "Running weights mixed-precision with unconstrained KPI, " + quantization_info.final_resource_utilization.total_memory <= self.target_total_ru.total_memory) + self.unit_test.assertTrue( + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory == + quantization_info.final_resource_utilization.total_memory, + "Running weights mixed-precision with unconstrained ResourceUtilization, " "final weights and activation memory sum should be equal to total memory.") - class MixedPercisionDepthwiseTest(MixedPercisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf) def create_networks(self): inputs = layers.Input(shape=self.get_input_shapes()[0][1:]) @@ -351,13 +354,13 @@ def create_networks(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): self.unit_test.assertTrue(len(quantization_info.mixed_precision_cfg) == 1) - self.unit_test.assertTrue(quantization_info.mixed_precision_cfg[0] == 0) # Assert model is quantized using 16 bits as KPI is inf - + self.unit_test.assertTrue(quantization_info.mixed_precision_cfg[ + 0] == 0) # Assert model is quantized using 16 bits as ResourceUtilization is inf def get_tpc(self): base_config = generate_test_op_qc(activation_n_bits=16, - **generate_test_attr_configs(default_cfg_nbits=16, - kernel_cfg_nbits=16)) + **generate_test_attr_configs(default_cfg_nbits=16, + kernel_cfg_nbits=16)) default_config = base_config.clone_and_edit(attr_weights_configs_mapping={}) @@ -399,14 +402,14 @@ def get_tpc(self): mp_bitwidth_candidates_list=[(8, 8), (4, 8), (2, 8)], name="mp_weights_only_test") - def get_kpi(self): - # kpi is infinity -> should give best model - 8bits - return KPI(np.inf) + def get_resource_utilization(self): + # resource utilization is infinity -> should give best model - 8bits + return ResourceUtilization(np.inf) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) assert (quantization_info.mixed_precision_cfg == [0, - 0]).all() # kpi is infinity -> should give best model - 8bits + 0]).all() # resource utilization is infinity -> should give best model - 8bits for i in range(32): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256) @@ -424,14 +427,14 @@ def get_mixed_precision_config(self): distance_weighting_method=MpDistanceWeighting.LAST_LAYER, use_hessian_based_scores=False) - def get_kpi(self): - # kpi is infinity -> should give best model - 8bits - return KPI(np.inf) + def get_resource_utilization(self): + # resource utilization is infinity -> should give best model - 8bits + return ResourceUtilization(np.inf) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) assert (quantization_info.mixed_precision_cfg == [0, - 0]).all() # kpi is infinity -> should give best model - 8bits + 0]).all() # resource utilization is infinity -> should give best model - 8bits for i in range(32): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256) @@ -439,9 +442,9 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info= self.unit_test.assertTrue( np.unique(conv_layers[1].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256) - # Verify final KPI + # Verify final Resource Utilization self.unit_test.assertTrue( - quantization_info.final_kpi.weights_memory + quantization_info.final_kpi.activation_memory == - quantization_info.final_kpi.total_memory, - "Running weights mixed-precision with unconstrained KPI, " - "final weights and activation memory sum should be equal to total memory.") \ No newline at end of file + quantization_info.final_resource_utilization.weights_memory + quantization_info.final_resource_utilization.activation_memory == + quantization_info.final_resource_utilization.total_memory, + "Running weights mixed-precision with unconstrained Resource Utilization, " + "final weights and activation memory sum should be equal to total memory.") diff --git a/tests/keras_tests/feature_networks_tests/test_features_runner.py b/tests/keras_tests/feature_networks_tests/test_features_runner.py index fd5fc5c39..896836b10 100644 --- a/tests/keras_tests/feature_networks_tests/test_features_runner.py +++ b/tests/keras_tests/feature_networks_tests/test_features_runner.py @@ -55,16 +55,16 @@ LUTActivationQuantizerTest from tests.keras_tests.feature_networks_tests.feature_networks.mixed_precision_bops_test import \ MixedPrecisionBopsBasicTest, MixedPrecisionBopsAllWeightsLayersTest, MixedPrecisionWeightsOnlyBopsTest, \ - MixedPrecisionActivationOnlyBopsTest, MixedPrecisionBopsAndWeightsKPITest, MixedPrecisionBopsAndActivationKPITest, \ - MixedPrecisionBopsAndTotalKPITest, MixedPrecisionBopsWeightsActivationKPITest, \ + MixedPrecisionActivationOnlyBopsTest, MixedPrecisionBopsAndWeightsUtilizationTest, MixedPrecisionBopsAndActivationUtilizationTest, \ + MixedPrecisionBopsAndTotalUtilizationTest, MixedPrecisionBopsWeightsActivationUtilizationTest, \ MixedPrecisionBopsMultipleOutEdgesTest from tests.keras_tests.feature_networks_tests.feature_networks.mixed_precision_tests import \ - MixedPrecisionActivationSearchTest, MixedPrecisionActivationSearchKPI4BitsAvgTest, \ - MixedPrecisionActivationSearchKPI2BitsAvgTest, MixedPrecisionActivationDepthwiseTest, \ + MixedPrecisionActivationSearchTest, MixedPrecisionActivationSearch4BitsAvgTest, \ + MixedPrecisionActivationSearch2BitsAvgTest, MixedPrecisionActivationDepthwiseTest, \ MixedPrecisionActivationSplitLayerTest, MixedPrecisionActivationOnlyWeightsDisabledTest, \ MixedPrecisionActivationOnlyTest, MixedPrecisionActivationDepthwise4BitTest, MixedPrecisionActivationAddLayerTest, \ - MixedPrecisionActivationMultipleInputsTest, MixedPrecisionTotalKPISearchTest, \ - MixedPrecisionMultipleKPIsTightSearchTest, MixedPrecisionReducedTotalKPISearchTest + MixedPrecisionActivationMultipleInputsTest, MixedPrecisionTotalMemoryUtilizationSearchTest, \ + MixedPrecisionMultipleResourcesTightUtilizationSearchTest, MixedPrecisionReducedTotalMemorySearchTest from tests.keras_tests.feature_networks_tests.feature_networks.multi_head_attention_test import MultiHeadAttentionTest from tests.keras_tests.feature_networks_tests.feature_networks.multi_inputs_to_node_test import MultiInputsToNodeTest from tests.keras_tests.feature_networks_tests.feature_networks.multiple_inputs_model_test import MultipleInputsModelTest @@ -125,9 +125,9 @@ UniformRangeSelectionActivationTest, UniformRangeSelectionBoundedActivationTest from tests.keras_tests.feature_networks_tests.feature_networks.weights_mixed_precision_tests import \ MixedPercisionSearchTest, MixedPercisionDepthwiseTest, \ - MixedPercisionSearchKPI4BitsAvgTest, MixedPercisionSearchKPI2BitsAvgTest, MixedPrecisionActivationDisabled, \ - MixedPercisionSearchLastLayerDistanceTest, MixedPercisionSearchActivationKPINonConfNodesTest, \ - MixedPercisionSearchTotalKPINonConfNodesTest, MixedPercisionSearchPartWeightsLayersTest, MixedPercisionCombinedNMSTest + MixedPercisionSearch4BitsAvgTest, MixedPercisionSearch2BitsAvgTest, MixedPrecisionActivationDisabled, \ + MixedPercisionSearchLastLayerDistanceTest, MixedPercisionSearchActivationNonConfNodesTest, \ + MixedPercisionSearchTotalMemoryNonConfNodesTest, MixedPercisionSearchPartWeightsLayersTest, MixedPercisionCombinedNMSTest from tests.keras_tests.feature_networks_tests.feature_networks.matmul_substitution_test import MatmulToDenseSubstitutionTest from tests.keras_tests.feature_networks_tests.feature_networks.const_representation_test import ConstRepresentationTest, \ ConstRepresentationMultiInputTest, ConstRepresentationMatMulTest @@ -194,13 +194,13 @@ def test_reused_layer_mixed_precision(self): def test_reuse_separable(self): ReusedSeparableTest(self).run_test() - def test_mixed_precision_search_kpi_2bits_avg(self): - MixedPercisionSearchKPI2BitsAvgTest(self).run_test() + def test_mixed_precision_search_2bits_avg(self): + MixedPercisionSearch2BitsAvgTest(self).run_test() - def test_mixed_precision_search_kpi_4bits_avg(self): - MixedPercisionSearchKPI4BitsAvgTest(self).run_test() + def test_mixed_precision_search_4bits_avg(self): + MixedPercisionSearch4BitsAvgTest(self).run_test() - def test_mixed_precision_search_kpi_4bits_avg_nms(self): + def test_mixed_precision_search_4bits_avg_nms(self): MixedPercisionCombinedNMSTest(self).run_test() def test_mixed_precision_search(self): @@ -220,10 +220,10 @@ def test_mixed_precision_search_with_last_layer_distance(self): MixedPercisionSearchLastLayerDistanceTest(self).run_test() def test_mixed_precision_search_activation_non_conf_nodes(self): - MixedPercisionSearchActivationKPINonConfNodesTest(self).run_test() + MixedPercisionSearchActivationNonConfNodesTest(self).run_test() def test_mixed_precision_search_total_non_conf_nodes(self): - MixedPercisionSearchTotalKPINonConfNodesTest(self).run_test() + MixedPercisionSearchTotalMemoryNonConfNodesTest(self).run_test() def test_mixed_precision_activation_search(self): MixedPrecisionActivationSearchTest(self).run_test() @@ -234,11 +234,11 @@ def test_mixed_precision_activation_only(self): def test_mixed_precision_activation_only_weights_disabled(self): MixedPrecisionActivationOnlyWeightsDisabledTest(self).run_test() - def test_mixed_precision_activation_search_kpi_4bits_avg(self): - MixedPrecisionActivationSearchKPI4BitsAvgTest(self).run_test() + def test_mixed_precision_activation_search_4bits_avg(self): + MixedPrecisionActivationSearch4BitsAvgTest(self).run_test() - def test_mixed_precision_activation_search_kpi_2bits_avg(self): - MixedPrecisionActivationSearchKPI2BitsAvgTest(self).run_test() + def test_mixed_precision_activation_search_2bits_avg(self): + MixedPrecisionActivationSearch2BitsAvgTest(self).run_test() def test_mixed_precision_activation_dw(self): MixedPrecisionActivationDepthwiseTest(self).run_test() @@ -255,24 +255,24 @@ def test_mixed_precision_activation_split(self): def test_mixed_precision_activation_multiple_inputs(self): MixedPrecisionActivationMultipleInputsTest(self).run_test() - def test_mixed_precision_total_kpi(self): - MixedPrecisionTotalKPISearchTest(self).run_test() + def test_mixed_precision_total_memory_utilization(self): + MixedPrecisionTotalMemoryUtilizationSearchTest(self).run_test() - def test_mixed_precision_multiple_kpis_tight(self): - MixedPrecisionMultipleKPIsTightSearchTest(self).run_test() + def test_mixed_precision_multiple_resources_tight_utilization(self): + MixedPrecisionMultipleResourcesTightUtilizationSearchTest(self).run_test() - def test_mixed_precision_reduced_total_kpi(self): - MixedPrecisionReducedTotalKPISearchTest(self).run_test() + def test_mixed_precision_reduced_total_memory(self): + MixedPrecisionReducedTotalMemorySearchTest(self).run_test() - def test_mixed_precision_bops_kpi(self): + def test_mixed_precision_bops_utilization(self): MixedPrecisionBopsBasicTest(self).run_test() MixedPrecisionBopsAllWeightsLayersTest(self).run_test() MixedPrecisionWeightsOnlyBopsTest(self).run_test() MixedPrecisionActivationOnlyBopsTest(self).run_test() - MixedPrecisionBopsAndWeightsKPITest(self).run_test() - MixedPrecisionBopsAndActivationKPITest(self).run_test() - MixedPrecisionBopsAndTotalKPITest(self).run_test() - MixedPrecisionBopsWeightsActivationKPITest(self).run_test() + MixedPrecisionBopsAndWeightsUtilizationTest(self).run_test() + MixedPrecisionBopsAndActivationUtilizationTest(self).run_test() + MixedPrecisionBopsAndTotalUtilizationTest(self).run_test() + MixedPrecisionBopsWeightsActivationUtilizationTest(self).run_test() MixedPrecisionBopsMultipleOutEdgesTest(self).run_test() def test_name_filter(self): @@ -734,7 +734,7 @@ def test_qat(self): QuantizationAwareTrainingQuantizersTest(self).run_test() QuantizationAwareTrainingQuantizerHolderTest(self).run_test() QATWrappersMixedPrecisionCfgTest(self).run_test() - QATWrappersMixedPrecisionCfgTest(self,kpi_weights=17920 * 4 / 8, kpi_activation=5408 * 4 / 8, expected_mp_cfg=[0, 4, 1, 1]).run_test() + QATWrappersMixedPrecisionCfgTest(self, ru_weights=17920 * 4 / 8, ru_activation=5408 * 4 / 8, expected_mp_cfg=[0, 4, 1, 1]).run_test() def test_bn_attributes_quantization(self): BNAttributesQuantization(self, quantize_linear=False).run_test() diff --git a/tests/keras_tests/function_tests/test_doc_examples.py b/tests/keras_tests/function_tests/test_doc_examples.py index 7ae133044..0307b7860 100644 --- a/tests/keras_tests/function_tests/test_doc_examples.py +++ b/tests/keras_tests/function_tests/test_doc_examples.py @@ -31,5 +31,5 @@ def test_keras_ptq_facade(self): def test_keras_gptq_facade(self): doctest.testfile("quantization_facade.py", package=keras, verbose=True, raise_on_error=RAISE_ON_ERROR) - def test_keras_kpi_data_facade(self): - doctest.testfile("kpi_data_facade.py", package=core.keras, verbose=True, raise_on_error=RAISE_ON_ERROR) + def test_keras_resource_utilization_data_facade(self): + doctest.testfile("resource_utilization_data_facade.py", package=core.keras, verbose=True, raise_on_error=RAISE_ON_ERROR) diff --git a/tests/keras_tests/function_tests/test_kpi_data.py b/tests/keras_tests/function_tests/test_resource_utilization_data.py similarity index 68% rename from tests/keras_tests/function_tests/test_kpi_data.py rename to tests/keras_tests/function_tests/test_resource_utilization_data.py index 83f364130..d94878aca 100644 --- a/tests/keras_tests/function_tests/test_kpi_data.py +++ b/tests/keras_tests/function_tests/test_resource_utilization_data.py @@ -22,7 +22,8 @@ import unittest from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, Input, SeparableConv2D -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \ + get_op_quantization_configs from model_compression_toolkit.core.keras.constants import DEPTHWISE_KERNEL, KERNEL from model_compression_toolkit.core.keras.graph_substitutions.substitutions.separableconv_decomposition import \ POINTWISE_KERNEL @@ -54,8 +55,8 @@ def basic_model(): outputs = ReLU()(x_bn) model = keras.Model(inputs=inputs, outputs=outputs) return model, \ - getattr(model.layers[1], KERNEL).numpy().flatten().shape[0], \ - compute_output_size(model.layers[0].output_shape) + getattr(model.layers[1], KERNEL).numpy().flatten().shape[0], \ + compute_output_size(model.layers[0].output_shape) def complex_model(): @@ -73,15 +74,14 @@ def complex_model(): outputs = ReLU()(x) model = keras.Model(inputs=inputs, outputs=outputs) return model, \ - getattr(model.layers[1], DEPTHWISE_KERNEL).numpy().flatten().shape[0] + \ - getattr(model.layers[1], POINTWISE_KERNEL).numpy().flatten().shape[0] + \ - getattr(model.layers[4], DEPTHWISE_KERNEL).numpy().flatten().shape[0] + \ - getattr(model.layers[4], POINTWISE_KERNEL).numpy().flatten().shape[0], \ - compute_output_size(model.layers[4].output_shape) + getattr(model.layers[1], DEPTHWISE_KERNEL).numpy().flatten().shape[0] + \ + getattr(model.layers[1], POINTWISE_KERNEL).numpy().flatten().shape[0] + \ + getattr(model.layers[4], DEPTHWISE_KERNEL).numpy().flatten().shape[0] + \ + getattr(model.layers[4], POINTWISE_KERNEL).numpy().flatten().shape[0], \ + compute_output_size(model.layers[4].output_shape) def prep_test(model, mp_bitwidth_candidates_list, random_datagen): - base_config = generate_test_op_qc(activation_n_bits=mp_bitwidth_candidates_list[0][1], **generate_test_attr_configs(default_cfg_nbits=mp_bitwidth_candidates_list[0][0], kernel_cfg_nbits=mp_bitwidth_candidates_list[0][0])) @@ -91,57 +91,57 @@ def prep_test(model, mp_bitwidth_candidates_list, random_datagen): tpc = get_tpc_with_activation_mp_keras(base_config=base_config, default_config=default_config, mp_bitwidth_candidates_list=mp_bitwidth_candidates_list, - name="kpi_data_test") + name="ru_data_test") - kpi_data = mct.core.keras_kpi_data(in_model=model, - representative_data_gen=random_datagen, - core_config=mct.core.CoreConfig(), - target_platform_capabilities=tpc) + ru_data = mct.core.keras_resource_utilization_data(in_model=model, + representative_data_gen=random_datagen, + core_config=mct.core.CoreConfig(), + target_platform_capabilities=tpc) - return kpi_data + return ru_data -class TestKPIData(unittest.TestCase): +class TestResourceUtilizationData(unittest.TestCase): - def test_kpi_data_basic_all_bitwidth(self): + def test_ru_data_basic_all_bitwidth(self): model, sum_parameters, max_tensor = basic_model() mp_bitwidth_candidates_list = [(i, j) for i in [8, 4, 2] for j in [8, 4, 2]] - kpi_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) + ru_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) - self.verify_results(kpi_data, sum_parameters, max_tensor) + self.verify_results(ru_data, sum_parameters, max_tensor) - def test_kpi_data_basic_partial_bitwidth(self): + def test_ru_data_basic_partial_bitwidth(self): model, sum_parameters, max_tensor = basic_model() mp_bitwidth_candidates_list = [(i, j) for i in [4, 2] for j in [4, 2]] - kpi_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) + ru_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) - self.verify_results(kpi_data, sum_parameters, max_tensor) + self.verify_results(ru_data, sum_parameters, max_tensor) - def test_kpi_data_complex_all_bitwidth(self): + def test_ru_data_complex_all_bitwidth(self): model, sum_parameters, max_tensor = complex_model() mp_bitwidth_candidates_list = [(i, j) for i in [8, 4, 2] for j in [8, 4, 2]] - kpi_data = prep_test(model, mp_bitwidth_candidates_list, large_random_datagen()) + ru_data = prep_test(model, mp_bitwidth_candidates_list, large_random_datagen()) - self.verify_results(kpi_data, sum_parameters, max_tensor) + self.verify_results(ru_data, sum_parameters, max_tensor) - def test_kpi_data_complex_partial_bitwidth(self): + def test_ru_data_complex_partial_bitwidth(self): model, sum_parameters, max_tensor = basic_model() mp_bitwidth_candidates_list = [(i, j) for i in [4, 2] for j in [4, 2]] - kpi_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) + ru_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) - self.verify_results(kpi_data, sum_parameters, max_tensor) + self.verify_results(ru_data, sum_parameters, max_tensor) - def verify_results(self, kpi, sum_parameters, max_tensor): - self.assertTrue(kpi.weights_memory == sum_parameters, + def verify_results(self, ru, sum_parameters, max_tensor): + self.assertTrue(ru.weights_memory == sum_parameters, f"Expects weights_memory to be {sum_parameters} " - f"but result is {kpi.weights_memory}") - self.assertTrue(kpi.activation_memory == max_tensor, + f"but result is {ru.weights_memory}") + self.assertTrue(ru.activation_memory == max_tensor, f"Expects activation_memory to be {max_tensor} " - f"but result is {kpi.activation_memory}") + f"but result is {ru.activation_memory}") if __name__ == '__main__': diff --git a/tests/keras_tests/function_tests/test_sensitivity_eval_non_suppoerted_output.py b/tests/keras_tests/function_tests/test_sensitivity_eval_non_suppoerted_output.py index 8ac75e2cb..f8884348d 100644 --- a/tests/keras_tests/function_tests/test_sensitivity_eval_non_suppoerted_output.py +++ b/tests/keras_tests/function_tests/test_sensitivity_eval_non_suppoerted_output.py @@ -96,13 +96,13 @@ def test_not_supported_output_argmax(self): model = argmax_output_model((8, 8, 3)) with self.assertRaises(Exception) as e: self.verify_test_for_model(model) - self.assertTrue("All graph outputs should support Hessian computation" in str(e.exception)) + self.assertTrue("All graph outputs must support Hessian score computation" in str(e.exception)) def test_not_supported_output_nms(self): model = nms_output_model((8, 8, 3)) with self.assertRaises(Exception) as e: self.verify_test_for_model(model) - self.assertTrue("All graph outputs should support Hessian computation" in str(e.exception)) + self.assertTrue("All graph outputs must support Hessian score computation" in str(e.exception)) if __name__ == '__main__': unittest.main() diff --git a/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py b/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py index 1baede0e1..e22eda04f 100644 --- a/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py +++ b/tests/keras_tests/non_parallel_tests/test_keras_tp_model.py @@ -260,7 +260,7 @@ def rep_data(): quantized_model, _ = mct.ptq.keras_post_training_quantization(model, rep_data, core_config=core_config, - target_kpi=mct.core.KPI(np.inf), + target_resource_utilization=mct.core.ResourceUtilization(np.inf), target_platform_capabilities=tpc) def test_get_keras_supported_version(self): diff --git a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py index 918c009f0..c768005d7 100644 --- a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py +++ b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py @@ -18,7 +18,8 @@ import keras from model_compression_toolkit.core import DEFAULTCONFIG from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI, KPITarget +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ + ResourceUtilization, RUTarget from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width, \ @@ -46,46 +47,46 @@ def __init__(self): pass def reconstruct_config_from_virtual_graph(self, - max_kpi_config, + max_ru_config, changed_virtual_nodes_idx=None, original_base_config=None): - return max_kpi_config + return max_ru_config class MockMixedPrecisionSearchManager: - def __init__(self, layer_to_kpi_mapping): + def __init__(self, layer_to_ru_mapping): self.layer_to_bitwidth_mapping = {0: [0, 1, 2]} - self.layer_to_kpi_mapping = layer_to_kpi_mapping + self.layer_to_ru_mapping = layer_to_ru_mapping self.compute_metric_fn = lambda x, y=None, z=None: {0: 2, 1: 1, 2: 0}[x[0]] - self.min_kpi = {KPITarget.WEIGHTS: [[1], [1], [1]], - KPITarget.ACTIVATION: [[1], [1], [1]], - KPITarget.TOTAL: [[2], [2], [2]], - KPITarget.BOPS: [[1], [1], [1]]} # minimal kpi in the tests layer_to_kpi_mapping - - self.compute_kpi_functions = {KPITarget.WEIGHTS: (None, lambda v: [lpSum(v)]), - KPITarget.ACTIVATION: (None, lambda v: [i for i in v]), - KPITarget.TOTAL: (None, lambda v: [lpSum(v[0]) + i for i in v[1]]), - KPITarget.BOPS: (None, lambda v: [lpSum(v)])} - self.max_kpi_config = [0] + self.min_ru = {RUTarget.WEIGHTS: [[1], [1], [1]], + RUTarget.ACTIVATION: [[1], [1], [1]], + RUTarget.TOTAL: [[2], [2], [2]], + RUTarget.BOPS: [[1], [1], [1]]} # minimal resource utilization in the tests layer_to_ru_mapping + + self.compute_ru_functions = {RUTarget.WEIGHTS: (None, lambda v: [lpSum(v)]), + RUTarget.ACTIVATION: (None, lambda v: [i for i in v]), + RUTarget.TOTAL: (None, lambda v: [lpSum(v[0]) + i for i in v[1]]), + RUTarget.BOPS: (None, lambda v: [lpSum(v)])} + self.max_ru_config = [0] self.config_reconstruction_helper = MockReconstructionHelper() - self.non_conf_kpi_dict = None - - def compute_kpi_matrix(self, target): - # minus 1 is normalization by the minimal kpi (which is always 1 in this test) - if target == KPITarget.WEIGHTS: - kpi_matrix = [np.flip(np.array([kpi.weights_memory - 1 for _, kpi in self.layer_to_kpi_mapping[0].items()]))] - elif target == KPITarget.ACTIVATION: - kpi_matrix = [np.flip(np.array([kpi.activation_memory - 1 for _, kpi in self.layer_to_kpi_mapping[0].items()]))] - elif target == KPITarget.TOTAL: - kpi_matrix = [np.flip(np.array([kpi.weights_memory - 1 for _, kpi in self.layer_to_kpi_mapping[0].items()])), - np.flip(np.array([kpi.activation_memory - 1 for _, kpi in self.layer_to_kpi_mapping[0].items()]))] - elif target == KPITarget.BOPS: - kpi_matrix = [np.flip(np.array([kpi.bops - 1 for _, kpi in self.layer_to_kpi_mapping[0].items()]))] + self.non_conf_ru_dict = None + + def compute_resource_utilization_matrix(self, target): + # minus 1 is normalization by the minimal resource utilization (which is always 1 in this test) + if target == RUTarget.WEIGHTS: + ru_matrix = [np.flip(np.array([ru.weights_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))] + elif target == RUTarget.ACTIVATION: + ru_matrix = [np.flip(np.array([ru.activation_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))] + elif target == RUTarget.TOTAL: + ru_matrix = [np.flip(np.array([ru.weights_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()])), + np.flip(np.array([ru.activation_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))] + elif target == RUTarget.BOPS: + ru_matrix = [np.flip(np.array([ru.bops - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))] else: # not supposed to get here - kpi_matrix = [] + ru_matrix = [] - return np.array(kpi_matrix) + return np.array(ru_matrix) def finalize_distance_metric(self, d): return d @@ -94,104 +95,106 @@ def finalize_distance_metric(self, d): class TestLpSearchBitwidth(unittest.TestCase): def test_search_weights_only(self): - target_kpi = KPI(weights_memory=2) - layer_to_kpi_mapping = {0: {2: KPI(weights_memory=1), - 1: KPI(weights_memory=2), - 0: KPI(weights_memory=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_kpi_mapping) + target_resource_utilization = ResourceUtilization(weights_memory=2) + layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1), + 1: ResourceUtilization(weights_memory=2), + 0: ResourceUtilization(weights_memory=3)}} + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) - target_kpi = KPI(weights_memory=0) # Infeasible solution! + target_resource_utilization = ResourceUtilization(weights_memory=0) # Infeasible solution! with self.assertRaises(Exception): bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=KPI(weights_memory=np.inf)) + target_resource_utilization=ResourceUtilization(weights_memory=np.inf)) self.assertTrue(len(bit_cfg) == 1) - self.assertTrue(bit_cfg[0] == 0) # KPI is Inf so expecting for the maximal bit-width result + self.assertTrue(bit_cfg[0] == 0) # ResourceUtilization is Inf so expecting for the maximal bit-width result - target_kpi = None # target KPI is not defined! + target_resource_utilization = None # target ResourceUtilization is not defined! with self.assertRaises(Exception): bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) def test_search_activation_only(self): - target_kpi = KPI(activation_memory=2) - layer_to_kpi_mapping = {0: {2: KPI(activation_memory=1), - 1: KPI(activation_memory=2), - 0: KPI(activation_memory=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_kpi_mapping) + target_resource_utilization = ResourceUtilization(activation_memory=2) + layer_to_ru_mapping = {0: {2: ResourceUtilization(activation_memory=1), + 1: ResourceUtilization(activation_memory=2), + 0: ResourceUtilization(activation_memory=3)}} + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) - target_kpi = KPI(activation_memory=0) # Infeasible solution! + target_resource_utilization = ResourceUtilization(activation_memory=0) # Infeasible solution! with self.assertRaises(Exception): bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=KPI(activation_memory=np.inf)) + target_resource_utilization=ResourceUtilization( + activation_memory=np.inf)) self.assertTrue(len(bit_cfg) == 1) - self.assertTrue(bit_cfg[0] == 0) # KPI is Inf so expecting for the maximal bit-width result + self.assertTrue(bit_cfg[0] == 0) # ResourceUtilization is Inf so expecting for the maximal bit-width result def test_search_weights_and_activation(self): - target_kpi = KPI(weights_memory=2, activation_memory=2) - layer_to_kpi_mapping = {0: {2: KPI(weights_memory=1, activation_memory=1), - 1: KPI(weights_memory=2, activation_memory=2), - 0: KPI(weights_memory=3, activation_memory=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_kpi_mapping) + target_resource_utilization = ResourceUtilization(weights_memory=2, activation_memory=2) + layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1), + 1: ResourceUtilization(weights_memory=2, activation_memory=2), + 0: ResourceUtilization(weights_memory=3, activation_memory=3)}} + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) - target_kpi = KPI(weights_memory=0, activation_memory=0) # Infeasible solution! + target_resource_utilization = ResourceUtilization(weights_memory=0, activation_memory=0) # Infeasible solution! with self.assertRaises(Exception): bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=KPI(weights_memory=np.inf, activation_memory=np.inf)) + target_resource_utilization=ResourceUtilization(weights_memory=np.inf, + activation_memory=np.inf)) self.assertTrue(len(bit_cfg) == 1) - self.assertTrue(bit_cfg[0] == 0) # KPI is Inf so expecting for the maximal bit-width result + self.assertTrue(bit_cfg[0] == 0) # ResourceUtilization is Inf so expecting for the maximal bit-width result - def test_search_total_kpi(self): - target_kpi = KPI(total_memory=4) - layer_to_kpi_mapping = {0: {2: KPI(weights_memory=1, activation_memory=1), - 1: KPI(weights_memory=2, activation_memory=2), - 0: KPI(weights_memory=3, activation_memory=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_kpi_mapping) + def test_search_total_resource_utilization(self): + target_resource_utilization = ResourceUtilization(total_memory=4) + layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1), + 1: ResourceUtilization(weights_memory=2, activation_memory=2), + 0: ResourceUtilization(weights_memory=3, activation_memory=3)}} + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) - def test_search_bops_kpi(self): - target_kpi = KPI(bops=2) - layer_to_kpi_mapping = {0: {2: KPI(bops=1), - 1: KPI(bops=2), - 0: KPI(bops=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_kpi_mapping) + def test_search_bops_ru(self): + target_resource_utilization = ResourceUtilization(bops=2) + layer_to_ru_mapping = {0: {2: ResourceUtilization(bops=1), + 1: ResourceUtilization(bops=2), + 0: ResourceUtilization(bops=3)}} + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) bit_cfg = mp_integer_programming_search(mock_search_manager, - target_kpi=target_kpi) + target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) @@ -205,9 +208,10 @@ def run_search_bitwidth_config_test(self, core_config): tpc = get_weights_only_mp_tpc_keras(base_config=base_config, default_config=default_config, - mp_bitwidth_candidates_list=[(c.attr_weights_configs_mapping[KERNEL_ATTR].weights_n_bits, - c.activation_n_bits) for c - in mixed_precision_cfg_list], + mp_bitwidth_candidates_list=[ + (c.attr_weights_configs_mapping[KERNEL_ATTR].weights_n_bits, + c.activation_n_bits) for c + in mixed_precision_cfg_list], name="bitwidth_cfg_test") fw_info = DEFAULT_KERAS_INFO @@ -255,7 +259,7 @@ def representative_data_gen(): cfg = search_bit_width(graph_to_search_cfg=graph, fw_info=DEFAULT_KERAS_INFO, fw_impl=keras_impl, - target_kpi=KPI(np.inf), + target_resource_utilization=ResourceUtilization(np.inf), mp_config=core_config.mixed_precision_config, representative_data_gen=representative_data_gen, search_method=BitWidthSearchMethod.INTEGER_PROGRAMMING) @@ -264,7 +268,7 @@ def representative_data_gen(): cfg = search_bit_width(graph_to_search_cfg=graph, fw_info=DEFAULT_KERAS_INFO, fw_impl=keras_impl, - target_kpi=KPI(np.inf), + target_resource_utilization=ResourceUtilization(np.inf), mp_config=core_config.mixed_precision_config, representative_data_gen=representative_data_gen, search_method=None) @@ -273,7 +277,7 @@ def representative_data_gen(): cfg = search_bit_width(graph_to_search_cfg=graph, fw_info=DEFAULT_KERAS_INFO, fw_impl=keras_impl, - target_kpi=None, + target_resource_utilization=None, mp_config=core_config.mixed_precision_config, representative_data_gen=representative_data_gen, search_method=BitWidthSearchMethod.INTEGER_PROGRAMMING) @@ -296,6 +300,5 @@ def test_mixed_precision_search_facade(self): self.run_search_bitwidth_config_test(core_config_last_layer) - if __name__ == '__main__': unittest.main() diff --git a/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py b/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py index 9a4f132d3..555e62108 100644 --- a/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py +++ b/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py @@ -120,7 +120,7 @@ def plot_tensor_sizes(self): tpc=tpc, network_editor=[], quant_config=cfg, - target_kpi=mct.core.KPI(), + target_resource_utilization=mct.core.ResourceUtilization(), n_iter=1, analyze_similarity=True, mp_cfg=mp_cfg) @@ -149,7 +149,7 @@ def rep_data(): core_config = mct.core.CoreConfig(mixed_precision_config=mp_qc) quantized_model, _ = mct.ptq.keras_post_training_quantization(self.model, rep_data, - target_kpi=mct.core.KPI(np.inf), + target_resource_utilization=mct.core.ResourceUtilization(np.inf), core_config=core_config, target_platform_capabilities=tpc) @@ -162,7 +162,7 @@ def rep_data(): self.model = MultipleOutputsNet() quantized_model, _ = mct.ptq.keras_post_training_quantization(self.model, rep_data, - target_kpi=mct.core.KPI(np.inf), + target_resource_utilization=mct.core.ResourceUtilization(np.inf), core_config=core_config, target_platform_capabilities=tpc) diff --git a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2d_conv2dtranspose_pruning_test.py b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2d_conv2dtranspose_pruning_test.py index 3dd32650d..3bd51ddb3 100644 --- a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2d_conv2dtranspose_pruning_test.py +++ b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2d_conv2dtranspose_pruning_test.py @@ -67,9 +67,9 @@ def get_pruning_config(self): add_const_importance_metric(first_num_oc=6, second_num_oc=4, simd=self.simd) return mct.pruning.PruningConfig(importance_metric=ConstImportanceMetric.CONST) return super().get_pruning_config() - def get_kpi(self): + def get_resource_utilization(self): # Remove only one group of channels only one parameter should be pruned - return mct.core.KPI(weights_memory=(self.dense_model_num_params - 1) * 4) + return mct.core.ResourceUtilization(weights_memory=(self.dense_model_num_params - 1) * 4) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): dense_convtrans_layers = get_layers_from_model_by_type(float_model, layers.Conv2DTranspose) diff --git a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2d_pruning_test.py b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2d_pruning_test.py index a7e9d0ce1..29d05b571 100644 --- a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2d_pruning_test.py +++ b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2d_pruning_test.py @@ -69,9 +69,9 @@ def create_networks(self): model = keras.Model(inputs=inputs, outputs=outputs) return model - def get_kpi(self): + def get_resource_utilization(self): # Remove only one group of channels only one parameter should be pruned - return mct.core.KPI(weights_memory=(self.dense_model_num_params-1) * 4) + return mct.core.ResourceUtilization(weights_memory=(self.dense_model_num_params - 1) * 4) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): dense_layers = get_layers_from_model_by_type(float_model, layers.Conv2D) diff --git a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2dtranspose_conv2d_pruning_test.py b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2dtranspose_conv2d_pruning_test.py index c3c15c9b6..ab7f84db6 100644 --- a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2dtranspose_conv2d_pruning_test.py +++ b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2dtranspose_conv2d_pruning_test.py @@ -66,9 +66,9 @@ def create_networks(self): model = keras.Model(inputs=inputs, outputs=x) return model - def get_kpi(self): + def get_resource_utilization(self): # Remove only one group of channels only one parameter should be pruned - return mct.core.KPI(weights_memory=(self.dense_model_num_params - 1) * 4) + return mct.core.ResourceUtilization(weights_memory=(self.dense_model_num_params - 1) * 4) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): dense_convtrans_layers = get_layers_from_model_by_type(float_model, layers.Conv2DTranspose) diff --git a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2dtranspose_pruning_test.py b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2dtranspose_pruning_test.py index b1985d6b9..aaf1ccf3a 100644 --- a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2dtranspose_pruning_test.py +++ b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/conv2dtranspose_pruning_test.py @@ -70,9 +70,9 @@ def get_pruning_config(self): return mct.pruning.PruningConfig(importance_metric=ConstImportanceMetric.CONST) return super().get_pruning_config() - def get_kpi(self): + def get_resource_utilization(self): # Remove only one group of channels only one parameter should be pruned - return mct.core.KPI(weights_memory=(self.dense_model_num_params-1) * 4) + return mct.core.ResourceUtilization(weights_memory=(self.dense_model_num_params - 1) * 4) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): dense_layers = get_layers_from_model_by_type(float_model, layers.Conv2DTranspose) diff --git a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/dense_pruning_test.py b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/dense_pruning_test.py index 1edb2fabf..59246e893 100644 --- a/tests/keras_tests/pruning_tests/feature_networks/networks_tests/dense_pruning_test.py +++ b/tests/keras_tests/pruning_tests/feature_networks/networks_tests/dense_pruning_test.py @@ -71,9 +71,9 @@ def create_networks(self): model = keras.Model(inputs=inputs, outputs=outputs) return model - def get_kpi(self): + def get_resource_utilization(self): # Remove only one group of channels only one parameter should be pruned - return mct.core.KPI(weights_memory=(self.dense_model_num_params - 1) * 4) + return mct.core.ResourceUtilization(weights_memory=(self.dense_model_num_params - 1) * 4) def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): dense_layers = get_layers_from_model_by_type(float_model, layers.Dense) diff --git a/tests/keras_tests/pruning_tests/feature_networks/pruning_keras_feature_test.py b/tests/keras_tests/pruning_tests/feature_networks/pruning_keras_feature_test.py index f39c93575..ee4149ec0 100644 --- a/tests/keras_tests/pruning_tests/feature_networks/pruning_keras_feature_test.py +++ b/tests/keras_tests/pruning_tests/feature_networks/pruning_keras_feature_test.py @@ -42,7 +42,7 @@ def run_test(self): for model_float in feature_networks: self.dense_model_num_params=sum([l.count_params() for l in model_float.layers]) pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(model=model_float, - target_kpi=self.get_kpi(), + target_resource_utilization=self.get_resource_utilization(), representative_data_gen=self.representative_data_gen_experimental, pruning_config=self.get_pruning_config(), target_platform_capabilities=self.get_tpc()) diff --git a/tests/keras_tests/pruning_tests/test_pretrained_models.py b/tests/keras_tests/pruning_tests/test_pretrained_models.py index 53cd20c83..874338ef8 100644 --- a/tests/keras_tests/pruning_tests/test_pretrained_models.py +++ b/tests/keras_tests/pruning_tests/test_pretrained_models.py @@ -128,7 +128,7 @@ def run_test(self, cr, dense_model, test_retraining=False): # Perform pruning on the dense model. pruned_model, pruning_info = mct.pruning.keras_pruning_experimental( model=dense_model, - target_kpi=mct.core.KPI(weights_memory=dense_nparams * FP32_BYTES_PER_PARAMETER * cr), + target_resource_utilization=mct.core.ResourceUtilization(weights_memory=dense_nparams * FP32_BYTES_PER_PARAMETER * cr), representative_data_gen=self.representative_dataset, pruning_config=mct.pruning.PruningConfig( num_score_approximations=1, diff --git a/tests/pytorch_tests/function_tests/kpi_data_test.py b/tests/pytorch_tests/function_tests/resource_utilization_data_test.py similarity index 72% rename from tests/pytorch_tests/function_tests/kpi_data_test.py rename to tests/pytorch_tests/function_tests/resource_utilization_data_test.py index 1130e4bb0..e06bb07ae 100644 --- a/tests/pytorch_tests/function_tests/kpi_data_test.py +++ b/tests/pytorch_tests/function_tests/resource_utilization_data_test.py @@ -19,7 +19,8 @@ import torch from torch.nn import Conv2d, BatchNorm2d, ReLU -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs +from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \ + get_op_quantization_configs from model_compression_toolkit.core.pytorch.constants import KERNEL from tests.common_tests.helpers.generate_test_tp_model import generate_tp_model_with_activation_mp, generate_test_op_qc, \ generate_test_attr_configs @@ -88,7 +89,7 @@ def forward(self, inp): def parameters_sum(self): return getattr(self.conv1, KERNEL).detach().numpy().flatten().shape[0] + \ - getattr(self.conv2, KERNEL).detach().numpy().flatten().shape[0] + getattr(self.conv2, KERNEL).detach().numpy().flatten().shape[0] def max_tensor(self): _, l_shape = self(torch.from_numpy(next(large_random_datagen())[0]).float()) @@ -109,29 +110,29 @@ def prep_test(model, mp_bitwidth_candidates_list, random_datagen): mp_bitwidth_candidates_list=[(8, 8), (8, 4), (8, 2), (4, 8), (4, 4), (4, 2), (2, 8), (2, 4), (2, 2)]), - test_name='kpi_data_test', - tpc_name='kpi_data_test') + test_name='ru_data_test', + tpc_name='ru_data_test') - kpi_data = mct.core.pytorch_kpi_data(in_model=model, - representative_data_gen=random_datagen, - core_config=mct.core.CoreConfig(), - target_platform_capabilities=tpc_dict['kpi_data_test']) + ru_data = mct.core.pytorch_resource_utilization_data(in_model=model, + representative_data_gen=random_datagen, + core_config=mct.core.CoreConfig(), + target_platform_capabilities=tpc_dict['ru_data_test']) - return kpi_data + return ru_data -class KPIDataBaseTestClass(BasePytorchTest): +class ResourceUtilizationDataBaseTestClass(BasePytorchTest): - def verify_results(self, kpi, sum_parameters, max_tensor): - self.unit_test.assertTrue(kpi.weights_memory == sum_parameters, + def verify_results(self, ru, sum_parameters, max_tensor): + self.unit_test.assertTrue(ru.weights_memory == sum_parameters, f"Expects weights_memory to be {sum_parameters} " - f"but result is {kpi.weights_memory}") - self.unit_test.assertTrue(kpi.activation_memory == max_tensor, + f"but result is {ru.weights_memory}") + self.unit_test.assertTrue(ru.activation_memory == max_tensor, f"Expects activation_memory to be {max_tensor} " - f"but result is {kpi.activation_memory}") + f"but result is {ru.activation_memory}") -class TestKPIDataBasicAllBitwidth(KPIDataBaseTestClass): +class TestResourceUtilizationDataBasicAllBitwidth(ResourceUtilizationDataBaseTestClass): def run_test(self): model = BasicModel() @@ -140,13 +141,13 @@ def run_test(self): mp_bitwidth_candidates_list = [(i, j) for i in [8, 4, 2] for j in [8, 4, 2]] - kpi_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) + ru_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) # max should be 8-bit quantization - self.verify_results(kpi_data, sum_parameters, max_tensor) + self.verify_results(ru_data, sum_parameters, max_tensor) -class TestKPIDataBasicPartialBitwidth(KPIDataBaseTestClass): +class TestResourceUtilizationDataBasicPartialBitwidth(ResourceUtilizationDataBaseTestClass): def run_test(self): model = BasicModel() @@ -155,12 +156,12 @@ def run_test(self): mp_bitwidth_candidates_list = [(i, j) for i in [4, 2] for j in [4, 2]] - kpi_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) + ru_data = prep_test(model, mp_bitwidth_candidates_list, small_random_datagen) - self.verify_results(kpi_data, sum_parameters, max_tensor) + self.verify_results(ru_data, sum_parameters, max_tensor) -class TestKPIDataComplesAllBitwidth(KPIDataBaseTestClass): +class TestResourceUtilizationDataComplesAllBitwidth(ResourceUtilizationDataBaseTestClass): def run_test(self): model = ComplexModel() @@ -169,13 +170,12 @@ def run_test(self): mp_bitwidth_candidates_list = [(i, j) for i in [8, 4, 2] for j in [8, 4, 2]] - kpi_data = prep_test(model, mp_bitwidth_candidates_list, large_random_datagen) + ru_data = prep_test(model, mp_bitwidth_candidates_list, large_random_datagen) - self.verify_results(kpi_data, sum_parameters, max_tensor) + self.verify_results(ru_data, sum_parameters, max_tensor) - -class TestKPIDataComplexPartialBitwidth(KPIDataBaseTestClass): +class TestResourceUtilizationDataComplexPartialBitwidth(ResourceUtilizationDataBaseTestClass): def run_test(self): model = ComplexModel() @@ -184,6 +184,6 @@ def run_test(self): mp_bitwidth_candidates_list = [(i, j) for i in [4, 2] for j in [4, 2]] - kpi_data = prep_test(model, mp_bitwidth_candidates_list, large_random_datagen) + ru_data = prep_test(model, mp_bitwidth_candidates_list, large_random_datagen) - self.verify_results(kpi_data, sum_parameters, max_tensor) + self.verify_results(ru_data, sum_parameters, max_tensor) diff --git a/tests/pytorch_tests/function_tests/test_doc_examples.py b/tests/pytorch_tests/function_tests/test_doc_examples.py index 41c293db6..91a6db140 100644 --- a/tests/pytorch_tests/function_tests/test_doc_examples.py +++ b/tests/pytorch_tests/function_tests/test_doc_examples.py @@ -31,5 +31,5 @@ def test_pytorch_ptq_facade(self): def test_pytorch_gptq_facade(self): doctest.testfile("quantization_facade.py", package=gptq.pytorch, verbose=True, raise_on_error=RAISE_ON_ERROR) - def test_pytorch_kpi_data_facade(self): - doctest.testfile("kpi_data_facade.py", package=core.pytorch, verbose=True, raise_on_error=RAISE_ON_ERROR) + def test_pytorch_resource_utilization_data_facade(self): + doctest.testfile("resource_utilization_data_facade.py", package=core.pytorch, verbose=True, raise_on_error=RAISE_ON_ERROR) diff --git a/tests/pytorch_tests/function_tests/test_function_runner.py b/tests/pytorch_tests/function_tests/test_function_runner.py index 6db8e8c00..1d2fb2715 100644 --- a/tests/pytorch_tests/function_tests/test_function_runner.py +++ b/tests/pytorch_tests/function_tests/test_function_runner.py @@ -20,8 +20,8 @@ Conv2D2BNInfoCollectionTest, Conv2DBNChainInfoCollectionTest, BNChainInfoCollectionTest, \ BNLayerInfoCollectionTest, INP2BNInfoCollectionTest from tests.pytorch_tests.function_tests.get_gptq_config_test import TestGetGPTQConfig -from tests.pytorch_tests.function_tests.kpi_data_test import TestKPIDataBasicAllBitwidth, \ - TestKPIDataBasicPartialBitwidth, TestKPIDataComplexPartialBitwidth, TestKPIDataComplesAllBitwidth +from tests.pytorch_tests.function_tests.resource_utilization_data_test import TestResourceUtilizationDataBasicAllBitwidth, \ + TestResourceUtilizationDataBasicPartialBitwidth, TestResourceUtilizationDataComplexPartialBitwidth, TestResourceUtilizationDataComplesAllBitwidth from tests.pytorch_tests.function_tests.layer_fusing_test import LayerFusingTest1, LayerFusingTest2, LayerFusingTest3, \ LayerFusingTest4 from tests.pytorch_tests.function_tests.set_device_test import SetDeviceTest @@ -78,29 +78,29 @@ def test_inp2bn_bn_info_collection(self): """ INP2BNInfoCollectionTest(self).run_test() - def test_kpi_data_basic_all(self): + def test_ru_data_basic_all(self): """ - This test checks the KPI data Pytorch API. + This test checks the resource utilization data Pytorch API. """ - TestKPIDataBasicAllBitwidth(self).run_test() + TestResourceUtilizationDataBasicAllBitwidth(self).run_test() - def test_kpi_data_basic_partial(self): + def test_ru_data_basic_partial(self): """ - This test checks the KPI data Pytorch API. + This test checks the resource utilization data Pytorch API. """ - TestKPIDataBasicPartialBitwidth(self).run_test() + TestResourceUtilizationDataBasicPartialBitwidth(self).run_test() - def test_kpi_data_complex_all(self): + def test_ru_data_complex_all(self): """ - This test checks the KPI data Pytorch API. + This test checks the resource utilization data Pytorch API. """ - TestKPIDataComplesAllBitwidth(self).run_test() + TestResourceUtilizationDataComplesAllBitwidth(self).run_test() - def test_kpi_data_complex_partial(self): + def test_ru_data_complex_partial(self): """ - This test checks the KPI data Pytorch API. + This test checks the resource utilization data Pytorch API. """ - TestKPIDataComplexPartialBitwidth(self).run_test() + TestResourceUtilizationDataComplexPartialBitwidth(self).run_test() def test_activation_hessian_trace(self): """ diff --git a/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py b/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py index 2854cb94c..f8eb2023e 100644 --- a/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py +++ b/tests/pytorch_tests/function_tests/test_pytorch_tp_model.py @@ -246,7 +246,7 @@ def rep_data(): mixed_precision_config=mp_qc) quantized_model, _ = mct.ptq.pytorch_post_training_quantization(model, rep_data, - target_kpi=mct.core.KPI(np.inf), + target_resource_utilization=mct.core.ResourceUtilization(np.inf), target_platform_capabilities=tpc, core_config=core_config) diff --git a/tests/pytorch_tests/model_tests/base_pytorch_test.py b/tests/pytorch_tests/model_tests/base_pytorch_test.py index c1ebb5a22..87558a78e 100644 --- a/tests/pytorch_tests/model_tests/base_pytorch_test.py +++ b/tests/pytorch_tests/model_tests/base_pytorch_test.py @@ -146,7 +146,7 @@ def representative_data_gen_experimental(): ptq_model, quantization_info = mct.ptq.pytorch_post_training_quantization(in_module=model_float, representative_data_gen=representative_data_gen_experimental, - target_kpi=self.get_kpi(), + target_resource_utilization=self.get_resource_utilization(), core_config=core_config, target_platform_capabilities=tpc) diff --git a/tests/pytorch_tests/model_tests/feature_models/dynamic_size_inputs_test.py b/tests/pytorch_tests/model_tests/feature_models/dynamic_size_inputs_test.py index 25f03b963..c5a392013 100644 --- a/tests/pytorch_tests/model_tests/feature_models/dynamic_size_inputs_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/dynamic_size_inputs_test.py @@ -120,7 +120,7 @@ def representative_data_gen_experimental(): ptq_model, quantization_info = mct.ptq.pytorch_post_training_quantization( in_module=model_float, representative_data_gen=representative_data_gen_experimental, - target_kpi=self.get_kpi(), + target_resource_utilization=self.get_resource_utilization(), core_config=core_config, target_platform_capabilities=tpc ) diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py index 41e76a7d2..811b5c22e 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py @@ -15,7 +15,7 @@ import torch import numpy as np -from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, KPI, CoreConfig, \ +from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, ResourceUtilization, CoreConfig, \ MixedPrecisionQuantizationConfig from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs @@ -70,8 +70,8 @@ def __init__(self, unit_test): super().__init__(unit_test) self.expected_config = [0, 0, 0, 0] - def get_kpi(self): - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf, np.inf) def compare(self, quantized_models, float_model, input_x=None, quantization_info=None): self.verify_config(quantization_info.mixed_precision_cfg, self.expected_config) @@ -82,8 +82,8 @@ def __init__(self, unit_test): super().__init__(unit_test) self.expected_config = [2, 8, 2, 2] - def get_kpi(self): - return KPI(96, 768) + def get_resource_utilization(self): + return ResourceUtilization(96, 768) def compare(self, quantized_models, float_model, input_x=None, quantization_info=None): self.verify_config(quantization_info.mixed_precision_cfg, self.expected_config) @@ -94,8 +94,8 @@ def __init__(self, unit_test): super().__init__(unit_test) self.expected_config = [1, 4, 1, 1] - def get_kpi(self): - return KPI(192, 1536) + def get_resource_utilization(self): + return ResourceUtilization(192, 1536) def compare(self, quantized_models, float_model, input_x=None, quantization_info=None): self.verify_config(quantization_info.mixed_precision_cfg, self.expected_config) @@ -106,8 +106,8 @@ def __init__(self, unit_test): super().__init__(unit_test) self.expected_config = [1, 4, 4, 1] - def get_kpi(self): - return KPI(81, 1536) + def get_resource_utilization(self): + return ResourceUtilization(81, 1536) def create_feature_network(self, input_shape): return MixedPrecisionFunctionalNet(input_shape) @@ -123,8 +123,8 @@ def __init__(self, unit_test): self.num_calibration_iter = 3 self.val_batch_size = 2 - def get_kpi(self): - return KPI(np.inf, np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf, np.inf) def get_mixed_precision_config(self): return MixedPrecisionQuantizationConfig(num_of_images=4) diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_bops_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_bops_test.py index b02ed1ad0..dfd585cdd 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_bops_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_bops_test.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== import torch.nn -from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, KPI, MixedPrecisionQuantizationConfig +from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, ResourceUtilization, MixedPrecisionQuantizationConfig from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs @@ -127,8 +127,8 @@ def get_input_shapes(self): def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # Verify that some layers got bit-width smaller than 8 bits (so checking candidate index is not 0) self.unit_test.assertTrue(any(i > 0 for i in quantization_info.mixed_precision_cfg)) - # Verify final BOPs KPI - self.unit_test.assertTrue(quantization_info.final_kpi.bops <= self.get_kpi().bops) + # Verify final BOPs utilization + self.unit_test.assertTrue(quantization_info.final_resource_utilization.bops <= self.get_resource_utilization().bops) class MixedPrecisionBopsBasicTest(BaseMixedPrecisionBopsTest): @@ -141,8 +141,8 @@ def __init__(self, unit_test): def create_feature_network(self, input_shape): return BaseBopsNetwork(input_shape) - def get_kpi(self): - return KPI(bops=1350000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(bops=1350000) # should require some quantization to all layers class MixedPrecisionBopsAllWeightsLayersTest(BaseMixedPrecisionBopsTest): @@ -156,62 +156,62 @@ def __init__(self, unit_test, mixed_precision_candidates_list=None): def create_feature_network(self, input_shape): return AllWeightsBopsNetwork(input_shape) - def get_kpi(self): - return KPI(bops=3000000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(bops=3000000) # should require some quantization to all layers class MixedPrecisionWeightsOnlyBopsTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test, mixed_precision_candidates_list=[(8, 8), (4, 8), (2, 8)]) - def get_kpi(self): - return KPI(bops=10000000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(bops=10000000) # should require some quantization to all layers class MixedPrecisionActivationOnlyBopsTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test, mixed_precision_candidates_list=[(8, 8), (8, 4), (8, 2)]) - def get_kpi(self): - return KPI(bops=10000000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(bops=10000000) # should require some quantization to all layers def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # Verify that some layers got bit-width smaller than 8 bits (so checking candidate index is not 0) self.unit_test.assertTrue(any(i > 0 for i in quantization_info.mixed_precision_cfg)) - # Verify final BOPs KPI - self.unit_test.assertTrue(quantization_info.final_kpi.bops <= self.get_kpi().bops) + # Verify final BOPs utilization + self.unit_test.assertTrue(quantization_info.final_resource_utilization.bops <= self.get_resource_utilization().bops) -class MixedPrecisionBopsAndWeightsKPITest(MixedPrecisionBopsAllWeightsLayersTest): +class MixedPrecisionBopsAndWeightsMemoryUtilizationTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(weights_memory=150, bops=3000000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(weights_memory=150, bops=3000000) # should require some quantization to all layers -class MixedPrecisionBopsAndActivationKPITest(MixedPrecisionBopsAllWeightsLayersTest): +class MixedPrecisionBopsAndActivationMemoryUtilizationTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(activation_memory=1000, bops=3000000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(activation_memory=1000, bops=3000000) # should require some quantization to all layers -class MixedPrecisionBopsAndTotalKPITest(MixedPrecisionBopsAllWeightsLayersTest): +class MixedPrecisionBopsAndTotalMemoryUtilizationTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(total_memory=1100, bops=3000000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(total_memory=1100, bops=3000000) # should require some quantization to all layers -class MixedPrecisionBopsWeightsActivationKPITest(MixedPrecisionBopsAllWeightsLayersTest): +class MixedPrecisionBopsWeightsActivationUtilizationTest(MixedPrecisionBopsAllWeightsLayersTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(weights_memory=150, activation_memory=1000, bops=3000000) # should require some quantization to all layers + def get_resource_utilization(self): + return ResourceUtilization(weights_memory=150, activation_memory=1000, bops=3000000) # should require some quantization to all layers class MixedPrecisionBopsMultipleOutEdgesTest(BaseMixedPrecisionBopsTest): @@ -224,8 +224,8 @@ def __init__(self, unit_test): def create_feature_network(self, input_shape): return MultipleEdgesBopsNetwork(input_shape) - def get_kpi(self): - return KPI(bops=1) # No layers with BOPs count + def get_resource_utilization(self): + return ResourceUtilization(bops=1) # No layers with BOPs count def compare(self, quantized_model, float_model, input_x=None, quantization_info=None): # Verify that all layers got 8 bits (so checking candidate index is 0) diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py index 0e6ebf2d6..2644d6073 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py @@ -17,7 +17,7 @@ from torch.nn import Conv2d from model_compression_toolkit.defaultdict import DefaultDict -from model_compression_toolkit.core import KPI +from model_compression_toolkit.core import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.core.pytorch.constants import BIAS @@ -61,7 +61,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info: raise NotImplementedError def compare_results(self, quantization_info, quantized_models, float_model, expected_bitwidth_idx): - # quantized with the highest precision since KPI==inf + # quantized with the highest precision since ResourceUtilization==inf self.unit_test.assertTrue((quantization_info.mixed_precision_cfg == [expected_bitwidth_idx, expected_bitwidth_idx]).all()) # verify that quantization occurred @@ -84,8 +84,8 @@ def __init__(self, unit_test, distance_metric=MpDistanceWeighting.AVG): self.distance_metric = distance_metric - def get_kpi(self): - return KPI(np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf) def get_core_configs(self): qc = mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, mct.core.QuantizationErrorMethod.MSE, @@ -161,15 +161,15 @@ def forward(self, inp): return ConvLinearModel(input_shape) - def get_kpi(self): - return KPI(np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf) def compare(self, quantized_models, float_model, input_x=None, quantization_info=None): # We just needed to verify that the graph finalization is working without failing. # The actual quantization is not interesting for the sake of this test, so we just verify some # degenerated things to see that everything worked. self.unit_test.assertTrue( - quantization_info.mixed_precision_cfg == [0]) # kpi is infinity -> should give best model - 8bits + quantization_info.mixed_precision_cfg == [0]) # resource utilization is infinity -> should give best model - 8bits quantized_model = quantized_models['mixed_precision_model'] linear_layer = quantized_model.linear @@ -182,8 +182,8 @@ class MixedPercisionSearch2Bit(MixedPercisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(96) + def get_resource_utilization(self): + return ResourceUtilization(96) def compare(self, quantized_models, float_model, input_x=None, quantization_info=None): self.compare_results(quantization_info, quantized_models, float_model, 2) @@ -193,8 +193,8 @@ class MixedPercisionSearch4Bit(MixedPercisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(192) + def get_resource_utilization(self): + return ResourceUtilization(192) def compare(self, quantized_models, float_model, input_x=None, quantization_info=None): self.compare_results(quantization_info, quantized_models, float_model, 1) @@ -214,8 +214,8 @@ def get_fw_hw_model(self): test_name='mixed_precision_model', ftp_name='mixed_precision_pytorch_test') - def get_kpi(self): - return KPI(np.inf) + def get_resource_utilization(self): + return ResourceUtilization(np.inf) def compare(self, quantized_models, float_model, input_x=None, quantization_info=None): self.compare_results(quantization_info, quantized_models, float_model, 0) @@ -225,8 +225,8 @@ class MixedPercisionSearchLastLayerDistance(MixedPercisionBaseTest): def __init__(self, unit_test): super().__init__(unit_test) - def get_kpi(self): - return KPI(192) + def get_resource_utilization(self): + return ResourceUtilization(192) def get_mixed_precision_config(self): return mct.core.MixedPrecisionQuantizationConfig(num_of_images=1, diff --git a/tests/pytorch_tests/model_tests/feature_models/qat_test.py b/tests/pytorch_tests/model_tests/feature_models/qat_test.py index fb6fdcc92..366703328 100644 --- a/tests/pytorch_tests/model_tests/feature_models/qat_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/qat_test.py @@ -262,10 +262,10 @@ def run_test(self): self._gen_fixed_input() model_float = self.create_networks() config = mct.core.CoreConfig() - kpi = mct.core.KPI() # inf memory + ru = mct.core.ResourceUtilization() # inf memory qat_ready_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model_float, self.representative_data_gen_experimental, - kpi, + ru, core_config=config, target_platform_capabilities=self.get_tpc()) @@ -287,7 +287,7 @@ def run_test(self): -class QuantizationAwareTrainingMixedPrecisionKpiCfgTest(QuantizationAwareTrainingTest): +class QuantizationAwareTrainingMixedPrecisionRUCfgTest(QuantizationAwareTrainingTest): def __init__(self, unit_test): super().__init__(unit_test) @@ -307,10 +307,10 @@ def run_test(self): self._gen_fixed_input() model_float = self.create_networks() config = mct.core.CoreConfig() - kpi = mct.core.KPI(weights_memory=50, activation_memory=40) + ru = mct.core.ResourceUtilization(weights_memory=50, activation_memory=40) qat_ready_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model_float, self.representative_data_gen_experimental, - kpi, + ru, core_config=config, target_platform_capabilities=self.get_tpc()) diff --git a/tests/pytorch_tests/model_tests/test_feature_models_runner.py b/tests/pytorch_tests/model_tests/test_feature_models_runner.py index 1e6a6b71b..28ae05091 100644 --- a/tests/pytorch_tests/model_tests/test_feature_models_runner.py +++ b/tests/pytorch_tests/model_tests/test_feature_models_runner.py @@ -27,10 +27,10 @@ from tests.pytorch_tests.model_tests.feature_models.conv2d_replacement_test import DwConv2dReplacementTest from tests.pytorch_tests.model_tests.feature_models.mixed_precision_bops_test import MixedPrecisionBopsBasicTest, \ MixedPrecisionBopsAllWeightsLayersTest, MixedPrecisionWeightsOnlyBopsTest, MixedPrecisionActivationOnlyBopsTest, \ - MixedPrecisionBopsAndWeightsKPITest, MixedPrecisionBopsAndActivationKPITest, MixedPrecisionBopsAndTotalKPITest, \ - MixedPrecisionBopsWeightsActivationKPITest, MixedPrecisionBopsMultipleOutEdgesTest + MixedPrecisionBopsAndWeightsMemoryUtilizationTest, MixedPrecisionBopsAndActivationMemoryUtilizationTest, MixedPrecisionBopsAndTotalMemoryUtilizationTest, \ + MixedPrecisionBopsWeightsActivationUtilizationTest, MixedPrecisionBopsMultipleOutEdgesTest from tests.pytorch_tests.model_tests.feature_models.qat_test import QuantizationAwareTrainingTest, \ - QuantizationAwareTrainingMixedPrecisionCfgTest, QuantizationAwareTrainingMixedPrecisionKpiCfgTest, \ + QuantizationAwareTrainingMixedPrecisionCfgTest, QuantizationAwareTrainingMixedPrecisionRUCfgTest, \ QuantizationAwareTrainingQuantizerHolderTest from tests.pytorch_tests.model_tests.feature_models.relu_replacement_test import SingleLayerReplacementTest, \ ReluReplacementTest, ReluReplacementWithAddBiasTest @@ -466,7 +466,7 @@ def test_mixed_precision_multiple_inputs(self): """ MixedPercisionActivationMultipleInputs(self).run_test() - def test_mixed_precision_bops_kpi(self): + def test_mixed_precision_bops_utilization(self): """ This test checks different scenarios for mixed-precision quantization with bit-operations constraint. """ @@ -474,10 +474,10 @@ def test_mixed_precision_bops_kpi(self): MixedPrecisionBopsAllWeightsLayersTest(self).run_test() MixedPrecisionWeightsOnlyBopsTest(self).run_test() MixedPrecisionActivationOnlyBopsTest(self).run_test() - MixedPrecisionBopsAndWeightsKPITest(self).run_test() - MixedPrecisionBopsAndActivationKPITest(self).run_test() - MixedPrecisionBopsAndTotalKPITest(self).run_test() - MixedPrecisionBopsWeightsActivationKPITest(self).run_test() + MixedPrecisionBopsAndWeightsMemoryUtilizationTest(self).run_test() + MixedPrecisionBopsAndActivationMemoryUtilizationTest(self).run_test() + MixedPrecisionBopsAndTotalMemoryUtilizationTest(self).run_test() + MixedPrecisionBopsWeightsActivationUtilizationTest(self).run_test() MixedPrecisionBopsMultipleOutEdgesTest(self).run_test() def test_mha_layer_test(self): @@ -569,7 +569,7 @@ def test_qat(self): finalize=True).run_test() QuantizationAwareTrainingQuantizerHolderTest(self).run_test() QuantizationAwareTrainingMixedPrecisionCfgTest(self).run_test() - QuantizationAwareTrainingMixedPrecisionKpiCfgTest(self).run_test() + QuantizationAwareTrainingMixedPrecisionRUCfgTest(self).run_test() def test_bn_attributes_quantization(self): """ diff --git a/tests/pytorch_tests/pruning_tests/feature_networks/pruning_pytorch_feature_test.py b/tests/pytorch_tests/pruning_tests/feature_networks/pruning_pytorch_feature_test.py index 323e9029e..90ded1040 100644 --- a/tests/pytorch_tests/pruning_tests/feature_networks/pruning_pytorch_feature_test.py +++ b/tests/pytorch_tests/pruning_tests/feature_networks/pruning_pytorch_feature_test.py @@ -49,12 +49,12 @@ def get_tpc(self): tp = generate_test_tp_model({'simd_size': self.simd}) return generate_pytorch_tpc(name="simd_test", tp_model=tp) - def get_kpi(self, dense_model_num_params, model): + def get_resource_utilization(self, dense_model_num_params, model): if not self.use_bn and torch.nn.BatchNorm2d in [type(m) for m in model.modules()]: # substract the 4 bn params if the bn is not used. This is because Back2Framework will create a model without bn dense_model_num_params -= count_model_prunable_params(model.bn) # Remove only one group of channels only one parameter should be pruned - return mct.core.KPI(weights_memory=(dense_model_num_params-self.simd) * 4) + return mct.core.ResourceUtilization(weights_memory=(dense_model_num_params - self.simd) * 4) def run_test(self): feature_networks = self.create_networks() @@ -64,7 +64,7 @@ def run_test(self): # self.dense_model_num_params = sum(p.numel() for p in model_float.parameters()) dense_model_num_params = count_model_prunable_params(model_float) pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model_float, - target_kpi=self.get_kpi(dense_model_num_params, model_float), + target_resource_utilization=self.get_resource_utilization(dense_model_num_params, model_float), representative_data_gen=self.representative_data_gen_experimental, pruning_config=self.get_pruning_config(), target_platform_capabilities=self.get_tpc()) diff --git a/tests/pytorch_tests/pruning_tests/test_pretrained_models.py b/tests/pytorch_tests/pruning_tests/test_pretrained_models.py index f5e3da97f..efa336268 100644 --- a/tests/pytorch_tests/pruning_tests/test_pretrained_models.py +++ b/tests/pytorch_tests/pruning_tests/test_pretrained_models.py @@ -129,7 +129,7 @@ def run_test(self, cr, dense_model, test_retraining=False): # Perform pruning on the dense model. pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental( model=dense_model, - target_kpi=mct.core.KPI(weights_memory=dense_nparams * FP32_BYTES_PER_PARAMETER * cr), + target_resource_utilization=mct.core.ResourceUtilization(weights_memory=dense_nparams * FP32_BYTES_PER_PARAMETER * cr), representative_data_gen=self.representative_dataset, pruning_config=mct.pruning.PruningConfig( num_score_approximations=1, diff --git a/tests/test_suite.py b/tests/test_suite.py index 3109d5216..9fbad2886 100644 --- a/tests/test_suite.py +++ b/tests/test_suite.py @@ -22,7 +22,7 @@ from tests.common_tests.function_tests.test_folder_image_loader import TestFolderLoader # ---------------- Individual test suites from tests.common_tests.function_tests.test_histogram_collector import TestHistogramCollector -from tests.common_tests.function_tests.test_kpi_object import TestKPIObject +from tests.common_tests.function_tests.test_resource_utilization_object import TestResourceUtilizationObject from tests.common_tests.function_tests.test_threshold_selection import TestThresholdSelection from tests.common_tests.test_doc_examples import TestCommonDocsExamples from tests.common_tests.test_tp_model import TargetPlatformModelingTest, OpsetTest, QCOptionsTest, FusingTest @@ -62,7 +62,7 @@ TestSensitivityEvalWithNonSupportedOutputNodes from tests.keras_tests.function_tests.test_set_layer_to_bitwidth import TestKerasSetLayerToBitwidth from tests.keras_tests.function_tests.test_export_keras_fully_quantized_model import TestKerasFakeQuantExporter - from tests.keras_tests.function_tests.test_kpi_data import TestKPIData + from tests.keras_tests.function_tests.test_resource_utilization_data import TestResourceUtilizationData from tests.keras_tests.exporter_tests.test_runner import ExporterTestsRunner from tests.keras_tests.function_tests.test_get_gptq_config import TestGetGPTQConfig from tests.keras_tests.function_tests.test_gptq_loss_functions import TestGPTQLossFunctions @@ -104,7 +104,7 @@ suiteList.append(unittest.TestLoader().loadTestsFromTestCase(QCOptionsTest)) suiteList.append(unittest.TestLoader().loadTestsFromTestCase(FusingTest)) suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestCommonDocsExamples)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestKPIObject)) + suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestResourceUtilizationObject)) # Add TF tests only if tensorflow is installed if found_tf: @@ -136,7 +136,7 @@ suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestKerasSetLayerToBitwidth)) suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestSensitivityEvalWithNonSupportedOutputNodes)) suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestKerasFakeQuantExporter)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestKPIData)) + suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestResourceUtilizationData)) suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestFileLogger)) suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGetGPTQConfig)) suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGPTQLossFunctions)) diff --git a/tests/trainable_infrastructure_tests/keras/trainable_keras/test_keras_base_quantizer.py b/tests/trainable_infrastructure_tests/keras/trainable_keras/test_keras_base_quantizer.py index 926e68cd3..ba9c58208 100644 --- a/tests/trainable_infrastructure_tests/keras/trainable_keras/test_keras_base_quantizer.py +++ b/tests/trainable_infrastructure_tests/keras/trainable_keras/test_keras_base_quantizer.py @@ -40,8 +40,9 @@ def get_weights_quantization_config(self): def run_test(self): with self.unit_test.assertRaises(Exception) as e: ZeroWeightsQuantizer(self.get_weights_quantization_config()) + # TODO: In next MCTQ release, this test will fail due to KMEANS removal. Fix: "QuantizationMethod.SYMMETRIC: 3" -> "QuantizationMethod.SYMMETRIC: 2" self.unit_test.assertEqual(f'Quantization method mismatch. Expected methods: [, ' - f''f'], received: QuantizationMethod.UNIFORM.', + f''f'], received: QuantizationMethod.UNIFORM.', str(e.exception)) with self.unit_test.assertRaises(Exception) as e: @@ -72,8 +73,9 @@ def get_activation_quantization_config(self): def run_test(self): with self.unit_test.assertRaises(Exception) as e: ZeroActivationsQuantizer(self.get_activation_quantization_config()) + # TODO: In next MCTQ release, this test will fail due to KMEANS removal. Fix: "QuantizationMethod.SYMMETRIC: 3" -> "QuantizationMethod.SYMMETRIC: 2" self.unit_test.assertEqual(f'Quantization method mismatch. Expected methods: [, ' - f'], received: QuantizationMethod.UNIFORM.', + f'], received: QuantizationMethod.UNIFORM.', str(e.exception)) with self.unit_test.assertRaises(Exception) as e: diff --git a/tests/trainable_infrastructure_tests/pytorch/trainable_pytorch/test_pytorch_base_quantizer.py b/tests/trainable_infrastructure_tests/pytorch/trainable_pytorch/test_pytorch_base_quantizer.py index 090b077fb..0f149588e 100644 --- a/tests/trainable_infrastructure_tests/pytorch/trainable_pytorch/test_pytorch_base_quantizer.py +++ b/tests/trainable_infrastructure_tests/pytorch/trainable_pytorch/test_pytorch_base_quantizer.py @@ -40,11 +40,11 @@ def run_test(self): with self.unit_test.assertRaises(Exception) as e: ZeroWeightsQuantizer(self.get_weights_quantization_config()) # TODO: In next MCTQ release, this test will fail due to KMEANS removal. Fix: "QuantizationMethod.SYMMETRIC: 3" -> "QuantizationMethod.SYMMETRIC: 2" - self.unit_test.assertEqual(f'Quantization method mismatch expected: [, ] and got QuantizationMethod.UNIFORM', str(e.exception)) + self.unit_test.assertEqual(f'Quantization method mismatch. Expected methods: [, ], received: QuantizationMethod.UNIFORM.', str(e.exception)) with self.unit_test.assertRaises(Exception) as e: ZeroWeightsQuantizer(self.get_activation_quantization_config()) - self.unit_test.assertEqual(f'Expect weight quantization got activation', str(e.exception)) + self.unit_test.assertEqual(f'Expected weight quantization configuration; received activation quantization instead.', str(e.exception)) weight_quantization_config = super(TestPytorchBaseWeightsQuantizer, self).get_weights_quantization_config() quantizer = ZeroWeightsQuantizer(weight_quantization_config) @@ -68,11 +68,11 @@ def run_test(self): with self.unit_test.assertRaises(Exception) as e: ZeroActivationsQuantizer(self.get_activation_quantization_config()) # TODO: In next MCTQ release, this test will fail due to KMEANS removal. Fix: "QuantizationMethod.SYMMETRIC: 3" -> "QuantizationMethod.SYMMETRIC: 2" - self.unit_test.assertEqual(f'Quantization method mismatch expected: [, ] and got QuantizationMethod.UNIFORM', str(e.exception)) + self.unit_test.assertEqual(f'Quantization method mismatch. Expected methods: [, ], received: QuantizationMethod.UNIFORM.', str(e.exception)) with self.unit_test.assertRaises(Exception) as e: ZeroActivationsQuantizer(self.get_weights_quantization_config()) - self.unit_test.assertEqual(f'Expect activation quantization got weight', str(e.exception)) + self.unit_test.assertEqual(f'Expected activation quantization configuration; received weight quantization instead.', str(e.exception)) activation_quantization_config = super(TestPytorchBaseActivationQuantizer, self).get_activation_quantization_config() quantizer = ZeroActivationsQuantizer(activation_quantization_config) @@ -94,6 +94,6 @@ def run_test(self): with self.unit_test.assertRaises(Exception) as e: test_quantizer = _TestQuantizer(self.get_weights_quantization_config()) self.unit_test.assertEqual( - "A quantizer class that inherit from BaseTrainableQuantizer is not defined appropriately." - "Either it misses the @mark_quantizer decorator or the decorator is not used correctly.", + "Quantizer class inheriting from 'BaseTrainableQuantizer' is improperly defined. " + "Ensure it includes the '@mark_quantizer' decorator and is correctly applied.", str(e.exception)) diff --git a/tutorials/notebooks/keras/pruning/example_keras_pruning_mnist.ipynb b/tutorials/notebooks/keras/pruning/example_keras_pruning_mnist.ipynb index ecccc8048..888ac4d3d 100644 --- a/tutorials/notebooks/keras/pruning/example_keras_pruning_mnist.ipynb +++ b/tutorials/notebooks/keras/pruning/example_keras_pruning_mnist.ipynb @@ -280,9 +280,9 @@ { "cell_type": "markdown", "source": [ - "### Create KPI\n", + "### Create Resource Utilization constraint\n", "\n", - "We're defining a Key Performance Indicator (KPI) to constrain the memory usage of our pruned model.\n", + "We're defining a resource_utilization limit to constrain the memory usage of our pruned model.\n", "\n", "By setting a target that limits the model's weight memory to half of its original size (around 427KB), we aim to achieve a compression ratio of 50%:" ], @@ -293,11 +293,11 @@ { "cell_type": "code", "source": [ - "# Create KPI to limit the pruned model weights memory to a certain KPI\n", + "# Create a ResourceUtilization object to limit the pruned model weights memory to a certain resource constraint\n", "dense_model_memory = 427*(2**10) # Original model weights requiers ~427KB\n", "compression_ratio = 0.5\n", "\n", - "kpi = mct.core.KPI(weights_memory=dense_model_memory*compression_ratio)" + "resource_utilization = mct.core.ResourceUtilization(weights_memory=dense_model_memory*compression_ratio)" ], "metadata": { "id": "doJgwbSxsCbr" @@ -310,7 +310,7 @@ "source": [ "### Prune Model\n", "\n", - "We're ready to execute the actual pruning using MCT's keras_pruning_experimental function. The model is pruned according to our defined KPI and using the representative dataset generated earlier.\n", + "We're ready to execute the actual pruning using MCT's keras_pruning_experimental function. The model is pruned according to our defined target Resource Utilization and using the representative dataset generated earlier.\n", "\n", "Each channel's importance is measured using LFH (Label-Free-Hessian)\n", "which approximates the Hessian of the loss function w.r.t model's weights.\n", @@ -331,7 +331,7 @@ "target_platform_cap = get_tpc()\n", "pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(\n", " model=model,\n", - " target_kpi=kpi,\n", + " target_resource_utilization=resource_utilization,\n", " representative_data_gen=representative_data_gen,\n", " target_platform_capabilities=target_platform_cap,\n", " pruning_config=mct.pruning.PruningConfig(num_score_approximations=num_score_approximations)\n", diff --git a/tutorials/notebooks/keras/ptq/example_keras_effdet_lite0.ipynb b/tutorials/notebooks/keras/ptq/example_keras_effdet_lite0.ipynb index 20b269a39..f8bf39695 100644 --- a/tutorials/notebooks/keras/ptq/example_keras_effdet_lite0.ipynb +++ b/tutorials/notebooks/keras/ptq/example_keras_effdet_lite0.ipynb @@ -385,14 +385,14 @@ "# Set IMX500-v1 TPC\n", "tpc = mct.get_target_platform_capabilities(\"tensorflow\", 'imx500', target_platform_version='v1')\n", "# set weights memory size, so the quantized model will fit the IMX500 memory\n", - "kpi = mct.core.KPI(weights_memory=2674291)\n", + "resource_utilization = mct.core.ResourceUtilization(weights_memory=2674291)\n", "# set MixedPrecision configuration for compressing the weights\n", "mp_config = mct.core.MixedPrecisionQuantizationConfig(use_hessian_based_scores=False)\n", "core_config = mct.core.CoreConfig(mixed_precision_config=mp_config)\n", "quant_model, _ = mct.ptq.keras_post_training_quantization(\n", " model,\n", " get_representative_dataset(20),\n", - " target_kpi=kpi,\n", + " target_resource_utilization=resource_utilization,\n", " core_config=core_config,\n", " target_platform_capabilities=tpc)" ], diff --git a/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.ipynb b/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.ipynb index 3e67bc606..8716e4150 100644 --- a/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.ipynb +++ b/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.ipynb @@ -237,7 +237,7 @@ "The candidates bit-width for quantization are defined in the target platform model. \n", "\n", "Finally, we need to set the **hardware constraints** which we want our quantized model to fit into.\n", - "These are defined using a `KPI` object.\n", + "These are defined using a `ResourceUtilization` object.\n", "In this example, we set a **weights memory** constraint, by computing the size of the desired model's parameters under a compression of the model to 75% of its fixed-point 8-bit precision." ] }, @@ -254,19 +254,19 @@ "# Specify the target platform capability (TPC)\n", "tpc = mct.get_target_platform_capabilities(\"tensorflow\", 'imx500', target_platform_version='v1')\n", "\n", - "# Get KPI information to constraint your model's memory size. Retrieve a KPI object with helpful information of each KPI metric, to constraint the quantized model to the desired memory size.\n", - "kpi_data = mct.core.keras_kpi_data(float_model,\n", + "# Get Resource Utilization information to constraint your model's memory size. Retrieve a ResourceUtilization object with helpful information of each resource metric, to constraint the quantized model to the desired memory size.\n", + "resource_utilization_data = mct.core.keras_resource_utilization_data(float_model,\n", " representative_dataset_gen,\n", " core_config=core_config,\n", " target_platform_capabilities=tpc)\n", "\n", - "# Set a constraint for each of the KPI metrics.\n", - "# Create a KPI object to limit our returned model's size. Note that this values affects only layers and attributes\n", + "# Set a constraint for each of the Resource Utilization metrics.\n", + "# Create a ResourceUtilization object to limit our returned model's size. Note that this values affects only layers and attributes\n", "# that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value,\n", "# while the bias will not)\n", "# examples:\n", "weights_compression_ratio = 0.75 # About 0.75 of the model's weights memory size when quantized with 8 bits.\n", - "kpi = mct.core.KPI(kpi_data.weights_memory * weights_compression_ratio)" + "resource_utilization = mct.core.ResourceUtilization(resource_utilization_data.weights_memory * weights_compression_ratio)" ], "metadata": { "collapsed": false @@ -296,7 +296,7 @@ "quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(\n", " float_model,\n", " representative_dataset_gen,\n", - " target_kpi=kpi,\n", + " target_ResourceUtilization=ResourceUtilization,\n", " core_config=core_config,\n", " target_platform_capabilities=tpc)" ] diff --git a/tutorials/notebooks/keras/ptq/example_keras_yolov8n.ipynb b/tutorials/notebooks/keras/ptq/example_keras_yolov8n.ipynb index f32db0e74..a544cc212 100644 --- a/tutorials/notebooks/keras/ptq/example_keras_yolov8n.ipynb +++ b/tutorials/notebooks/keras/ptq/example_keras_yolov8n.ipynb @@ -291,17 +291,17 @@ "config = mct.core.CoreConfig(mixed_precision_config=mp_config,\n", " quantization_config=mct.core.QuantizationConfig(shift_negative_activation_correction=True))\n", "\n", - "# Define memory KPI for mixed precision weights quantization (75% of 'standard' 8bits quantization)\n", - "kpi_data = mct.core.keras_kpi_data(model,\n", + "# Define target Resource Utilization for mixed precision weights quantization (75% of 'standard' 8bits quantization)\n", + "resource_utilization_data = mct.core.keras_resource_utilization_data(model,\n", " representative_dataset_gen,\n", " config,\n", " target_platform_capabilities=tpc)\n", - "kpi = mct.core.KPI(kpi_data.weights_memory * 0.75)\n", + "resource_utilization = mct.core.ResourceUtilization(resource_utilization_data.weights_memory * 0.75)\n", "\n", "# Perform post training quantization\n", "quant_model, _ = mct.ptq.keras_post_training_quantization(model,\n", " representative_dataset_gen,\n", - " target_kpi=kpi,\n", + " target_resource_utilization=resource_utilization,\n", " core_config=config,\n", " target_platform_capabilities=tpc)\n", "print('Quantized model is ready')" diff --git a/tutorials/notebooks/keras/ptq/keras_yolov8n_for_imx500.ipynb b/tutorials/notebooks/keras/ptq/keras_yolov8n_for_imx500.ipynb index f0fb703a6..dda621b89 100644 --- a/tutorials/notebooks/keras/ptq/keras_yolov8n_for_imx500.ipynb +++ b/tutorials/notebooks/keras/ptq/keras_yolov8n_for_imx500.ipynb @@ -187,17 +187,17 @@ "config = mct.core.CoreConfig(mixed_precision_config=mp_config,\n", " quantization_config=mct.core.QuantizationConfig(shift_negative_activation_correction=True))\n", "\n", - "# Define memory KPI for mixed precision weights quantization (75% of 'standard' 8bits quantization)\n", - "kpi_data = mct.core.keras_kpi_data(model,\n", + "# Define target Resource Utilization for mixed precision weights quantization (75% of 'standard' 8bits quantization)\n", + "resource_utilization_data = mct.core.keras_resource_utilization_data(model,\n", " representative_dataset_gen,\n", " config,\n", " target_platform_capabilities=tpc)\n", - "kpi = mct.core.KPI(kpi_data.weights_memory * 0.75)\n", + "resource_utilization = mct.core.ResourceUtilization(resource_utilization_data.weights_memory * 0.75)\n", "\n", "# Perform post training quantization\n", "quant_model, _ = mct.ptq.keras_post_training_quantization(model,\n", " representative_dataset_gen,\n", - " target_kpi=kpi,\n", + " target_resource_utilization=resource_utilization,\n", " core_config=config,\n", " target_platform_capabilities=tpc)\n", "print('Quantized model is ready')" diff --git a/tutorials/notebooks/pytorch/pruning/example_pytorch_pruning_mnist.ipynb b/tutorials/notebooks/pytorch/pruning/example_pytorch_pruning_mnist.ipynb index af7dee661..4ba1f4979 100644 --- a/tutorials/notebooks/pytorch/pruning/example_pytorch_pruning_mnist.ipynb +++ b/tutorials/notebooks/pytorch/pruning/example_pytorch_pruning_mnist.ipynb @@ -293,13 +293,13 @@ "outputs": [], "source": [ "compression_ratio = 0.5\n", - "# Define KPI for pruning. Each float32 parameter requires 4 bytes, \n", + "# Define Resource Utilization constraint for pruning. Each float32 parameter requires 4 bytes, \n", "# hence we multiply the total parameter count by 4 to calculate the memory footprint.\n", - "target_kpi = mct.core.KPI(weights_memory=dense_model_params * 4 * compression_ratio)\n", + "target_resource_utilization = mct.core.ResourceUtilization(weights_memory=dense_model_params * 4 * compression_ratio)\n", "# Define a pruning configuration\n", "pruning_config=mct.pruning.PruningConfig(num_score_approximations=1)\n", "# Prune the model\n", - "pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=dense_model, target_kpi=target_kpi, representative_data_gen=representative_data_gen, pruning_config=pruning_config)" + "pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=dense_model, target_resource_utilization=target_resource_utilization, representative_data_gen=representative_data_gen, pruning_config=pruning_config)" ], "metadata": { "collapsed": false diff --git a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb index 6e967f7fd..0923f7dce 100644 --- a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb +++ b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb @@ -501,21 +501,21 @@ }, "outputs": [], "source": [ - "# Get KPI information to constraint your model's memory size.\n", - "# Retrieve a KPI object with helpful information of each KPI metric,\n", + "# Get Resource Utilization information to constraint your model's memory size.\n", + "# Retrieve a ResourceUtilization object with helpful information of each resource utilization metric,\n", "# to constraint the quantized model to the desired memory size.\n", - "kpi_data = mct.core.pytorch_kpi_data(model,\n", + "resource_utilization_data = mct.core.pytorch_resource_utilization_data(model,\n", " representative_data_gen,\n", " configuration,\n", " target_platform_capabilities=target_platform_cap)\n", "\n", - "# Set a constraint for each of the KPI metrics.\n", - "# Create a KPI object to limit our returned model's size. Note that this values affect only layers and attributes\n", + "# Set a constraint for each of the resource utilization metrics.\n", + "# Create a ResourceUtilization object to limit our returned model's size. Note that this values affect only layers and attributes\n", "# that should be quantized (for example, the kernel of Conv2D in Pytorch will be affected by this value,\n", "# while the bias will not)\n", "# examples:\n", "# weights_compression_ratio = 0.75 - About 0.75 of the model's weights memory size when quantized with 8 bits.\n", - "kpi = mct.core.KPI(kpi_data.weights_memory * 0.75)" + "ResourceUtilization = mct.core.ResourceUtilization(ResourceUtilization_data.weights_memory * 0.75)" ] }, { @@ -537,7 +537,7 @@ "source": [ "quantized_model, quantization_info = mct.ptq.pytorch_post_training_quantization(model,\n", " representative_data_gen,\n", - " target_kpi=kpi,\n", + " target_ResourceUtilization=ResourceUtilization,\n", " core_config=configuration,\n", " target_platform_capabilities=target_platform_cap)\n", " " diff --git a/tutorials/quick_start/common/results.py b/tutorials/quick_start/common/results.py index 142f58c46..4f50550f8 100644 --- a/tutorials/quick_start/common/results.py +++ b/tutorials/quick_start/common/results.py @@ -56,7 +56,7 @@ def __init__(self, user_info: UserInformation, user_info (UserInformation): Quantization information returned from MCT tpc_info (TPCInfo): The target platform capabilities information which is provided to the MCT. quantization_workflow (str): String to describe the quantization workflow (PTQ, GPTQ etc.). - mp_weights_compression (float): Weights compression factor for mixed precision KPI + mp_weights_compression (float): Weights compression factor for mixed precision Resource Utilization constraint. """ self.user_info = user_info self.tpc_info = tpc_info @@ -124,7 +124,7 @@ def parse_results(params: dict, float_acc: float, quant_acc: float, quant_info: res['TotalImages'] = dataset_info.n_images res['FloatAcc'] = round(float_acc, 4) res['QuantAcc'] = round(quant_acc, 4) - res['Size[MB]'] = round(quant_info.user_info.final_kpi.weights_memory / 1e6, 2) + res['Size[MB]'] = round(quant_info.user_info.final_resource_utilization.weights_memory / 1e6, 2) res['BitsConfig'] = bit_config res['QuantWorkflow'] = quant_info.quantization_workflow res['TPC'] = quant_info.tpc_info.tp_model_name + '-' + quant_info.tpc_info.version diff --git a/tutorials/quick_start/keras_fw/quant.py b/tutorials/quick_start/keras_fw/quant.py index 1365c929c..fe6be8b9d 100644 --- a/tutorials/quick_start/keras_fw/quant.py +++ b/tutorials/quick_start/keras_fw/quant.py @@ -23,7 +23,7 @@ REPRESENTATIVE_DATASET_FOLDER, TARGET_PLATFORM_NAME, TARGET_PLATFORM_VERSION, BYTES_TO_FP32, MP_WEIGHTS_COMPRESSION -from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, CoreConfig, KPI +from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, CoreConfig, ResourceUtilization from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities from tutorials.quick_start.common.results import QuantInfo from tutorials.quick_start.common.tpc_info import get_tpc_info @@ -44,25 +44,25 @@ def get_tpc(target_platform_name: str, target_platform_version: str) -> TargetPl return mct.get_target_platform_capabilities('tensorflow', target_platform_name, target_platform_version) -def get_target_kpi(model, weights_compression, representative_data_gen, core_config, tpc): +def get_target_resource_utilization(model, weights_compression, representative_data_gen, core_config, tpc): """ Calculates the model's required size according to the given weights compression rate, to provide as a constraint for mixed precision search. Args: - model: The model to calculate the KPI. + model: The model to calculate the target resource utilization for. weights_compression: The required weights compression ratio. representative_data_gen: Callable function to generate the representative dataset. core_config (CoreConfig): CoreConfig containing parameters for quantization and mixed precision. tpc (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the TensorFlow model according to. Returns: - A KPI object computed from MCT and contains info about the target model size. + A ResourceUtilization object computed from MCT and contains info about the target model size. """ - kpi_data = mct.core.keras_kpi_data(model, representative_data_gen, core_config=core_config, - target_platform_capabilities=tpc) - weights_kpi = BYTES_TO_FP32 * kpi_data.weights_memory / weights_compression # (4 bytes for fp32) * weights memory(in Bytes) / compression rate - return KPI(weights_memory=weights_kpi) + ru_data = mct.core.keras_resource_utilization_data(model, representative_data_gen, core_config=core_config, + target_platform_capabilities=tpc) + weights_ru = BYTES_TO_FP32 * ru_data.weights_memory / weights_compression # (4 bytes for fp32) * weights memory(in Bytes) / compression rate + return ResourceUtilization(weights_memory=weights_ru) def quantize(model: tf.keras.Model, @@ -100,11 +100,11 @@ def quantize(model: tf.keras.Model, core_conf = CoreConfig(quantization_config=mct.core.QuantizationConfig( shift_negative_activation_correction=True), mixed_precision_config=mp_conf) - target_kpi = get_target_kpi(model, mp_wcr, representative_data_gen, core_conf, tpc) + target_resource_utilization = get_target_resource_utilization(model, mp_wcr, representative_data_gen, core_conf, tpc) else: core_conf = CoreConfig(quantization_config=mct.core.QuantizationConfig( shift_negative_activation_correction=True)) - target_kpi = None + target_resource_utilization = None # Quantize model if args.get('gptq', False): @@ -119,7 +119,7 @@ def quantize(model: tf.keras.Model, quantized_model, quantization_info = \ mct.gptq.keras_gradient_post_training_quantization(model, representative_data_gen=representative_data_gen, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, core_config=core_conf, gptq_config=gptq_conf, gptq_representative_data_gen=representative_data_gen, @@ -131,7 +131,7 @@ def quantize(model: tf.keras.Model, quantized_model, quantization_info = \ mct.ptq.keras_post_training_quantization(model, representative_data_gen=representative_data_gen, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, core_config=core_conf, target_platform_capabilities=tpc) diff --git a/tutorials/quick_start/pytorch_fw/quant.py b/tutorials/quick_start/pytorch_fw/quant.py index 497ce3ac8..53a9520d0 100644 --- a/tutorials/quick_start/pytorch_fw/quant.py +++ b/tutorials/quick_start/pytorch_fw/quant.py @@ -23,7 +23,7 @@ from common.constants import NUM_REPRESENTATIVE_IMAGES, BATCH_SIZE, REPRESENTATIVE_DATASET_FOLDER, \ TARGET_PLATFORM_NAME, TARGET_PLATFORM_VERSION -from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, CoreConfig, KPI +from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, CoreConfig, ResourceUtilization from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities from tutorials.quick_start.common.constants import BYTES_TO_FP32, MP_WEIGHTS_COMPRESSION from tutorials.quick_start.common.results import QuantInfo @@ -45,7 +45,7 @@ def get_tpc(target_platform_name: str, target_platform_version: str) -> TargetPl return mct.get_target_platform_capabilities('pytorch', target_platform_name, target_platform_version) -def get_target_kpi(model, weights_compression, representative_data_gen, core_config, tpc): +def get_target_resource_utilization(model, weights_compression, representative_data_gen, core_config, tpc): """ Calculates the model's required size according to the given weights compression rate, to provide as a constraint for mixed precision search. @@ -57,13 +57,13 @@ def get_target_kpi(model, weights_compression, representative_data_gen, core_con tpc (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the PyTorch model according to. Returns: - A KPI object computed from MCT and contains info about the target model size. + A ResourceUtilization object computed from MCT and contains info about the target model size. """ - kpi_data = mct.core.pytorch_kpi_data(model, representative_data_gen, core_config=core_config, - target_platform_capabilities=tpc) - weights_kpi = BYTES_TO_FP32 * kpi_data.weights_memory / weights_compression # (4 bytes for fp32) * weights memory(in Bytes) / compression rate - return KPI(weights_memory=weights_kpi) + ru_data = mct.core.pytorch_resource_utilization_data(model, representative_data_gen, core_config=core_config, + target_platform_capabilities=tpc) + weights_ru = BYTES_TO_FP32 * ru_data.weights_memory / weights_compression # (4 bytes for fp32) * weights memory(in Bytes) / compression rate + return ResourceUtilization(weights_memory=weights_ru) def quantize(model: nn.Module, @@ -101,11 +101,11 @@ def quantize(model: nn.Module, core_conf = CoreConfig(quantization_config=mct.core.QuantizationConfig( shift_negative_activation_correction=True), mixed_precision_config=mp_conf) - target_kpi = get_target_kpi(model, mp_wcr, representative_data_gen, core_conf, tpc) + target_resource_utilization = get_target_resource_utilization(model, mp_wcr, representative_data_gen, core_conf, tpc) else: core_conf = CoreConfig(quantization_config=mct.core.QuantizationConfig( shift_negative_activation_correction=True)) - target_kpi = None + target_resource_utilization = None # Quantize model if args.get('gptq', False): @@ -120,7 +120,7 @@ def quantize(model: nn.Module, quantized_model, quantization_info = \ mct.gptq.pytorch_gradient_post_training_quantization(model, representative_data_gen=representative_data_gen, - target_kpi=target_kpi, + target_resource_utilization=target_resource_utilization, core_config=core_conf, gptq_config=gptq_conf, gptq_representative_data_gen=representative_data_gen,