diff --git a/docs/.buildinfo b/docs/.buildinfo index eafc4f405..ee11fdd63 100644 --- a/docs/.buildinfo +++ b/docs/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 463b5d411b812fb296a8f7bff970d1cf +config: e8534f6a2f0b425ce862dbeb0800af00 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/_sources/api/api_docs/classes/GradientPTQConfig.rst.txt b/docs/_sources/api/api_docs/classes/GradientPTQConfig.rst.txt index c14ec1e7c..711c4f2d1 100644 --- a/docs/_sources/api/api_docs/classes/GradientPTQConfig.rst.txt +++ b/docs/_sources/api/api_docs/classes/GradientPTQConfig.rst.txt @@ -8,7 +8,7 @@ GradientPTQConfig Class ================================= -**The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float Keras model) to a student (the quantized Keras model)** +**The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float model) to a student (the quantized model)** .. autoclass:: model_compression_toolkit.gptq.GradientPTQConfig :members: @@ -30,3 +30,22 @@ RoundingType .. autoclass:: model_compression_toolkit.gptq.RoundingType :members: + + +===================================== +GradualActivationQuantizationConfig +===================================== + +**The following API can be used to configure the gradual activation quantization when using GPTQ.** + +.. autoclass:: model_compression_toolkit.gptq.GradualActivationQuantizationConfig + :members: + + +===================================== +QFractionLinearAnnealingConfig +===================================== + +.. autoclass:: model_compression_toolkit.gptq.QFractionLinearAnnealingConfig + :members: + diff --git a/docs/_sources/api/api_docs/index.rst.txt b/docs/_sources/api/api_docs/index.rst.txt index 0c4433163..cd78a4b5c 100644 --- a/docs/_sources/api/api_docs/index.rst.txt +++ b/docs/_sources/api/api_docs/index.rst.txt @@ -106,9 +106,9 @@ keras_load_quantized_model - :ref:`keras_load_quantized_model`: A function to load a quantized keras model. -target_platform -================ -- :ref:`target_platform`: Module to create and model hardware-related settings to optimize the model according to, by the hardware the optimized model will use during inference. +target_platform_capabilities +============================== +- :ref:`target_platform_capabilities`: Module to create and model hardware-related settings to optimize the model according to, by the hardware the optimized model will use during inference. - :ref:`get_target_platform_capabilities`: A function to get a target platform model for Tensorflow and Pytorch. - :ref:`DefaultDict`: Util class for creating a TargetPlatformCapabilities. diff --git a/docs/_sources/api/api_docs/methods/get_target_platform_capabilities.rst.txt b/docs/_sources/api/api_docs/methods/get_target_platform_capabilities.rst.txt index cc623b66a..e8346a359 100644 --- a/docs/_sources/api/api_docs/methods/get_target_platform_capabilities.rst.txt +++ b/docs/_sources/api/api_docs/methods/get_target_platform_capabilities.rst.txt @@ -4,7 +4,7 @@ ======================================= -Get TargetPlatformCapabilities +Get FrameworkQuantizationCapabilities ======================================= .. autofunction:: model_compression_toolkit.get_target_platform_capabilities diff --git a/docs/_sources/api/api_docs/modules/layer_filters.rst.txt b/docs/_sources/api/api_docs/modules/layer_filters.rst.txt index 2279e54b6..f21836e08 100644 --- a/docs/_sources/api/api_docs/modules/layer_filters.rst.txt +++ b/docs/_sources/api/api_docs/modules/layer_filters.rst.txt @@ -15,30 +15,30 @@ one may use the next filters to check if a layer configuration holds the created Attribute Filters ================== -.. autoclass:: model_compression_toolkit.target_platform.AttributeFilter +.. autoclass:: model_compression_toolkit.target_platform_capabilities.AttributeFilter | -.. autoclass:: model_compression_toolkit.target_platform.Eq +.. autoclass:: model_compression_toolkit.target_platform_capabilities.Eq | -.. autoclass:: model_compression_toolkit.target_platform.NotEq +.. autoclass:: model_compression_toolkit.target_platform_capabilities.NotEq | -.. autoclass:: model_compression_toolkit.target_platform.Greater +.. autoclass:: model_compression_toolkit.target_platform_capabilities.Greater | -.. autoclass:: model_compression_toolkit.target_platform.GreaterEq +.. autoclass:: model_compression_toolkit.target_platform_capabilities.GreaterEq | -.. autoclass:: model_compression_toolkit.target_platform.Smaller +.. autoclass:: model_compression_toolkit.target_platform_capabilities.Smaller | -.. autoclass:: model_compression_toolkit.target_platform.SmallerEq +.. autoclass:: model_compression_toolkit.target_platform_capabilities.SmallerEq diff --git a/docs/_sources/api/api_docs/modules/qat_config.rst.txt b/docs/_sources/api/api_docs/modules/qat_config.rst.txt index 9583aee88..c7dfcc9ea 100644 --- a/docs/_sources/api/api_docs/modules/qat_config.rst.txt +++ b/docs/_sources/api/api_docs/modules/qat_config.rst.txt @@ -10,10 +10,7 @@ qat_config Module TrainingMethod ================================ -**Select a QAT training method:** - -.. autoclass:: model_compression_toolkit.qat.TrainingMethod - +In order to select a training method, please visit the :ref:`trainable_infrastructure API.` | diff --git a/docs/_sources/api/api_docs/modules/target_platform.rst.txt b/docs/_sources/api/api_docs/modules/target_platform_capabilities.rst.txt similarity index 51% rename from docs/_sources/api/api_docs/modules/target_platform.rst.txt rename to docs/_sources/api/api_docs/modules/target_platform_capabilities.rst.txt index c393cb21a..5e0dd9252 100644 --- a/docs/_sources/api/api_docs/modules/target_platform.rst.txt +++ b/docs/_sources/api/api_docs/modules/target_platform_capabilities.rst.txt @@ -1,11 +1,11 @@ :orphan: -.. _ug-target_platform: +.. _ug-target_platform_capabilities: -================================= -target_platform Module -================================= +===================================== +target_platform_capabilities Module +===================================== MCT can be configured to quantize and optimize models for different hardware settings. For example, when using qnnpack backend for Pytorch model inference, Pytorch `quantization @@ -14,7 +14,7 @@ uses `per-tensor weights quantization `_. -This can be addressed in MCT by using the target_platform module, that can configure different +This can be addressed in MCT by using the target_platform_capabilities module, that can configure different parameters that are hardware-related, and the optimization process will use this to optimize the model accordingly. Models for IMX500, TFLite and qnnpack can be observed `here `_, and can be used using :ref:`get_target_platform_capabilities function`. @@ -27,7 +27,7 @@ Models for IMX500, TFLite and qnnpack can be observed `here `. - - -TargetPlatformCapabilities -============================= -.. autoclass:: model_compression_toolkit.target_platform.TargetPlatformCapabilities - - - +.. autoclass:: model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup diff --git a/docs/_sources/api/api_docs/modules/trainable_infrastructure.rst.txt b/docs/_sources/api/api_docs/modules/trainable_infrastructure.rst.txt index c514a3307..42541f22f 100644 --- a/docs/_sources/api/api_docs/modules/trainable_infrastructure.rst.txt +++ b/docs/_sources/api/api_docs/modules/trainable_infrastructure.rst.txt @@ -36,6 +36,15 @@ It adds to the base quantizer a get_config and from_config functions to enable l .. autoclass:: model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer + + +TrainingMethod +================================ +**Select a training method:** + +.. autoclass:: model_compression_toolkit.trainable_infrastructure.TrainingMethod + + TrainableQuantizerWeightsConfig ================================= This configuration object contains the necessary attributes for configuring a weights trainable quantizer. @@ -46,7 +55,7 @@ For example, we can set a trainable weights quantizer with the following configu .. code-block:: python - from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod + from model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD TrainableQuantizerWeightsConfig(weights_quantization_method=QuantizationMethod.SYMMETRIC, @@ -70,7 +79,7 @@ For example, we can set a trainable activation quantizer with the following conf .. code-block:: python - from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod + from model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD TrainableQuantizerActivationConfig(activation_quantization_method=QuantizationMethod.UNIFORM, diff --git a/docs/_sources/api/api_docs/notes/tpc_note.rst.txt b/docs/_sources/api/api_docs/notes/tpc_note.rst.txt index 39558f42a..7ced4a5d6 100644 --- a/docs/_sources/api/api_docs/notes/tpc_note.rst.txt +++ b/docs/_sources/api/api_docs/notes/tpc_note.rst.txt @@ -1,7 +1,7 @@ .. note:: - For now, some fields of :class:`~model_compression_toolkit.target_platform.OpQuantizationConfig` are ignored during + For now, some fields of :class:`~model_compression_toolkit.target_platform_capabilities.OpQuantizationConfig` are ignored during the optimization process such as quantization_preserving, fixed_scale, and fixed_zero_point. - - MCT will use more information from :class:`~model_compression_toolkit.target_platform.OpQuantizationConfig`, in the future. + - MCT will use more information from :class:`~model_compression_toolkit.target_platform_capabilities.OpQuantizationConfig`, in the future. diff --git a/docs/api/api_docs/classes/BitWidthConfig.html b/docs/api/api_docs/classes/BitWidthConfig.html index ffb941926..f69e01d18 100644 --- a/docs/api/api_docs/classes/BitWidthConfig.html +++ b/docs/api/api_docs/classes/BitWidthConfig.html @@ -7,7 +7,7 @@ - BitWidthConfig — MCT Documentation: ver 2.2.0 + BitWidthConfig — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

Navigation

  • index
  • - + @@ -45,7 +45,7 @@

    Navigation

    BitWidthConfig

    -class model_compression_toolkit.core.BitWidthConfig(manual_activation_bit_width_selection_list=None)
    +class model_compression_toolkit.core.BitWidthConfig(manual_activation_bit_width_selection_list=<factory>)

    Class to manage manual bit-width configurations.

    @@ -128,7 +128,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/DataGenerationConfig.html b/docs/api/api_docs/classes/DataGenerationConfig.html index 3f2a64060..190c2ba4c 100644 --- a/docs/api/api_docs/classes/DataGenerationConfig.html +++ b/docs/api/api_docs/classes/DataGenerationConfig.html @@ -7,7 +7,7 @@ - Data Generation Configuration — MCT Documentation: ver 2.2.0 + Data Generation Configuration — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -211,7 +211,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/DefaultDict.html b/docs/api/api_docs/classes/DefaultDict.html index a8805dda9..fd2892809 100644 --- a/docs/api/api_docs/classes/DefaultDict.html +++ b/docs/api/api_docs/classes/DefaultDict.html @@ -7,7 +7,7 @@ - DefaultDict Class — MCT Documentation: ver 2.2.0 + DefaultDict Class — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -114,7 +114,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/FrameworkInfo.html b/docs/api/api_docs/classes/FrameworkInfo.html index 517e94030..809cfecdd 100644 --- a/docs/api/api_docs/classes/FrameworkInfo.html +++ b/docs/api/api_docs/classes/FrameworkInfo.html @@ -7,7 +7,7 @@ - FrameworkInfo Class — MCT Documentation: ver 2.2.0 + FrameworkInfo Class — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -57,7 +57,7 @@

    Navigation

    Parameters:
      -
    • activation_quantizer_mapping (Dict[QuantizationMethod, Callable]) – A dictionary mapping from QuantizationMethod to a quantization function.

    • +
    • activation_quantizer_mapping (Dict[QuantizationMethod, Callable]) – A dictionary mapping from QuantizationMethod to a quantization function.

    • kernel_channels_mapping (DefaultDict) – Dictionary from a layer to a tuple of its kernel in/out channels indices.

    • activation_min_max_mapping (Dict[str, tuple]) – Dictionary from an activation function to its min/max output values.

    • layer_min_max_mapping (Dict[Any, tuple]) – Dictionary from a layer to its min/max output values.

    • @@ -144,7 +144,7 @@

      Navigation

    • index
    • - +
    diff --git a/docs/api/api_docs/classes/GradientPTQConfig.html b/docs/api/api_docs/classes/GradientPTQConfig.html index 75eae64d5..06761a630 100644 --- a/docs/api/api_docs/classes/GradientPTQConfig.html +++ b/docs/api/api_docs/classes/GradientPTQConfig.html @@ -7,7 +7,7 @@ - GradientPTQConfig Class — MCT Documentation: ver 2.2.0 + GradientPTQConfig Class — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -43,30 +43,28 @@

    Navigation

    GradientPTQConfig Class

    -

    The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float Keras model) to a student (the quantized Keras model)

    +

    The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float model) to a student (the quantized model)

    -class model_compression_toolkit.gptq.GradientPTQConfig(n_epochs, optimizer, optimizer_rest=None, loss=None, log_function=None, train_bias=True, rounding_type=RoundingType.SoftQuantizer, use_hessian_based_weights=True, optimizer_quantization_parameter=None, optimizer_bias=None, regularization_factor=REG_DEFAULT, hessian_weights_config=GPTQHessianScoresConfig(), gptq_quantizer_params_override=None)
    +class model_compression_toolkit.gptq.GradientPTQConfig(n_epochs, loss, optimizer, optimizer_rest, train_bias, hessian_weights_config, gradual_activation_quantization_config, regularization_factor, rounding_type=RoundingType.SoftQuantizer, optimizer_quantization_parameter=None, optimizer_bias=None, log_function=None, gptq_quantizer_params_override=<factory>)

    Configuration to use for quantization with GradientPTQ.

    -

    Initialize a GradientPTQConfig.

    Parameters:
      -
    • n_epochs (int) – Number of representative dataset epochs to train.

    • -
    • optimizer (Any) – Optimizer to use.

    • -
    • optimizer_rest (Any) – Optimizer to use for bias and quantizer parameters.

    • -
    • loss (Callable) – The loss to use. should accept 6 lists of tensors. 1st list of quantized tensors, the 2nd list is the float tensors, -the 3rd is a list of quantized weights, the 4th is a list of float weights, the 5th and 6th lists are the mean and std of the tensors -accordingly. see example in multiple_tensors_mse_loss

    • -
    • log_function (Callable) – Function to log information about the GPTQ process.

    • -
    • train_bias (bool) – Whether to update the bias during the training or not.

    • -
    • rounding_type (RoundingType) – An enum that defines the rounding type.

    • -
    • use_hessian_based_weights (bool) – Whether to use Hessian-based weights for weighted average loss.

    • -
    • optimizer_quantization_parameter (Any) – Optimizer to override the rest optimizer for quantizer parameters.

    • -
    • optimizer_bias (Any) – Optimizer to override the rest optimizer for bias.

    • -
    • regularization_factor (float) – A floating point number that defines the regularization factor.

    • -
    • hessian_weights_config (GPTQHessianScoresConfig) – A configuration that include all necessary arguments to run a computation of Hessian scores for the GPTQ loss.

    • -
    • gptq_quantizer_params_override (dict) – A dictionary of parameters to override in GPTQ quantizer instantiation. Defaults to None (no parameters).

    • +
    • n_epochs – Number of representative dataset epochs to train.

    • +
    • loss – The loss to use. See ‘multiple_tensors_mse_loss’ for the expected interface.

    • +
    • optimizer – Optimizer to use.

    • +
    • optimizer_rest – Default optimizer to use for bias and quantizer parameters.

    • +
    • train_bias – Whether to update the bias during the training or not.

    • +
    • hessian_weights_config – A configuration that include all necessary arguments to run a computation of +Hessian scores for the GPTQ loss.

    • +
    • gradual_activation_quantization_config – A configuration for Gradual Activation Quantization.

    • +
    • regularization_factor – A floating point number that defines the regularization factor.

    • +
    • rounding_type – An enum that defines the rounding type.

    • +
    • optimizer_quantization_parameter – Optimizer to override the rest optimizer for quantizer parameters.

    • +
    • optimizer_bias – Optimizer to override the rest optimizer for bias.

    • +
    • log_function – Function to log information about the GPTQ process.

    • +
    • gptq_quantizer_params_override – A dictionary of parameters to override in GPTQ quantizer instantiation.

    @@ -78,13 +76,14 @@

    GPTQHessianScoresConfig Class
    -class model_compression_toolkit.gptq.GPTQHessianScoresConfig(hessians_num_samples=GPTQ_HESSIAN_NUM_SAMPLES, norm_scores=True, log_norm=True, scale_log_norm=False, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE)
    +class model_compression_toolkit.gptq.GPTQHessianScoresConfig(per_sample, hessians_num_samples, norm_scores=None, log_norm=None, scale_log_norm=False, hessian_batch_size=32)

    Configuration to use for computing the Hessian-based scores for GPTQ loss metric.

    -

    Initialize a GPTQHessianWeightsConfig.

    Parameters:
      -
    • hessians_num_samples (int) – Number of samples to use for computing the Hessian-based scores.

    • +
    • per_sample (bool) – Whether to use per sample attention score.

    • +
    • hessians_num_samples (int|None) – Number of samples to use for computing the Hessian-based scores. +If None, compute Hessian for all images.

    • norm_scores (bool) – Whether to normalize the returned scores of the weighted loss function (to get values between 0 and 1).

    • log_norm (bool) – Whether to use log normalization for the GPTQ Hessian-based scores.

    • scale_log_norm (bool) – Whether to scale the final vector of the Hessian-based scores.

    • @@ -105,6 +104,42 @@

      RoundingType +

      GradualActivationQuantizationConfig

      +

      The following API can be used to configure the gradual activation quantization when using GPTQ.

      +
      +
      +class model_compression_toolkit.gptq.GradualActivationQuantizationConfig(q_fraction_scheduler_policy=<factory>)
      +

      Configuration for Gradual Activation Quantization.

      +

      By default, the quantized fraction increases linearly from 0 to 1 throughout the training.

      +
      +
      Parameters:
      +

      q_fraction_scheduler_policy – config for the scheduling of the quantized fraction. +Only linear annealing is currently supported.

      +
      +
      +
      + + +
      +

      QFractionLinearAnnealingConfig

      +
      +
      +class model_compression_toolkit.gptq.QFractionLinearAnnealingConfig(initial_q_fraction, target_q_fraction, start_step, end_step)
      +

      Config for the quantized fraction linear scheduler of Gradual Activation Quantization.

      +
      +
      Parameters:
      +
        +
      • initial_q_fraction – initial quantized fraction

      • +
      • target_q_fraction – target quantized fraction

      • +
      • start_step – gradient step to begin annealing

      • +
      • end_step – gradient step to complete annealing. None means last step.

      • +
      +
      +
      +
      +
      @@ -120,6 +155,8 @@

      Table of Contents

    • GradientPTQConfig Class
    • GPTQHessianScoresConfig Class
    • RoundingType
    • +
    • GradualActivationQuantizationConfig
    • +
    • QFractionLinearAnnealingConfig
    @@ -143,7 +180,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html b/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html index 43bc8cce8..97f983f85 100644 --- a/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html +++ b/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html @@ -7,7 +7,7 @@ - MixedPrecisionQuantizationConfig — MCT Documentation: ver 2.2.0 + MixedPrecisionQuantizationConfig — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -46,7 +46,7 @@

    Navigation

    Class to configure the quantization process of the model when quantizing in mixed-precision:

    -class model_compression_toolkit.core.MixedPrecisionQuantizationConfig(compute_distance_fn=None, distance_weighting_method=MpDistanceWeighting.AVG, num_of_images=MP_DEFAULT_NUM_SAMPLES, configuration_overwrite=None, num_interest_points_factor=1.0, use_hessian_based_scores=False, norm_scores=True, refine_mp_solution=True, metric_normalization_threshold=1e10, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE)
    +class model_compression_toolkit.core.MixedPrecisionQuantizationConfig(compute_distance_fn=None, distance_weighting_method=MpDistanceWeighting.AVG, num_of_images=32, configuration_overwrite=None, num_interest_points_factor=1.0, use_hessian_based_scores=False, norm_scores=True, refine_mp_solution=True, metric_normalization_threshold=10000000000.0, hessian_batch_size=32)

    Class with mixed precision parameters to quantize the input model.

    Parameters:
    @@ -95,7 +95,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/MpDistanceWeighting.html b/docs/api/api_docs/classes/MpDistanceWeighting.html index ac03fdcef..18227dd98 100644 --- a/docs/api/api_docs/classes/MpDistanceWeighting.html +++ b/docs/api/api_docs/classes/MpDistanceWeighting.html @@ -7,7 +7,7 @@ - MpDistanceWeighting — MCT Documentation: ver 2.2.0 + MpDistanceWeighting — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -83,7 +83,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/PruningConfig.html b/docs/api/api_docs/classes/PruningConfig.html index 47ff68d96..a11882d5a 100644 --- a/docs/api/api_docs/classes/PruningConfig.html +++ b/docs/api/api_docs/classes/PruningConfig.html @@ -7,7 +7,7 @@ - Pruning Configuration — MCT Documentation: ver 2.2.0 + Pruning Configuration — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -159,7 +159,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/PruningInfo.html b/docs/api/api_docs/classes/PruningInfo.html index 0516ddc4c..dd494387b 100644 --- a/docs/api/api_docs/classes/PruningInfo.html +++ b/docs/api/api_docs/classes/PruningInfo.html @@ -7,7 +7,7 @@ - Pruning Information — MCT Documentation: ver 2.2.0 + Pruning Information — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -122,7 +122,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/QuantizationConfig.html b/docs/api/api_docs/classes/QuantizationConfig.html index 227aa69d1..d06827331 100644 --- a/docs/api/api_docs/classes/QuantizationConfig.html +++ b/docs/api/api_docs/classes/QuantizationConfig.html @@ -7,7 +7,7 @@ - QuantizationConfig — MCT Documentation: ver 2.2.0 + QuantizationConfig — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -46,42 +46,17 @@

    Navigation

    Class to configure the quantization process of the model:

    -class model_compression_toolkit.core.QuantizationConfig(activation_error_method=QuantizationErrorMethod.MSE, weights_error_method=QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=False, weights_bias_correction=True, weights_second_moment_correction=False, input_scaling=False, softmax_shift=False, shift_negative_activation_correction=True, activation_channel_equalization=False, z_threshold=math.inf, min_threshold=MIN_THRESHOLD, l_p_value=2, linear_collapsing=True, residual_collapsing=True, shift_negative_ratio=0.05, shift_negative_threshold_recalculation=False, shift_negative_params_search=False, concat_threshold_update=False)
    -

    Class to wrap all different parameters the library quantize the input model according to.

    -
    -
    Parameters:
    -
      -
    • activation_error_method (QuantizationErrorMethod) – Which method to use from QuantizationErrorMethod for activation quantization threshold selection.

    • -
    • weights_error_method (QuantizationErrorMethod) – Which method to use from QuantizationErrorMethod for activation quantization threshold selection.

    • -
    • relu_bound_to_power_of_2 (bool) – Whether to use relu to power of 2 scaling correction or not.

    • -
    • weights_bias_correction (bool) – Whether to use weights bias correction or not.

    • -
    • weights_second_moment_correction (bool) – Whether to use weights second_moment correction or not.

    • -
    • input_scaling (bool) – Whether to use input scaling or not.

    • -
    • softmax_shift (bool) – Whether to use softmax shift or not.

    • -
    • shift_negative_activation_correction (bool) – Whether to use shifting negative activation correction or not.

    • -
    • activation_channel_equalization (bool) – Whether to use activation channel equalization correction or not.

    • -
    • z_threshold (float) – Value of z score for outliers removal.

    • -
    • min_threshold (float) – Minimum threshold to use during thresholds selection.

    • -
    • l_p_value (int) – The p value of L_p norm threshold selection.

    • -
    • block_collapsing (bool) – Whether to collapse block one to another in the input network

    • -
    • shift_negative_ratio (float) – Value for the ratio between the minimal negative value of a non-linearity output to its activation threshold, which above it - shifting negative activation should occur if enabled.

    • -
    • shift_negative_threshold_recalculation (bool) – Whether or not to recompute the threshold after shifting negative activation.

    • -
    • shift_negative_params_search (bool) – Whether to search for optimal shift and threshold in shift negative activation.

    • -
    -
    -
    +class model_compression_toolkit.core.QuantizationConfig(activation_error_method=QuantizationErrorMethod.MSE, weights_error_method=QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=False, weights_bias_correction=True, weights_second_moment_correction=False, input_scaling=False, softmax_shift=False, shift_negative_activation_correction=True, activation_channel_equalization=False, z_threshold=inf, min_threshold=1.52587890625e-05, l_p_value=2, linear_collapsing=True, residual_collapsing=True, shift_negative_ratio=0.05, shift_negative_threshold_recalculation=False, shift_negative_params_search=False, concat_threshold_update=False, activation_bias_correction=False, activation_bias_correction_threshold=0.0, custom_tpc_opset_to_layer=None) +

    A class that encapsulates all the different parameters used by the library to quantize a model.

    Examples

    -

    One may create a quantization configuration to quantize a model according to. -For example, to quantize a model’s weights and activation using thresholds, such that -weights threshold selection is done using MSE, activation threshold selection is done using NOCLIPPING (min/max), -enabling relu_bound_to_power_of_2, weights_bias_correction, -one can instantiate a quantization configuration:

    +

    You can create a quantization configuration to apply to a model. For example, to quantize a model’s weights and +activations using thresholds, with weight threshold selection based on MSE and activation threshold selection +using NOCLIPPING (min/max), while enabling relu_bound_to_power_of_2 and weights_bias_correction, +you can instantiate a quantization configuration like this:

    >>> import model_compression_toolkit as mct
     >>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING, weights_error_method=mct.core.QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=True, weights_bias_correction=True)
     
    -

    The QuantizationConfig instanse can then be passed to -keras_post_training_quantization()

    @@ -113,7 +88,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/QuantizationErrorMethod.html b/docs/api/api_docs/classes/QuantizationErrorMethod.html index e2c40702a..c7ffe60a4 100644 --- a/docs/api/api_docs/classes/QuantizationErrorMethod.html +++ b/docs/api/api_docs/classes/QuantizationErrorMethod.html @@ -7,7 +7,7 @@ - QuantizationErrorMethod — MCT Documentation: ver 2.2.0 + QuantizationErrorMethod — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -85,7 +85,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/ResourceUtilization.html b/docs/api/api_docs/classes/ResourceUtilization.html index 54f03591f..f291d6675 100644 --- a/docs/api/api_docs/classes/ResourceUtilization.html +++ b/docs/api/api_docs/classes/ResourceUtilization.html @@ -7,7 +7,7 @@ - ResourceUtilization — MCT Documentation: ver 2.2.0 + ResourceUtilization — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -46,18 +46,12 @@

    Navigation

    Object to configure resources to use when searching for a configuration for the optimized model (such as in mixed-precision, pruning, etc.):

    -class model_compression_toolkit.core.ResourceUtilization(weights_memory=np.inf, activation_memory=np.inf, total_memory=np.inf, bops=np.inf)
    +class model_compression_toolkit.core.ResourceUtilization(weights_memory=inf, activation_memory=inf, total_memory=inf, bops=inf)

    Class to represent measurements of performance.

    -
    -
    Parameters:
    -
      -
    • weights_memory – Memory of a model’s weights in bytes. Note that this includes only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not).

    • -
    • activation_memory – Memory of a model’s activation in bytes, according to the given activation resource utilization metric.

    • -
    • total_memory – The sum of model’s activation and weights memory in bytes, according to the given total resource utilization metric.

    • -
    • bops – The total bit-operations in the model.

    • -
    -
    -
    +

    weights_memory: Memory of a model’s weights in bytes. +activation_memory: Memory of a model’s activation in bytes. +total_memory: The sum of model’s activation and weights memory in bytes. +bops: The total bit-operations in the model.

    @@ -89,7 +83,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/classes/XQuantConfig.html b/docs/api/api_docs/classes/XQuantConfig.html index 2375020ec..c86704052 100644 --- a/docs/api/api_docs/classes/XQuantConfig.html +++ b/docs/api/api_docs/classes/XQuantConfig.html @@ -7,7 +7,7 @@ - XQuant Configuration — MCT Documentation: ver 2.2.0 + XQuant Configuration — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -89,7 +89,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/index.html b/docs/api/api_docs/index.html index 292fc010a..facf42521 100644 --- a/docs/api/api_docs/index.html +++ b/docs/api/api_docs/index.html @@ -7,7 +7,7 @@ - API Docs — MCT Documentation: ver 2.2.0 + API Docs — MCT Documentation: ver 2.3.0 @@ -35,7 +35,7 @@

    Navigation

  • previous |
  • - + @@ -147,10 +147,10 @@

    keras_load_quantized_modelkeras_load_quantized_model: A function to load a quantized keras model.

    -
    -

    target_platform

    +
    +

    target_platform_capabilities

      -
    • target_platform: Module to create and model hardware-related settings to optimize the model according to, by the hardware the optimized model will use during inference.

    • +
    • target_platform_capabilities: Module to create and model hardware-related settings to optimize the model according to, by the hardware the optimized model will use during inference.

    • get_target_platform_capabilities: A function to get a target platform model for Tensorflow and Pytorch.

    • DefaultDict: Util class for creating a TargetPlatformCapabilities.

    @@ -191,7 +191,7 @@

    Table of Contents

  • trainable_infrastructure
  • set_log_folder
  • keras_load_quantized_model
  • -
  • target_platform
  • +
  • target_platform_capabilities
  • Indices and tables
  • @@ -226,7 +226,7 @@

    Navigation

  • previous |
  • - + diff --git a/docs/api/api_docs/methods/get_keras_data_generation_config.html b/docs/api/api_docs/methods/get_keras_data_generation_config.html index acacb952d..098ecf3e0 100644 --- a/docs/api/api_docs/methods/get_keras_data_generation_config.html +++ b/docs/api/api_docs/methods/get_keras_data_generation_config.html @@ -7,7 +7,7 @@ - Get DataGenerationConfig for Keras Models — MCT Documentation: ver 2.2.0 + Get DataGenerationConfig for Keras Models — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -109,7 +109,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/get_keras_gptq_config.html b/docs/api/api_docs/methods/get_keras_gptq_config.html index 37cbf9274..43b773633 100644 --- a/docs/api/api_docs/methods/get_keras_gptq_config.html +++ b/docs/api/api_docs/methods/get_keras_gptq_config.html @@ -7,7 +7,7 @@ - Get GradientPTQConfig for Keras Models — MCT Documentation: ver 2.2.0 + Get GradientPTQConfig for Keras Models — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -45,8 +45,8 @@

    Navigation

    Get GradientPTQConfig for Keras Models

    -model_compression_toolkit.gptq.get_keras_gptq_config(n_epochs, optimizer=tf.keras.optimizers.Adam(learning_rate=LR_DEFAULT), optimizer_rest=tf.keras.optimizers.Adam(learning_rate=LR_REST_DEFAULT), loss=GPTQMultipleTensorsLoss(), log_function=None, use_hessian_based_weights=True, regularization_factor=REG_DEFAULT, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE)
    -

    Create a GradientPTQConfigV2 instance for Keras models.

    +model_compression_toolkit.gptq.get_keras_gptq_config(n_epochs, optimizer=None, optimizer_rest=None, loss=None, log_function=None, use_hessian_based_weights=True, regularization_factor=None, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE, use_hessian_sample_attention=True, gradual_activation_quantization=True) +

    Create a GradientPTQConfig instance for Keras models.

    Parameters:
      @@ -58,10 +58,12 @@

      Navigation

    • use_hessian_based_weights (bool) – Whether to use Hessian-based weights for weighted average loss.

    • regularization_factor (float) – A floating point number that defines the regularization factor.

    • hessian_batch_size (int) – Batch size for Hessian computation in Hessian-based weights GPTQ.

    • +
    • use_hessian_sample_attention (bool) – whether to use Sample-Layer Attention score for weighted loss.

    • +
    • gradual_activation_quantization (bool, GradualActivationQuantizationConfig) – If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.

    Returns:
    -

    a GradientPTQConfigV2 object to use when fine-tuning the quantized model using gptq.

    +

    a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.

    Examples

    @@ -70,7 +72,7 @@

    Navigation

    >>> import tensorflow as tf -

    Create a GradientPTQConfigV2 to run for 5 epochs:

    +

    Create a GradientPTQConfig to run for 5 epochs:

    >>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=5)
     
    @@ -78,7 +80,7 @@

    Navigation

    >>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=3, optimizer=tf.keras.optimizers.Nadam())
     
    -

    The configuration can be passed to keras_post_training_quantization() in order to quantize a keras model using gptq.

    +

    The configuration can be passed to keras_gradient_post_training_quantization() in order to quantize a keras model using gptq.

    Return type:

    GradientPTQConfig

    @@ -115,7 +117,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/get_pytorch_data_generation_config.html b/docs/api/api_docs/methods/get_pytorch_data_generation_config.html index 0edeab3fb..7a9381d26 100644 --- a/docs/api/api_docs/methods/get_pytorch_data_generation_config.html +++ b/docs/api/api_docs/methods/get_pytorch_data_generation_config.html @@ -7,7 +7,7 @@ - Get DataGenerationConfig for Pytorch Models — MCT Documentation: ver 2.2.0 + Get DataGenerationConfig for Pytorch Models — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -110,7 +110,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/get_pytroch_gptq_config.html b/docs/api/api_docs/methods/get_pytroch_gptq_config.html index 72de25f28..61c799f13 100644 --- a/docs/api/api_docs/methods/get_pytroch_gptq_config.html +++ b/docs/api/api_docs/methods/get_pytroch_gptq_config.html @@ -7,7 +7,7 @@ - Get GradientPTQConfig for Pytorch Models — MCT Documentation: ver 2.2.0 + Get GradientPTQConfig for Pytorch Models — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -45,27 +45,29 @@

    Navigation

    Get GradientPTQConfig for Pytorch Models

    -model_compression_toolkit.gptq.get_pytorch_gptq_config(n_epochs, optimizer=Adam([torch.Tensor([])], lr=LR_DEFAULT), optimizer_rest=Adam([torch.Tensor([])], lr=LR_REST_DEFAULT), loss=multiple_tensors_mse_loss, log_function=None, use_hessian_based_weights=True, regularization_factor=REG_DEFAULT, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE)
    -

    Create a GradientPTQConfigV2 instance for Pytorch models.

    +model_compression_toolkit.gptq.get_pytorch_gptq_config(n_epochs, optimizer=None, optimizer_rest=None, loss=None, log_function=None, use_hessian_based_weights=True, regularization_factor=None, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE, use_hessian_sample_attention=True, gradual_activation_quantization=True) +

    Create a GradientPTQConfig instance for Pytorch models.

    Parameters:
    • n_epochs (int) – Number of epochs for running the representative dataset for fine-tuning.

    • optimizer (Optimizer) – Pytorch optimizer to use for fine-tuning for auxiliry variable.

    • optimizer_rest (Optimizer) – Pytorch optimizer to use for fine-tuning of the bias variable.

    • -
    • loss (Callable) – loss to use during fine-tuning. should accept 4 lists of tensors. 1st list of quantized tensors, the 2nd list is the float tensors, the 3rd is a list of quantized weights and the 4th is a list of float weights.

    • +
    • loss (Callable) – loss to use during fine-tuning. See the default loss function for the exact interface.

    • log_function (Callable) – Function to log information about the gptq process.

    • use_hessian_based_weights (bool) – Whether to use Hessian-based weights for weighted average loss.

    • regularization_factor (float) – A floating point number that defines the regularization factor.

    • hessian_batch_size (int) – Batch size for Hessian computation in Hessian-based weights GPTQ.

    • +
    • use_hessian_sample_attention (bool) – whether to use Sample-Layer Attention score for weighted loss.

    • +
    • gradual_activation_quantization (bool, GradualActivationQuantizationConfig) – If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.

    Returns:
    -

    a GradientPTQConfigV2 object to use when fine-tuning the quantized model using gptq.

    +

    a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.

    Examples

    -

    Import MCT and Create a GradientPTQConfigV2 to run for 5 epochs:

    +

    Import MCT and Create a GradientPTQConfig to run for 5 epochs:

    >>> import model_compression_toolkit as mct
     >>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=5)
     
    @@ -75,7 +77,7 @@

    Navigation

    >>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, optimizer=torch.optim.Adam([torch.Tensor(1)]))
    -

    The configuration can be passed to pytorch_post_training_quantization() in order to quantize a pytorch model using gptq.

    +

    The configuration can be passed to pytorch_gradient_post_training_quantization() in order to quantize a pytorch model using gptq.

    Return type:

    GradientPTQConfig

    @@ -112,7 +114,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/get_target_platform_capabilities.html b/docs/api/api_docs/methods/get_target_platform_capabilities.html index 5fe03a901..eb3d990ef 100644 --- a/docs/api/api_docs/methods/get_target_platform_capabilities.html +++ b/docs/api/api_docs/methods/get_target_platform_capabilities.html @@ -7,7 +7,7 @@ - Get TargetPlatformCapabilities — MCT Documentation: ver 2.2.0 + Get FrameworkQuantizationCapabilities — MCT Documentation: ver 2.3.0 @@ -31,8 +31,8 @@

    Navigation

  • index
  • - - + + @@ -41,38 +41,36 @@

    Navigation

    -
    -

    Get TargetPlatformCapabilities

    +
    +

    Get FrameworkQuantizationCapabilities

    model_compression_toolkit.get_target_platform_capabilities(fw_name, target_platform_name, target_platform_version=None)
    -

    Get a TargetPlatformCapabilities by the target platform model name and the framework name. -For now, it supports frameworks ‘tensorflow’ and ‘pytorch’. For both of them -the target platform model can be ‘default’, ‘imx500’, ‘tflite’, or ‘qnnpack’.

    +

    This is a degenerated function that only returns the MCT default TargetPlatformCapabilities object, to comply with the +existing TPC API.

    Parameters:
      -
    • fw_name – Framework name of the TargetPlatformCapabilities.

    • +
    • fw_name – Framework name of the FrameworkQuantizationCapabilities.

    • target_platform_name – Target platform model name the model will use for inference.

    • target_platform_version – Target platform capabilities version.

    Returns:
    -

    A TargetPlatformCapabilities object that models the hardware and attaches -a framework information to it.

    +

    A default TargetPlatformCapabilities object.

    Return type:
    -

    TargetPlatformCapabilities

    +

    TargetPlatformCapabilities

    Note

    -

    For now, some fields of OpQuantizationConfig are ignored during +

    For now, some fields of OpQuantizationConfig are ignored during the optimization process such as quantization_preserving, fixed_scale, and fixed_zero_point.

      -
    • MCT will use more information from OpQuantizationConfig, in the future.

    • +
    • MCT will use more information from OpQuantizationConfig, in the future.

    @@ -104,8 +102,8 @@

    Navigation

  • index
  • - - + +
    @@ -122,7 +122,7 @@

    Navigation

  • index
  • - +
    diff --git a/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html b/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html index 62eb33a89..5c2492cae 100644 --- a/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html +++ b/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html @@ -7,7 +7,7 @@ - Keras Gradient Based Post Training Quantization — MCT Documentation: ver 2.2.0 + Keras Gradient Based Post Training Quantization — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -69,7 +69,7 @@

    Navigation

  • gptq_representative_data_gen (Callable) – Dataset used for GPTQ training. If None defaults to representative_data_gen

  • target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.

  • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Keras model according to.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the Keras model according to.

  • Returns:
    @@ -152,7 +152,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/keras_kpi_data.html b/docs/api/api_docs/methods/keras_kpi_data.html index e39322e8c..e31bb0dee 100644 --- a/docs/api/api_docs/methods/keras_kpi_data.html +++ b/docs/api/api_docs/methods/keras_kpi_data.html @@ -7,7 +7,7 @@ - Get Resource Utilization information for Keras Models — MCT Documentation: ver 2.2.0 + Get Resource Utilization information for Keras Models — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -56,7 +56,7 @@

    Navigation

  • in_model (Model) – Keras model to quantize.

  • representative_data_gen (Callable) – Dataset used for calibration.

  • core_config (CoreConfig) – CoreConfig containing parameters for quantization and mixed precision of how the model should be quantized.

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Keras model according to.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – FrameworkQuantizationCapabilities to optimize the Keras model according to.

  • Returns:
    @@ -115,7 +115,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/keras_load_quantizad_model.html b/docs/api/api_docs/methods/keras_load_quantizad_model.html index db16f181a..7969d8414 100644 --- a/docs/api/api_docs/methods/keras_load_quantizad_model.html +++ b/docs/api/api_docs/methods/keras_load_quantizad_model.html @@ -7,7 +7,7 @@ - Load Quantized Keras Model — MCT Documentation: ver 2.2.0 + Load Quantized Keras Model — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -89,7 +89,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/keras_post_training_quantization.html b/docs/api/api_docs/methods/keras_post_training_quantization.html index 9cc04bf05..023c8a43e 100644 --- a/docs/api/api_docs/methods/keras_post_training_quantization.html +++ b/docs/api/api_docs/methods/keras_post_training_quantization.html @@ -7,7 +7,7 @@ - Keras Post Training Quantization — MCT Documentation: ver 2.2.0 + Keras Post Training Quantization — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -64,7 +64,7 @@

    Navigation

  • representative_data_gen (Callable) – Dataset used for calibration.

  • target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.

  • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Keras model according to.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the Keras model according to.

  • Returns:
    @@ -144,7 +144,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/keras_pruning_experimental.html b/docs/api/api_docs/methods/keras_pruning_experimental.html index 0574b4a2e..1374f953f 100644 --- a/docs/api/api_docs/methods/keras_pruning_experimental.html +++ b/docs/api/api_docs/methods/keras_pruning_experimental.html @@ -7,7 +7,7 @@ - Keras Structured Pruning — MCT Documentation: ver 2.2.0 + Keras Structured Pruning — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -62,7 +62,7 @@

    Navigation

  • target_resource_utilization (ResourceUtilization) – The target Key Performance Indicators to be achieved through pruning.

  • representative_data_gen (Callable) – A function to generate representative data for pruning analysis.

  • pruning_config (PruningConfig) – Configuration settings for the pruning process. Defaults to standard config.

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – Platform-specific constraints and capabilities. Defaults to DEFAULT_KERAS_TPC.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – Platform-specific constraints and capabilities. Defaults to DEFAULT_KERAS_TPC.

  • Returns:
    @@ -145,7 +145,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html b/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html index f19e24b23..11d911a65 100644 --- a/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html +++ b/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html @@ -7,7 +7,7 @@ - Keras Quantization Aware Training Model Finalize — MCT Documentation: ver 2.2.0 + Keras Quantization Aware Training Model Finalize — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -132,7 +132,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html b/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html index b5a47c72a..54ee5b454 100644 --- a/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html +++ b/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html @@ -7,7 +7,7 @@ - Keras Quantization Aware Training Model Init — MCT Documentation: ver 2.2.0 + Keras Quantization Aware Training Model Init — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -67,7 +67,7 @@

    Navigation

  • target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.

  • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

  • qat_config (QATConfig) – QAT configuration

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Keras model according to.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the Keras model according to.

  • Returns:
    @@ -152,7 +152,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/pytorch_data_generation_experimental.html b/docs/api/api_docs/methods/pytorch_data_generation_experimental.html index 14fc9f215..0fcbf10f1 100644 --- a/docs/api/api_docs/methods/pytorch_data_generation_experimental.html +++ b/docs/api/api_docs/methods/pytorch_data_generation_experimental.html @@ -7,7 +7,7 @@ - Pytorch Data Generation — MCT Documentation: ver 2.2.0 + Pytorch Data Generation — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -122,7 +122,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html b/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html index 3db8f08ed..5d41c6976 100644 --- a/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html +++ b/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html @@ -7,7 +7,7 @@ - Pytorch Gradient Based Post Training Quantization — MCT Documentation: ver 2.2.0 + Pytorch Gradient Based Post Training Quantization — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -48,7 +48,7 @@

    Navigation

    model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization(model, representative_data_gen, target_resource_utilization=None, core_config=CoreConfig(), gptq_config=None, gptq_representative_data_gen=None, target_platform_capabilities=DEFAULT_PYTORCH_TPC)

    Quantize a trained Pytorch module using post-training quantization. By default, the module is quantized using a symmetric constraint quantization thresholds -(power of two) as defined in the default TargetPlatformCapabilities. +(power of two) as defined in the default FrameworkQuantizationCapabilities. The module is first optimized using several transformations (e.g. BatchNormalization folding to preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are being collected for each layer’s output (and input, depends on the quantization configuration). @@ -69,7 +69,7 @@

    Navigation

  • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

  • gptq_config (GradientPTQConfig) – Configuration for using gptq (e.g. optimizer).

  • gptq_representative_data_gen (Callable) – Dataset used for GPTQ training. If None defaults to representative_data_gen

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the PyTorch model according to.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the PyTorch model according to.

  • Returns:
    @@ -134,7 +134,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/pytorch_kpi_data.html b/docs/api/api_docs/methods/pytorch_kpi_data.html index 6913f0e11..f7a639a62 100644 --- a/docs/api/api_docs/methods/pytorch_kpi_data.html +++ b/docs/api/api_docs/methods/pytorch_kpi_data.html @@ -7,7 +7,7 @@ - Get Resource Utilization information for PyTorch Models — MCT Documentation: ver 2.2.0 + Get Resource Utilization information for PyTorch Models — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -54,7 +54,7 @@

    Navigation

  • in_model (Model) – PyTorch model to quantize.

  • representative_data_gen (Callable) – Dataset used for calibration.

  • core_config (CoreConfig) – CoreConfig containing parameters for quantization and mixed precision

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the PyTorch model according to.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – FrameworkQuantizationCapabilities to optimize the PyTorch model according to.

  • Returns:
    @@ -113,7 +113,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/pytorch_post_training_quantization.html b/docs/api/api_docs/methods/pytorch_post_training_quantization.html index d69365f83..8604a672e 100644 --- a/docs/api/api_docs/methods/pytorch_post_training_quantization.html +++ b/docs/api/api_docs/methods/pytorch_post_training_quantization.html @@ -7,7 +7,7 @@ - Pytorch Post Training Quantization — MCT Documentation: ver 2.2.0 + Pytorch Post Training Quantization — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -48,7 +48,7 @@

    Navigation

    model_compression_toolkit.ptq.pytorch_post_training_quantization(in_module, representative_data_gen, target_resource_utilization=None, core_config=CoreConfig(), target_platform_capabilities=DEFAULT_PYTORCH_TPC)

    Quantize a trained Pytorch module using post-training quantization. By default, the module is quantized using a symmetric constraint quantization thresholds -(power of two) as defined in the default TargetPlatformCapabilities. +(power of two) as defined in the default FrameworkQuantizationCapabilities. The module is first optimized using several transformations (e.g. BatchNormalization folding to preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are being collected for each layer’s output (and input, depends on the quantization configuration). @@ -64,7 +64,7 @@

    Navigation

  • representative_data_gen (Callable) – Dataset used for calibration.

  • target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.

  • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the PyTorch model according to.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the PyTorch model according to.

  • Returns:
    @@ -123,7 +123,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/pytorch_pruning_experimental.html b/docs/api/api_docs/methods/pytorch_pruning_experimental.html index c2eee01ab..e51687d15 100644 --- a/docs/api/api_docs/methods/pytorch_pruning_experimental.html +++ b/docs/api/api_docs/methods/pytorch_pruning_experimental.html @@ -7,7 +7,7 @@ - Pytorch Structured Pruning — MCT Documentation: ver 2.2.0 + Pytorch Structured Pruning — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -62,7 +62,7 @@

    Navigation

  • target_resource_utilization (ResourceUtilization) – Key Performance Indicators specifying the pruning targets.

  • representative_data_gen (Callable) – A function to generate representative data for pruning analysis.

  • pruning_config (PruningConfig) – Configuration settings for the pruning process. Defaults to standard config.

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – Platform-specific constraints and capabilities. +

  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – Platform-specific constraints and capabilities. Defaults to DEFAULT_PYTORCH_TPC.

  • @@ -146,7 +146,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html b/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html index 7fb23e0f1..177aab5d8 100644 --- a/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html +++ b/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html @@ -7,7 +7,7 @@ - PyTorch Quantization Aware Training Model Finalize — MCT Documentation: ver 2.2.0 + PyTorch Quantization Aware Training Model Finalize — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -115,7 +115,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html b/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html index 2263184a6..a969e130d 100644 --- a/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html +++ b/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html @@ -7,7 +7,7 @@ - PyTorch Quantization Aware Training Model Init — MCT Documentation: ver 2.2.0 + PyTorch Quantization Aware Training Model Init — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -67,7 +67,7 @@

    Navigation

  • target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.

  • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

  • qat_config (QATConfig) – QAT configuration

  • -
  • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Pytorch model according to.

  • +
  • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the Pytorch model according to.

  • Returns:
    @@ -85,9 +85,7 @@

    Navigation

    >>> model = mobilenet_v2(pretrained=True) -
    -

    Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): -In this example a random dataset of 10 batches each containing 4 images is used.

    +

    Create a random dataset generator, for required number of calibration iterations (num_calibration_batches). In this example, a random dataset of 10 batches each containing 4 images is used:

    >>> import numpy as np
     >>> num_calibration_batches = 10
     >>> def repr_datagen():
    @@ -95,19 +93,15 @@ 

    Navigation

    >>> yield [np.random.random((4, 3, 224, 224))]
    -
    -

    Create a MCT core config, containing the quantization configuration:

    +

    Create a MCT core config, containing the quantization configuration:

    >>> config = mct.core.CoreConfig()
     
    -

    Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a -quantized model. Now the model contains quantizer wrappers for fine tunning the weights:

    +

    Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a quantized model. Now the model contains quantizer wrappers for fine tunning the weights:

    >>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config)
     

    For more configuration options, please take a look at our API documentation.

    -
    -
    @@ -139,7 +133,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/set_logger_path.html b/docs/api/api_docs/methods/set_logger_path.html index 46bc672ee..e342d53ea 100644 --- a/docs/api/api_docs/methods/set_logger_path.html +++ b/docs/api/api_docs/methods/set_logger_path.html @@ -7,7 +7,7 @@ - Enable a Logger — MCT Documentation: ver 2.2.0 + Enable a Logger — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -86,7 +86,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/xquant_report_keras_experimental.html b/docs/api/api_docs/methods/xquant_report_keras_experimental.html index 6a019e06a..e24f6ec96 100644 --- a/docs/api/api_docs/methods/xquant_report_keras_experimental.html +++ b/docs/api/api_docs/methods/xquant_report_keras_experimental.html @@ -7,7 +7,7 @@ - XQuant Report Keras — MCT Documentation: ver 2.2.0 + XQuant Report Keras — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -98,7 +98,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html b/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html index cf0cb52c4..b2f2c2071 100644 --- a/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html +++ b/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html @@ -7,7 +7,7 @@ - XQuant Report Pytorch — MCT Documentation: ver 2.2.0 + XQuant Report Pytorch — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -95,7 +95,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/modules/core_config.html b/docs/api/api_docs/modules/core_config.html index 1560526f9..58ff2ca64 100644 --- a/docs/api/api_docs/modules/core_config.html +++ b/docs/api/api_docs/modules/core_config.html @@ -7,7 +7,7 @@ - CoreConfig — MCT Documentation: ver 2.2.0 + CoreConfig — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -46,15 +46,14 @@

    Navigation

    Class to configure the optimization process of the model:

    -class model_compression_toolkit.core.CoreConfig(quantization_config=None, mixed_precision_config=None, bit_width_config=None, debug_config=None)
    -

    A class to hold the configurations classes of the MCT-core.

    +class model_compression_toolkit.core.CoreConfig(quantization_config=<factory>, mixed_precision_config=<factory>, bit_width_config=<factory>, debug_config=<factory>) +

    A dataclass to hold the configurations classes of the MCT-core.

    Parameters:
    • quantization_config (QuantizationConfig) – Config for quantization.

    • -
    • mixed_precision_config (MixedPrecisionQuantizationConfig) – Config for mixed precision quantization.

    • -
    • None (If) –

    • -
    • used. (a default MixedPrecisionQuantizationConfig is) –

    • +
    • mixed_precision_config (MixedPrecisionQuantizationConfig) – Config for mixed precision quantization. +If None, a default MixedPrecisionQuantizationConfig is used.

    • bit_width_config (BitWidthConfig) – Config for manual bit-width selection.

    • debug_config (DebugConfig) – Config for debugging and editing the network quantization process.

    @@ -91,7 +90,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/modules/debug_config.html b/docs/api/api_docs/modules/debug_config.html index faf43f328..3c000d400 100644 --- a/docs/api/api_docs/modules/debug_config.html +++ b/docs/api/api_docs/modules/debug_config.html @@ -7,7 +7,7 @@ - debug_config Module — MCT Documentation: ver 2.2.0 + debug_config Module — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -48,15 +48,15 @@

    DebugConfig
    -class model_compression_toolkit.core.DebugConfig(analyze_similarity=False, network_editor=[], simulate_scheduler=False)
    -

    A class for MCT core debug information.

    +class model_compression_toolkit.core.DebugConfig(analyze_similarity=False, network_editor=<factory>, simulate_scheduler=False) +

    A dataclass for MCT core debug information.

    Parameters:
    • analyze_similarity (bool) – Whether to plot similarity figures within TensorBoard (when logger is enabled) or not. Can be used to pinpoint problematic layers in the quantization process.

    • network_editor (List[EditRule]) – A list of rules and actions to edit the network for quantization.

    • -
    • simulate_scheduler (bool) – Simulate scheduler behaviour to compute operators order and cuts.

    • +
    • simulate_scheduler (bool) – Simulate scheduler behavior to compute operators’ order and cuts.

    @@ -102,7 +102,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/modules/exporter.html b/docs/api/api_docs/modules/exporter.html index 8c98c0976..c40ec04bb 100644 --- a/docs/api/api_docs/modules/exporter.html +++ b/docs/api/api_docs/modules/exporter.html @@ -7,7 +7,7 @@ - exporter Module — MCT Documentation: ver 2.2.0 + exporter Module — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -318,7 +318,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/modules/layer_filters.html b/docs/api/api_docs/modules/layer_filters.html index 659abc0f2..d39f15b10 100644 --- a/docs/api/api_docs/modules/layer_filters.html +++ b/docs/api/api_docs/modules/layer_filters.html @@ -7,7 +7,7 @@ - Layer Attributes Filters — MCT Documentation: ver 2.2.0 + Layer Attributes Filters — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -48,8 +48,8 @@

    Navigation

    Attribute Filters

    -
    -class model_compression_toolkit.target_platform.AttributeFilter(attr, value, op)
    +
    +class model_compression_toolkit.target_platform_capabilities.AttributeFilter(attr, value, op)

    Wrap a key, value and an operation to filter a layer’s configuration according to. If the layer’s configuration has the key, and its’ value matches when applying the operator, the configuration matches the AttributeFilter.

    @@ -68,8 +68,8 @@

    Attribute Filters
    -
    -class model_compression_toolkit.target_platform.Eq(attr, value)
    +
    +class model_compression_toolkit.target_platform_capabilities.Eq(attr, value)

    Filter configurations such that it matches configurations that have an attribute with a value that equals to the value that Eq holds.

    Parameters:
    @@ -86,8 +86,8 @@

    Attribute Filters
    -
    -class model_compression_toolkit.target_platform.NotEq(attr, value)
    +
    +class model_compression_toolkit.target_platform_capabilities.NotEq(attr, value)

    Filter configurations such that it matches configurations that have an attribute with a value that is not equal to the value that NotEq holds.

    Parameters:
    @@ -104,8 +104,8 @@

    Attribute Filters
    -
    -class model_compression_toolkit.target_platform.Greater(attr, value)
    +
    +class model_compression_toolkit.target_platform_capabilities.Greater(attr, value)

    Filter configurations such that it matches configurations that have an attribute with a value that is greater than the value that Greater holds.

    @@ -123,8 +123,8 @@

    Attribute Filters
    -
    -class model_compression_toolkit.target_platform.GreaterEq(attr, value)
    +
    +class model_compression_toolkit.target_platform_capabilities.GreaterEq(attr, value)

    Filter configurations such that it matches configurations that have an attribute with a value that is greater or equal than the value that GreaterEq holds.

    @@ -142,8 +142,8 @@

    Attribute Filters
    -
    -class model_compression_toolkit.target_platform.Smaller(attr, value)
    +
    +class model_compression_toolkit.target_platform_capabilities.Smaller(attr, value)

    Filter configurations such that it matches configurations that have an attribute with a value that is smaller than the value that Smaller holds.

    Parameters:
    @@ -160,8 +160,8 @@

    Attribute Filters
    -
    -class model_compression_toolkit.target_platform.SmallerEq(attr, value)
    +
    +class model_compression_toolkit.target_platform_capabilities.SmallerEq(attr, value)

    Filter configurations such that it matches configurations that have an attribute with a value that is smaller or equal than the value that SmallerEq holds.

    Parameters:
    @@ -214,7 +214,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/modules/network_editor.html b/docs/api/api_docs/modules/network_editor.html index 4d12299f3..1886428e9 100644 --- a/docs/api/api_docs/modules/network_editor.html +++ b/docs/api/api_docs/modules/network_editor.html @@ -7,7 +7,7 @@ - network_editor Module — MCT Documentation: ver 2.2.0 + network_editor Module — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -283,7 +283,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/modules/qat_config.html b/docs/api/api_docs/modules/qat_config.html index 787a31493..f7742d5f6 100644 --- a/docs/api/api_docs/modules/qat_config.html +++ b/docs/api/api_docs/modules/qat_config.html @@ -7,7 +7,7 @@ - qat_config Module — MCT Documentation: ver 2.2.0 + qat_config Module — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -45,16 +45,7 @@

    Navigation

    qat_config Module

    TrainingMethod

    -

    Select a QAT training method:

    -
    -
    -class model_compression_toolkit.qat.TrainingMethod(value)
    -

    An enum for selecting a QAT training method

    -

    STE - Standard straight-through estimator. Includes PowerOfTwo, symmetric & uniform quantizers

    -

    DQA - DNN Quantization with Attention. Includes a smooth quantization introduces by DQA method

    -

    LSQ - Learned Step size Quantization. Includes PowerOfTwo, symmetric & uniform quantizers: https://arxiv.org/pdf/1902.08153.pdf

    -
    - +

    In order to select a training method, please visit the trainable_infrastructure API.


    @@ -69,8 +60,8 @@

    QATConfig
    Parameters:
      -
    • weight_training_method (TrainingMethod) – Training method for weight quantizers

    • -
    • activation_training_method (TrainingMethod) – Training method for activation quantizers:

    • +
    • weight_training_method (TrainingMethod) – Training method for weight quantizers

    • +
    • activation_training_method (TrainingMethod) – Training method for activation quantizers:

    • weight_quantizer_params_override – A dictionary of parameters to override in weight quantization quantizer instantiation. Defaults to None (no parameters)

    • activation_quantizer_params_override – A dictionary of parameters to override in activation quantization quantizer instantiation. Defaults to None (no parameters)

    @@ -119,7 +110,7 @@

    Navigation

  • index
  • - + diff --git a/docs/api/api_docs/modules/target_platform.html b/docs/api/api_docs/modules/target_platform.html deleted file mode 100644 index 4f799584a..000000000 --- a/docs/api/api_docs/modules/target_platform.html +++ /dev/null @@ -1,350 +0,0 @@ - - - - - - - - - - target_platform Module — MCT Documentation: ver 2.2.0 - - - - - - - - - - - - - - - - - -
    -
    -
    -
    - -
    -

    target_platform Module

    -

    MCT can be configured to quantize and optimize models for different hardware settings. -For example, when using qnnpack backend for Pytorch model inference, Pytorch quantization -configuration -uses per-tensor weights quantization -for Conv2d, while when using tflite modeling, Tensorflow uses per-channel weights quantization for -Conv2D.

    -

    This can be addressed in MCT by using the target_platform module, that can configure different -parameters that are hardware-related, and the optimization process will use this to optimize the model accordingly. -Models for IMX500, TFLite and qnnpack can be observed here, and can be used using get_target_platform_capabilities function.

    -
    -

    -
    -
    -

    Note

    -

    For now, some fields of OpQuantizationConfig are ignored during -the optimization process such as quantization_preserving, fixed_scale, and fixed_zero_point.

    - -
    -
    -

    -
    -

    The object MCT should get called TargetPlatformCapabilities (or shortly TPC). -This diagram demonstrates the main components:

    -../../../images/tpc.jpg -

    Now, we will detail about the different components.

    -
    -

    QuantizationMethod

    -
    -
    -class model_compression_toolkit.target_platform.QuantizationMethod(value)
    -

    Method for quantization function selection:

    -

    POWER_OF_TWO - Symmetric, uniform, threshold is power of two quantization.

    -

    LUT_POT_QUANTIZER - quantization using a lookup table and power of 2 threshold.

    -

    SYMMETRIC - Symmetric, uniform, quantization.

    -

    UNIFORM - uniform quantization,

    -

    LUT_SYM_QUANTIZER - quantization using a lookup table and symmetric threshold.

    -
    - -
    -
    -

    OpQuantizationConfig

    -
    -
    -class model_compression_toolkit.target_platform.OpQuantizationConfig(default_weight_attr_config, attr_weights_configs_mapping, activation_quantization_method, activation_n_bits, supported_input_activation_n_bits, enable_activation_quantization, quantization_preserving, fixed_scale, fixed_zero_point, simd_size, signedness)
    -

    OpQuantizationConfig is a class to configure the quantization parameters of an operator.

    -
    -
    Parameters:
    -
      -
    • default_weight_attr_config (AttributeQuantizationConfig) – A default attribute quantization configuration for the operation.

    • -
    • attr_weights_configs_mapping (Dict[str, AttributeQuantizationConfig]) – A mapping between an op attribute name and its quantization configuration.

    • -
    • activation_quantization_method (QuantizationMethod) – Which method to use from QuantizationMethod for activation quantization.

    • -
    • activation_n_bits (int) – Number of bits to quantize the activations.

    • -
    • supported_input_activation_n_bits (int or Tuple[int]) – Number of bits that operator accepts as input.

    • -
    • enable_activation_quantization (bool) – Whether to quantize the model activations or not.

    • -
    • quantization_preserving (bool) – Whether quantization parameters should be the same for an operator’s input and output.

    • -
    • fixed_scale (float) – Scale to use for an operator quantization parameters.

    • -
    • fixed_zero_point (int) – Zero-point to use for an operator quantization parameters.

    • -
    • simd_size (int) – Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.

    • -
    • signedness (bool) – Set activation quantization signedness.

    • -
    -
    -
    -
    - -
    -
    -

    AttributeQuantizationConfig

    -
    -
    -class model_compression_toolkit.target_platform.AttributeQuantizationConfig(weights_quantization_method=QuantizationMethod.POWER_OF_TWO, weights_n_bits=FLOAT_BITWIDTH, weights_per_channel_threshold=False, enable_weights_quantization=False, lut_values_bitwidth=None)
    -

    Hold the quantization configuration of a weight attribute of a layer.

    -

    Initializes an attribute quantization config.

    -
    -
    Parameters:
    -
      -
    • weights_quantization_method (QuantizationMethod) – Which method to use from QuantizationMethod for weights quantization.

    • -
    • weights_n_bits (int) – Number of bits to quantize the coefficients.

    • -
    • weights_per_channel_threshold (bool) – Whether to quantize the weights per-channel or not (per-tensor).

    • -
    • enable_weights_quantization (bool) – Whether to quantize the model weights or not.

    • -
    • lut_values_bitwidth (int) – Number of bits to use when quantizing in look-up-table.

    • -
    -
    -
    -
    - -
    -
    -

    QuantizationConfigOptions

    -
    -
    -class model_compression_toolkit.target_platform.QuantizationConfigOptions(quantization_config_list, base_config=None)
    -

    Wrap a set of quantization configurations to consider during the quantization -of an operator.

    -
    -
    Parameters:
    -
      -
    • quantization_config_list (List[OpQuantizationConfig]) – List of possible OpQuantizationConfig to gather.

    • -
    • base_config (OpQuantizationConfig) – Fallback OpQuantizationConfig to use when optimizing the model in a non mixed-precision manner.

    • -
    -
    -
    -
    - -
    -
    -

    TargetPlatformModel

    -
    -
    -class model_compression_toolkit.target_platform.TargetPlatformModel(default_qco, add_metadata=False, name='default_tp_model')
    -

    Modeling of the hardware the quantized model will use during inference. -The model contains definition of operators, quantization configurations of them, and -fusing patterns so that multiple operators will be combined into a single operator.

    -
    -
    Parameters:
    -
      -
    • default_qco (QuantizationConfigOptions) – Default QuantizationConfigOptions to use for operators that their QuantizationConfigOptions are not defined in the model.

    • -
    • add_metadata (bool) – Whether to add metadata to the model or not.

    • -
    • name (str) – Name of the model.

    • -
    -
    -
    -
    - -
    -
    -

    OperatorsSet

    -
    -
    -class model_compression_toolkit.target_platform.OperatorsSet(name, qc_options=None)
    -

    Set of operators that are represented by a unique label.

    -
    -
    Parameters:
    -
      -
    • name (str) – Set’s label (must be unique in a TargetPlatformModel).

    • -
    • qc_options (QuantizationConfigOptions) – Configuration options to use for this set of operations.

    • -
    -
    -
    -
    - -
    -
    -

    Fusing

    -
    -
    -class model_compression_toolkit.target_platform.Fusing(operator_groups_list, name=None)
    -

    Fusing defines a list of operators that should be combined and treated as a single operator, -hence no quantization is applied between them.

    -
    -
    Parameters:
    -
      -
    • operator_groups_list (List[Union[OperatorsSet, OperatorSetConcat]]) – A list of operator groups, each being either an OperatorSetConcat or an OperatorsSet.

    • -
    • name (str) – The name for the Fusing instance. If not provided, it’s generated from the operator groups’ names.

    • -
    -
    -
    -
    - -
    -
    -

    OperatorSetConcat

    -
    -
    -class model_compression_toolkit.target_platform.OperatorSetConcat(*opsets)
    -

    Concatenate a list of operator sets to treat them similarly in different places (like fusing).

    -

    Group a list of operation sets.

    -
    -
    Parameters:
    -

    *opsets (OperatorsSet) – List of operator sets to group.

    -
    -
    -
    - -
    -
    -

    OperationsToLayers

    -
    -
    -class model_compression_toolkit.target_platform.OperationsToLayers(op_sets_to_layers=None)
    -

    Gather multiple OperationsSetToLayers to represent mapping of framework’s layers to TargetPlatformModel OperatorsSet.

    -
    -
    Parameters:
    -

    op_sets_to_layers (List[OperationsSetToLayers]) – List of OperationsSetToLayers where each of them maps an OperatorsSet name to a list of layers that represents the OperatorsSet.

    -
    -
    -
    - -
    -
    -

    OperationsSetToLayers

    -
    -
    -class model_compression_toolkit.target_platform.OperationsSetToLayers(op_set_name, layers, attr_mapping=None)
    -

    Associate an OperatorsSet to a list of framework’s layers.

    -
    -
    Parameters:
    -
      -
    • op_set_name (str) – Name of OperatorsSet to associate with layers.

    • -
    • layers (List[Any]) – List of layers/FilterLayerParams to associate with OperatorsSet.

    • -
    • attr_mapping (Dict[str, DefaultDict]) – A mapping between a general attribute name to a DefaultDict that maps a layer type to the layer’s framework name of this attribute.

    • -
    -
    -
    -
    - -
    -
    -

    LayerFilterParams

    -
    -
    -class model_compression_toolkit.target_platform.LayerFilterParams(layer, *conditions, **kwargs)
    -

    Wrap a layer with filters to filter framework’s layers by their attributes.

    -
    -
    Parameters:
    -
      -
    • layer – Layer to match when filtering.

    • -
    • *conditions (AttributeFilter) – List of conditions to satisfy.

    • -
    • **kwargs – Keyword arguments to filter layers according to.

    • -
    -
    -
    -
    - -

    More filters and usage examples are detailed here.

    -
    -
    -

    TargetPlatformCapabilities

    -
    -
    -class model_compression_toolkit.target_platform.TargetPlatformCapabilities(tp_model, name='base', version=None)
    -

    Attach framework information to a modeled hardware.

    -
    -
    Parameters:
    -
      -
    • tp_model (TargetPlatformModel) – Modeled hardware to attach framework information to.

    • -
    • name (str) – Name of the TargetPlatformCapabilities.

    • -
    • version (str) – TPC version.

    • -
    -
    -
    -
    - -
    -
    - - -
    -
    -
    -
    - -
    -
    - - - - \ No newline at end of file diff --git a/docs/api/api_docs/modules/target_platform_capabilities.html b/docs/api/api_docs/modules/target_platform_capabilities.html new file mode 100644 index 000000000..a611c34dd --- /dev/null +++ b/docs/api/api_docs/modules/target_platform_capabilities.html @@ -0,0 +1,504 @@ + + + + + + + + + + target_platform_capabilities Module — MCT Documentation: ver 2.3.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    target_platform_capabilities Module

    +

    MCT can be configured to quantize and optimize models for different hardware settings. +For example, when using qnnpack backend for Pytorch model inference, Pytorch quantization +configuration +uses per-tensor weights quantization +for Conv2d, while when using tflite modeling, Tensorflow uses per-channel weights quantization for +Conv2D.

    +

    This can be addressed in MCT by using the target_platform_capabilities module, that can configure different +parameters that are hardware-related, and the optimization process will use this to optimize the model accordingly. +Models for IMX500, TFLite and qnnpack can be observed here, and can be used using get_target_platform_capabilities function.

    +
    +

    +
    +
    +

    Note

    +

    For now, some fields of OpQuantizationConfig are ignored during +the optimization process such as quantization_preserving, fixed_scale, and fixed_zero_point.

    +
      +
    • MCT will use more information from OpQuantizationConfig, in the future.

    • +
    +
    +
    +

    +
    +

    The object MCT should get called TargetPlatformCapabilities (or shortly TPC). +This diagram demonstrates the main components:

    +../../../images/tpc_diagram.png +

    Now, we will detail about the different components.

    +
    +

    QuantizationMethod

    +
    +
    +class model_compression_toolkit.target_platform_capabilities.QuantizationMethod(value)
    +

    Method for quantization function selection:

    +

    POWER_OF_TWO - Symmetric, uniform, threshold is power of two quantization.

    +

    LUT_POT_QUANTIZER - quantization using a lookup table and power of 2 threshold.

    +

    SYMMETRIC - Symmetric, uniform, quantization.

    +

    UNIFORM - uniform quantization,

    +

    LUT_SYM_QUANTIZER - quantization using a lookup table and symmetric threshold.

    +
    + +
    +
    +

    OpQuantizationConfig

    +
    +
    +class model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OpQuantizationConfig(**data)
    +

    OpQuantizationConfig is a class to configure the quantization parameters of an operator.

    +
    +
    Parameters:
    +
      +
    • default_weight_attr_config (AttributeQuantizationConfig) – A default attribute quantization configuration for the operation.

    • +
    • attr_weights_configs_mapping (Dict[str, AttributeQuantizationConfig]) – A mapping between an op attribute name and its quantization configuration.

    • +
    • activation_quantization_method (QuantizationMethod) – Which method to use from QuantizationMethod for activation quantization.

    • +
    • activation_n_bits (int) – Number of bits to quantize the activations.

    • +
    • supported_input_activation_n_bits (Union[int, Tuple[int, ...]]) – Number of bits that operator accepts as input.

    • +
    • enable_activation_quantization (bool) – Whether to quantize the model activations or not.

    • +
    • quantization_preserving (bool) – Whether quantization parameters should be the same for an operator’s input and output.

    • +
    • fixed_scale (Optional[float]) – Scale to use for an operator quantization parameters.

    • +
    • fixed_zero_point (Optional[int]) – Zero-point to use for an operator quantization parameters.

    • +
    • simd_size (Optional[int]) – Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.

    • +
    • signedness (Signedness) – Set activation quantization signedness.

    • +
    +
    +
    +

    Create a new model by parsing and validating input data from keyword arguments.

    +

    Raises ValidationError if the input data cannot be parsed to form a valid model.

    +
    + +
    +
    +

    AttributeQuantizationConfig

    +
    +
    +class model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig(**data)
    +

    Holds the quantization configuration of a weight attribute of a layer.

    +
    +
    +weights_quantization_method
    +

    The method to use from QuantizationMethod for weights quantization.

    +
    +
    Type:
    +

    QuantizationMethod

    +
    +
    +
    + +
    +
    +weights_n_bits
    +

    Number of bits to quantize the coefficients.

    +
    +
    Type:
    +

    int

    +
    +
    +
    + +
    +
    +weights_per_channel_threshold
    +

    Indicates whether to quantize the weights per-channel or per-tensor.

    +
    +
    Type:
    +

    bool

    +
    +
    +
    + +
    +
    +enable_weights_quantization
    +

    Indicates whether to quantize the model weights or not.

    +
    +
    Type:
    +

    bool

    +
    +
    +
    + +
    +
    +lut_values_bitwidth
    +

    Number of bits to use when quantizing in a look-up table. +If None, defaults to 8 in hptq; otherwise, it uses the provided value.

    +
    +
    Type:
    +

    Optional[int]

    +
    +
    +
    + +

    Create a new model by parsing and validating input data from keyword arguments.

    +

    Raises ValidationError if the input data cannot be parsed to form a valid model.

    +
    + +
    +
    +

    QuantizationConfigOptions

    +
    +
    +class model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions(**data)
    +

    QuantizationConfigOptions wraps a set of quantization configurations to consider during the quantization of an operator.

    +
    +
    +quantization_configurations
    +

    Tuple of possible OpQuantizationConfig to gather.

    +
    +
    Type:
    +

    Tuple[OpQuantizationConfig, …]

    +
    +
    +
    + +
    +
    +base_config
    +

    Fallback OpQuantizationConfig to use when optimizing the model in a non-mixed-precision manner.

    +
    +
    Type:
    +

    Optional[OpQuantizationConfig]

    +
    +
    +
    + +

    Create a new model by parsing and validating input data from keyword arguments.

    +

    Raises ValidationError if the input data cannot be parsed to form a valid model.

    +
    + +
    +
    +

    TargetPlatformCapabilities

    +
    +
    +class model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities(**data)
    +

    Represents the hardware configuration used for quantized model inference.

    +
    +
    +default_qco
    +

    Default quantization configuration options for the model.

    +
    +
    Type:
    +

    QuantizationConfigOptions

    +
    +
    +
    + +
    +
    +operator_set
    +

    Tuple of operator sets within the model.

    +
    +
    Type:
    +

    Optional[Tuple[OperatorsSet, …]]

    +
    +
    +
    + +
    +
    +fusing_patterns
    +

    Tuple of fusing patterns for the model.

    +
    +
    Type:
    +

    Optional[Tuple[Fusing, …]]

    +
    +
    +
    + +
    +
    +tpc_minor_version
    +

    Minor version of the Target Platform Configuration.

    +
    +
    Type:
    +

    Optional[int]

    +
    +
    +
    + +
    +
    +tpc_patch_version
    +

    Patch version of the Target Platform Configuration.

    +
    +
    Type:
    +

    Optional[int]

    +
    +
    +
    + +
    +
    +tpc_platform_type
    +

    Type of the platform for the Target Platform Configuration.

    +
    +
    Type:
    +

    Optional[str]

    +
    +
    +
    + +
    +
    +add_metadata
    +

    Flag to determine if metadata should be added.

    +
    +
    Type:
    +

    bool

    +
    +
    +
    + +
    +
    +name
    +

    Name of the Target Platform Model.

    +
    +
    Type:
    +

    str

    +
    +
    +
    + +
    +
    +is_simd_padding
    +

    Indicates if SIMD padding is applied.

    +
    +
    Type:
    +

    bool

    +
    +
    +
    + +
    +
    +SCHEMA_VERSION
    +

    Version of the schema for the Target Platform Model.

    +
    +
    Type:
    +

    int

    +
    +
    +
    + +

    Create a new model by parsing and validating input data from keyword arguments.

    +

    Raises ValidationError if the input data cannot be parsed to form a valid model.

    +
    + +
    +
    +

    OperatorsSet

    +
    +
    +class model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet(**data)
    +

    Set of operators that are represented by a unique label.

    +
    +
    +name
    +

    The set’s label (must be unique within a TargetPlatformCapabilities).

    +
    +
    Type:
    +

    Union[str, OperatorSetNames]

    +
    +
    +
    + +
    +
    +qc_options
    +

    Configuration options to use for this set of operations. +If None, it represents a fusing set.

    +
    +
    Type:
    +

    Optional[QuantizationConfigOptions]

    +
    +
    +
    + +
    +
    +type
    +

    Fixed type identifier.

    +
    +
    Type:
    +

    Literal[“OperatorsSet”]

    +
    +
    +
    + +

    Create a new model by parsing and validating input data from keyword arguments.

    +

    Raises ValidationError if the input data cannot be parsed to form a valid model.

    +
    + +
    +
    +

    Fusing

    +
    +
    +class model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing(**data)
    +

    Fusing defines a tuple of operators that should be combined and treated as a single operator, +hence no quantization is applied between them.

    +
    +
    +operator_groups
    +

    A tuple of operator groups, +each being either an OperatorSetGroup or an OperatorsSet.

    +
    +
    Type:
    +

    Tuple[Union[OperatorsSet, OperatorSetGroup], …]

    +
    +
    +
    + +
    +
    +name
    +

    The name for the Fusing instance. If not provided, it is generated from the operator groups’ names.

    +
    +
    Type:
    +

    Optional[str]

    +
    +
    +
    + +

    Create a new model by parsing and validating input data from keyword arguments.

    +

    Raises ValidationError if the input data cannot be parsed to form a valid model.

    +
    + +
    +
    +

    OperatorSetGroup

    +
    +
    +class model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup(**data)
    +

    Concatenate a tuple of operator sets to treat them similarly in different places (like fusing).

    +
    +
    +operators_set
    +

    Tuple of operator sets to group.

    +
    +
    Type:
    +

    Tuple[OperatorsSet, …]

    +
    +
    +
    + +
    +
    +name
    +

    Concatenated name generated from the names of the operator sets.

    +
    +
    Type:
    +

    Optional[str]

    +
    +
    +
    + +

    Create a new model by parsing and validating input data from keyword arguments.

    +

    Raises ValidationError if the input data cannot be parsed to form a valid model.

    +
    + +
    +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/modules/trainable_infrastructure.html b/docs/api/api_docs/modules/trainable_infrastructure.html index 27074f6e2..08de2f7f7 100644 --- a/docs/api/api_docs/modules/trainable_infrastructure.html +++ b/docs/api/api_docs/modules/trainable_infrastructure.html @@ -7,7 +7,7 @@ - trainable_infrastructure Module — MCT Documentation: ver 2.2.0 + trainable_infrastructure Module — MCT Documentation: ver 2.3.0 @@ -31,7 +31,7 @@

    Navigation

  • index
  • - + @@ -60,12 +60,16 @@

    BaseKerasTrainableQuantizer
    -class model_compression_toolkit.trainable_infrastructure.BaseKerasTrainableQuantizer(quantization_config)
    +class model_compression_toolkit.trainable_infrastructure.BaseKerasTrainableQuantizer(quantization_config, freeze_quant_params=False)

    This class is a base quantizer which validates provided quantization config and defines an abstract function which any quantizer needs to implement. This class adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model.

    +

    This class is a base quantizer which validates the provided quantization config and defines an abstract function which any quantizer needs to implment.

    Parameters:
    -

    quantization_config – quantizer config class contains all the information about a quantizer configuration.

    +
      +
    • quantization_config – quantizer config class contains all the information about the quantizer configuration.

    • +
    • freeze_quant_params – whether to freeze all learnable quantization parameters during training.

    • +

    @@ -77,16 +81,32 @@

    BasePytorchTrainableQuantizer
    -class model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer(quantization_config)
    -

    This class is a base Pytorch quantizer which validates the provided quantization config and defines an -abstract function which any quantizer needs to implement.

    +class model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer(quantization_config, freeze_quant_params=False) +

    Base class for PyTorch trainable quantizers

    +

    This class is a base quantizer which validates the provided quantization config and defines an abstract function which any quantizer needs to implment.

    Parameters:
    -

    quantization_config – quantizer config class contains all the information about the quantizer configuration.

    +
      +
    • quantization_config – quantizer config class contains all the information about the quantizer configuration.

    • +
    • freeze_quant_params – whether to freeze all learnable quantization parameters during training.

    • +

    +

    +
    +

    TrainingMethod

    +

    Select a training method:

    +
    +
    +class model_compression_toolkit.trainable_infrastructure.TrainingMethod(value)
    +

    An enum for selecting a training method

    +

    STE - Standard straight-through estimator. Includes PowerOfTwo, symmetric & uniform quantizers

    +

    DQA - DNN Quantization with Attention. Includes a smooth quantization introduces by DQA method

    +

    LSQ - Learned Step size Quantization. Includes PowerOfTwo, symmetric & uniform quantizers: https://arxiv.org/pdf/1902.08153.pdf

    +
    +

    TrainableQuantizerWeightsConfig

    @@ -98,7 +118,7 @@

    TrainableQuantizerWeightsConfig
    Parameters:
    @@ -43,10 +43,10 @@

    Navigation

    Note

    -

    For now, some fields of OpQuantizationConfig are ignored during +

    For now, some fields of OpQuantizationConfig are ignored during the optimization process such as quantization_preserving, fixed_scale, and fixed_zero_point.

      -
    • MCT will use more information from OpQuantizationConfig, in the future.

    • +
    • MCT will use more information from OpQuantizationConfig, in the future.

    @@ -77,7 +77,7 @@

    Navigation

  • index
  • - + diff --git a/docs/genindex.html b/docs/genindex.html index 4382c5f92..b838d1785 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -6,7 +6,7 @@ - Index — MCT Documentation: ver 2.2.0 + Index — MCT Documentation: ver 2.3.0 @@ -30,7 +30,7 @@

    Navigation

  • index
  • - + @@ -62,17 +62,20 @@

    Index

    | R | S | T + | W | X

    A

    @@ -80,14 +83,16 @@

    A

    B

    @@ -161,12 +170,14 @@

    F

    @@ -192,9 +203,11 @@

    G

  • GradientPTQConfig (class in model_compression_toolkit.gptq)
  • -
  • Greater (class in model_compression_toolkit.target_platform) +
  • GradualActivationQuantizationConfig (class in model_compression_toolkit.gptq)
  • -
  • GreaterEq (class in model_compression_toolkit.target_platform) +
  • Greater (class in model_compression_toolkit.target_platform_capabilities) +
  • +
  • GreaterEq (class in model_compression_toolkit.target_platform_capabilities)
  • @@ -215,6 +228,8 @@

    I

  • importance_scores (model_compression_toolkit.pruning.PruningInfo property)
  • ImportanceMetric (class in model_compression_toolkit.pruning) +
  • +
  • is_simd_padding (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities attribute)
  • @@ -252,7 +267,7 @@

    K

    L

    @@ -276,15 +291,25 @@

    M

    N

    @@ -394,9 +429,13 @@

    S

    T

    +
    + +

    W

    + + +
    @@ -451,7 +506,7 @@

    Navigation

  • index
  • - + diff --git a/docs/guidelines/visualization.html b/docs/guidelines/visualization.html index 7d43406fc..e3d690ae0 100644 --- a/docs/guidelines/visualization.html +++ b/docs/guidelines/visualization.html @@ -7,7 +7,7 @@ - Visualization within TensorBoard — MCT Documentation: ver 2.2.0 + Visualization within TensorBoard — MCT Documentation: ver 2.3.0 @@ -39,7 +39,7 @@

    Navigation

  • previous |
  • - + @@ -186,7 +186,7 @@

    Navigation

  • previous |
  • - + diff --git a/docs/images/tpc.jpg b/docs/images/tpc.jpg deleted file mode 100644 index 3bbd86398..000000000 Binary files a/docs/images/tpc.jpg and /dev/null differ diff --git a/docs/images/tpc_diagram.png b/docs/images/tpc_diagram.png new file mode 100644 index 000000000..3f172b5e9 Binary files /dev/null and b/docs/images/tpc_diagram.png differ diff --git a/docs/index.html b/docs/index.html index 0c9e7fd19..3556d8917 100644 --- a/docs/index.html +++ b/docs/index.html @@ -7,7 +7,7 @@ - Model Compression Toolkit User Guide — MCT Documentation: ver 2.2.0 + Model Compression Toolkit User Guide — MCT Documentation: ver 2.3.0 @@ -35,7 +35,7 @@

    Navigation

  • next |
  • - + @@ -181,7 +181,7 @@

    Navigation

  • next |
  • - + diff --git a/docs/objects.inv b/docs/objects.inv index 01caa1317..d44eb99ca 100644 Binary files a/docs/objects.inv and b/docs/objects.inv differ diff --git a/docs/search.html b/docs/search.html index 25f88fb35..0cfb4c654 100644 --- a/docs/search.html +++ b/docs/search.html @@ -6,7 +6,7 @@ - Search — MCT Documentation: ver 2.2.0 + Search — MCT Documentation: ver 2.3.0 @@ -36,7 +36,7 @@

    Navigation

  • index
  • - + @@ -93,7 +93,7 @@

    Navigation

  • index
  • - + diff --git a/docs/searchindex.js b/docs/searchindex.js index 6bb933530..e151d20f1 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api/api_docs/classes/BitWidthConfig", "api/api_docs/classes/DataGenerationConfig", "api/api_docs/classes/DefaultDict", "api/api_docs/classes/FrameworkInfo", "api/api_docs/classes/GradientPTQConfig", "api/api_docs/classes/MixedPrecisionQuantizationConfig", "api/api_docs/classes/MpDistanceWeighting", "api/api_docs/classes/PruningConfig", "api/api_docs/classes/PruningInfo", "api/api_docs/classes/QuantizationConfig", "api/api_docs/classes/QuantizationErrorMethod", "api/api_docs/classes/ResourceUtilization", "api/api_docs/classes/XQuantConfig", "api/api_docs/index", "api/api_docs/methods/get_keras_data_generation_config", "api/api_docs/methods/get_keras_gptq_config", "api/api_docs/methods/get_pytorch_data_generation_config", "api/api_docs/methods/get_pytroch_gptq_config", "api/api_docs/methods/get_target_platform_capabilities", "api/api_docs/methods/keras_data_generation_experimental", "api/api_docs/methods/keras_gradient_post_training_quantization", "api/api_docs/methods/keras_kpi_data", "api/api_docs/methods/keras_load_quantizad_model", "api/api_docs/methods/keras_post_training_quantization", "api/api_docs/methods/keras_pruning_experimental", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental", "api/api_docs/methods/keras_quantization_aware_training_init_experimental", "api/api_docs/methods/pytorch_data_generation_experimental", "api/api_docs/methods/pytorch_gradient_post_training_quantization", "api/api_docs/methods/pytorch_kpi_data", "api/api_docs/methods/pytorch_post_training_quantization", "api/api_docs/methods/pytorch_pruning_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental", "api/api_docs/methods/set_logger_path", "api/api_docs/methods/xquant_report_keras_experimental", "api/api_docs/methods/xquant_report_pytorch_experimental", "api/api_docs/modules/core_config", "api/api_docs/modules/debug_config", "api/api_docs/modules/exporter", "api/api_docs/modules/layer_filters", "api/api_docs/modules/network_editor", "api/api_docs/modules/qat_config", "api/api_docs/modules/target_platform", "api/api_docs/modules/trainable_infrastructure", "api/api_docs/notes/tpc_note", "guidelines/visualization", "index"], "filenames": ["api/api_docs/classes/BitWidthConfig.rst", "api/api_docs/classes/DataGenerationConfig.rst", "api/api_docs/classes/DefaultDict.rst", "api/api_docs/classes/FrameworkInfo.rst", "api/api_docs/classes/GradientPTQConfig.rst", "api/api_docs/classes/MixedPrecisionQuantizationConfig.rst", "api/api_docs/classes/MpDistanceWeighting.rst", "api/api_docs/classes/PruningConfig.rst", "api/api_docs/classes/PruningInfo.rst", "api/api_docs/classes/QuantizationConfig.rst", "api/api_docs/classes/QuantizationErrorMethod.rst", "api/api_docs/classes/ResourceUtilization.rst", "api/api_docs/classes/XQuantConfig.rst", "api/api_docs/index.rst", "api/api_docs/methods/get_keras_data_generation_config.rst", "api/api_docs/methods/get_keras_gptq_config.rst", "api/api_docs/methods/get_pytorch_data_generation_config.rst", "api/api_docs/methods/get_pytroch_gptq_config.rst", "api/api_docs/methods/get_target_platform_capabilities.rst", "api/api_docs/methods/keras_data_generation_experimental.rst", "api/api_docs/methods/keras_gradient_post_training_quantization.rst", "api/api_docs/methods/keras_kpi_data.rst", "api/api_docs/methods/keras_load_quantizad_model.rst", "api/api_docs/methods/keras_post_training_quantization.rst", "api/api_docs/methods/keras_pruning_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/pytorch_data_generation_experimental.rst", "api/api_docs/methods/pytorch_gradient_post_training_quantization.rst", "api/api_docs/methods/pytorch_kpi_data.rst", "api/api_docs/methods/pytorch_post_training_quantization.rst", "api/api_docs/methods/pytorch_pruning_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/set_logger_path.rst", "api/api_docs/methods/xquant_report_keras_experimental.rst", "api/api_docs/methods/xquant_report_pytorch_experimental.rst", "api/api_docs/modules/core_config.rst", "api/api_docs/modules/debug_config.rst", "api/api_docs/modules/exporter.rst", "api/api_docs/modules/layer_filters.rst", "api/api_docs/modules/network_editor.rst", "api/api_docs/modules/qat_config.rst", "api/api_docs/modules/target_platform.rst", "api/api_docs/modules/trainable_infrastructure.rst", "api/api_docs/notes/tpc_note.rst", "guidelines/visualization.rst", "index.rst"], "titles": ["BitWidthConfig", "Data Generation Configuration", "DefaultDict Class", "FrameworkInfo Class", "GradientPTQConfig Class", "MixedPrecisionQuantizationConfig", "MpDistanceWeighting", "Pruning Configuration", "Pruning Information", "QuantizationConfig", "QuantizationErrorMethod", "ResourceUtilization", "XQuant Configuration", "API Docs", "Get DataGenerationConfig for Keras Models", "Get GradientPTQConfig for Keras Models", "Get DataGenerationConfig for Pytorch Models", "Get GradientPTQConfig for Pytorch Models", "Get TargetPlatformCapabilities", "Keras Data Generation", "Keras Gradient Based Post Training Quantization", "Get Resource Utilization information for Keras Models", "Load Quantized Keras Model", "Keras Post Training Quantization", "Keras Structured Pruning", "Keras Quantization Aware Training Model Finalize", "Keras Quantization Aware Training Model Init", "Pytorch Data Generation", "Pytorch Gradient Based Post Training Quantization", "Get Resource Utilization information for PyTorch Models", "Pytorch Post Training Quantization", "Pytorch Structured Pruning", "PyTorch Quantization Aware Training Model Finalize", "PyTorch Quantization Aware Training Model Init", "Enable a Logger", "XQuant Report Keras", "XQuant Report Pytorch", "CoreConfig", "debug_config Module", "exporter Module", "Layer Attributes Filters", "network_editor Module", "qat_config Module", "target_platform Module", "trainable_infrastructure Module", "<no title>", "Visualization within TensorBoard", "Model Compression Toolkit User Guide"], "terms": {"class": [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 22, 37, 38, 39, 40, 41, 42, 43, 44], "model_compression_toolkit": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "core": [0, 3, 5, 6, 9, 10, 11, 20, 21, 23, 24, 25, 26, 28, 29, 31, 32, 33, 37, 38, 41], "manual_activation_bit_width_selection_list": 0, "none": [0, 1, 2, 4, 5, 12, 15, 17, 18, 20, 22, 23, 26, 28, 30, 33, 37, 39, 41, 42, 43, 44], "manag": 0, "manual": [0, 13, 37], "bit": [0, 5, 11, 13, 20, 23, 25, 26, 33, 37, 39, 41, 43, 44, 47], "width": [0, 5, 13, 20, 23, 26, 27, 33, 37, 43, 47], "configur": [0, 4, 5, 9, 11, 13, 14, 15, 16, 17, 19, 20, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 42, 43, 44, 47], "A": [0, 3, 4, 5, 8, 13, 15, 17, 18, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 41, 42, 43, 47], "list": [0, 1, 3, 4, 5, 14, 15, 16, 17, 19, 27, 38, 41, 43, 47], "object": [0, 3, 4, 5, 7, 11, 14, 15, 16, 17, 18, 20, 21, 22, 23, 25, 26, 28, 29, 30, 33, 39, 41, 43, 44], "defin": [0, 4, 5, 6, 15, 17, 19, 20, 23, 24, 25, 26, 27, 28, 30, 31, 43, 44], "type": [0, 1, 2, 4, 5, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 27, 29, 31, 35, 36, 39, 41, 43], "common": [0, 12], "quantiz": [0, 3, 4, 5, 9, 10, 11, 12, 13, 15, 17, 19, 21, 27, 29, 35, 36, 37, 38, 41, 42, 43, 44, 46, 47], "bit_width_config": [0, 37], "filter": [0, 1, 7, 43], "bit_width": 0, "encapsul": 0, "select": [0, 3, 7, 9, 10, 13, 37, 39, 42, 43, 44], "specif": [0, 3, 24, 31, 41, 46], "The": [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 19, 20, 23, 24, 25, 26, 27, 28, 30, 31, 33, 35, 36, 39, 41, 43, 44, 46], "us": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47], "node": [0, 26, 33, 41, 44, 46], "manipul": [0, 1], "basenodematch": 0, "appli": [0, 1, 13, 39, 40, 41, 43], "int": [0, 1, 4, 5, 7, 9, 14, 15, 16, 17, 19, 27, 43, 44], "data_gener": [1, 14, 16, 19, 27], "datagenerationconfig": [1, 13, 19, 27], "n_iter": [1, 14, 16, 19, 27], "optim": [1, 3, 4, 9, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 26, 28, 29, 30, 33, 37, 43, 44, 45, 47], "data_gen_batch_s": [1, 14, 16, 19, 27], "initial_lr": [1, 14, 16], "output_loss_multipli": [1, 14, 16], "image_granular": [1, 14, 16], "allimag": [1, 16], "scheduler_typ": [1, 14, 16], "bn_alignment_loss_typ": [1, 14, 16], "output_loss_typ": [1, 14, 16], "data_init_typ": [1, 14, 16], "layer_weighting_typ": [1, 14, 16], "image_pipeline_typ": [1, 14, 16], "image_normalization_typ": [1, 14, 16], "extra_pixel": [1, 14, 16], "0": [1, 3, 4, 5, 8, 9, 14, 15, 16, 20, 23, 24, 25, 26, 31, 39, 44], "bn_layer_typ": [1, 14, 16], "last_layer_typ": [1, 16], "image_clip": [1, 14, 16], "true": [1, 4, 5, 9, 15, 16, 17, 22, 32, 33, 44], "initi": [1, 2, 4, 7, 12, 14, 16, 26, 33, 43, 44], "paramet": [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "number": [1, 4, 5, 7, 14, 15, 16, 17, 19, 20, 23, 24, 26, 27, 28, 30, 31, 33, 43, 44], "iter": [1, 14, 16, 19, 20, 23, 26, 27, 28, 30, 33], "ani": [1, 2, 3, 4, 35, 36, 39, 40, 43, 44], "batch": [1, 4, 5, 14, 15, 16, 17, 19, 20, 23, 26, 27, 28, 30, 33], "size": [1, 4, 5, 14, 15, 16, 17, 19, 20, 23, 25, 26, 27, 33, 39, 42], "float": [1, 4, 5, 9, 14, 15, 16, 17, 20, 26, 28, 30, 33, 35, 36, 39, 43, 44, 46], "learn": [1, 14, 15, 16, 42], "rate": [1, 14, 15, 16], "multipli": [1, 14, 16], "output": [1, 3, 6, 9, 14, 16, 19, 20, 23, 26, 27, 28, 30, 32, 33, 43, 46, 47], "loss": [1, 4, 14, 15, 16, 17, 20, 24, 28, 30, 31], "granular": [1, 14, 16], "imag": [1, 5, 14, 16, 19, 20, 23, 26, 27, 28, 30, 33, 46], "default": [1, 2, 4, 7, 14, 15, 16, 18, 20, 23, 24, 28, 30, 31, 37, 39, 42, 43, 46], "schedul": [1, 14, 16, 38], "batchnorm": [1, 14, 16, 19, 20, 23, 26, 28, 30, 33], "align": [1, 14, 16], "layer": [1, 3, 5, 6, 8, 14, 16, 19, 20, 23, 24, 25, 26, 28, 30, 31, 32, 33, 38, 39, 41, 43, 44, 46], "weight": [1, 3, 4, 5, 6, 9, 11, 13, 14, 15, 16, 17, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 39, 41, 42, 43, 44, 46], "pipelin": [1, 14, 16], "normal": [1, 4, 5, 14, 16], "union": [1, 14, 16, 19, 27, 43], "tupl": [1, 3, 14, 16, 19, 20, 24, 27, 31, 41, 43], "extra": [1, 14, 16], "pixel": [1, 14, 16], "add": [1, 3, 12, 14, 16, 22, 43, 44], "input": [1, 5, 9, 14, 16, 20, 23, 26, 28, 30, 33, 43], "bool": [1, 4, 5, 9, 14, 15, 16, 17, 38, 43, 44], "flag": 1, "enabl": [1, 5, 9, 13, 38, 44, 47], "clip": [1, 14, 16], "valu": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 20, 23, 24, 25, 26, 31, 39, 40, 41, 42, 43], "an": [1, 2, 3, 4, 8, 13, 20, 23, 26, 33, 35, 36, 39, 40, 41, 42, 43, 44, 47], "enum": [1, 3, 4, 5, 6, 7, 10, 42], "choos": [1, 4, 39], "depend": [1, 20, 23, 26, 28, 30, 33], "when": [1, 2, 3, 5, 7, 10, 11, 13, 15, 17, 20, 23, 25, 26, 38, 40, 42, 43, 44, 46], "imagewis": 1, "batchwis": [1, 14], "reduce_on_plateau": [1, 14], "reduceonplateau": 1, "step": [1, 42], "l2_squar": [1, 14, 16], "l2": 1, "squar": [1, 10], "No": 1, "negative_min_max_diff": [1, 16], "mean": [1, 4, 10, 46], "neg": [1, 9], "min": [1, 3, 9, 10, 20, 23, 26, 28, 30, 33, 46], "max": [1, 3, 9, 10, 20, 21, 23, 26, 28, 29, 30, 33, 46], "differ": [1, 5, 9, 13, 20, 23, 25, 26, 39, 43, 46], "inverse_min_max_diff": 1, "1": [1, 3, 4, 5, 8, 17, 19, 20, 21, 23, 24, 25, 27, 28, 29, 30, 31, 32, 39, 47], "regularized_min_max_diff": [1, 14], "regular": [1, 4, 15, 17], "gaussian": [1, 14, 16], "divers": 1, "averag": [1, 4, 5, 6, 14, 15, 16, 17], "same": [1, 39, 43], "per": [1, 3, 20, 23, 26, 33, 43, 44, 46], "first_layer_multipli": 1, "first": [1, 20, 23, 26, 28, 30, 33, 39, 46], "all": [1, 3, 4, 6, 9, 41, 44, 46], "other": [1, 15, 17], "grad": 1, "gradient": [1, 13, 30, 47], "base": [1, 4, 5, 10, 13, 15, 17, 19, 24, 27, 30, 31, 43, 44, 47], "smoothing_and_augment": [1, 14, 16], "smooth": [1, 42], "crop": 1, "flip": 1, "ident": 1, "do": [1, 46], "transform": [1, 20, 23, 26, 28, 30, 33], "torchvis": [1, 16, 28, 29, 30, 31, 32, 33, 39], "keras_appl": [1, 14], "imagenet": 1, "no_norm": 1, "known_dict": 2, "default_valu": 2, "dictionari": [2, 3, 4, 12, 25, 26, 35, 36, 39, 41, 42, 44], "It": [2, 12, 43, 44], "wrap": [2, 3, 9, 22, 26, 33, 40, 43, 44], "given": [2, 11, 20, 21, 23, 26, 28, 29, 30, 33], "return": [2, 4, 5, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 39], "its": [2, 3, 9, 22, 24, 31, 40, 43, 46], "request": 2, "If": [2, 3, 5, 20, 23, 25, 26, 28, 30, 37, 39, 40, 43], "kei": [2, 6, 12, 24, 31, 40], "present": [2, 46], "pass": [2, 3, 9, 15, 17, 20, 23, 24, 25, 26, 28, 30, 31, 32, 33, 41], "gener": [2, 12, 13, 14, 16, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 43, 46, 47], "provid": [2, 5, 19, 24, 27, 31, 39, 43, 44, 46], "empti": 2, "get": [2, 3, 4, 5, 13, 20, 23, 25, 26, 28, 30, 32, 33, 43, 46], "inner": 2, "default_factori": 2, "exist": [2, 41], "wa": [2, 39], "follow": [3, 4, 44, 46], "api": [3, 4, 23, 26, 33], "can": [3, 4, 6, 9, 13, 15, 17, 18, 19, 21, 24, 27, 29, 31, 38, 39, 41, 43, 44, 46, 47], "mct": [3, 9, 13, 15, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 37, 38, 39, 41, 43, 44, 45, 46, 47], "framework": [3, 18, 43, 44], "relat": [3, 8, 13, 43], "inform": [3, 4, 13, 15, 17, 18, 20, 23, 24, 26, 28, 30, 31, 33, 38, 43, 44, 45], "network": [3, 7, 9, 32, 37, 38, 41, 46, 47], "activation_quantizer_map": 3, "kernel_channels_map": 3, "activation_min_max_map": 3, "layer_min_max_map": 3, "kernel_ops_attributes_map": 3, "out_channel_axis_map": 3, "about": [3, 4, 8, 13, 15, 17, 20, 23, 25, 26, 39, 43, 44], "librari": [3, 9], "need": [3, 20, 23, 26, 28, 30, 33, 39, 40, 44], "model": [3, 4, 5, 8, 9, 11, 12, 13, 18, 19, 20, 23, 24, 27, 28, 30, 31, 35, 36, 37, 41, 42, 43, 44, 46], "hold": [3, 37, 40, 43], "how": [3, 7, 20, 21, 23, 26, 28, 30, 33, 39, 44, 47], "thei": 3, "should": [3, 4, 7, 9, 11, 15, 17, 20, 21, 23, 24, 25, 26, 28, 30, 31, 33, 39, 43, 46], "multipl": [3, 5, 43], "map": [3, 43], "kernel": [3, 11, 20, 23, 25, 26, 41, 44], "channel": [3, 7, 8, 9, 13, 24, 31, 43, 44, 46], "indic": [3, 8, 24, 31, 43], "etc": [3, 11, 13, 20, 23, 26, 28, 30, 33, 46], "ar": [3, 4, 12, 18, 20, 23, 24, 26, 28, 30, 31, 33, 39, 43, 44, 45, 46], "divid": 3, "three": 3, "group": [3, 7, 24, 31, 43], "kernel_op": 3, "have": [3, 39, 40, 46], "coeffici": [3, 11, 20, 23, 25, 26, 28, 30, 43, 44], "e": [3, 20, 23, 26, 28, 30, 33, 47], "g": [3, 20, 23, 26, 28, 30, 33], "conv2d": [3, 11, 19, 20, 23, 25, 26, 27, 41, 43], "dens": [3, 19], "activation_op": 3, "relu": [3, 9], "no_quantization_op": 3, "reshap": [3, 19], "transpos": 3, "dict": [3, 4, 8, 12, 35, 36, 39, 43, 44], "quantizationmethod": [3, 44], "callabl": [3, 4, 5, 12, 15, 17, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 35, 36, 39, 40], "from": [3, 4, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 39, 41, 43, 44, 45, 46, 47], "function": [3, 4, 5, 6, 13, 14, 15, 16, 17, 19, 22, 24, 27, 31, 41, 43, 44], "defaultdict": [3, 13, 43], "out": [3, 7], "str": [3, 12, 35, 36, 39, 40, 43], "activ": [3, 9, 11, 20, 21, 23, 26, 28, 29, 30, 33, 39, 41, 42, 43, 44, 46], "oper": [3, 11, 38, 40, 43], "attirbut": 3, "s": [3, 7, 9, 11, 20, 23, 24, 25, 26, 28, 30, 31, 33, 39, 40, 41, 43, 44, 47], "comput": [3, 4, 5, 6, 10, 12, 13, 15, 17, 21, 29, 35, 38, 46], "statist": [3, 20, 23, 26, 28, 30, 33, 46], "exampl": [3, 4, 9, 11, 15, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 41, 43, 44, 47], "kera": [3, 4, 11, 13, 41, 44, 47], "we": [3, 19, 20, 23, 24, 26, 27, 31, 33, 39, 41, 43, 44, 46], "want": 3, "onli": [3, 4, 5, 6, 7, 11, 20, 23, 25, 26, 39], "set": [3, 6, 12, 13, 15, 19, 20, 23, 24, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 41, 43, 44, 46], "know": 3, "3": [3, 15, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 39, 44], "2": [3, 9, 15, 19, 27, 43, 44, 47], "respectivli": 3, "import": [3, 7, 8, 9, 13, 15, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 39, 41, 44, 46], "tensorflow": [3, 13, 15, 18, 19, 20, 21, 23, 24, 25, 26, 39, 41, 43, 47], "tf": [3, 15, 19, 22, 25, 26], "Then": [3, 20, 23, 26, 28, 30, 33, 41, 46], "creat": [3, 4, 9, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 39, 40, 41], "rang": [3, 20, 23, 26, 28, 30, 33], "advanc": 3, "save": [3, 12, 26, 34, 39, 44], "collect": [3, 20, 23, 26, 28, 30, 33, 35, 36, 46], "time": [3, 7, 44], "For": [3, 9, 18, 19, 20, 23, 25, 26, 27, 33, 39, 43, 44, 45, 46, 47], "softmax": [3, 9], "format": [3, 13], "index": [3, 13], "axi": [3, 44], "nhwc": 3, "last": [3, 6], "nchw": 3, "instanc": [4, 13, 15, 17, 41, 43, 46], "which": [4, 7, 9, 39, 40, 41, 43, 44], "post": [4, 13, 24, 26, 31, 33, 47], "train": [4, 13, 42, 44, 47], "knowledg": [4, 47], "distil": [4, 47], "teacher": 4, "student": 4, "gptq": [4, 15, 17, 20, 28], "n_epoch": [4, 15, 17, 20], "optimizer_rest": [4, 15, 17], "log_funct": [4, 15, 17], "train_bia": 4, "rounding_typ": 4, "softquant": 4, "use_hessian_based_weight": [4, 15, 17], "optimizer_quantization_paramet": 4, "optimizer_bia": 4, "regularization_factor": [4, 15, 17], "reg_default": [4, 15, 17], "hessian_weights_config": 4, "gptq_quantizer_params_overrid": 4, "gradientptq": [4, 13], "repres": [4, 5, 11, 15, 17, 20, 23, 24, 25, 26, 28, 30, 31, 32, 33, 35, 36, 39, 41, 43, 46], "dataset": [4, 15, 17, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 39, 46], "epoch": [4, 15, 17], "bia": [4, 9, 11, 15, 17, 20, 23, 25, 26], "accept": [4, 15, 17, 43], "6": [4, 27], "tensor": [4, 5, 12, 15, 17, 19, 21, 27, 29, 43, 44, 46, 47], "1st": [4, 15, 17], "2nd": [4, 15, 17], "3rd": [4, 15, 17], "4th": [4, 15, 17], "5th": 4, "6th": 4, "std": 4, "accordingli": [4, 43], "see": [4, 47], "multiple_tensors_mse_loss": [4, 17], "log": [4, 12, 13, 15, 17, 34, 46], "process": [4, 5, 9, 13, 14, 15, 16, 17, 18, 19, 24, 27, 31, 37, 38, 41, 42, 43, 45, 46], "whether": [4, 5, 8, 9, 14, 15, 16, 17, 22, 38, 39, 43, 44], "updat": 4, "dure": [4, 9, 13, 14, 15, 16, 17, 18, 35, 36, 39, 41, 43, 44, 45, 46], "round": 4, "hessian": [4, 5, 7, 10, 15, 17, 24, 31, 47], "overrid": [4, 42], "rest": 4, "point": [4, 5, 15, 17, 20, 28, 30, 35, 36, 43, 46], "factor": [4, 5, 10, 15, 17], "includ": [4, 8, 11, 20, 23, 26, 28, 30, 33, 42], "necessari": [4, 39, 44], "argument": [4, 6, 43], "run": [4, 15, 17, 39, 46], "score": [4, 5, 7, 8, 9, 10, 24, 31], "instanti": [4, 9, 42], "hessians_num_sampl": 4, "gptq_hessian_num_sampl": 4, "norm_scor": [4, 5], "log_norm": 4, "scale_log_norm": 4, "fals": [4, 5, 9, 14, 38, 43], "hessian_batch_s": [4, 5, 15, 17], "act_hessian_default_batch_s": [4, 5, 15, 17], "metric": [4, 5, 6, 7, 11, 12, 13, 35, 36], "gptqhessianweightsconfig": 4, "sampl": [4, 46], "between": [4, 5, 9, 12, 20, 28, 30, 43, 46], "scale": [4, 5, 9, 43], "final": [4, 5, 13, 19, 27, 41, 46, 47], "vector": [4, 46], "method": [4, 5, 6, 7, 9, 10, 13, 24, 31, 39, 41, 42, 43, 44], "ste": [4, 42], "straight": [4, 42], "through": [4, 19, 24, 27, 42], "estim": [4, 42], "mix": [5, 6, 11, 13, 20, 21, 23, 25, 26, 28, 29, 30, 33, 37, 43, 47], "precis": [5, 6, 11, 13, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 33, 37, 43, 47], "compute_distance_fn": 5, "distance_weighting_method": 5, "mpdistanceweight": [5, 13], "avg": [5, 6], "num_of_imag": [5, 20, 23], "mp_default_num_sampl": 5, "configuration_overwrit": 5, "num_interest_points_factor": 5, "use_hessian_based_scor": 5, "refine_mp_solut": 5, "metric_normalization_threshold": 5, "1e10": 5, "distanc": [5, 6, 13], "two": [5, 12, 20, 23, 26, 28, 30, 33, 39, 43, 46], "pre": 5, "each": [5, 7, 8, 20, 23, 24, 26, 28, 30, 31, 33, 41, 43, 44, 46], "among": 5, "sensit": [5, 7, 24, 31], "evalu": [5, 35, 36], "compar": [5, 20, 28, 30, 46], "integ": [5, 39, 43], "overwrit": 5, "predefin": [5, 7], "one": [5, 9, 40, 46], "zero": [5, 43], "percentag": 5, "reduc": [5, 24, 31], "interest": 5, "calcul": [5, 7, 13, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33], "try": 5, "improv": [5, 24, 31], "greedi": [5, 7], "algorithm": 5, "search": [5, 9, 11, 13, 20, 23, 26, 28, 30, 33], "increas": 5, "threshold": [5, 9, 10, 20, 23, 26, 28, 30, 33, 43, 44], "check": [5, 39, 40, 41], "In": [5, 19, 20, 23, 26, 27, 28, 30, 33, 39, 40], "case": 5, "larger": 5, "than": [5, 40], "thi": [5, 8, 10, 11, 13, 19, 20, 22, 23, 24, 25, 26, 27, 28, 30, 31, 33, 39, 43, 44, 47], "prevent": 5, "numer": 5, "issu": [5, 39], "call": [6, 21, 29, 43, 46], "take": [6, 23, 26, 33, 47], "last_lay": 6, "pruningconfig": [7, 13, 24, 31], "num_score_approxim": [7, 24, 31], "pruning_num_score_approxim": 7, "importance_metr": 7, "lfh": [7, 24, 31], "channels_filtering_strategi": 7, "specifi": [7, 14, 16, 19, 22, 24, 27, 31, 39], "neural": [7, 47], "approxim": [7, 24, 31], "perform": [7, 11, 19, 24, 27, 31], "strategi": [7, 24, 31], "constant": [7, 41, 44], "label": [7, 24, 31, 43, 47], "free": [7, 19, 24, 27, 31, 47], "approach": 7, "determin": [7, 24, 31], "info": [7, 34], "measur": [7, 11, 12, 46], "least": 7, "up": [7, 19, 27, 43, 46], "allow": [7, 12, 19, 27, 39], "resourc": [7, 11, 13, 20, 23, 24, 25, 26, 31, 32, 33, 46], "util": [7, 11, 13, 20, 23, 24, 25, 26, 31, 32, 33, 44], "limit": [7, 20, 23, 25, 26, 28, 30, 33], "now": [7, 18, 33, 39, 43, 44, 45, 46], "weights_memori": [7, 11, 20, 23, 24, 26, 31, 33], "consid": [7, 14, 16, 24, 31, 43], "pruninginfo": [8, 13, 24, 31], "pruning_mask": 8, "importance_scor": 8, "store": [8, 44], "mask": 8, "act": 8, "contain": [8, 13, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 43, 44], "access": 8, "metadata": [8, 43], "basenod": 8, "np": [8, 11, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "ndarrai": 8, "arrai": 8, "where": [8, 12, 39, 41, 43, 46], "element": [8, 43], "correspond": 8, "neuron": 8, "ha": [8, 39, 40, 41], "been": 8, "kept": [8, 26, 33], "quantifi": [8, 46], "signific": 8, "properti": 8, "activation_error_method": 9, "quantizationerrormethod": [9, 13], "mse": [9, 10, 46], "weights_error_method": 9, "relu_bound_to_power_of_2": 9, "weights_bias_correct": 9, "weights_second_moment_correct": 9, "input_sc": 9, "softmax_shift": 9, "shift_negative_activation_correct": 9, "activation_channel_equ": 9, "z_threshold": 9, "math": 9, "inf": [9, 11], "min_threshold": [9, 44], "l_p_valu": 9, "linear_collaps": 9, "residual_collaps": 9, "shift_negative_ratio": 9, "05": 9, "shift_negative_threshold_recalcul": 9, "shift_negative_params_search": 9, "concat_threshold_upd": 9, "accord": [9, 11, 13, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 39, 40, 43], "power": [9, 20, 23, 26, 28, 30, 33, 43], "correct": 9, "second_mo": 9, "shift": 9, "equal": [9, 40], "z": 9, "outlier": 9, "remov": [9, 24, 31, 32], "minimum": [9, 44], "p": [9, 31], "l_p": 9, "norm": [9, 10, 46], "block_collaps": 9, "collaps": 9, "block": [9, 44, 46], "anoth": 9, "ratio": 9, "minim": [9, 10, 20, 24, 28, 30, 31], "non": [9, 43], "linear": [9, 27], "abov": 9, "occur": 9, "recomput": 9, "after": [9, 13, 20, 22, 23, 26, 33, 47], "One": [9, 46], "mai": [9, 19, 20, 23, 26, 27, 28, 30, 33, 40, 46], "done": [9, 46], "noclip": [9, 10], "qc": 9, "instans": 9, "keras_post_training_quant": [9, 13, 15, 23, 39, 41, 46], "error": 10, "nois": 10, "mae": [10, 46], "absolut": 10, "kl": [10, 46], "diverg": [10, 46], "make": 10, "signal": 10, "distribut": 10, "similar": [10, 12, 35, 36, 38, 47], "possibl": [10, 20, 23, 26, 33, 43, 46], "lp": 10, "hmse": 10, "more": [10, 18, 23, 24, 26, 31, 33, 39, 43, 45, 46], "valuabl": 10, "induc": 10, "prune": [11, 47], "activation_memori": 11, "total_memori": 11, "bop": 11, "memori": [11, 24, 31, 46], "byte": [11, 20, 23, 24, 26, 31, 33, 46], "note": [11, 20, 23, 25, 26], "affect": [11, 20, 23, 25, 26], "while": [11, 20, 23, 25, 26, 33, 43], "sum": [11, 21, 24, 29, 31], "total": [11, 21, 29], "xquant_config": [12, 35, 36], "xquantconfig": [12, 13, 35, 36], "report_dir": 12, "custom_similarity_metr": 12, "report": [12, 13], "dir": [12, 46], "explain": [12, 13, 35, 36, 44], "directori": [12, 13, 34], "custom": [12, 19, 22, 26, 27, 39], "name": [12, 18, 39, 41, 43, 46], "implement": [12, 44], "init": [13, 41, 47], "modul": [13, 27, 28, 29, 30, 31, 36], "pytorch_post_training_quant": [13, 17, 30, 39], "pytorch": [13, 18, 43, 44, 47], "pytorch_gradient_post_training_quant": [13, 28], "get_pytorch_gptq_config": [13, 17], "gradientptqconfig": [13, 20, 28], "keras_gradient_post_training_quant": [13, 20], "get_keras_gptq_config": [13, 15, 20], "option": [13, 22, 23, 24, 26, 28, 31, 33, 39, 43], "pytorch_quantization_aware_training_init_experiment": [13, 32, 33], "prepar": [13, 26, 33], "awar": [13, 42, 44, 47], "experiment": [13, 19, 27, 47], "pytorch_quantization_aware_training_finalize_experiment": [13, 32], "without": 13, "quantizewrapp": [13, 26, 32, 33], "keras_quantization_aware_training_init_experiment": [13, 25, 26], "keras_quantization_aware_training_finalize_experiment": [13, 25], "qat_config": [13, 26, 33], "coreconfig": [13, 20, 21, 23, 25, 26, 28, 29, 30, 32, 33], "entir": 13, "quantizationconfig": [13, 37], "mixedprecisionquantizationconfig": [13, 20, 21, 23, 25, 26, 37], "bitwidthconfig": [13, 37], "resourceutil": [13, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 33], "network_editor": [13, 38], "modifi": [13, 41], "troubleshoot": 13, "pytorch_resource_utilization_data": [13, 29], "data": [13, 14, 16, 21, 24, 29, 31, 35, 36, 39, 43, 46, 47], "desir": [13, 20, 21, 23, 25, 26, 28, 29, 30, 33], "target": [13, 18, 20, 21, 23, 24, 25, 26, 29, 31, 32, 33], "keras_resource_utilization_data": [13, 21], "pytorch_data_generation_experiment": [13, 27], "get_pytorch_data_generation_config": [13, 16, 27], "load": [13, 25, 26, 39, 44], "keras_data_generation_experiment": [13, 19], "get_keras_data_generation_config": [13, 14, 19], "pytorch_pruning_experiment": [13, 31], "structur": [13, 47], "keras_pruning_experiment": [13, 24], "xquant_report_pytorch_experiment": [13, 36], "xquant_report_keras_experiment": [13, 35], "serial": 13, "abstract": [13, 44], "hardwar": [13, 18, 24, 31, 43, 44, 47], "orient": [13, 44], "tool": [13, 44], "logger": [13, 38, 46], "path": [13, 22, 34, 39, 46], "infer": [13, 18, 25, 32, 43, 44], "get_target_platform_cap": [13, 18, 43], "platform": [13, 18, 20, 23, 24, 25, 26, 29, 31], "targetplatformcap": [13, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33], "page": 13, "document": [13, 23, 26, 33], "auto": 13, "sphinx": 13, "default_n_it": [14, 16], "adam": [14, 15, 17], "default_data_gen_b": [14, 16], "default_keras_initial_lr": 14, "default_keras_output_loss_multipli": 14, "schedulertyp": [14, 16], "batchnormalignemntlosstyp": [14, 16], "outputlosstyp": [14, 16], "datainittyp": [14, 16], "bnlayerweightingtyp": [14, 16], "imagegranular": [14, 16], "imagepipelinetyp": [14, 16], "imagenormalizationtyp": [14, 16], "default_keras_extra_pixel": 14, "learning_r": 15, "lr_default": [15, 17], "lr_rest_default": [15, 17], "gptqmultipletensorsloss": 15, "gradientptqconfigv2": [15, 17], "fine": [15, 17, 24, 25, 26, 31, 32, 33], "tune": [15, 17, 24, 25, 26, 31, 32], "optimizerv2": 15, "auxiliri": [15, 17], "variabl": [15, 17], "4": [15, 17, 19, 20, 23, 24, 26, 27, 28, 30, 31, 33], "5": [15, 17, 24, 31], "gptq_conf": [15, 17, 28], "nadam": 15, "order": [15, 17, 20, 23, 26, 33, 38, 39, 40], "radam": 16, "default_pytorch_initial_lr": 16, "default_pytorch_output_loss_multipli": 16, "reduce_on_plateau_with_reset": 16, "default_pytorch_extra_pixel": 16, "default_pytorch_bn_layer_typ": 16, "default_pytorch_last_layer_typ": 16, "torch": [17, 27, 36, 39, 47], "lr": 17, "dummi": 17, "param": [17, 38, 41, 44], "fw_name": 18, "target_platform_nam": 18, "target_platform_vers": 18, "support": [18, 39], "both": [18, 20, 23, 28, 30, 32, 44, 46], "them": [18, 43, 46], "imx500": [18, 39, 43], "tflite": [18, 39, 43], "qnnpack": [18, 43], "capabl": [18, 24, 29, 31], "version": [18, 19, 27, 43, 47], "attach": [18, 43], "some": [18, 19, 27, 39, 43, 45, 46], "field": [18, 40, 43, 45], "opquantizationconfig": [18, 45], "ignor": [18, 43, 45], "quantization_preserv": [18, 43, 45], "fixed_scal": [18, 43, 45], "fixed_zero_point": [18, 43, 45], "futur": [18, 19, 27, 43, 45], "n_imag": [19, 27], "output_image_s": [19, 27], "data_generation_config": [19, 27], "ll": [19, 27], "walk": [19, 27], "simpl": [19, 27], "involv": [19, 24, 27, 31], "start": [19, 27, 39, 44, 47], "compress": [19, 24, 27, 28, 31], "toolkit": [19, 27, 28], "sequenti": [19, 27], "flatten": [19, 27], "next": [19, 27, 39, 40], "input_shap": 19, "8": [19, 20, 23, 25, 26, 27, 39, 44], "10": [19, 20, 23, 26, 27, 28, 30, 33], "simplic": [19, 27], "config": [19, 20, 23, 24, 25, 26, 27, 28, 31, 32, 33, 37, 41, 43, 44], "notic": [19, 24, 27, 31, 39], "chang": [19, 27, 39, 41, 46], "generated_imag": [19, 27], "variou": [19, 27, 46], "purpos": [19, 27, 38], "in_model": [20, 21, 23, 25, 26, 29, 32, 33], "representative_data_gen": [20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 39], "gptq_config": [20, 28, 30], "gptq_representative_data_gen": [20, 28], "target_resource_util": [20, 23, 24, 26, 28, 30, 31, 33], "core_config": [20, 21, 23, 25, 26, 28, 29, 30, 32, 33], "target_platform_cap": [20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 44], "default_keras_tpc": [20, 23, 24, 26], "symmetr": [20, 23, 26, 28, 30, 33, 42, 43, 44], "constraint": [20, 23, 24, 28, 30, 31], "sever": [20, 23, 26, 28, 30, 33, 46], "fold": [20, 23, 26, 28, 30, 33], "preced": [20, 23, 26, 28, 30, 33], "histogram": [20, 23, 26, 28, 30, 33, 46], "being": [20, 23, 26, 28, 30, 33, 43, 44], "ilp": [20, 23, 26, 33], "solver": [20, 23, 26, 33], "find": [20, 23, 26, 33], "maxim": [20, 23, 26, 33], "observ": [20, 28, 30, 43, 46], "calibr": [20, 21, 23, 26, 28, 29, 30, 33], "user": [20, 23, 25, 26, 28, 30, 32, 33], "handl": [20, 23, 26, 28, 30, 33], "applic": [20, 21, 23, 24, 25, 26, 39], "mobilenet": [20, 21], "random": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "requir": [20, 23, 26, 28, 30, 33, 44, 46], "num_calibration_batch": [20, 23, 26, 28, 30, 33], "numpi": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "def": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "repr_datagen": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33], "_": [20, 23, 26, 28, 30, 33, 39], "yield": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "224": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "bitwidth": [20, 23, 25, 26], "candid": [20, 23, 25, 26, 41], "mixed_precision_config": [20, 21, 23, 25, 26, 37], "our": [20, 23, 25, 26, 33, 47], "ru": [20, 23, 25, 26], "count_param": [20, 23, 24, 25, 26], "75": [20, 23, 25, 26], "quantized_model": [20, 23, 25, 26, 32, 33, 35, 36], "quantization_info": [20, 23, 25, 26, 28, 30, 32, 33], "userinform": 20, "keras_default_tpc": 21, "build": [21, 29, 44, 47], "graph": [21, 29, 41, 46], "hw": 21, "ru_data": [21, 29], "keras_load_quantized_model": 22, "filepath": 22, "custom_object": [22, 25, 26], "compil": 22, "trainabl": [22, 25, 44], "file": [22, 25, 26, 34, 39], "addit": [22, 39], "boolean": 22, "saved_model": 22, "loadopt": 22, "savedmodel": 22, "ptq": [23, 30, 39], "mobilenet_v2": [23, 25, 26, 28, 29, 30, 32, 33, 39], "mobilenetv2": [23, 25, 26, 39, 46], "pleas": [23, 26, 33, 39, 47], "look": [23, 26, 33, 43, 47], "pruning_config": [24, 31], "meet": [24, 31], "simd": [24, 31, 43], "tpc": [24, 31, 43], "By": [24, 28, 30, 31, 39, 46], "assess": [24, 31], "togeth": [24, 31], "friendli": [24, 31, 47], "architectur": [24, 31], "analyz": [24, 31], "identifi": [24, 31], "impact": [24, 31], "must": [24, 31, 43], "retrain": [24, 31], "recov": [24, 31], "origin": [24, 35, 36, 46], "achiev": 24, "analysi": [24, 31], "standard": [24, 31, 42], "associ": [24, 31, 43], "resnet50": [24, 31, 39], "here": [24, 31, 39, 43, 47], "aim": [24, 31], "footprint": [24, 31], "50": [24, 31], "assum": [24, 31], "float32": [24, 31, 39], "thu": [24, 31, 46], "dense_nparam": [24, 31], "l": [24, 47], "higher": [24, 31], "also": [24, 31, 47], "extend": [24, 31], "durat": [24, 31], "pruned_model": [24, 31], "pruning_info": [24, 31], "qat": [25, 26, 32, 33, 42], "convert": [25, 32], "replac": 25, "trainablequant": 25, "inferablequant": [25, 32], "load_model": [25, 26], "model_fil": [25, 26], "qatconfig": [26, 33], "built": [26, 33, 44], "fake_qu": [26, 33], "onlin": [26, 33], "wrapper": [26, 32, 33, 44], "ker": 26, "hight": 27, "nn": [27, 36], "batchnorm2d": 27, "default_pytorch_tpc": [28, 30, 31, 33], "quantized_modul": [28, 30], "pytorch_default_tpc": 29, "in_modul": 30, "clibrat": 30, "default_pyotrch_tpc": 31, "resnet50_weight": 31, "imagenet1k_v1": 31, "numel": 31, "state_dict": 31, "pretrain": [32, 33], "keep": [32, 47], "readi": 32, "tun": 33, "set_log_fold": [34, 46], "folder": 34, "level": 34, "verbos": 34, "facade_xquant_report": [35, 36], "float_model": [35, 36, 39], "repr_dataset": [35, 36, 39], "validation_dataset": [35, 36], "valid": [35, 36, 44], "quantization_config": [37, 44], "debug_config": 37, "debugconfig": 37, "debug": [37, 38], "edit": [37, 38, 41], "analyze_similar": 38, "simulate_schedul": 38, "plot": [38, 46], "figur": [38, 46], "within": [38, 47], "tensorboard": [38, 47], "pinpoint": 38, "problemat": 38, "editrul": 38, "rule": [38, 41], "action": 38, "simul": 38, "behaviour": 38, "cut": 38, "detail": [39, 43], "refer": 39, "project": [39, 47], "github": [39, 47], "readm": 39, "you": [39, 46, 47], "question": 39, "open": [39, 46, 47], "repositori": 39, "fakely_qu": 39, "int8": 39, "mct_quantiz": 39, "via": [39, 47], "save_model_path": 39, "is_layer_exportable_fn": 39, "is_keras_layer_export": 39, "serialization_format": 39, "quantization_format": 39, "To": [39, 46], "demonstr": [39, 43], "quantized_exportable_model": 39, "lambda": 39, "dtype": 39, "avail": [39, 47], "tempfil": 39, "keras_file_path": 39, "mkstemp": 39, "torchscript": 39, "is_pytorch_layer_export": 39, "onnx_opset_vers": 39, "default_onnx_opset_vers": 39, "current": 39, "fake": 39, "trace": 39, "mandatori": 39, "quant": 39, "your": 39, "packag": [39, 44, 47], "so": [39, 43], "part": 39, "skip": 39, "plan": 39, "pip": [39, 47], "instal": 39, "q": 39, "onnxruntim": 39, "extens": 39, "let": 39, "There": [39, 46], "onnx_file_path": 39, "model_format_onnx_mctq": 39, "15": 39, "16": 39, "get_ort_session_opt": 39, "session": 39, "creation": 39, "slowli": 39, "suffer": 39, "longer": 39, "latenc": 39, "howev": 39, "ort": 39, "sess": 39, "inferencesess": 39, "cudaexecutionprovid": 39, "cpuexecutionprovid": 39, "_input_data": 39, "astyp": 39, "_model_output_nam": 39, "get_output": 39, "_model_input_nam": 39, "get_input": 39, "predict": 39, "layerfilterparam": 40, "target_platform": [40, 44], "attributefilt": [40, 43], "attr": 40, "op": [40, 43], "match": [40, 41, 43], "regard": 40, "eq": 40, "noteq": 40, "greater": 40, "greatereq": 40, "smaller": 40, "smallereq": 40, "attribut": [41, 43, 44], "9": 41, "er_list": 41, "nodetypefilt": 41, "changecandidatesweightsquantconfigattr": 41, "attr_nam": 41, "weights_n_bit": [41, 43, 44], "new": 41, "node_typ": 41, "nodenamefilt": 41, "node_nam": 41, "nodenamescopefilt": 41, "node_name_scop": 41, "string": 41, "changefinalweightsquantconfigattr": 41, "kwarg": [41, 43], "attr_valu": 41, "changefinalactivationquantconfigattr": 41, "changecandidatesactivationquantconfigattr": 41, "changequantizationparamfunct": 41, "activation_quantization_params_fn": 41, "weights_quantization_params_fn": 41, "changefinalweightsquantizationmethod": 41, "weights_quantization_method": [41, 43, 44], "changecandidatesweightsquantizationmethod": 41, "changecandidatesactivationquantizationmethod": 41, "activation_quantization_method": [41, 43, 44], "changequantizationmethod": 41, "poweroftwo": 42, "uniform": [42, 43, 44], "dqa": 42, "dnn": 42, "attent": 42, "introduc": 42, "lsq": 42, "http": [42, 47], "arxiv": [42, 47], "org": 42, "pdf": 42, "1902": 42, "08153": 42, "weight_training_method": 42, "activation_training_method": 42, "weight_quantizer_params_overrid": 42, "activation_quantizer_params_overrid": 42, "backend": 43, "address": 43, "shortli": 43, "diagram": 43, "main": [43, 46], "compon": [43, 44], "power_of_two": 43, "lut_pot_quant": 43, "lookup": 43, "tabl": 43, "lut_sym_quant": 43, "default_weight_attr_config": 43, "attr_weights_configs_map": 43, "activation_n_bit": [43, 44], "supported_input_activation_n_bit": 43, "enable_activation_quant": [43, 44], "simd_siz": 43, "signed": 43, "singl": 43, "instruct": 43, "fetch": 43, "simultan": 43, "float_bitwidth": 43, "weights_per_channel_threshold": [43, 44], "enable_weights_quant": [43, 44], "lut_values_bitwidth": 43, "quantization_config_list": 43, "base_config": 43, "gather": [43, 46], "fallback": 43, "manner": 43, "default_qco": 43, "add_metadata": 43, "default_tp_model": 43, "definit": 43, "pattern": 43, "combin": 43, "qc_option": 43, "uniqu": 43, "operator_groups_list": 43, "treat": 43, "henc": 43, "either": 43, "opset": 43, "concaten": 43, "similarli": 43, "place": 43, "like": 43, "op_sets_to_lay": 43, "op_set_nam": 43, "attr_map": 43, "filterlayerparam": 43, "condit": 43, "satisfi": 43, "keyword": 43, "usag": 43, "tp_model": 43, "infrastructur": 44, "propos": 44, "emul": 44, "holder": 44, "upon": 44, "learnabl": 44, "basic": 44, "get_config": 44, "from_config": 44, "weights_quantization_param": 44, "weights_channels_axi": 44, "weights_quantization_candid": 44, "activation_quantization_param": 44, "activation_quantization_candid": 44, "phase": 46, "displai": 46, "ui": 46, "writer": 46, "stage": 46, "launch": 46, "logdir": 46, "seen": 46, "under": 46, "tab": 46, "click": 46, "deeper": 46, "view": 46, "tag": 46, "show": 46, "These": 46, "scalar": 46, "were": 46, "wai": [46, 47], "mathemat": 46, "divis": 46, "dot": 46, "product": 46, "euclidean": 46, "along": 46, "pair": 46, "would": 46, "expect": 46, "mani": 46, "few": [46, 47], "As": 46, "20": 46, "insert": 46, "chosen": 46, "second": 46, "sourc": 47, "research": 47, "develop": 47, "engin": 47, "easili": 47, "state": 47, "art": 47, "work": 47, "soni": 47, "semiconductor": 47, "israel": 47, "git": 47, "clone": 47, "com": 47, "model_optim": 47, "python": 47, "setup": 47, "py": 47, "pypi": 47, "latest": 47, "stabl": 47, "releas": 47, "nightli": 47, "unstabl": 47, "visual": 47, "cosin": 47, "comparison": 47, "just": 47, "minut": 47, "visit": 47, "notebook": 47, "doesn": 47, "t": 47, "out1": 47, "out2": 47, "out3": 47, "habi": 47, "h": 47, "v": 47, "peretz": 47, "r": 47, "cohen": 47, "dikstein": 47, "dror": 47, "o": 47, "diamant": 47, "i": 47, "jen": 47, "netzer": 47, "2021": 47, "hptq": 47, "preprint": 47, "gordon": 47, "2023": 47, "eptq": 47, "enhanc": 47}, "objects": {"model_compression_toolkit": [[2, 0, 1, "", "DefaultDict"], [18, 3, 1, "", "get_target_platform_capabilities"], [22, 3, 1, "", "keras_load_quantized_model"], [34, 3, 1, "", "set_log_folder"]], "model_compression_toolkit.DefaultDict": [[2, 1, 1, "", "get"], [2, 1, 1, "", "keys"]], "model_compression_toolkit.core": [[0, 0, 1, "", "BitWidthConfig"], [3, 0, 1, "", "ChannelAxis"], [37, 0, 1, "", "CoreConfig"], [38, 0, 1, "", "DebugConfig"], [3, 0, 1, "", "FrameworkInfo"], [5, 0, 1, "", "MixedPrecisionQuantizationConfig"], [6, 0, 1, "", "MpDistanceWeighting"], [9, 0, 1, "", "QuantizationConfig"], [10, 0, 1, "", "QuantizationErrorMethod"], [11, 0, 1, "", "ResourceUtilization"], [21, 3, 1, "", "keras_resource_utilization_data"], [29, 3, 1, "", "pytorch_resource_utilization_data"]], "model_compression_toolkit.core.BitWidthConfig": [[0, 2, 1, "", "manual_activation_bit_width_selection_list"]], "model_compression_toolkit.core.common.quantization.bit_width_config": [[0, 0, 1, "", "ManualBitWidthSelection"]], "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection": [[0, 2, 1, "", "bit_width"], [0, 2, 1, "", "filter"]], "model_compression_toolkit.core.network_editor": [[41, 0, 1, "", "ChangeCandidatesActivationQuantConfigAttr"], [41, 0, 1, "", "ChangeCandidatesActivationQuantizationMethod"], [41, 0, 1, "", "ChangeCandidatesWeightsQuantConfigAttr"], [41, 0, 1, "", "ChangeCandidatesWeightsQuantizationMethod"], [41, 0, 1, "", "ChangeFinalActivationQuantConfigAttr"], [41, 0, 1, "", "ChangeFinalWeightsQuantConfigAttr"], [41, 0, 1, "", "ChangeFinalWeightsQuantizationMethod"], [41, 0, 1, "", "ChangeQuantizationParamFunction"], [41, 0, 1, "", "EditRule"], [41, 0, 1, "", "NodeNameFilter"], [41, 0, 1, "", "NodeNameScopeFilter"], [41, 0, 1, "", "NodeTypeFilter"]], "model_compression_toolkit.data_generation": [[1, 0, 1, "", "BNLayerWeightingType"], [1, 0, 1, "", "BatchNormAlignemntLossType"], [1, 0, 1, "", "DataGenerationConfig"], [1, 0, 1, "", "DataInitType"], [1, 0, 1, "", "ImageGranularity"], [1, 0, 1, "", "ImageNormalizationType"], [1, 0, 1, "", "ImagePipelineType"], [1, 0, 1, "", "OutputLossType"], [1, 0, 1, "", "SchedulerType"], [14, 3, 1, "", "get_keras_data_generation_config"], [16, 3, 1, "", "get_pytorch_data_generation_config"], [19, 3, 1, "", "keras_data_generation_experimental"], [27, 3, 1, "", "pytorch_data_generation_experimental"]], "model_compression_toolkit.exporter": [[39, 0, 1, "", "KerasExportSerializationFormat"], [39, 0, 1, "", "PytorchExportSerializationFormat"], [39, 0, 1, "", "QuantizationFormat"], [39, 0, 1, "", "keras_export_model"], [39, 0, 1, "", "pytorch_export_model"]], "model_compression_toolkit.gptq": [[4, 0, 1, "", "GPTQHessianScoresConfig"], [4, 0, 1, "", "GradientPTQConfig"], [4, 0, 1, "", "RoundingType"], [15, 3, 1, "", "get_keras_gptq_config"], [17, 3, 1, "", "get_pytorch_gptq_config"], [20, 3, 1, "", "keras_gradient_post_training_quantization"], [28, 3, 1, "", "pytorch_gradient_post_training_quantization"]], "model_compression_toolkit.pruning": [[7, 0, 1, "", "ChannelsFilteringStrategy"], [7, 0, 1, "", "ImportanceMetric"], [7, 0, 1, "", "PruningConfig"], [8, 0, 1, "", "PruningInfo"], [24, 3, 1, "", "keras_pruning_experimental"], [31, 3, 1, "", "pytorch_pruning_experimental"]], "model_compression_toolkit.pruning.PruningConfig": [[7, 2, 1, "", "channels_filtering_strategy"], [7, 2, 1, "", "importance_metric"], [7, 2, 1, "", "num_score_approximations"]], "model_compression_toolkit.pruning.PruningInfo": [[8, 4, 1, "", "importance_scores"], [8, 4, 1, "", "pruning_masks"]], "model_compression_toolkit.ptq": [[23, 3, 1, "", "keras_post_training_quantization"], [30, 3, 1, "", "pytorch_post_training_quantization"]], "model_compression_toolkit.qat": [[42, 0, 1, "", "QATConfig"], [42, 0, 1, "", "TrainingMethod"], [25, 3, 1, "", "keras_quantization_aware_training_finalize_experimental"], [26, 3, 1, "", "keras_quantization_aware_training_init_experimental"], [32, 3, 1, "", "pytorch_quantization_aware_training_finalize_experimental"], [33, 3, 1, "", "pytorch_quantization_aware_training_init_experimental"]], "model_compression_toolkit.target_platform": [[40, 0, 1, "", "AttributeFilter"], [43, 0, 1, "", "AttributeQuantizationConfig"], [40, 0, 1, "", "Eq"], [43, 0, 1, "", "Fusing"], [40, 0, 1, "", "Greater"], [40, 0, 1, "", "GreaterEq"], [43, 0, 1, "", "LayerFilterParams"], [40, 0, 1, "", "NotEq"], [43, 0, 1, "", "OpQuantizationConfig"], [43, 0, 1, "", "OperationsSetToLayers"], [43, 0, 1, "", "OperationsToLayers"], [43, 0, 1, "", "OperatorSetConcat"], [43, 0, 1, "", "OperatorsSet"], [43, 0, 1, "", "QuantizationConfigOptions"], [43, 0, 1, "", "QuantizationMethod"], [40, 0, 1, "", "Smaller"], [40, 0, 1, "", "SmallerEq"], [43, 0, 1, "", "TargetPlatformCapabilities"], [43, 0, 1, "", "TargetPlatformModel"]], "model_compression_toolkit.trainable_infrastructure": [[44, 0, 1, "", "BaseKerasTrainableQuantizer"], [44, 0, 1, "", "BasePytorchTrainableQuantizer"], [44, 0, 1, "", "TrainableQuantizerActivationConfig"], [44, 0, 1, "", "TrainableQuantizerWeightsConfig"]], "model_compression_toolkit.xquant.common.xquant_config": [[12, 0, 1, "", "XQuantConfig"]], "model_compression_toolkit.xquant.keras.facade_xquant_report": [[35, 3, 1, "", "xquant_report_keras_experimental"]], "model_compression_toolkit.xquant.pytorch.facade_xquant_report": [[36, 3, 1, "", "xquant_report_pytorch_experimental"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function", "4": "py:property"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"], "4": ["py", "property", "Python property"]}, "titleterms": {"bitwidthconfig": 0, "manualbitwidthselect": 0, "data": [1, 19, 27], "gener": [1, 19, 27], "configur": [1, 7, 12, 46], "imagegranular": 1, "schedulertyp": 1, "batchnormalignemntlosstyp": 1, "outputlosstyp": 1, "datainittyp": 1, "bnlayerweightingtyp": 1, "imagepipelinetyp": 1, "imagenormalizationtyp": 1, "defaultdict": 2, "class": [2, 3, 4], "frameworkinfo": 3, "channelaxi": 3, "gradientptqconfig": [4, 15, 17], "gptqhessianscoresconfig": 4, "roundingtyp": 4, "mixedprecisionquantizationconfig": 5, "mpdistanceweight": 6, "prune": [7, 8, 13, 24, 31], "importancemetr": 7, "channelsfilteringstrategi": 7, "inform": [8, 21, 29], "quantizationconfig": 9, "quantizationerrormethod": 10, "resourceutil": 11, "xquant": [12, 13, 35, 36], "api": [13, 47], "doc": 13, "ptq": 13, "gptq": 13, "qat": 13, "core": 13, "data_gener": 13, "export": [13, 39], "trainable_infrastructur": [13, 44], "set_log_fold": 13, "keras_load_quantized_model": 13, "target_platform": [13, 43], "indic": 13, "tabl": 13, "get": [14, 15, 16, 17, 18, 21, 29], "datagenerationconfig": [14, 16], "kera": [14, 15, 19, 20, 21, 22, 23, 24, 25, 26, 35, 39], "model": [14, 15, 16, 17, 21, 22, 25, 26, 29, 32, 33, 39, 47], "pytorch": [16, 17, 27, 28, 29, 30, 31, 32, 33, 36, 39], "targetplatformcap": [18, 43], "gradient": [20, 28], "base": [20, 28], "post": [20, 23, 28, 30], "train": [20, 23, 25, 26, 28, 30, 32, 33], "quantiz": [20, 22, 23, 25, 26, 28, 30, 32, 33, 39], "resourc": [21, 29], "util": [21, 29], "load": 22, "structur": [24, 31], "awar": [25, 26, 32, 33], "final": [25, 32], "init": [26, 33], "enabl": 34, "logger": 34, "report": [35, 36], "coreconfig": 37, "debug_config": 38, "modul": [38, 39, 41, 42, 43, 44], "debugconfig": 38, "quantizationformat": 39, "kerasexportserializationformat": 39, "keras_export_model": 39, "tutori": 39, "serial": 39, "format": 39, "mctq": 39, "pytorchexportserializationformat": 39, "pytorch_export_model": 39, "onnx": 39, "opset": 39, "version": 39, "us": 39, "infer": 39, "layer": 40, "attribut": 40, "filter": [40, 41], "network_editor": 41, "editrul": 41, "action": 41, "qat_config": 42, "trainingmethod": 42, "qatconfig": 42, "quantizationmethod": 43, "opquantizationconfig": 43, "attributequantizationconfig": 43, "quantizationconfigopt": 43, "targetplatformmodel": 43, "operatorsset": 43, "fuse": 43, "operatorsetconcat": 43, "operationstolay": 43, "operationssettolay": 43, "layerfilterparam": 43, "basekerastrainablequant": 44, "basepytorchtrainablequant": 44, "trainablequantizerweightsconfig": 44, "trainablequantizeractivationconfig": 44, "visual": 46, "within": 46, "tensorboard": 46, "cosin": 46, "similar": 46, "comparison": 46, "mix": 46, "precis": 46, "bit": 46, "width": 46, "compress": 47, "toolkit": 47, "user": 47, "guid": 47, "overview": 47, "instal": 47, "support": 47, "featur": 47, "quickstart": 47, "document": 47, "technic": 47, "constraint": 47, "refer": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["api/api_docs/classes/BitWidthConfig", "api/api_docs/classes/DataGenerationConfig", "api/api_docs/classes/DefaultDict", "api/api_docs/classes/FrameworkInfo", "api/api_docs/classes/GradientPTQConfig", "api/api_docs/classes/MixedPrecisionQuantizationConfig", "api/api_docs/classes/MpDistanceWeighting", "api/api_docs/classes/PruningConfig", "api/api_docs/classes/PruningInfo", "api/api_docs/classes/QuantizationConfig", "api/api_docs/classes/QuantizationErrorMethod", "api/api_docs/classes/ResourceUtilization", "api/api_docs/classes/XQuantConfig", "api/api_docs/index", "api/api_docs/methods/get_keras_data_generation_config", "api/api_docs/methods/get_keras_gptq_config", "api/api_docs/methods/get_pytorch_data_generation_config", "api/api_docs/methods/get_pytroch_gptq_config", "api/api_docs/methods/get_target_platform_capabilities", "api/api_docs/methods/keras_data_generation_experimental", "api/api_docs/methods/keras_gradient_post_training_quantization", "api/api_docs/methods/keras_kpi_data", "api/api_docs/methods/keras_load_quantizad_model", "api/api_docs/methods/keras_post_training_quantization", "api/api_docs/methods/keras_pruning_experimental", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental", "api/api_docs/methods/keras_quantization_aware_training_init_experimental", "api/api_docs/methods/pytorch_data_generation_experimental", "api/api_docs/methods/pytorch_gradient_post_training_quantization", "api/api_docs/methods/pytorch_kpi_data", "api/api_docs/methods/pytorch_post_training_quantization", "api/api_docs/methods/pytorch_pruning_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental", "api/api_docs/methods/set_logger_path", "api/api_docs/methods/xquant_report_keras_experimental", "api/api_docs/methods/xquant_report_pytorch_experimental", "api/api_docs/modules/core_config", "api/api_docs/modules/debug_config", "api/api_docs/modules/exporter", "api/api_docs/modules/layer_filters", "api/api_docs/modules/network_editor", "api/api_docs/modules/qat_config", "api/api_docs/modules/target_platform_capabilities", "api/api_docs/modules/trainable_infrastructure", "api/api_docs/notes/tpc_note", "guidelines/visualization", "index"], "filenames": ["api/api_docs/classes/BitWidthConfig.rst", "api/api_docs/classes/DataGenerationConfig.rst", "api/api_docs/classes/DefaultDict.rst", "api/api_docs/classes/FrameworkInfo.rst", "api/api_docs/classes/GradientPTQConfig.rst", "api/api_docs/classes/MixedPrecisionQuantizationConfig.rst", "api/api_docs/classes/MpDistanceWeighting.rst", "api/api_docs/classes/PruningConfig.rst", "api/api_docs/classes/PruningInfo.rst", "api/api_docs/classes/QuantizationConfig.rst", "api/api_docs/classes/QuantizationErrorMethod.rst", "api/api_docs/classes/ResourceUtilization.rst", "api/api_docs/classes/XQuantConfig.rst", "api/api_docs/index.rst", "api/api_docs/methods/get_keras_data_generation_config.rst", "api/api_docs/methods/get_keras_gptq_config.rst", "api/api_docs/methods/get_pytorch_data_generation_config.rst", "api/api_docs/methods/get_pytroch_gptq_config.rst", "api/api_docs/methods/get_target_platform_capabilities.rst", "api/api_docs/methods/keras_data_generation_experimental.rst", "api/api_docs/methods/keras_gradient_post_training_quantization.rst", "api/api_docs/methods/keras_kpi_data.rst", "api/api_docs/methods/keras_load_quantizad_model.rst", "api/api_docs/methods/keras_post_training_quantization.rst", "api/api_docs/methods/keras_pruning_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/pytorch_data_generation_experimental.rst", "api/api_docs/methods/pytorch_gradient_post_training_quantization.rst", "api/api_docs/methods/pytorch_kpi_data.rst", "api/api_docs/methods/pytorch_post_training_quantization.rst", "api/api_docs/methods/pytorch_pruning_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/set_logger_path.rst", "api/api_docs/methods/xquant_report_keras_experimental.rst", "api/api_docs/methods/xquant_report_pytorch_experimental.rst", "api/api_docs/modules/core_config.rst", "api/api_docs/modules/debug_config.rst", "api/api_docs/modules/exporter.rst", "api/api_docs/modules/layer_filters.rst", "api/api_docs/modules/network_editor.rst", "api/api_docs/modules/qat_config.rst", "api/api_docs/modules/target_platform_capabilities.rst", "api/api_docs/modules/trainable_infrastructure.rst", "api/api_docs/notes/tpc_note.rst", "guidelines/visualization.rst", "index.rst"], "titles": ["BitWidthConfig", "Data Generation Configuration", "DefaultDict Class", "FrameworkInfo Class", "GradientPTQConfig Class", "MixedPrecisionQuantizationConfig", "MpDistanceWeighting", "Pruning Configuration", "Pruning Information", "QuantizationConfig", "QuantizationErrorMethod", "ResourceUtilization", "XQuant Configuration", "API Docs", "Get DataGenerationConfig for Keras Models", "Get GradientPTQConfig for Keras Models", "Get DataGenerationConfig for Pytorch Models", "Get GradientPTQConfig for Pytorch Models", "Get FrameworkQuantizationCapabilities", "Keras Data Generation", "Keras Gradient Based Post Training Quantization", "Get Resource Utilization information for Keras Models", "Load Quantized Keras Model", "Keras Post Training Quantization", "Keras Structured Pruning", "Keras Quantization Aware Training Model Finalize", "Keras Quantization Aware Training Model Init", "Pytorch Data Generation", "Pytorch Gradient Based Post Training Quantization", "Get Resource Utilization information for PyTorch Models", "Pytorch Post Training Quantization", "Pytorch Structured Pruning", "PyTorch Quantization Aware Training Model Finalize", "PyTorch Quantization Aware Training Model Init", "Enable a Logger", "XQuant Report Keras", "XQuant Report Pytorch", "CoreConfig", "debug_config Module", "exporter Module", "Layer Attributes Filters", "network_editor Module", "qat_config Module", "target_platform_capabilities Module", "trainable_infrastructure Module", "<no title>", "Visualization within TensorBoard", "Model Compression Toolkit User Guide"], "terms": {"class": [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 22, 37, 38, 39, 40, 41, 42, 43, 44], "model_compression_toolkit": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "core": [0, 3, 5, 6, 9, 10, 11, 20, 21, 23, 24, 25, 26, 28, 29, 31, 32, 33, 37, 38, 41], "manual_activation_bit_width_selection_list": 0, "factori": [0, 4, 37, 38], "manag": 0, "manual": [0, 13, 37], "bit": [0, 5, 11, 13, 20, 23, 25, 26, 33, 37, 39, 41, 43, 44, 47], "width": [0, 5, 13, 20, 23, 26, 27, 33, 37, 43, 47], "configur": [0, 4, 5, 9, 11, 13, 14, 15, 16, 17, 19, 20, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 42, 43, 44, 47], "A": [0, 3, 4, 5, 8, 9, 13, 15, 17, 18, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 41, 42, 43, 47], "list": [0, 1, 3, 5, 14, 15, 16, 19, 27, 38, 41, 47], "object": [0, 3, 4, 5, 7, 11, 14, 15, 16, 17, 18, 20, 21, 22, 23, 25, 26, 28, 29, 30, 33, 39, 41, 43, 44], "defin": [0, 4, 5, 6, 15, 17, 19, 20, 23, 24, 25, 26, 27, 28, 30, 31, 43, 44], "type": [0, 1, 2, 4, 5, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 27, 29, 31, 35, 36, 39, 41, 43], "common": [0, 12], "quantiz": [0, 3, 4, 5, 9, 10, 12, 13, 15, 17, 19, 21, 27, 29, 35, 36, 37, 38, 41, 42, 43, 44, 46, 47], "bit_width_config": [0, 37], "filter": [0, 1, 7], "bit_width": 0, "encapsul": [0, 9], "select": [0, 3, 7, 9, 10, 13, 37, 39, 42, 43, 44], "specif": [0, 3, 24, 31, 41, 46], "The": [0, 1, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 19, 20, 23, 24, 25, 26, 27, 28, 30, 31, 33, 35, 36, 39, 41, 43, 44, 46], "us": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47], "node": [0, 26, 33, 41, 44, 46], "manipul": [0, 1], "basenodematch": 0, "appli": [0, 1, 9, 13, 39, 40, 41, 43], "int": [0, 1, 4, 5, 7, 14, 15, 16, 17, 19, 27, 43, 44], "data_gener": [1, 14, 16, 19, 27], "datagenerationconfig": [1, 13, 19, 27], "n_iter": [1, 14, 16, 19, 27], "optim": [1, 3, 4, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 26, 28, 29, 30, 33, 37, 43, 44, 45, 47], "data_gen_batch_s": [1, 14, 16, 19, 27], "initial_lr": [1, 14, 16], "output_loss_multipli": [1, 14, 16], "image_granular": [1, 14, 16], "allimag": [1, 16], "scheduler_typ": [1, 14, 16], "none": [1, 2, 4, 5, 9, 12, 15, 17, 18, 20, 22, 23, 26, 28, 30, 33, 37, 39, 41, 42, 43, 44], "bn_alignment_loss_typ": [1, 14, 16], "output_loss_typ": [1, 14, 16], "data_init_typ": [1, 14, 16], "layer_weighting_typ": [1, 14, 16], "image_pipeline_typ": [1, 14, 16], "image_normalization_typ": [1, 14, 16], "extra_pixel": [1, 14, 16], "0": [1, 3, 4, 5, 8, 9, 14, 15, 16, 20, 23, 24, 25, 26, 31, 39, 44], "bn_layer_typ": [1, 14, 16], "last_layer_typ": [1, 16], "image_clip": [1, 14, 16], "true": [1, 5, 9, 15, 16, 17, 22, 32, 33, 44], "initi": [1, 2, 4, 7, 12, 14, 16, 26, 33, 44], "paramet": [1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "number": [1, 4, 5, 7, 14, 15, 16, 17, 19, 20, 23, 24, 26, 27, 28, 30, 31, 33, 43, 44], "iter": [1, 14, 16, 19, 20, 23, 26, 27, 28, 30, 33], "ani": [1, 2, 3, 35, 36, 39, 40, 44], "batch": [1, 4, 5, 14, 15, 16, 17, 19, 20, 23, 26, 27, 28, 30, 33], "size": [1, 4, 5, 14, 15, 16, 17, 19, 20, 23, 25, 26, 27, 33, 39, 44], "float": [1, 4, 5, 14, 15, 16, 17, 20, 26, 28, 30, 33, 35, 36, 39, 43, 44, 46], "learn": [1, 14, 15, 16, 44], "rate": [1, 14, 15, 16], "multipli": [1, 14, 16], "output": [1, 3, 6, 14, 16, 19, 20, 23, 26, 27, 28, 30, 32, 33, 43, 46, 47], "loss": [1, 4, 14, 15, 16, 17, 20, 24, 28, 30, 31], "granular": [1, 14, 16], "imag": [1, 4, 5, 14, 16, 19, 20, 23, 26, 27, 28, 30, 33, 46], "default": [1, 2, 4, 7, 14, 15, 16, 17, 18, 20, 23, 24, 28, 30, 31, 37, 39, 42, 43, 46], "schedul": [1, 4, 14, 16, 38], "batchnorm": [1, 14, 16, 19, 20, 23, 26, 28, 30, 33], "align": [1, 14, 16], "layer": [1, 3, 5, 6, 8, 14, 15, 16, 17, 19, 20, 23, 24, 25, 26, 28, 30, 31, 32, 33, 38, 39, 41, 43, 44, 46], "weight": [1, 3, 4, 5, 6, 9, 11, 13, 14, 15, 16, 17, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 39, 41, 42, 43, 44, 46], "pipelin": [1, 14, 16], "normal": [1, 4, 5, 14, 16], "union": [1, 14, 16, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 33, 43], "tupl": [1, 3, 14, 16, 19, 20, 24, 27, 31, 41, 43], "extra": [1, 14, 16], "pixel": [1, 14, 16], "add": [1, 3, 12, 14, 16, 22, 44], "input": [1, 5, 14, 16, 20, 23, 26, 28, 30, 33, 43], "bool": [1, 4, 5, 14, 15, 16, 17, 38, 43, 44], "flag": [1, 43], "enabl": [1, 5, 9, 13, 15, 17, 38, 44, 47], "clip": [1, 14, 16], "valu": [1, 2, 3, 4, 5, 6, 7, 10, 12, 20, 23, 24, 25, 26, 31, 39, 40, 41, 43, 44], "an": [1, 2, 3, 4, 8, 13, 20, 23, 26, 33, 35, 36, 39, 40, 41, 43, 44, 47], "enum": [1, 3, 4, 5, 6, 7, 10, 44], "choos": [1, 4, 39], "depend": [1, 20, 23, 26, 28, 30, 33], "when": [1, 2, 3, 4, 5, 7, 10, 11, 13, 15, 17, 20, 23, 25, 26, 38, 40, 42, 43, 44, 46], "imagewis": 1, "batchwis": [1, 14], "reduce_on_plateau": [1, 14], "reduceonplateau": 1, "step": [1, 4, 44], "l2_squar": [1, 14, 16], "l2": 1, "squar": [1, 10], "No": 1, "negative_min_max_diff": [1, 16], "mean": [1, 4, 10, 46], "neg": 1, "min": [1, 3, 9, 10, 20, 23, 26, 28, 30, 33, 46], "max": [1, 3, 9, 10, 20, 21, 23, 26, 28, 29, 30, 33, 46], "differ": [1, 5, 9, 13, 20, 23, 25, 26, 39, 43, 46], "inverse_min_max_diff": 1, "1": [1, 3, 4, 5, 8, 9, 17, 19, 20, 21, 23, 24, 25, 27, 28, 29, 30, 31, 32, 39, 47], "regularized_min_max_diff": [1, 14], "regular": [1, 4, 15, 17], "gaussian": [1, 14, 16], "divers": 1, "averag": [1, 5, 6, 14, 15, 16, 17], "same": [1, 39, 43], "per": [1, 3, 4, 20, 23, 26, 33, 43, 44, 46], "first_layer_multipli": 1, "first": [1, 20, 23, 26, 28, 30, 33, 39, 46], "all": [1, 3, 4, 6, 9, 41, 44, 46], "other": [1, 15, 17], "grad": 1, "gradient": [1, 4, 13, 30, 47], "base": [1, 4, 5, 9, 10, 13, 15, 17, 19, 24, 27, 30, 31, 44, 47], "smoothing_and_augment": [1, 14, 16], "smooth": [1, 44], "crop": 1, "flip": 1, "ident": 1, "do": [1, 46], "transform": [1, 20, 23, 26, 28, 30, 33], "torchvis": [1, 16, 28, 29, 30, 31, 32, 33, 39], "keras_appl": [1, 14], "imagenet": 1, "no_norm": 1, "known_dict": 2, "default_valu": 2, "dictionari": [2, 3, 4, 12, 25, 26, 35, 36, 39, 41, 42, 44], "It": [2, 12, 43, 44], "wrap": [2, 3, 22, 26, 33, 40, 43, 44], "given": [2, 20, 21, 23, 26, 28, 29, 30, 33], "return": [2, 4, 5, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 39], "its": [2, 3, 22, 24, 31, 40, 43, 46], "request": 2, "If": [2, 3, 4, 5, 15, 17, 20, 23, 25, 26, 28, 30, 37, 39, 40, 43], "kei": [2, 6, 12, 24, 31, 40], "present": [2, 46], "pass": [2, 3, 15, 17, 20, 23, 24, 25, 26, 28, 30, 31, 32, 33, 41], "gener": [2, 12, 13, 14, 16, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 43, 46, 47], "provid": [2, 5, 19, 24, 27, 31, 39, 43, 44, 46], "empti": 2, "get": [2, 3, 4, 5, 13, 20, 23, 25, 26, 28, 30, 32, 33, 43, 46], "inner": 2, "default_factori": 2, "exist": [2, 18, 41], "wa": [2, 39], "follow": [3, 4, 44, 46], "api": [3, 4, 18, 23, 26, 33, 42], "can": [3, 4, 6, 9, 13, 15, 17, 19, 21, 24, 27, 29, 31, 38, 39, 41, 43, 44, 46, 47], "mct": [3, 9, 13, 15, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 37, 38, 39, 41, 43, 44, 45, 46, 47], "framework": [3, 18, 44], "relat": [3, 8, 13, 43], "inform": [3, 4, 13, 15, 17, 18, 20, 23, 24, 26, 28, 30, 31, 33, 38, 43, 44, 45], "network": [3, 7, 32, 37, 38, 41, 46, 47], "activation_quantizer_map": 3, "kernel_channels_map": 3, "activation_min_max_map": 3, "layer_min_max_map": 3, "kernel_ops_attributes_map": 3, "out_channel_axis_map": 3, "about": [3, 4, 8, 13, 15, 17, 20, 23, 25, 26, 39, 43, 44], "librari": [3, 9], "need": [3, 20, 23, 26, 28, 30, 33, 39, 40, 44], "model": [3, 4, 5, 8, 9, 11, 12, 13, 18, 19, 20, 23, 24, 27, 28, 30, 31, 35, 36, 37, 41, 42, 43, 44, 46], "hold": [3, 37, 40, 43], "how": [3, 7, 20, 21, 23, 26, 28, 30, 33, 39, 44, 47], "thei": 3, "should": [3, 7, 15, 20, 21, 23, 24, 25, 26, 28, 30, 31, 33, 39, 43, 46], "multipl": [3, 5, 43], "map": [3, 43], "kernel": [3, 20, 23, 25, 26, 41, 44], "channel": [3, 7, 8, 13, 24, 31, 43, 44, 46], "indic": [3, 8, 24, 31, 43], "etc": [3, 11, 13, 20, 23, 26, 28, 30, 33, 46], "ar": [3, 12, 18, 20, 23, 24, 26, 28, 30, 31, 33, 39, 43, 44, 45, 46], "divid": 3, "three": 3, "group": [3, 7, 24, 31, 43], "kernel_op": 3, "have": [3, 39, 40, 46], "coeffici": [3, 20, 23, 25, 26, 28, 30, 43, 44], "e": [3, 20, 23, 26, 28, 30, 33, 47], "g": [3, 20, 23, 26, 28, 30, 33], "conv2d": [3, 19, 20, 23, 25, 26, 27, 41, 43], "dens": [3, 19], "activation_op": 3, "relu": 3, "no_quantization_op": 3, "reshap": [3, 19], "transpos": 3, "dict": [3, 8, 12, 35, 36, 39, 43, 44], "quantizationmethod": [3, 44], "callabl": [3, 5, 12, 15, 17, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 35, 36, 39, 40], "from": [3, 4, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 39, 41, 43, 44, 45, 46, 47], "function": [3, 4, 5, 6, 13, 14, 15, 16, 17, 18, 19, 22, 24, 27, 31, 41, 43, 44], "defaultdict": [3, 13], "out": [3, 7], "str": [3, 12, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 35, 36, 39, 40, 43], "activ": [3, 4, 9, 11, 20, 21, 23, 26, 28, 29, 30, 33, 39, 41, 42, 43, 44, 46], "oper": [3, 11, 38, 40, 43], "attirbut": 3, "s": [3, 7, 9, 11, 20, 23, 24, 25, 26, 28, 30, 31, 33, 39, 40, 41, 43, 44, 47], "comput": [3, 4, 5, 6, 10, 12, 13, 15, 17, 21, 29, 35, 38, 46], "statist": [3, 20, 23, 26, 28, 30, 33, 46], "exampl": [3, 9, 15, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 41, 43, 44, 47], "kera": [3, 13, 41, 44, 47], "we": [3, 19, 20, 23, 24, 26, 27, 31, 33, 39, 41, 43, 44, 46], "want": 3, "onli": [3, 4, 5, 6, 7, 18, 20, 23, 25, 26, 39], "set": [3, 6, 12, 13, 15, 17, 19, 20, 23, 24, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 41, 43, 44, 46], "know": 3, "3": [3, 15, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 39, 44], "2": [3, 9, 15, 19, 27, 43, 44, 47], "respectivli": 3, "import": [3, 7, 8, 9, 13, 15, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 39, 41, 44, 46], "tensorflow": [3, 13, 15, 19, 20, 21, 23, 24, 25, 26, 39, 41, 43, 47], "tf": [3, 15, 19, 22, 25, 26], "Then": [3, 20, 23, 26, 28, 30, 33, 41, 46], "creat": [3, 4, 9, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 39, 40, 41, 43], "rang": [3, 20, 23, 26, 28, 30, 33], "advanc": 3, "save": [3, 12, 26, 34, 39, 44], "collect": [3, 20, 23, 26, 28, 30, 33, 35, 36, 46], "time": [3, 7, 44], "For": [3, 9, 18, 19, 20, 23, 25, 26, 27, 33, 39, 43, 44, 45, 46, 47], "softmax": 3, "format": [3, 13], "index": [3, 13], "axi": [3, 44], "nhwc": 3, "last": [3, 4, 6], "nchw": 3, "instanc": [4, 13, 15, 17, 41, 43, 46], "which": [4, 7, 39, 40, 41, 43, 44], "post": [4, 13, 24, 26, 31, 33, 47], "train": [4, 13, 42, 44, 47], "knowledg": [4, 47], "distil": [4, 47], "teacher": 4, "student": 4, "gptq": [4, 15, 17, 20, 28], "n_epoch": [4, 15, 17, 20], "optimizer_rest": [4, 15, 17], "train_bia": 4, "hessian_weights_config": 4, "gradual_activation_quantization_config": 4, "regularization_factor": [4, 15, 17], "rounding_typ": 4, "softquant": 4, "optimizer_quantization_paramet": 4, "optimizer_bia": 4, "log_funct": [4, 15, 17], "gptq_quantizer_params_overrid": 4, "gradientptq": [4, 13], "repres": [4, 5, 11, 15, 17, 20, 23, 24, 25, 26, 28, 30, 31, 32, 33, 35, 36, 39, 41, 43, 46], "dataset": [4, 15, 17, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 39, 46], "epoch": [4, 15, 17], "see": [4, 17, 47], "multiple_tensors_mse_loss": 4, "expect": [4, 46], "interfac": [4, 17], "bia": [4, 15, 17, 20, 23, 25, 26], "whether": [4, 5, 8, 14, 15, 16, 17, 22, 38, 39, 43, 44], "updat": 4, "dure": [4, 13, 14, 15, 16, 17, 18, 35, 36, 39, 41, 43, 44, 45, 46], "includ": [4, 8, 20, 23, 26, 28, 30, 33, 44], "necessari": [4, 39, 44], "argument": [4, 6, 43], "run": [4, 15, 17, 39, 46], "hessian": [4, 5, 7, 10, 15, 17, 24, 31, 47], "score": [4, 5, 7, 8, 10, 15, 17, 24, 31], "gradual": 4, "point": [4, 5, 15, 17, 20, 28, 30, 35, 36, 43, 46], "factor": [4, 5, 10, 15, 17], "round": 4, "overrid": [4, 42], "rest": 4, "log": [4, 12, 13, 15, 17, 34, 46], "process": [4, 5, 9, 13, 14, 15, 16, 17, 18, 19, 24, 27, 31, 37, 38, 41, 42, 43, 45, 46], "instanti": [4, 9, 42], "per_sampl": 4, "hessians_num_sampl": 4, "norm_scor": [4, 5], "log_norm": 4, "scale_log_norm": 4, "fals": [4, 5, 9, 14, 15, 17, 38, 44], "hessian_batch_s": [4, 5, 15, 17], "32": [4, 5], "metric": [4, 5, 6, 7, 12, 13, 35, 36], "sampl": [4, 15, 17, 46], "attent": [4, 15, 17, 44], "between": [4, 5, 12, 20, 28, 30, 43, 46], "scale": [4, 5, 43], "final": [4, 5, 13, 19, 27, 41, 46, 47], "vector": [4, 46], "method": [4, 5, 6, 7, 10, 13, 24, 31, 39, 41, 42, 43, 44], "ste": [4, 42, 44], "straight": [4, 44], "through": [4, 19, 24, 27, 44], "estim": [4, 44], "q_fraction_scheduler_polici": 4, "By": [4, 24, 28, 30, 31, 39, 46], "fraction": 4, "increas": [4, 5], "linearli": 4, "throughout": 4, "config": [4, 19, 20, 23, 24, 25, 26, 27, 28, 31, 32, 33, 37, 41, 44], "linear": [4, 27], "anneal": 4, "current": [4, 39], "support": [4, 39], "initial_q_fract": 4, "target_q_fract": 4, "start_step": 4, "end_step": 4, "target": [4, 13, 18, 20, 21, 23, 24, 25, 26, 29, 31, 32, 33, 43], "begin": 4, "complet": 4, "mix": [5, 6, 11, 13, 20, 21, 23, 25, 26, 28, 29, 30, 33, 37, 43, 47], "precis": [5, 6, 11, 13, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 33, 37, 43, 47], "compute_distance_fn": 5, "distance_weighting_method": 5, "mpdistanceweight": [5, 13], "avg": [5, 6], "num_of_imag": [5, 20, 23], "configuration_overwrit": 5, "num_interest_points_factor": 5, "use_hessian_based_scor": 5, "refine_mp_solut": 5, "metric_normalization_threshold": 5, "10000000000": 5, "distanc": [5, 6, 13], "two": [5, 12, 20, 23, 26, 28, 30, 33, 39, 43, 46], "tensor": [5, 12, 15, 17, 19, 21, 27, 29, 43, 44, 46, 47], "pre": 5, "each": [5, 7, 8, 20, 23, 24, 26, 28, 30, 31, 33, 41, 43, 44, 46], "among": 5, "sensit": [5, 7, 24, 31], "evalu": [5, 35, 36], "compar": [5, 20, 28, 30, 46], "integ": [5, 39, 43], "overwrit": 5, "predefin": [5, 7], "one": [5, 40, 46], "zero": [5, 43], "percentag": 5, "reduc": [5, 24, 31], "interest": 5, "calcul": [5, 7, 13, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33], "try": 5, "improv": [5, 24, 31], "greedi": [5, 7], "algorithm": 5, "search": [5, 11, 13, 20, 23, 26, 28, 30, 33], "threshold": [5, 9, 10, 20, 23, 26, 28, 30, 33, 43, 44], "check": [5, 39, 40, 41], "In": [5, 19, 20, 23, 26, 27, 28, 30, 33, 39, 40, 42], "case": 5, "larger": 5, "than": [5, 40], "thi": [5, 8, 9, 10, 13, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 30, 31, 33, 39, 43, 44, 47], "prevent": 5, "numer": 5, "issu": [5, 39], "call": [6, 21, 29, 43, 46], "take": [6, 23, 26, 33, 47], "last_lay": 6, "pruningconfig": [7, 13, 24, 31], "num_score_approxim": [7, 24, 31], "pruning_num_score_approxim": 7, "importance_metr": 7, "lfh": [7, 24, 31], "channels_filtering_strategi": 7, "specifi": [7, 14, 16, 19, 22, 24, 27, 31, 39], "neural": [7, 47], "approxim": [7, 24, 31], "perform": [7, 11, 19, 24, 27, 31], "strategi": [7, 24, 31], "constant": [7, 41, 44], "label": [7, 24, 31, 43, 47], "free": [7, 19, 24, 27, 31, 47], "approach": 7, "determin": [7, 24, 31, 43], "info": [7, 34], "measur": [7, 11, 12, 46], "least": 7, "up": [7, 19, 27, 43, 46], "allow": [7, 12, 19, 27, 39], "resourc": [7, 11, 13, 20, 23, 24, 25, 26, 31, 32, 33, 46], "util": [7, 13, 20, 23, 24, 25, 26, 31, 32, 33, 44], "limit": [7, 20, 23, 25, 26, 28, 30, 33], "now": [7, 18, 33, 39, 43, 44, 45, 46], "weights_memori": [7, 11, 20, 23, 24, 26, 31, 33], "consid": [7, 14, 16, 24, 31, 43], "pruninginfo": [8, 13, 24, 31], "pruning_mask": 8, "importance_scor": 8, "store": [8, 44], "mask": 8, "act": 8, "contain": [8, 13, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 44], "access": 8, "metadata": [8, 43], "basenod": 8, "np": [8, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "ndarrai": 8, "arrai": 8, "where": [8, 12, 39, 41, 46], "element": [8, 43], "correspond": 8, "neuron": 8, "ha": [8, 39, 40, 41], "been": 8, "kept": [8, 26, 33], "quantifi": [8, 46], "signific": 8, "properti": 8, "activation_error_method": 9, "quantizationerrormethod": [9, 13], "mse": [9, 10, 46], "weights_error_method": 9, "relu_bound_to_power_of_2": 9, "weights_bias_correct": 9, "weights_second_moment_correct": 9, "input_sc": 9, "softmax_shift": 9, "shift_negative_activation_correct": 9, "activation_channel_equ": 9, "z_threshold": 9, "inf": [9, 11], "min_threshold": [9, 44], "52587890625e": 9, "05": 9, "l_p_valu": 9, "linear_collaps": 9, "residual_collaps": 9, "shift_negative_ratio": 9, "shift_negative_threshold_recalcul": 9, "shift_negative_params_search": 9, "concat_threshold_upd": 9, "activation_bias_correct": 9, "activation_bias_correction_threshold": 9, "custom_tpc_opset_to_lay": 9, "you": [9, 39, 46, 47], "noclip": [9, 10], "while": [9, 20, 23, 25, 26, 33, 43], "like": [9, 43], "qc": 9, "error": 10, "minim": [10, 20, 24, 28, 30, 31], "nois": 10, "mae": [10, 46], "absolut": 10, "kl": [10, 46], "diverg": [10, 46], "make": 10, "signal": 10, "distribut": 10, "similar": [10, 12, 35, 36, 38, 47], "possibl": [10, 20, 23, 26, 33, 43, 46], "lp": 10, "norm": [10, 46], "hmse": 10, "more": [10, 18, 23, 24, 26, 31, 33, 39, 43, 45, 46], "valuabl": 10, "induc": 10, "prune": [11, 47], "activation_memori": 11, "total_memori": 11, "bop": 11, "memori": [11, 24, 31, 46], "byte": [11, 20, 23, 24, 26, 31, 33, 46], "sum": [11, 21, 24, 29, 31], "total": [11, 21, 29], "xquant_config": [12, 35, 36], "xquantconfig": [12, 13, 35, 36], "report_dir": 12, "custom_similarity_metr": 12, "report": [12, 13], "dir": [12, 46], "explain": [12, 13, 35, 36, 44], "directori": [12, 13, 34], "custom": [12, 19, 22, 26, 27, 39], "name": [12, 18, 39, 41, 43, 46], "implement": [12, 44], "init": [13, 41, 47], "modul": [13, 27, 28, 29, 30, 31, 36], "pytorch_post_training_quant": [13, 30, 39], "pytorch": [13, 43, 44, 47], "keras_post_training_quant": [13, 23, 39, 41, 46], "pytorch_gradient_post_training_quant": [13, 17, 28], "get_pytorch_gptq_config": [13, 17], "gradientptqconfig": [13, 20, 28], "keras_gradient_post_training_quant": [13, 15, 20], "get_keras_gptq_config": [13, 15, 20], "option": [13, 22, 23, 24, 26, 28, 31, 33, 39, 43], "pytorch_quantization_aware_training_init_experiment": [13, 32, 33], "prepar": [13, 26, 33], "awar": [13, 42, 44, 47], "experiment": [13, 19, 27, 47], "pytorch_quantization_aware_training_finalize_experiment": [13, 32], "after": [13, 20, 22, 23, 26, 33, 47], "without": 13, "quantizewrapp": [13, 26, 32, 33], "keras_quantization_aware_training_init_experiment": [13, 25, 26], "keras_quantization_aware_training_finalize_experiment": [13, 25], "qat_config": [13, 26, 33], "coreconfig": [13, 20, 21, 23, 25, 26, 28, 29, 30, 32, 33], "entir": 13, "quantizationconfig": [13, 37], "mixedprecisionquantizationconfig": [13, 20, 21, 23, 25, 26, 37], "bitwidthconfig": [13, 37], "resourceutil": [13, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 33], "network_editor": [13, 38], "modifi": [13, 41], "troubleshoot": 13, "pytorch_resource_utilization_data": [13, 29], "data": [13, 14, 16, 21, 24, 29, 31, 35, 36, 39, 43, 46, 47], "desir": [13, 20, 21, 23, 25, 26, 28, 29, 30, 33], "keras_resource_utilization_data": [13, 21], "pytorch_data_generation_experiment": [13, 27], "get_pytorch_data_generation_config": [13, 16, 27], "load": [13, 25, 26, 39, 44], "keras_data_generation_experiment": [13, 19], "get_keras_data_generation_config": [13, 14, 19], "pytorch_pruning_experiment": [13, 31], "structur": [13, 47], "keras_pruning_experiment": [13, 24], "xquant_report_pytorch_experiment": [13, 36], "xquant_report_keras_experiment": [13, 35], "serial": 13, "abstract": [13, 44], "hardwar": [13, 24, 31, 43, 44, 47], "orient": [13, 44], "tool": [13, 44], "logger": [13, 38, 46], "path": [13, 22, 34, 39, 46], "accord": [13, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 39, 40], "infer": [13, 18, 25, 32, 43, 44], "get_target_platform_cap": [13, 18, 43], "platform": [13, 18, 20, 23, 24, 25, 26, 29, 31, 43], "targetplatformcap": [13, 18, 20, 21, 23, 24, 26, 28, 29, 30, 31, 33], "page": 13, "document": [13, 23, 26, 33], "auto": 13, "sphinx": 13, "default_n_it": [14, 16], "adam": [14, 17], "default_data_gen_b": [14, 16], "default_keras_initial_lr": 14, "default_keras_output_loss_multipli": 14, "schedulertyp": [14, 16], "batchnormalignemntlosstyp": [14, 16], "outputlosstyp": [14, 16], "datainittyp": [14, 16], "bnlayerweightingtyp": [14, 16], "imagegranular": [14, 16], "imagepipelinetyp": [14, 16], "imagenormalizationtyp": [14, 16], "default_keras_extra_pixel": 14, "use_hessian_based_weight": [15, 17], "act_hessian_default_batch_s": [15, 17], "use_hessian_sample_attent": [15, 17], "gradual_activation_quant": [15, 17], "fine": [15, 17, 24, 25, 26, 31, 32, 33], "tune": [15, 17, 24, 25, 26, 31, 32], "optimizerv2": 15, "auxiliri": [15, 17], "variabl": [15, 17], "accept": [15, 43], "4": [15, 19, 20, 23, 24, 26, 27, 28, 30, 31, 33], "1st": 15, "2nd": 15, "3rd": 15, "4th": 15, "gradualactivationquantizationconfig": [15, 17], "gradualactivationquant": [15, 17], "disabl": [15, 17], "non": [15, 17, 43], "5": [15, 17, 24, 31], "gptq_conf": [15, 17, 28], "nadam": 15, "order": [15, 17, 20, 23, 26, 33, 38, 39, 40, 42], "radam": 16, "default_pytorch_initial_lr": 16, "default_pytorch_output_loss_multipli": 16, "reduce_on_plateau_with_reset": 16, "default_pytorch_extra_pixel": 16, "default_pytorch_bn_layer_typ": 16, "default_pytorch_last_layer_typ": 16, "exact": 17, "dummi": 17, "param": [17, 38, 41, 44], "torch": [17, 27, 36, 39, 47], "fw_name": 18, "target_platform_nam": 18, "target_platform_vers": 18, "degener": 18, "compli": 18, "tpc": [18, 24, 31, 43], "capabl": [18, 24, 29, 31], "version": [18, 19, 27, 43, 47], "some": [18, 19, 27, 39, 43, 45, 46], "field": [18, 40, 43, 45], "opquantizationconfig": [18, 45], "ignor": [18, 43, 45], "quantization_preserv": [18, 43, 45], "fixed_scal": [18, 43, 45], "fixed_zero_point": [18, 43, 45], "futur": [18, 19, 27, 43, 45], "n_imag": [19, 27], "output_image_s": [19, 27], "data_generation_config": [19, 27], "ll": [19, 27], "walk": [19, 27], "simpl": [19, 27], "involv": [19, 24, 27, 31], "start": [19, 27, 39, 44, 47], "compress": [19, 24, 27, 28, 31], "toolkit": [19, 27, 28], "sequenti": [19, 27], "flatten": [19, 27], "next": [19, 27, 39, 40], "input_shap": 19, "8": [19, 20, 23, 25, 26, 27, 39, 43, 44], "10": [19, 20, 23, 26, 27, 28, 30, 33], "simplic": [19, 27], "notic": [19, 24, 27, 31, 39], "mai": [19, 20, 23, 26, 27, 28, 30, 33, 40, 46], "chang": [19, 27, 39, 41, 46], "generated_imag": [19, 27], "variou": [19, 27, 46], "purpos": [19, 27, 38], "in_model": [20, 21, 23, 25, 26, 29, 32, 33], "representative_data_gen": [20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 39], "gptq_config": [20, 28, 30], "gptq_representative_data_gen": [20, 28], "target_resource_util": [20, 23, 24, 26, 28, 30, 31, 33], "core_config": [20, 21, 23, 25, 26, 28, 29, 30, 32, 33], "target_platform_cap": [20, 21, 23, 24, 26, 28, 29, 30, 31, 33, 40, 44], "default_keras_tpc": [20, 23, 24, 26], "symmetr": [20, 23, 26, 28, 30, 33, 43, 44], "constraint": [20, 23, 24, 28, 30, 31], "power": [20, 23, 26, 28, 30, 33, 43], "sever": [20, 23, 26, 28, 30, 33, 46], "fold": [20, 23, 26, 28, 30, 33], "preced": [20, 23, 26, 28, 30, 33], "histogram": [20, 23, 26, 28, 30, 33, 46], "being": [20, 23, 26, 28, 30, 33, 43, 44], "ilp": [20, 23, 26, 33], "solver": [20, 23, 26, 33], "find": [20, 23, 26, 33], "both": [20, 23, 28, 30, 32, 44, 46], "maxim": [20, 23, 26, 33], "observ": [20, 28, 30, 43, 46], "calibr": [20, 21, 23, 26, 28, 29, 30, 33], "user": [20, 23, 25, 26, 28, 30, 32, 33], "handl": [20, 23, 26, 28, 30, 33], "applic": [20, 21, 23, 24, 25, 26, 39], "mobilenet": [20, 21], "random": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "requir": [20, 23, 26, 28, 30, 33, 44, 46], "num_calibration_batch": [20, 23, 26, 28, 30, 33], "numpi": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "def": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "repr_datagen": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33], "_": [20, 23, 26, 28, 30, 33, 39], "yield": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "224": [20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 39], "bitwidth": [20, 23, 25, 26], "candid": [20, 23, 25, 26, 41], "mixed_precision_config": [20, 21, 23, 25, 26, 37], "our": [20, 23, 25, 26, 33, 47], "note": [20, 23, 25, 26], "affect": [20, 23, 25, 26], "ru": [20, 23, 25, 26], "count_param": [20, 23, 24, 25, 26], "75": [20, 23, 25, 26], "quantized_model": [20, 23, 25, 26, 32, 33, 35, 36], "quantization_info": [20, 23, 25, 26, 28, 30, 32, 33], "userinform": 20, "keras_default_tpc": 21, "build": [21, 29, 44, 47], "graph": [21, 29, 41, 46], "hw": 21, "frameworkquantizationcap": [21, 28, 29, 30], "ru_data": [21, 29], "keras_load_quantized_model": 22, "filepath": 22, "custom_object": [22, 25, 26], "compil": 22, "trainabl": [22, 25, 44], "file": [22, 25, 26, 34, 39], "addit": [22, 39], "boolean": 22, "saved_model": 22, "loadopt": 22, "savedmodel": 22, "ptq": [23, 30, 39], "mobilenet_v2": [23, 25, 26, 28, 29, 30, 32, 33, 39], "mobilenetv2": [23, 25, 26, 39, 46], "pleas": [23, 26, 33, 39, 42, 47], "look": [23, 26, 33, 43, 47], "pruning_config": [24, 31], "meet": [24, 31], "simd": [24, 31, 43], "assess": [24, 31], "togeth": [24, 31], "friendli": [24, 31, 47], "architectur": [24, 31], "analyz": [24, 31], "identifi": [24, 31, 43], "remov": [24, 31, 32], "impact": [24, 31], "must": [24, 31, 43], "retrain": [24, 31], "recov": [24, 31], "origin": [24, 35, 36, 46], "achiev": 24, "analysi": [24, 31], "standard": [24, 31, 44], "associ": [24, 31], "resnet50": [24, 31, 39], "here": [24, 31, 39, 43, 47], "aim": [24, 31], "footprint": [24, 31], "50": [24, 31], "assum": [24, 31], "float32": [24, 31, 39], "thu": [24, 31, 46], "dense_nparam": [24, 31], "l": [24, 47], "higher": [24, 31], "also": [24, 31, 47], "extend": [24, 31], "durat": [24, 31], "pruned_model": [24, 31], "pruning_info": [24, 31], "qat": [25, 26, 32, 33, 42], "convert": [25, 32], "replac": 25, "trainablequant": 25, "inferablequant": [25, 32], "load_model": [25, 26], "model_fil": [25, 26], "qatconfig": [26, 33], "built": [26, 33, 44], "fake_qu": [26, 33], "onlin": [26, 33], "wrapper": [26, 32, 33, 44], "ker": 26, "hight": 27, "nn": [27, 36], "batchnorm2d": 27, "6": 27, "default_pytorch_tpc": [28, 30, 31, 33], "quantized_modul": [28, 30], "pytorch_default_tpc": 29, "in_modul": 30, "clibrat": 30, "default_pyotrch_tpc": 31, "resnet50_weight": 31, "imagenet1k_v1": 31, "p": 31, "numel": 31, "state_dict": 31, "pretrain": [32, 33], "keep": [32, 47], "readi": 32, "tun": 33, "set_log_fold": [34, 46], "folder": 34, "level": 34, "verbos": 34, "facade_xquant_report": [35, 36], "float_model": [35, 36, 39], "repr_dataset": [35, 36, 39], "validation_dataset": [35, 36], "valid": [35, 36, 43, 44], "quantization_config": [37, 44], "debug_config": 37, "dataclass": [37, 38], "debugconfig": 37, "debug": [37, 38], "edit": [37, 38, 41], "analyze_similar": 38, "simulate_schedul": 38, "plot": [38, 46], "figur": [38, 46], "within": [38, 43, 47], "tensorboard": [38, 47], "pinpoint": 38, "problemat": 38, "editrul": 38, "rule": [38, 41], "action": 38, "simul": 38, "behavior": 38, "cut": 38, "detail": [39, 43], "refer": 39, "project": [39, 47], "github": [39, 47], "readm": 39, "question": 39, "open": [39, 46, 47], "repositori": 39, "fakely_qu": 39, "int8": 39, "mct_quantiz": 39, "tflite": [39, 43], "via": [39, 47], "save_model_path": 39, "is_layer_exportable_fn": 39, "is_keras_layer_export": 39, "serialization_format": 39, "quantization_format": 39, "To": [39, 46], "demonstr": [39, 43], "quantized_exportable_model": 39, "lambda": 39, "dtype": 39, "avail": [39, 47], "tempfil": 39, "keras_file_path": 39, "mkstemp": 39, "torchscript": 39, "is_pytorch_layer_export": 39, "onnx_opset_vers": 39, "default_onnx_opset_vers": 39, "fake": 39, "trace": 39, "mandatori": 39, "quant": 39, "your": 39, "packag": [39, 44, 47], "so": 39, "part": 39, "skip": 39, "plan": 39, "pip": [39, 47], "instal": 39, "q": 39, "onnxruntim": 39, "extens": 39, "let": 39, "There": [39, 46], "onnx_file_path": 39, "model_format_onnx_mctq": 39, "15": 39, "16": 39, "get_ort_session_opt": 39, "session": 39, "creation": 39, "slowli": 39, "suffer": 39, "longer": 39, "latenc": 39, "howev": 39, "imx500": [39, 43], "ort": 39, "sess": 39, "inferencesess": 39, "cudaexecutionprovid": 39, "cpuexecutionprovid": 39, "_input_data": 39, "astyp": 39, "_model_output_nam": 39, "get_output": 39, "_model_input_nam": 39, "get_input": 39, "predict": 39, "layerfilterparam": 40, "attributefilt": 40, "attr": 40, "op": [40, 43], "match": [40, 41], "regard": 40, "eq": 40, "equal": 40, "noteq": 40, "greater": 40, "greatereq": 40, "smaller": 40, "smallereq": 40, "attribut": [41, 43, 44], "9": 41, "er_list": 41, "nodetypefilt": 41, "changecandidatesweightsquantconfigattr": 41, "attr_nam": 41, "weights_n_bit": [41, 43, 44], "new": [41, 43], "node_typ": 41, "nodenamefilt": 41, "node_nam": 41, "nodenamescopefilt": 41, "node_name_scop": 41, "string": 41, "changefinalweightsquantconfigattr": 41, "kwarg": 41, "attr_valu": 41, "changefinalactivationquantconfigattr": 41, "changecandidatesactivationquantconfigattr": 41, "changequantizationparamfunct": 41, "activation_quantization_params_fn": 41, "weights_quantization_params_fn": 41, "changefinalweightsquantizationmethod": 41, "weights_quantization_method": [41, 43, 44], "changecandidatesweightsquantizationmethod": 41, "changecandidatesactivationquantizationmethod": 41, "activation_quantization_method": [41, 43, 44], "changequantizationmethod": 41, "visit": [42, 47], "trainable_infrastructur": 42, "weight_training_method": 42, "activation_training_method": 42, "weight_quantizer_params_overrid": 42, "activation_quantizer_params_overrid": 42, "qnnpack": 43, "backend": 43, "address": 43, "accordingli": 43, "shortli": 43, "diagram": 43, "main": [43, 46], "compon": [43, 44], "power_of_two": 43, "uniform": [43, 44], "lut_pot_quant": 43, "lookup": 43, "tabl": 43, "lut_sym_quant": 43, "schema": 43, "mct_current_schema": 43, "default_weight_attr_config": 43, "attr_weights_configs_map": 43, "activation_n_bit": [43, 44], "supported_input_activation_n_bit": 43, "enable_activation_quant": [43, 44], "simd_siz": 43, "singl": 43, "instruct": 43, "fetch": 43, "simultan": 43, "signed": 43, "pars": 43, "keyword": 43, "rais": 43, "validationerror": 43, "cannot": 43, "form": 43, "weights_per_channel_threshold": [43, 44], "enable_weights_quant": [43, 44], "lut_values_bitwidth": 43, "hptq": [43, 47], "otherwis": 43, "quantization_configur": 43, "gather": [43, 46], "base_config": 43, "fallback": 43, "manner": 43, "default_qco": 43, "operator_set": 43, "fusing_pattern": 43, "pattern": 43, "tpc_minor_vers": 43, "minor": 43, "tpc_patch_vers": 43, "patch": 43, "tpc_platform_typ": 43, "add_metadata": 43, "ad": 43, "is_simd_pad": 43, "pad": 43, "schema_vers": 43, "uniqu": 43, "operatorsetnam": 43, "qc_option": 43, "fix": 43, "liter": 43, "combin": 43, "treat": 43, "henc": 43, "them": [43, 46], "operator_group": 43, "either": 43, "concaten": 43, "similarli": 43, "place": 43, "operators_set": 43, "infrastructur": 44, "propos": 44, "emul": 44, "holder": 44, "upon": 44, "learnabl": 44, "basic": 44, "block": [44, 46], "get_config": 44, "from_config": 44, "freeze_quant_param": 44, "implment": 44, "freez": 44, "poweroftwo": 44, "dqa": 44, "dnn": 44, "introduc": 44, "lsq": 44, "http": [44, 47], "arxiv": [44, 47], "org": 44, "pdf": 44, "1902": 44, "08153": 44, "weights_quantization_param": 44, "weights_channels_axi": 44, "weights_quantization_candid": 44, "minimum": 44, "activation_quantization_param": 44, "activation_quantization_candid": 44, "One": 46, "phase": 46, "displai": 46, "ui": 46, "done": 46, "writer": 46, "stage": 46, "launch": 46, "logdir": 46, "seen": 46, "under": 46, "tab": 46, "click": 46, "deeper": 46, "view": 46, "tag": 46, "show": 46, "These": 46, "scalar": 46, "were": 46, "wai": [46, 47], "mathemat": 46, "divis": 46, "dot": 46, "product": 46, "euclidean": 46, "along": 46, "pair": 46, "would": 46, "mani": 46, "few": [46, 47], "As": 46, "20": 46, "insert": 46, "chosen": 46, "second": 46, "sourc": 47, "research": 47, "develop": 47, "engin": 47, "easili": 47, "state": 47, "art": 47, "work": 47, "soni": 47, "semiconductor": 47, "israel": 47, "git": 47, "clone": 47, "com": 47, "model_optim": 47, "python": 47, "setup": 47, "py": 47, "pypi": 47, "latest": 47, "stabl": 47, "releas": 47, "nightli": 47, "unstabl": 47, "visual": 47, "cosin": 47, "comparison": 47, "just": 47, "minut": 47, "notebook": 47, "doesn": 47, "t": 47, "out1": 47, "out2": 47, "out3": 47, "habi": 47, "h": 47, "v": 47, "peretz": 47, "r": 47, "cohen": 47, "dikstein": 47, "dror": 47, "o": 47, "diamant": 47, "i": 47, "jen": 47, "netzer": 47, "2021": 47, "preprint": 47, "gordon": 47, "2023": 47, "eptq": 47, "enhanc": 47}, "objects": {"model_compression_toolkit": [[2, 0, 1, "", "DefaultDict"], [18, 3, 1, "", "get_target_platform_capabilities"], [22, 3, 1, "", "keras_load_quantized_model"], [34, 3, 1, "", "set_log_folder"]], "model_compression_toolkit.DefaultDict": [[2, 1, 1, "", "get"], [2, 1, 1, "", "keys"]], "model_compression_toolkit.core": [[0, 0, 1, "", "BitWidthConfig"], [3, 0, 1, "", "ChannelAxis"], [37, 0, 1, "", "CoreConfig"], [38, 0, 1, "", "DebugConfig"], [3, 0, 1, "", "FrameworkInfo"], [5, 0, 1, "", "MixedPrecisionQuantizationConfig"], [6, 0, 1, "", "MpDistanceWeighting"], [9, 0, 1, "", "QuantizationConfig"], [10, 0, 1, "", "QuantizationErrorMethod"], [11, 0, 1, "", "ResourceUtilization"], [21, 3, 1, "", "keras_resource_utilization_data"], [29, 3, 1, "", "pytorch_resource_utilization_data"]], "model_compression_toolkit.core.BitWidthConfig": [[0, 2, 1, "", "manual_activation_bit_width_selection_list"]], "model_compression_toolkit.core.common.quantization.bit_width_config": [[0, 0, 1, "", "ManualBitWidthSelection"]], "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection": [[0, 2, 1, "", "bit_width"], [0, 2, 1, "", "filter"]], "model_compression_toolkit.core.network_editor": [[41, 0, 1, "", "ChangeCandidatesActivationQuantConfigAttr"], [41, 0, 1, "", "ChangeCandidatesActivationQuantizationMethod"], [41, 0, 1, "", "ChangeCandidatesWeightsQuantConfigAttr"], [41, 0, 1, "", "ChangeCandidatesWeightsQuantizationMethod"], [41, 0, 1, "", "ChangeFinalActivationQuantConfigAttr"], [41, 0, 1, "", "ChangeFinalWeightsQuantConfigAttr"], [41, 0, 1, "", "ChangeFinalWeightsQuantizationMethod"], [41, 0, 1, "", "ChangeQuantizationParamFunction"], [41, 0, 1, "", "EditRule"], [41, 0, 1, "", "NodeNameFilter"], [41, 0, 1, "", "NodeNameScopeFilter"], [41, 0, 1, "", "NodeTypeFilter"]], "model_compression_toolkit.data_generation": [[1, 0, 1, "", "BNLayerWeightingType"], [1, 0, 1, "", "BatchNormAlignemntLossType"], [1, 0, 1, "", "DataGenerationConfig"], [1, 0, 1, "", "DataInitType"], [1, 0, 1, "", "ImageGranularity"], [1, 0, 1, "", "ImageNormalizationType"], [1, 0, 1, "", "ImagePipelineType"], [1, 0, 1, "", "OutputLossType"], [1, 0, 1, "", "SchedulerType"], [14, 3, 1, "", "get_keras_data_generation_config"], [16, 3, 1, "", "get_pytorch_data_generation_config"], [19, 3, 1, "", "keras_data_generation_experimental"], [27, 3, 1, "", "pytorch_data_generation_experimental"]], "model_compression_toolkit.exporter": [[39, 0, 1, "", "KerasExportSerializationFormat"], [39, 0, 1, "", "PytorchExportSerializationFormat"], [39, 0, 1, "", "QuantizationFormat"], [39, 0, 1, "", "keras_export_model"], [39, 0, 1, "", "pytorch_export_model"]], "model_compression_toolkit.gptq": [[4, 0, 1, "", "GPTQHessianScoresConfig"], [4, 0, 1, "", "GradientPTQConfig"], [4, 0, 1, "", "GradualActivationQuantizationConfig"], [4, 0, 1, "", "QFractionLinearAnnealingConfig"], [4, 0, 1, "", "RoundingType"], [15, 3, 1, "", "get_keras_gptq_config"], [17, 3, 1, "", "get_pytorch_gptq_config"], [20, 3, 1, "", "keras_gradient_post_training_quantization"], [28, 3, 1, "", "pytorch_gradient_post_training_quantization"]], "model_compression_toolkit.pruning": [[7, 0, 1, "", "ChannelsFilteringStrategy"], [7, 0, 1, "", "ImportanceMetric"], [7, 0, 1, "", "PruningConfig"], [8, 0, 1, "", "PruningInfo"], [24, 3, 1, "", "keras_pruning_experimental"], [31, 3, 1, "", "pytorch_pruning_experimental"]], "model_compression_toolkit.pruning.PruningConfig": [[7, 2, 1, "", "channels_filtering_strategy"], [7, 2, 1, "", "importance_metric"], [7, 2, 1, "", "num_score_approximations"]], "model_compression_toolkit.pruning.PruningInfo": [[8, 4, 1, "", "importance_scores"], [8, 4, 1, "", "pruning_masks"]], "model_compression_toolkit.ptq": [[23, 3, 1, "", "keras_post_training_quantization"], [30, 3, 1, "", "pytorch_post_training_quantization"]], "model_compression_toolkit.qat": [[42, 0, 1, "", "QATConfig"], [25, 3, 1, "", "keras_quantization_aware_training_finalize_experimental"], [26, 3, 1, "", "keras_quantization_aware_training_init_experimental"], [32, 3, 1, "", "pytorch_quantization_aware_training_finalize_experimental"], [33, 3, 1, "", "pytorch_quantization_aware_training_init_experimental"]], "model_compression_toolkit.target_platform_capabilities": [[40, 0, 1, "", "AttributeFilter"], [40, 0, 1, "", "Eq"], [40, 0, 1, "", "Greater"], [40, 0, 1, "", "GreaterEq"], [40, 0, 1, "", "NotEq"], [43, 0, 1, "", "QuantizationMethod"], [40, 0, 1, "", "Smaller"], [40, 0, 1, "", "SmallerEq"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema": [[43, 0, 1, "", "AttributeQuantizationConfig"], [43, 0, 1, "", "Fusing"], [43, 0, 1, "", "OpQuantizationConfig"], [43, 0, 1, "", "OperatorSetGroup"], [43, 0, 1, "", "OperatorsSet"], [43, 0, 1, "", "QuantizationConfigOptions"], [43, 0, 1, "", "TargetPlatformCapabilities"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig": [[43, 2, 1, "", "enable_weights_quantization"], [43, 2, 1, "", "lut_values_bitwidth"], [43, 2, 1, "", "weights_n_bits"], [43, 2, 1, "", "weights_per_channel_threshold"], [43, 2, 1, "", "weights_quantization_method"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing": [[43, 2, 1, "", "name"], [43, 2, 1, "", "operator_groups"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup": [[43, 2, 1, "", "name"], [43, 2, 1, "", "operators_set"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet": [[43, 2, 1, "", "name"], [43, 2, 1, "", "qc_options"], [43, 2, 1, "", "type"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions": [[43, 2, 1, "", "base_config"], [43, 2, 1, "", "quantization_configurations"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities": [[43, 2, 1, "", "SCHEMA_VERSION"], [43, 2, 1, "", "add_metadata"], [43, 2, 1, "", "default_qco"], [43, 2, 1, "", "fusing_patterns"], [43, 2, 1, "", "is_simd_padding"], [43, 2, 1, "", "name"], [43, 2, 1, "", "operator_set"], [43, 2, 1, "", "tpc_minor_version"], [43, 2, 1, "", "tpc_patch_version"], [43, 2, 1, "", "tpc_platform_type"]], "model_compression_toolkit.trainable_infrastructure": [[44, 0, 1, "", "BaseKerasTrainableQuantizer"], [44, 0, 1, "", "BasePytorchTrainableQuantizer"], [44, 0, 1, "", "TrainableQuantizerActivationConfig"], [44, 0, 1, "", "TrainableQuantizerWeightsConfig"], [44, 0, 1, "", "TrainingMethod"]], "model_compression_toolkit.xquant.common.xquant_config": [[12, 0, 1, "", "XQuantConfig"]], "model_compression_toolkit.xquant.keras.facade_xquant_report": [[35, 3, 1, "", "xquant_report_keras_experimental"]], "model_compression_toolkit.xquant.pytorch.facade_xquant_report": [[36, 3, 1, "", "xquant_report_pytorch_experimental"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function", "4": "py:property"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"], "4": ["py", "property", "Python property"]}, "titleterms": {"bitwidthconfig": 0, "manualbitwidthselect": 0, "data": [1, 19, 27], "gener": [1, 19, 27], "configur": [1, 7, 12, 46], "imagegranular": 1, "schedulertyp": 1, "batchnormalignemntlosstyp": 1, "outputlosstyp": 1, "datainittyp": 1, "bnlayerweightingtyp": 1, "imagepipelinetyp": 1, "imagenormalizationtyp": 1, "defaultdict": 2, "class": [2, 3, 4], "frameworkinfo": 3, "channelaxi": 3, "gradientptqconfig": [4, 15, 17], "gptqhessianscoresconfig": 4, "roundingtyp": 4, "gradualactivationquantizationconfig": 4, "qfractionlinearannealingconfig": 4, "mixedprecisionquantizationconfig": 5, "mpdistanceweight": 6, "prune": [7, 8, 13, 24, 31], "importancemetr": 7, "channelsfilteringstrategi": 7, "inform": [8, 21, 29], "quantizationconfig": 9, "quantizationerrormethod": 10, "resourceutil": 11, "xquant": [12, 13, 35, 36], "api": [13, 47], "doc": 13, "ptq": 13, "gptq": 13, "qat": 13, "core": 13, "data_gener": 13, "export": [13, 39], "trainable_infrastructur": [13, 44], "set_log_fold": 13, "keras_load_quantized_model": 13, "target_platform_cap": [13, 43], "indic": 13, "tabl": 13, "get": [14, 15, 16, 17, 18, 21, 29], "datagenerationconfig": [14, 16], "kera": [14, 15, 19, 20, 21, 22, 23, 24, 25, 26, 35, 39], "model": [14, 15, 16, 17, 21, 22, 25, 26, 29, 32, 33, 39, 47], "pytorch": [16, 17, 27, 28, 29, 30, 31, 32, 33, 36, 39], "frameworkquantizationcap": 18, "gradient": [20, 28], "base": [20, 28], "post": [20, 23, 28, 30], "train": [20, 23, 25, 26, 28, 30, 32, 33], "quantiz": [20, 22, 23, 25, 26, 28, 30, 32, 33, 39], "resourc": [21, 29], "util": [21, 29], "load": 22, "structur": [24, 31], "awar": [25, 26, 32, 33], "final": [25, 32], "init": [26, 33], "enabl": 34, "logger": 34, "report": [35, 36], "coreconfig": 37, "debug_config": 38, "modul": [38, 39, 41, 42, 43, 44], "debugconfig": 38, "quantizationformat": 39, "kerasexportserializationformat": 39, "keras_export_model": 39, "tutori": 39, "serial": 39, "format": 39, "mctq": 39, "pytorchexportserializationformat": 39, "pytorch_export_model": 39, "onnx": 39, "opset": 39, "version": 39, "us": 39, "infer": 39, "layer": 40, "attribut": 40, "filter": [40, 41], "network_editor": 41, "editrul": 41, "action": 41, "qat_config": 42, "trainingmethod": [42, 44], "qatconfig": 42, "quantizationmethod": 43, "opquantizationconfig": 43, "attributequantizationconfig": 43, "quantizationconfigopt": 43, "targetplatformcap": 43, "operatorsset": 43, "fuse": 43, "operatorsetgroup": 43, "basekerastrainablequant": 44, "basepytorchtrainablequant": 44, "trainablequantizerweightsconfig": 44, "trainablequantizeractivationconfig": 44, "visual": 46, "within": 46, "tensorboard": 46, "cosin": 46, "similar": 46, "comparison": 46, "mix": 46, "precis": 46, "bit": 46, "width": 46, "compress": 47, "toolkit": 47, "user": 47, "guid": 47, "overview": 47, "instal": 47, "support": 47, "featur": 47, "quickstart": 47, "document": 47, "technic": 47, "constraint": 47, "refer": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 56}}) \ No newline at end of file diff --git a/docs/static/bizstyle.js b/docs/static/bizstyle.js index f41af42bc..9057a3811 100644 --- a/docs/static/bizstyle.js +++ b/docs/static/bizstyle.js @@ -23,7 +23,7 @@ const initialiseBizStyle = () => { } window.addEventListener("resize", - () => (document.querySelector("li.nav-item-0 a").innerText = (window.innerWidth <= 776) ? "Top" : "MCT Documentation: ver 2.2.0") + () => (document.querySelector("li.nav-item-0 a").innerText = (window.innerWidth <= 776) ? "Top" : "MCT Documentation: ver 2.3.0") ) if (document.readyState !== "loading") initialiseBizStyle() diff --git a/docs/static/documentation_options.js b/docs/static/documentation_options.js index 15876331c..72ca52131 100644 --- a/docs/static/documentation_options.js +++ b/docs/static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '2.2.0', + VERSION: '2.3.0', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html',