From 73f5e666a6a6a2b7df11786516b31c67bfabc02b Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:03 +0800 Subject: [PATCH 01/76] first commit --- .../torch/sparsify_activations/__init__.py | 12 ++ .../sparsify_activations_impl.py | 160 ++++++++++++++++++ .../sparsify_activations/torch_backend.py | 156 +++++++++++++++++ 3 files changed, 328 insertions(+) create mode 100644 nncf/experimental/torch/sparsify_activations/__init__.py create mode 100644 nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py create mode 100644 nncf/experimental/torch/sparsify_activations/torch_backend.py diff --git a/nncf/experimental/torch/sparsify_activations/__init__.py b/nncf/experimental/torch/sparsify_activations/__init__.py new file mode 100644 index 00000000000..ba1c31abb5b --- /dev/null +++ b/nncf/experimental/torch/sparsify_activations/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import sparsify_activations diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py new file mode 100644 index 00000000000..528c4838e78 --- /dev/null +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -0,0 +1,160 @@ +from abc import ABC +from abc import abstractmethod +from pathlib import Path +from typing import List, Tuple, TypeVar + +import torch +import torch.nn as nn + +import nncf +from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.operator_metatypes import OperatorMetatype +from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.scopes import should_consider_scope +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.common.utils.backend import BackendType +from nncf.common.utils.backend import get_backend +from nncf.data import Dataset +from nncf.experimental.tensor.tensor import Tensor +from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.torch.graph import operator_metatypes as om +from nncf.torch.graph.transformations.commands import PTInsertionCommand +from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand +from nncf.torch.graph.transformations.commands import PTTargetPoint +from nncf.torch.model_creation import is_wrapped_model +from nncf.torch.model_creation import wrap_model +from nncf.torch.model_graph_manager import find_const_node_in_constant_subgraph +from nncf.torch.model_graph_manager import get_const_node +from nncf.torch.model_graph_manager import get_module_by_name +from nncf.torch.model_graph_manager import split_const_name +from nncf.torch.model_transformer import PTModelTransformer +from nncf.torch.utils import is_tracing_state + +TModel = TypeVar("TModel") + + +def node_name_matches_module_name(node_name, module_name): + parts = module_name.split('.') + return all(f'[{p}]' in node_name for p in parts) + + +class SparsifyActivationsAlgorithmBackend(ABC): + @property + @abstractmethod + def supported_metatypes(self) -> List[OperatorMetatype]: + pass + + @abstractmethod + def do_sparsification(self, model, graph: NNCFGraph, nodes_to_sparsify: List[NNCFNode]): + pass + + def transform_model(self,): + pass + + +class SparsifyActivationsAlgorithm: + + def __init__(self, sparse_config: dict[str, float]): + self.sparse_config = sparse_config + + @property + def available_backends(self) -> List[BackendType]: + return [BackendType.TORCH] + + def _set_backend_entity(self, model: TModel) -> None: + """ + Creates a helper class with a backed-specific logic of the algorithm. + + :param model: Backend-specific input model. + """ + model_backend = get_backend(model) + if model_backend == BackendType.TORCH: + from nncf.experimental.torch.sparsify_activations.torch_backend import PTPruneActivationAlgorithmBackend + self._backend_entity = PTPruneActivationAlgorithmBackend() + else: + raise nncf.UnsupportedBackendError( + f"{model_backend.value} backend is not supported for `sparsify_activations`." + ) + + def _get_nodes_to_sparsify(self, nncf_graph: NNCFGraph) -> List[NNCFNode]: + """ + Collects nodes in the model's graph corresponding to the layers for weight compression. + + :param nncf_graph: NNCFGraph instance. + :return: List with the data for each layer. + """ + supported_metatypes = self._backend_entity.supported_metatypes + ordered_nodes_to_sparsify = [] + for node in nncf_graph.topological_sort(): + if node.metatype in supported_metatypes: + ordered_nodes_to_sparsify.append(node) + return ordered_nodes_to_sparsify + + def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: + """ + This method returns the activation layer and corresponding port id for the node. + + :param node: NNCFGraph node for which the activation is sought. + :param nncf_graph: NNCFGraph instance with the node. + :return: Tuple with the activation node and port id. + """ + activation_port = self._backend_entity.get_activation_port_id( + node, nncf_graph) + activation_edge = nncf_graph.get_input_edges(node)[activation_port] + activation_node = activation_edge.from_node + port_id = activation_edge.output_port_id + return activation_node, port_id + + def apply( + self, + model: TModel, + graph: NNCFGraph, + dataset: Dataset, + ) -> TModel: + self._set_backend_entity(model) + nodes_to_sparsify = self._get_nodes_to_sparsify(graph) + transformed_model, activation_sparsifiers = self._backend_entity.do_sparsification( + model, graph, nodes_to_sparsify + ) + for sparsifier in activation_sparsifiers: + sparsifier.reset_running_stats() + self._backend_entity.inference(transformed_model, dataset) + for sparsifier in activation_sparsifiers: + sparsifier.freeze(True) + transformed_model.nncf.rebuild_graph() + return transformed_model + + +def sparsify_activations( + model: TModel, + dataset: Dataset, + sparsity_config: dict[str, float], + debug_folder=None, +) -> TModel: + """ + Implementation of the `compress_weights()` method. + """ + + backend = get_backend(model) + if backend == BackendType.TORCH and not is_wrapped_model(model): + example_input = next(iter(dataset.get_inference_data())) + model = wrap_model( + model, + example_input=example_input, + trace_parameters=True, + ) + + algorithm = SparsifyActivationsAlgorithm(sparsity_config) + graph = NNCFGraphFactory.create(model) + if debug_folder: + graph.dump_graph( + Path(debug_folder, './before-sparsification.dot').as_posix()) + sparse_model = algorithm.apply(model, graph, dataset) + graph = NNCFGraphFactory.create(sparse_model) + if debug_folder: + graph.dump_graph( + Path(debug_folder, './after-sparsification.dot').as_posix()) + return sparse_model diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py new file mode 100644 index 00000000000..1406c86af66 --- /dev/null +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -0,0 +1,156 @@ +from pathlib import Path +from typing import List, Tuple, TypeVar + +import torch +import torch.nn as nn + +import nncf +import nncf.experimental.torch.sparsify_activations +from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.operator_metatypes import OperatorMetatype +from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.common.utils.backend import BackendType +from nncf.common.utils.backend import get_backend +from nncf.data import Dataset +from nncf.experimental.tensor.tensor import Tensor +from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgorithmBackend +from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.torch.graph import operator_metatypes as om +from nncf.torch.graph.transformations.commands import PTInsertionCommand +from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand +from nncf.torch.graph.transformations.commands import PTTargetPoint +from nncf.torch.model_creation import is_wrapped_model +from nncf.torch.model_creation import wrap_model +from nncf.torch.model_graph_manager import find_const_node_in_constant_subgraph +from nncf.torch.model_graph_manager import get_const_node +from nncf.torch.model_graph_manager import get_module_by_name +from nncf.torch.model_graph_manager import split_const_name +from nncf.torch.model_transformer import PTModelTransformer +from nncf.torch.utils import is_tracing_state + +TModel = TypeVar("TModel") + + +class ActivationSparsifier(nn.Module): + def __init__(self, target_sparsity: float, alpha: float = 0.1): + super().__init__() + self.alpha = alpha + self.register_buffer('target_sparsity', torch.tensor(target_sparsity)) + self.register_buffer("running_threshold", torch.tensor(0.)) + self.register_buffer("num_batches_tracked", torch.tensor(0)) + self.running_threshold: torch.Tensor + self.num_batches_tracked: torch.Tensor + self._frozen = False + + def forward(self, x): + threshold = None + if not self._frozen: + threshold = self._calculate_quantile(x.abs(), self.target_sparsity) + self._update(threshold) + assert self.num_batches_tracked > 0 + mask = torch.le(x.abs(), self.running_threshold) + if '[1]' in self.node_name and 'up_proj' in self.node_name: + print('sparsity', mask.float().mean(), 'cur_threshold', + threshold, 'threshold', self.running_threshold) + x = torch.masked_fill(x, mask, 0.) + return x + + def reset_running_stats(self): + self.running_threshold.zero_() + self.num_batches_tracked.zero_() + + def freeze(self, freeze: bool = True): + self._frozen = freeze + + def extra_repr(self) -> str: + return f"target_sparsity={self.target_sparsity.item()}" + + def _calculate_quantile(self, x: torch.Tensor, target_sparsity: float): + return x.view(-1).quantile(q=target_sparsity, dim=-1) + + def _update(self, threshold: torch.Tensor): + beta = 1.0 - self.alpha + # Exponential Moving Average with decaying adjustment, similar to pandas.DataFrame.ewm(adjust=True). + self.running_threshold = ( + threshold * self.alpha + + self.running_threshold * beta * + (1 - beta ** self.num_batches_tracked) + ) / (1 - beta ** (self.num_batches_tracked + 1)) + self.num_batches_tracked += 1 + return self.running_threshold + + +def node_name_matches_module_name(node_name, module_name): + parts = module_name.split('.') + return all(f'[{p}]' in node_name for p in parts) + + +class PTPruneActivationAlgorithmBackend(SparsifyActivationsAlgorithmBackend): + SUPPORTED_METATYPES = [om.PTLinearMetatype] + + def __init__(self) -> None: + pass + + def do_sparsification(self): + pass + + @property + def supported_metatypes(self) -> List[OperatorMetatype]: + return PTPruneActivationAlgorithmBackend.SUPPORTED_METATYPES + + @staticmethod + def get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> NNCFNode: + activation_ports = [] + for prev_node in graph.get_previous_nodes(node): + if 'weight' in prev_node.node_name.lower() or 'bias' in prev_node.node_name: + # TODO: find activation + continue + edge = graph.get_edge(prev_node, node) + activation_ports.append(edge.input_port_id) + assert len(activation_ports) == 1 + return activation_ports[0] + + @staticmethod + def inference(model: nn.Module, dataset: Dataset) -> None: + model = model.eval() + with torch.no_grad(): + for batch in dataset.get_inference_data(): + model(**batch) + + def do_sparsification(self, model, graph: NNCFGraph, nodes_to_sparsify: List[NNCFNode]): + transformation_layout = TransformationLayout() + activation_sparsifiers = [] + + for node in nodes_to_sparsify: + activation_node, activation_port_id = self._get_activation_node_and_port( + node, graph) + target_sparsity = None + for module_name in self.sparse_config: + if node_name_matches_module_name(node.node_name, module_name): + target_sparsity = self.sparse_config[module_name] + break + if target_sparsity is None: + continue + activation_sparsifier = ActivationSparsifier( + target_sparsity=target_sparsity) + activation_sparsifier.node_name = node.node_name + activation_sparsifiers.append(activation_sparsifier) + activation_sparsifier_name = f"activation_sparsifier_{node.node_name.replace('.', '_')}" + transformation_layout.register(PTSharedFnInsertionCommand( + [ + PTTargetPoint( + TargetType.PRE_LAYER_OPERATION, + target_node_name=node.node_name, + input_port_id=activation_port_id) + ], + activation_sparsifier, + activation_sparsifier_name, + )) + + transformed_model = PTModelTransformer( + model).transform(transformation_layout) + return transformed_model, activation_sparsifiers From a2b6f944444ddf5750b5ce56654556febed917a9 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:03 +0800 Subject: [PATCH 02/76] can work except ignored_scope --- .../sparsify_activations_impl.py | 136 +++++++++-------- .../sparsify_activations/torch_backend.py | 138 ++++++++---------- 2 files changed, 135 insertions(+), 139 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 528c4838e78..e6162a72aa0 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -1,64 +1,67 @@ from abc import ABC from abc import abstractmethod from pathlib import Path -from typing import List, Tuple, TypeVar - -import torch -import torch.nn as nn +from typing import Dict, List, Optional, TypeVar import nncf +from nncf.common import factory from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype -from nncf.common.graph.transformations.commands import TargetType -from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.logging.track_progress import track +from nncf.common.scopes import matches_any from nncf.common.scopes import should_consider_scope -from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend from nncf.data import Dataset -from nncf.experimental.tensor.tensor import Tensor -from nncf.quantization.algorithms.algorithm import Algorithm -from nncf.torch.graph import operator_metatypes as om -from nncf.torch.graph.transformations.commands import PTInsertionCommand -from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand -from nncf.torch.graph.transformations.commands import PTTargetPoint +from nncf.scopes import IgnoredScope +from nncf.scopes import get_ignored_node_names_from_ignored_scope from nncf.torch.model_creation import is_wrapped_model from nncf.torch.model_creation import wrap_model -from nncf.torch.model_graph_manager import find_const_node_in_constant_subgraph -from nncf.torch.model_graph_manager import get_const_node -from nncf.torch.model_graph_manager import get_module_by_name -from nncf.torch.model_graph_manager import split_const_name -from nncf.torch.model_transformer import PTModelTransformer -from nncf.torch.utils import is_tracing_state TModel = TypeVar("TModel") -def node_name_matches_module_name(node_name, module_name): - parts = module_name.split('.') - return all(f'[{p}]' in node_name for p in parts) +class SparsifyActivationsAlgoBackend(ABC): + def do_inference(self, model: TModel, dataset: Dataset): + engine = factory.EngineFactory.create(model) + for input_data in track( + dataset.get_inference_data(), + total=dataset.get_length(), + description="Activation sparsifier calibration", + ): + engine.infer(input_data) -class SparsifyActivationsAlgorithmBackend(ABC): @property @abstractmethod def supported_metatypes(self) -> List[OperatorMetatype]: pass @abstractmethod - def do_sparsification(self, model, graph: NNCFGraph, nodes_to_sparsify: List[NNCFNode]): + def insert_sparsifiers(self, model: TModel, target_sparsity_by_node: Dict[NNCFNode, float]) -> TModel: pass - def transform_model(self,): + @abstractmethod + def calibrate_sparsifiers(self, model: TModel, dataset: Dataset) -> TModel: + pass + + @abstractmethod + def freeze_sparsifiers(self, model: TModel) -> TModel: pass class SparsifyActivationsAlgorithm: - def __init__(self, sparse_config: dict[str, float]): - self.sparse_config = sparse_config + def __init__( + self, + target_sparsity_by_scope: dict[str, float], + ignored_scope: IgnoredScope, + ): + self._target_sparsity_by_scope = target_sparsity_by_scope + self._ignored_scope = ignored_scope + self._backend_entity = None @property def available_backends(self) -> List[BackendType]: @@ -72,14 +75,14 @@ def _set_backend_entity(self, model: TModel) -> None: """ model_backend = get_backend(model) if model_backend == BackendType.TORCH: - from nncf.experimental.torch.sparsify_activations.torch_backend import PTPruneActivationAlgorithmBackend - self._backend_entity = PTPruneActivationAlgorithmBackend() + from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend + self._backend_entity = PTSparsifyActivationsAlgoBackend() else: raise nncf.UnsupportedBackendError( f"{model_backend.value} backend is not supported for `sparsify_activations`." ) - def _get_nodes_to_sparsify(self, nncf_graph: NNCFGraph) -> List[NNCFNode]: + def _get_target_sparsity_by_node(self, nncf_graph: NNCFGraph) -> Dict[NNCFNode, float]: """ Collects nodes in the model's graph corresponding to the layers for weight compression. @@ -87,26 +90,37 @@ def _get_nodes_to_sparsify(self, nncf_graph: NNCFGraph) -> List[NNCFNode]: :return: List with the data for each layer. """ supported_metatypes = self._backend_entity.supported_metatypes - ordered_nodes_to_sparsify = [] + ignored_names = get_ignored_node_names_from_ignored_scope( + self._ignored_scope, nncf_graph, strict=self._ignored_scope.validate + ) + print(ignored_names) + target_sparsity_by_node = {} for node in nncf_graph.topological_sort(): - if node.metatype in supported_metatypes: - ordered_nodes_to_sparsify.append(node) - return ordered_nodes_to_sparsify - - def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: - """ - This method returns the activation layer and corresponding port id for the node. - - :param node: NNCFGraph node for which the activation is sought. - :param nncf_graph: NNCFGraph instance with the node. - :return: Tuple with the activation node and port id. - """ - activation_port = self._backend_entity.get_activation_port_id( - node, nncf_graph) - activation_edge = nncf_graph.get_input_edges(node)[activation_port] - activation_node = activation_edge.from_node - port_id = activation_edge.output_port_id - return activation_node, port_id + print(node.metatype, node.node_name, ignored_names, + 'should_consider_scope=', + should_consider_scope(node.node_name, ignored_names)) + if node.metatype not in supported_metatypes or not should_consider_scope(node.node_name, ignored_names): + continue + for scope, target_sparsity in self._target_sparsity_by_scope.items(): + if matches_any(node.node_name, scope): + if node.node_name in target_sparsity_by_node: + raise nncf.ValidationError( + f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.') + target_sparsity_by_node[node] = target_sparsity + return target_sparsity_by_node + + def do_sparsification( + self, + model: TModel, + graph: NNCFGraph, + target_sparsity_by_node: Dict[NNCFNode, float], + dataset: Dataset, + ): + model = self._backend_entity.insert_sparsifiers( + model, graph, target_sparsity_by_node) + model = self._backend_entity.calibrate_sparsifiers(model, dataset) + model = self._backend_entity.freeze_sparsifiers(model) + return model def apply( self, @@ -115,23 +129,18 @@ def apply( dataset: Dataset, ) -> TModel: self._set_backend_entity(model) - nodes_to_sparsify = self._get_nodes_to_sparsify(graph) - transformed_model, activation_sparsifiers = self._backend_entity.do_sparsification( - model, graph, nodes_to_sparsify + target_sparsity_by_node = self._get_target_sparsity_by_node(graph) + sparsified_model = self.do_sparsification( + model, graph, target_sparsity_by_node, dataset, ) - for sparsifier in activation_sparsifiers: - sparsifier.reset_running_stats() - self._backend_entity.inference(transformed_model, dataset) - for sparsifier in activation_sparsifiers: - sparsifier.freeze(True) - transformed_model.nncf.rebuild_graph() - return transformed_model + return sparsified_model def sparsify_activations( model: TModel, dataset: Dataset, - sparsity_config: dict[str, float], + target_sparsity_by_scope: Dict[str, float], + ignored_scope: Optional[IgnoredScope] = None, debug_folder=None, ) -> TModel: """ @@ -147,7 +156,12 @@ def sparsify_activations( trace_parameters=True, ) - algorithm = SparsifyActivationsAlgorithm(sparsity_config) + if ignored_scope is None: + ignored_scope = IgnoredScope() + + algorithm = SparsifyActivationsAlgorithm( + target_sparsity_by_scope, ignored_scope) + graph = NNCFGraphFactory.create(model) if debug_folder: graph.dump_graph( diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 1406c86af66..7ebc1242be4 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -1,36 +1,20 @@ -from pathlib import Path from typing import List, Tuple, TypeVar import torch import torch.nn as nn -import nncf -import nncf.experimental.torch.sparsify_activations -from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout -from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer -from nncf.common.utils.backend import BackendType -from nncf.common.utils.backend import get_backend from nncf.data import Dataset -from nncf.experimental.tensor.tensor import Tensor -from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgorithmBackend -from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend from nncf.torch.graph import operator_metatypes as om -from nncf.torch.graph.transformations.commands import PTInsertionCommand from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand from nncf.torch.graph.transformations.commands import PTTargetPoint -from nncf.torch.model_creation import is_wrapped_model -from nncf.torch.model_creation import wrap_model -from nncf.torch.model_graph_manager import find_const_node_in_constant_subgraph -from nncf.torch.model_graph_manager import get_const_node -from nncf.torch.model_graph_manager import get_module_by_name -from nncf.torch.model_graph_manager import split_const_name from nncf.torch.model_transformer import PTModelTransformer -from nncf.torch.utils import is_tracing_state +from nncf.torch.nncf_network import NNCFNetwork TModel = TypeVar("TModel") @@ -44,18 +28,17 @@ def __init__(self, target_sparsity: float, alpha: float = 0.1): self.register_buffer("num_batches_tracked", torch.tensor(0)) self.running_threshold: torch.Tensor self.num_batches_tracked: torch.Tensor - self._frozen = False + self._frozen = True def forward(self, x): threshold = None if not self._frozen: threshold = self._calculate_quantile(x.abs(), self.target_sparsity) self._update(threshold) - assert self.num_batches_tracked > 0 mask = torch.le(x.abs(), self.running_threshold) - if '[1]' in self.node_name and 'up_proj' in self.node_name: - print('sparsity', mask.float().mean(), 'cur_threshold', - threshold, 'threshold', self.running_threshold) + # if '[1]' in self.node_name and 'up_proj' in self.node_name: + # print('sparsity', mask.float().mean(), 'cur_threshold', + # threshold, 'threshold', self.running_threshold) x = torch.masked_fill(x, mask, 0.) return x @@ -67,10 +50,10 @@ def freeze(self, freeze: bool = True): self._frozen = freeze def extra_repr(self) -> str: - return f"target_sparsity={self.target_sparsity.item()}" + return f"target_sparsity={self.target_sparsity.item()},{self.running_threshold:=},{self.num_batches_tracked}" def _calculate_quantile(self, x: torch.Tensor, target_sparsity: float): - return x.view(-1).quantile(q=target_sparsity, dim=-1) + return x.float().view(-1).quantile(q=target_sparsity, dim=-1) def _update(self, threshold: torch.Tensor): beta = 1.0 - self.alpha @@ -84,73 +67,72 @@ def _update(self, threshold: torch.Tensor): return self.running_threshold -def node_name_matches_module_name(node_name, module_name): - parts = module_name.split('.') - return all(f'[{p}]' in node_name for p in parts) - - -class PTPruneActivationAlgorithmBackend(SparsifyActivationsAlgorithmBackend): +class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): SUPPORTED_METATYPES = [om.PTLinearMetatype] - def __init__(self) -> None: - pass - - def do_sparsification(self): - pass - @property def supported_metatypes(self) -> List[OperatorMetatype]: - return PTPruneActivationAlgorithmBackend.SUPPORTED_METATYPES - - @staticmethod - def get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> NNCFNode: - activation_ports = [] - for prev_node in graph.get_previous_nodes(node): - if 'weight' in prev_node.node_name.lower() or 'bias' in prev_node.node_name: - # TODO: find activation - continue - edge = graph.get_edge(prev_node, node) - activation_ports.append(edge.input_port_id) - assert len(activation_ports) == 1 - return activation_ports[0] + return PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES - @staticmethod - def inference(model: nn.Module, dataset: Dataset) -> None: - model = model.eval() - with torch.no_grad(): - for batch in dataset.get_inference_data(): - model(**batch) - - def do_sparsification(self, model, graph: NNCFGraph, nodes_to_sparsify: List[NNCFNode]): + def insert_sparsifiers(self, model, graph: NNCFGraph, target_sparsity_by_node: dict[NNCFNode, float]) -> NNCFNetwork: transformation_layout = TransformationLayout() - activation_sparsifiers = [] - - for node in nodes_to_sparsify: - activation_node, activation_port_id = self._get_activation_node_and_port( + for node, target_sparsity in target_sparsity_by_node.items(): + act_node, act_port_id = self._get_activation_node_and_port( node, graph) - target_sparsity = None - for module_name in self.sparse_config: - if node_name_matches_module_name(node.node_name, module_name): - target_sparsity = self.sparse_config[module_name] - break - if target_sparsity is None: - continue - activation_sparsifier = ActivationSparsifier( - target_sparsity=target_sparsity) - activation_sparsifier.node_name = node.node_name - activation_sparsifiers.append(activation_sparsifier) - activation_sparsifier_name = f"activation_sparsifier_{node.node_name.replace('.', '_')}" + sparsifier = ActivationSparsifier(target_sparsity=target_sparsity) + sparsifier_name = f"activation_sparsifier_{node.node_name.replace('.', '_')}" transformation_layout.register(PTSharedFnInsertionCommand( [ PTTargetPoint( TargetType.PRE_LAYER_OPERATION, target_node_name=node.node_name, - input_port_id=activation_port_id) + input_port_id=act_port_id) ], - activation_sparsifier, - activation_sparsifier_name, + sparsifier, + sparsifier_name, )) transformed_model = PTModelTransformer( model).transform(transformation_layout) - return transformed_model, activation_sparsifiers + return transformed_model + + def get_sparsifiers(self, model: NNCFNetwork) -> List[ActivationSparsifier]: + return [m for m in model.nncf.modules() if isinstance(m, ActivationSparsifier)] + + def calibrate_sparsifiers(self, model: NNCFNetwork, dataset: Dataset) -> NNCFNetwork: + for sparsifier in self.get_sparsifiers(model): + sparsifier.reset_running_stats() + sparsifier.freeze(False) + self.do_inference(model, dataset) + return model + + def freeze_sparsifiers(self, model: NNCFNetwork) -> NNCFNetwork: + for sparsifier in self.get_sparsifiers(model): + sparsifier.freeze(True) + model.nncf.rebuild_graph() + return model + + def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> NNCFNode: + activation_ports = [] + for prev_node in graph.get_previous_nodes(node): + if 'weight' in prev_node.node_name.lower() or 'bias' in prev_node.node_name: + # TODO: find activation + continue + edge = graph.get_edge(prev_node, node) + activation_ports.append(edge.input_port_id) + assert len(activation_ports) == 1 + return activation_ports[0] + + def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: + """ + This method returns the activation layer and corresponding port id for the node. + + :param node: NNCFGraph node for which the activation is sought. + :param nncf_graph: NNCFGraph instance with the node. + :return: Tuple with the activation node and port id. + """ + activation_port = self._get_activation_port_id(node, nncf_graph) + activation_edge = nncf_graph.get_input_edges(node)[activation_port] + activation_node = activation_edge.from_node + port_id = activation_edge.output_port_id + return activation_node, port_id From 42ef72c89a3fa2c841028184b1d32b65603700dd Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:03 +0800 Subject: [PATCH 03/76] refine code structure and add comments --- .../sparsify_activations_impl.py | 156 +++++++++++++----- .../sparsify_activations/torch_backend.py | 95 +++++++---- 2 files changed, 184 insertions(+), 67 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index e6162a72aa0..07718e871bf 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -1,6 +1,5 @@ from abc import ABC from abc import abstractmethod -from pathlib import Path from typing import Dict, List, Optional, TypeVar import nncf @@ -24,8 +23,17 @@ class SparsifyActivationsAlgoBackend(ABC): + """ + Abstract class for activation sparsification algorithm backend. + """ def do_inference(self, model: TModel, dataset: Dataset): + """ + Conducts model inference on given dataset to calibrate the activation sparsifiers. + + :param model: The model with activation sparsifiers. + :param dataset: The calibration dataset to update the sparsifiers. + """ engine = factory.EngineFactory.create(model) for input_data in track( dataset.get_inference_data(), @@ -36,76 +44,112 @@ def do_inference(self, model: TModel, dataset: Dataset): @property @abstractmethod - def supported_metatypes(self) -> List[OperatorMetatype]: - pass + def supported_metatypes(self) -> List[type[OperatorMetatype]]: + """ + Property for the backend-specific metatypes for supported layers. + """ + return [] @abstractmethod - def insert_sparsifiers(self, model: TModel, target_sparsity_by_node: Dict[NNCFNode, float]) -> TModel: - pass + def insert_sparsifiers( + self, model: TModel, graph: NNCFGraph, target_sparsity_by_node: Dict[NNCFNode, float], + ) -> TModel: + """ + Inserts the activation sparsifiers to the model. + + :param model: The model to conduct activation sparsification. + :param graph: The model's nncf graph. + :param target_sparsity_by_node: The target sparsity level for the input activation in each given node layer. + :return: The model with inserted activation sparsifiers. + """ + return model @abstractmethod - def calibrate_sparsifiers(self, model: TModel, dataset: Dataset) -> TModel: - pass + def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> TModel: + """ + Calibrates the thresholds in the activation sparsifiers. + + :param model: The model with inserted activation sparsifiers. + :param graph: The model's nncf graph. + :param dataset: The calibration dataset to update the thresholds in the sparsifiers. + :return: The model with calibrated activation sparsifiers. + """ + return model @abstractmethod - def freeze_sparsifiers(self, model: TModel) -> TModel: - pass + def freeze_sparsifiers(self, model: TModel, graph: NNCFGraph) -> TModel: + """ + Freezes the activation sparsifiers and applies the sparsification to the model. + + :param model: The model with activation sparsifiers. + :param graph: The model's nncf graph. + :return: The model with applied sparsification operations. + """ + return model class SparsifyActivationsAlgorithm: + """ + Implementation of activation sparsification algorithm. + """ def __init__( self, - target_sparsity_by_scope: dict[str, float], + target_sparsity_by_scope: Dict[str, float], ignored_scope: IgnoredScope, ): + """ + :param target_sparsity_by_scope: A dictionary that defines the target sparsity level for specified layers. + :param ignored_scope: An ignored scope that defines the list of model control flow + graph nodes to be ignored during activation sparsification. + """ self._target_sparsity_by_scope = target_sparsity_by_scope self._ignored_scope = ignored_scope self._backend_entity = None @property def available_backends(self) -> List[BackendType]: + """ + Supported backends for this algorithm. + """ return [BackendType.TORCH] def _set_backend_entity(self, model: TModel) -> None: """ - Creates a helper class with a backed-specific logic of the algorithm. + Creates a helper class with a backend-specific logic of the algorithm. :param model: Backend-specific input model. """ model_backend = get_backend(model) if model_backend == BackendType.TORCH: from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend - self._backend_entity = PTSparsifyActivationsAlgoBackend() + self._backend_entity: SparsifyActivationsAlgoBackend = PTSparsifyActivationsAlgoBackend() else: raise nncf.UnsupportedBackendError( f"{model_backend.value} backend is not supported for `sparsify_activations`." ) - def _get_target_sparsity_by_node(self, nncf_graph: NNCFGraph) -> Dict[NNCFNode, float]: + def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float]: """ - Collects nodes in the model's graph corresponding to the layers for weight compression. + Collects nodes in the model's graph corresponding to the layers for sparsification. :param nncf_graph: NNCFGraph instance. - :return: List with the data for each layer. + :return: A dictionary with nodes and the corresponding target sparsity level. """ supported_metatypes = self._backend_entity.supported_metatypes ignored_names = get_ignored_node_names_from_ignored_scope( - self._ignored_scope, nncf_graph, strict=self._ignored_scope.validate + self._ignored_scope, graph, strict=self._ignored_scope.validate ) - print(ignored_names) target_sparsity_by_node = {} - for node in nncf_graph.topological_sort(): - print(node.metatype, node.node_name, ignored_names, - 'should_consider_scope=', - should_consider_scope(node.node_name, ignored_names)) + for node in graph.topological_sort(): if node.metatype not in supported_metatypes or not should_consider_scope(node.node_name, ignored_names): continue for scope, target_sparsity in self._target_sparsity_by_scope.items(): if matches_any(node.node_name, scope): if node.node_name in target_sparsity_by_node: raise nncf.ValidationError( - f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.') + f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.' + ) target_sparsity_by_node[node] = target_sparsity return target_sparsity_by_node @@ -116,10 +160,23 @@ def do_sparsification( target_sparsity_by_node: Dict[NNCFNode, float], dataset: Dataset, ): + """ + Transforms the model into a sparsified one with node-specific target activation sparsity levels. + + :param model: The model to be sparsified. + :param graph: The model's nncf graph. + :param target_sparsity_by_node: A dictionary that defines the target sparsity level + for specified node layers. + :param dataset: The dataset to calibrate the activation sparsifiers. + :return: The sparsified model. + """ model = self._backend_entity.insert_sparsifiers( - model, graph, target_sparsity_by_node) - model = self._backend_entity.calibrate_sparsifiers(model, dataset) - model = self._backend_entity.freeze_sparsifiers(model) + model, graph, target_sparsity_by_node, + ) + model = self._backend_entity.calibrate_sparsifiers( + model, graph, dataset, + ) + model = self._backend_entity.freeze_sparsifiers(model, graph) return model def apply( @@ -128,12 +185,24 @@ def apply( graph: NNCFGraph, dataset: Dataset, ) -> TModel: + """ + Applies the algorithm to the given model. + + :param model: The model to be sparsified. + :param graph: The model's nncf graph. + :param dataset: The dataset to calibrate the activation sparsifiers. + :return: The sparsified model. + """ self._set_backend_entity(model) target_sparsity_by_node = self._get_target_sparsity_by_node(graph) - sparsified_model = self.do_sparsification( + if not target_sparsity_by_node: + raise nncf.ValidationError( + 'No layers matched for activation sparsification.' + ) + sparse_model = self.do_sparsification( model, graph, target_sparsity_by_node, dataset, ) - return sparsified_model + return sparse_model def sparsify_activations( @@ -141,12 +210,31 @@ def sparsify_activations( dataset: Dataset, target_sparsity_by_scope: Dict[str, float], ignored_scope: Optional[IgnoredScope] = None, - debug_folder=None, ) -> TModel: """ - Implementation of the `compress_weights()` method. + Post-training activation sparsification on the given model. + + :param model: The model to be sparsified. + :param dataset: The dataset to calibrate the activation sparsifiers. + :param target_sparsity_by_scope: A dictionary that defines the target activation sparsity + level for specified layers. For each item, the key should be a complete scope name + in the nncf graph, or a regular expression specification starting with `{re}`; the + corresponding value should be a float number in the range [0, 1] representing the + target sparsity level. + :param ignored_scope: An ignored scope that defines the list of model control flow graph + nodes to be ignored during activation sparsification. + :return: The sparsified model. """ + for scope, target_sparsity in target_sparsity_by_scope.items(): + if target_sparsity < 0. or target_sparsity > 1.: + raise ValueError( + f'Target sparsity for scope "{scope}" should be in range [0, 1].' + ) + + if ignored_scope is None: + ignored_scope = IgnoredScope() + backend = get_backend(model) if backend == BackendType.TORCH and not is_wrapped_model(model): example_input = next(iter(dataset.get_inference_data())) @@ -156,19 +244,9 @@ def sparsify_activations( trace_parameters=True, ) - if ignored_scope is None: - ignored_scope = IgnoredScope() - algorithm = SparsifyActivationsAlgorithm( target_sparsity_by_scope, ignored_scope) graph = NNCFGraphFactory.create(model) - if debug_folder: - graph.dump_graph( - Path(debug_folder, './before-sparsification.dot').as_posix()) sparse_model = algorithm.apply(model, graph, dataset) - graph = NNCFGraphFactory.create(sparse_model) - if debug_folder: - graph.dump_graph( - Path(debug_folder, './after-sparsification.dot').as_posix()) return sparse_model diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 7ebc1242be4..d63b3ad42d6 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -1,4 +1,4 @@ -from typing import List, Tuple, TypeVar +from typing import Dict, List, Tuple, TypeVar import torch import torch.nn as nn @@ -20,44 +20,68 @@ class ActivationSparsifier(nn.Module): - def __init__(self, target_sparsity: float, alpha: float = 0.1): + """ + Sparsifies input activations by masking out values around zero. + """ + + def __init__(self, target_sparsity: float, alpha: float = 0.2): + """ + :param target_sparsity: The target activation sparsity level. + :param alpha: The exponential moving average decay factor in range (0, 1) for calibrating + the threshold. A larger alpha will give more weight to the most recent batches. + """ super().__init__() + self.target_sparsity = target_sparsity + if alpha <= 0. or alpha >= 1.: + raise ValueError( + 'The decay factor `alpha` should be in range (0, 1).' + ) self.alpha = alpha - self.register_buffer('target_sparsity', torch.tensor(target_sparsity)) self.register_buffer("running_threshold", torch.tensor(0.)) self.register_buffer("num_batches_tracked", torch.tensor(0)) self.running_threshold: torch.Tensor self.num_batches_tracked: torch.Tensor - self._frozen = True + self._freeze = True - def forward(self, x): - threshold = None - if not self._frozen: - threshold = self._calculate_quantile(x.abs(), self.target_sparsity) + def forward(self, x: torch.Tensor) -> torch.Tensor: + if not self._freeze: + threshold = self._calculate_threshold(x, self.target_sparsity) self._update(threshold) + print(threshold) mask = torch.le(x.abs(), self.running_threshold) - # if '[1]' in self.node_name and 'up_proj' in self.node_name: - # print('sparsity', mask.float().mean(), 'cur_threshold', - # threshold, 'threshold', self.running_threshold) x = torch.masked_fill(x, mask, 0.) return x def reset_running_stats(self): + """ + Resets the running threshold and the number of tracked batches to the initial stage. + """ self.running_threshold.zero_() self.num_batches_tracked.zero_() def freeze(self, freeze: bool = True): - self._frozen = freeze + self._freeze = freeze - def extra_repr(self) -> str: - return f"target_sparsity={self.target_sparsity.item()},{self.running_threshold:=},{self.num_batches_tracked}" + def _calculate_threshold(self, x: torch.Tensor, target_sparsity: float) -> torch.Tensor: + """ + Calculates the threshold so that the target sparsity can be achieved. - def _calculate_quantile(self, x: torch.Tensor, target_sparsity: float): - return x.float().view(-1).quantile(q=target_sparsity, dim=-1) + :param x: The input tensor. + :param target_sparsity: The target sparsity level on the input tensor. + :return: The threshold value. + """ + return x.abs().float().view(-1).quantile(q=target_sparsity, dim=-1) + + def _update(self, threshold: torch.Tensor) -> torch.Tensor: + """ + Updates the running threshold by exponential moving average with decaying adjustment. - def _update(self, threshold: torch.Tensor): + The updating logic is similar to `pandas.DataFrame.ewm(adjust=True)`. + + :param threshold: The threshold value derived from this batch to update the running threshold. + :return: The updated running threshold. + """ beta = 1.0 - self.alpha - # Exponential Moving Average with decaying adjustment, similar to pandas.DataFrame.ewm(adjust=True). self.running_threshold = ( threshold * self.alpha + self.running_threshold * beta * @@ -68,16 +92,34 @@ def _update(self, threshold: torch.Tensor): class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): + """ + Torch backend for the activation sparsification algorithm. + """ + SUPPORTED_METATYPES = [om.PTLinearMetatype] @property - def supported_metatypes(self) -> List[OperatorMetatype]: + def supported_metatypes(self) -> List[type[OperatorMetatype]]: return PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES - def insert_sparsifiers(self, model, graph: NNCFGraph, target_sparsity_by_node: dict[NNCFNode, float]) -> NNCFNetwork: + def get_sparsifiers(self, model: NNCFNetwork) -> List[ActivationSparsifier]: + """ + Finds all the activation sparsifiers in the model. + + :param model: The model with activation sparsifiers. + :return: List of activation sparsifiers. + """ + return [m for m in model.nncf.modules() if isinstance(m, ActivationSparsifier)] + + def insert_sparsifiers( + self, + model: NNCFNetwork, + graph: NNCFGraph, + target_sparsity_by_node: Dict[NNCFNode, float], + ) -> NNCFNetwork: transformation_layout = TransformationLayout() for node, target_sparsity in target_sparsity_by_node.items(): - act_node, act_port_id = self._get_activation_node_and_port( + _, act_port_id = self._get_activation_node_and_port( node, graph) sparsifier = ActivationSparsifier(target_sparsity=target_sparsity) sparsifier_name = f"activation_sparsifier_{node.node_name.replace('.', '_')}" @@ -96,17 +138,14 @@ def insert_sparsifiers(self, model, graph: NNCFGraph, target_sparsity_by_node: d model).transform(transformation_layout) return transformed_model - def get_sparsifiers(self, model: NNCFNetwork) -> List[ActivationSparsifier]: - return [m for m in model.nncf.modules() if isinstance(m, ActivationSparsifier)] - - def calibrate_sparsifiers(self, model: NNCFNetwork, dataset: Dataset) -> NNCFNetwork: + def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: Dataset) -> NNCFNetwork: for sparsifier in self.get_sparsifiers(model): sparsifier.reset_running_stats() sparsifier.freeze(False) self.do_inference(model, dataset) return model - def freeze_sparsifiers(self, model: NNCFNetwork) -> NNCFNetwork: + def freeze_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph) -> NNCFNetwork: for sparsifier in self.get_sparsifiers(model): sparsifier.freeze(True) model.nncf.rebuild_graph() @@ -116,7 +155,7 @@ def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> NNCFNode: activation_ports = [] for prev_node in graph.get_previous_nodes(node): if 'weight' in prev_node.node_name.lower() or 'bias' in prev_node.node_name: - # TODO: find activation + # TODO(yujie): find activation continue edge = graph.get_edge(prev_node, node) activation_ports.append(edge.input_port_id) @@ -125,7 +164,7 @@ def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> NNCFNode: def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: """ - This method returns the activation layer and corresponding port id for the node. + Returns the activation layer and corresponding port id for the node. :param node: NNCFGraph node for which the activation is sought. :param nncf_graph: NNCFGraph instance with the node. From 8f5bd9ffd2e91e9e682f07c05e1cdc441a514270 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:03 +0800 Subject: [PATCH 04/76] disable grad in do_inference --- .../torch/sparsify_activations/torch_backend.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index d63b3ad42d6..789704093a6 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -8,6 +8,7 @@ from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.logging import nncf_logger from nncf.data import Dataset from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend from nncf.torch.graph import operator_metatypes as om @@ -15,6 +16,7 @@ from nncf.torch.graph.transformations.commands import PTTargetPoint from nncf.torch.model_transformer import PTModelTransformer from nncf.torch.nncf_network import NNCFNetwork +from nncf.torch.utils import training_mode_switcher TModel = TypeVar("TModel") @@ -47,7 +49,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: if not self._freeze: threshold = self._calculate_threshold(x, self.target_sparsity) self._update(threshold) - print(threshold) + nncf_logger.info('Cur %f, Averaged %f', threshold, + self.running_threshold) mask = torch.le(x.abs(), self.running_threshold) x = torch.masked_fill(x, mask, 0.) return x @@ -62,6 +65,9 @@ def reset_running_stats(self): def freeze(self, freeze: bool = True): self._freeze = freeze + def extra_repr(self) -> str: + return f'target_sparsity={self.target_sparsity}' + def _calculate_threshold(self, x: torch.Tensor, target_sparsity: float) -> torch.Tensor: """ Calculates the threshold so that the target sparsity can be achieved. @@ -122,6 +128,8 @@ def insert_sparsifiers( _, act_port_id = self._get_activation_node_and_port( node, graph) sparsifier = ActivationSparsifier(target_sparsity=target_sparsity) + # temporarily freeze it for model transformation + sparsifier.freeze(True) sparsifier_name = f"activation_sparsifier_{node.node_name.replace('.', '_')}" transformation_layout.register(PTSharedFnInsertionCommand( [ @@ -142,7 +150,9 @@ def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: D for sparsifier in self.get_sparsifiers(model): sparsifier.reset_running_stats() sparsifier.freeze(False) - self.do_inference(model, dataset) + with training_mode_switcher(model, is_training=False): + with torch.no_grad(): + self.do_inference(model, dataset) return model def freeze_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph) -> NNCFNetwork: From 67734975ed526cb24069649f68ff986381fe707e Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:03 +0800 Subject: [PATCH 05/76] add copyright --- .../sparsify_activations/sparsify_activations_impl.py | 11 +++++++++++ .../torch/sparsify_activations/torch_backend.py | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 07718e871bf..0c7808c205b 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -1,3 +1,14 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from abc import ABC from abc import abstractmethod from typing import Dict, List, Optional, TypeVar diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 789704093a6..ea62a2b7df4 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -1,3 +1,14 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Dict, List, Tuple, TypeVar import torch From 9631b09377ef0ffa2beb59af4d7f5942daa3c0ac Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:04 +0800 Subject: [PATCH 06/76] minor style change --- .../torch/sparsify_activations/sparsify_activations_impl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 0c7808c205b..ba54c37474f 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -116,7 +116,7 @@ def __init__( """ self._target_sparsity_by_scope = target_sparsity_by_scope self._ignored_scope = ignored_scope - self._backend_entity = None + self._backend_entity: SparsifyActivationsAlgoBackend = None @property def available_backends(self) -> List[BackendType]: @@ -134,7 +134,7 @@ def _set_backend_entity(self, model: TModel) -> None: model_backend = get_backend(model) if model_backend == BackendType.TORCH: from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend - self._backend_entity: SparsifyActivationsAlgoBackend = PTSparsifyActivationsAlgoBackend() + self._backend_entity = PTSparsifyActivationsAlgoBackend() else: raise nncf.UnsupportedBackendError( f"{model_backend.value} backend is not supported for `sparsify_activations`." @@ -208,7 +208,7 @@ def apply( target_sparsity_by_node = self._get_target_sparsity_by_node(graph) if not target_sparsity_by_node: raise nncf.ValidationError( - 'No layers matched for activation sparsification.' + "No layers matched for activation sparsification." ) sparse_model = self.do_sparsification( model, graph, target_sparsity_by_node, dataset, From 43de3bcf428d9571183c2255e1709be3ac49cc2d Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:04 +0800 Subject: [PATCH 07/76] style fix --- .../torch/sparsify_activations/__init__.py | 2 +- .../sparsify_activations_impl.py | 32 +++++++----- .../sparsify_activations/torch_backend.py | 52 +++++++++---------- 3 files changed, 44 insertions(+), 42 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/__init__.py b/nncf/experimental/torch/sparsify_activations/__init__.py index ba1c31abb5b..41a2b7ebcea 100644 --- a/nncf/experimental/torch/sparsify_activations/__init__.py +++ b/nncf/experimental/torch/sparsify_activations/__init__.py @@ -9,4 +9,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import sparsify_activations +from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import sparsify_activations # noqa: F401 diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index ba54c37474f..38cc9a3f922 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -63,7 +63,10 @@ def supported_metatypes(self) -> List[type[OperatorMetatype]]: @abstractmethod def insert_sparsifiers( - self, model: TModel, graph: NNCFGraph, target_sparsity_by_node: Dict[NNCFNode, float], + self, + model: TModel, + graph: NNCFGraph, + target_sparsity_by_node: Dict[NNCFNode, float], ) -> TModel: """ Inserts the activation sparsifiers to the model. @@ -134,6 +137,7 @@ def _set_backend_entity(self, model: TModel) -> None: model_backend = get_backend(model) if model_backend == BackendType.TORCH: from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend + self._backend_entity = PTSparsifyActivationsAlgoBackend() else: raise nncf.UnsupportedBackendError( @@ -182,10 +186,14 @@ def do_sparsification( :return: The sparsified model. """ model = self._backend_entity.insert_sparsifiers( - model, graph, target_sparsity_by_node, + model, + graph, + target_sparsity_by_node, ) model = self._backend_entity.calibrate_sparsifiers( - model, graph, dataset, + model, + graph, + dataset, ) model = self._backend_entity.freeze_sparsifiers(model, graph) return model @@ -207,11 +215,12 @@ def apply( self._set_backend_entity(model) target_sparsity_by_node = self._get_target_sparsity_by_node(graph) if not target_sparsity_by_node: - raise nncf.ValidationError( - "No layers matched for activation sparsification." - ) + raise nncf.ValidationError("No layers matched for activation sparsification.") sparse_model = self.do_sparsification( - model, graph, target_sparsity_by_node, dataset, + model, + graph, + target_sparsity_by_node, + dataset, ) return sparse_model @@ -238,10 +247,8 @@ def sparsify_activations( """ for scope, target_sparsity in target_sparsity_by_scope.items(): - if target_sparsity < 0. or target_sparsity > 1.: - raise ValueError( - f'Target sparsity for scope "{scope}" should be in range [0, 1].' - ) + if target_sparsity < 0.0 or target_sparsity > 1.0: + raise ValueError(f'Target sparsity for scope "{scope}" should be in range [0, 1].') if ignored_scope is None: ignored_scope = IgnoredScope() @@ -255,8 +262,7 @@ def sparsify_activations( trace_parameters=True, ) - algorithm = SparsifyActivationsAlgorithm( - target_sparsity_by_scope, ignored_scope) + algorithm = SparsifyActivationsAlgorithm(target_sparsity_by_scope, ignored_scope) graph = NNCFGraphFactory.create(model) sparse_model = algorithm.apply(model, graph, dataset) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index ea62a2b7df4..1597c0cb167 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -45,12 +45,10 @@ def __init__(self, target_sparsity: float, alpha: float = 0.2): """ super().__init__() self.target_sparsity = target_sparsity - if alpha <= 0. or alpha >= 1.: - raise ValueError( - 'The decay factor `alpha` should be in range (0, 1).' - ) + if alpha <= 0.0 or alpha >= 1.0: + raise ValueError("The decay factor `alpha` should be in range (0, 1).") self.alpha = alpha - self.register_buffer("running_threshold", torch.tensor(0.)) + self.register_buffer("running_threshold", torch.tensor(0.0)) self.register_buffer("num_batches_tracked", torch.tensor(0)) self.running_threshold: torch.Tensor self.num_batches_tracked: torch.Tensor @@ -60,10 +58,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: if not self._freeze: threshold = self._calculate_threshold(x, self.target_sparsity) self._update(threshold) - nncf_logger.info('Cur %f, Averaged %f', threshold, - self.running_threshold) + nncf_logger.info("Cur %f, Averaged %f", threshold, self.running_threshold) mask = torch.le(x.abs(), self.running_threshold) - x = torch.masked_fill(x, mask, 0.) + x = torch.masked_fill(x, mask, 0.0) return x def reset_running_stats(self): @@ -77,7 +74,7 @@ def freeze(self, freeze: bool = True): self._freeze = freeze def extra_repr(self) -> str: - return f'target_sparsity={self.target_sparsity}' + return f"target_sparsity={self.target_sparsity}" def _calculate_threshold(self, x: torch.Tensor, target_sparsity: float) -> torch.Tensor: """ @@ -100,9 +97,7 @@ def _update(self, threshold: torch.Tensor) -> torch.Tensor: """ beta = 1.0 - self.alpha self.running_threshold = ( - threshold * self.alpha + - self.running_threshold * beta * - (1 - beta ** self.num_batches_tracked) + threshold * self.alpha + self.running_threshold * beta * (1 - beta**self.num_batches_tracked) ) / (1 - beta ** (self.num_batches_tracked + 1)) self.num_batches_tracked += 1 return self.running_threshold @@ -136,25 +131,26 @@ def insert_sparsifiers( ) -> NNCFNetwork: transformation_layout = TransformationLayout() for node, target_sparsity in target_sparsity_by_node.items(): - _, act_port_id = self._get_activation_node_and_port( - node, graph) + _, act_port_id = self._get_activation_node_and_port(node, graph) sparsifier = ActivationSparsifier(target_sparsity=target_sparsity) # temporarily freeze it for model transformation sparsifier.freeze(True) sparsifier_name = f"activation_sparsifier_{node.node_name.replace('.', '_')}" - transformation_layout.register(PTSharedFnInsertionCommand( - [ - PTTargetPoint( - TargetType.PRE_LAYER_OPERATION, - target_node_name=node.node_name, - input_port_id=act_port_id) - ], - sparsifier, - sparsifier_name, - )) - - transformed_model = PTModelTransformer( - model).transform(transformation_layout) + transformation_layout.register( + PTSharedFnInsertionCommand( + [ + PTTargetPoint( + target_type=TargetType.PRE_LAYER_OPERATION, + target_node_name=node.node_name, + input_port_id=act_port_id, + ) + ], + sparsifier, + sparsifier_name, + ) + ) + + transformed_model = PTModelTransformer(model).transform(transformation_layout) return transformed_model def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: Dataset) -> NNCFNetwork: @@ -175,7 +171,7 @@ def freeze_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph) -> NNCFNetwor def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> NNCFNode: activation_ports = [] for prev_node in graph.get_previous_nodes(node): - if 'weight' in prev_node.node_name.lower() or 'bias' in prev_node.node_name: + if "weight" in prev_node.node_name.lower() or "bias" in prev_node.node_name: # TODO(yujie): find activation continue edge = graph.get_edge(prev_node, node) From 2d7c1fcc9ba5b7e71059d85b0f3241ade6dfde14 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:04 +0800 Subject: [PATCH 08/76] fix "find activation port id" --- .../sparsify_activations_impl.py | 14 +++--- .../sparsify_activations/torch_backend.py | 45 ++++++++----------- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 38cc9a3f922..7a39d3f37b2 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -72,7 +72,7 @@ def insert_sparsifiers( Inserts the activation sparsifiers to the model. :param model: The model to conduct activation sparsification. - :param graph: The model's nncf graph. + :param graph: The model's NNCF graph. :param target_sparsity_by_node: The target sparsity level for the input activation in each given node layer. :return: The model with inserted activation sparsifiers. """ @@ -84,7 +84,7 @@ def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Datase Calibrates the thresholds in the activation sparsifiers. :param model: The model with inserted activation sparsifiers. - :param graph: The model's nncf graph. + :param graph: The model's NNCF graph. :param dataset: The calibration dataset to update the thresholds in the sparsifiers. :return: The model with calibrated activation sparsifiers. """ @@ -96,7 +96,7 @@ def freeze_sparsifiers(self, model: TModel, graph: NNCFGraph) -> TModel: Freezes the activation sparsifiers and applies the sparsification to the model. :param model: The model with activation sparsifiers. - :param graph: The model's nncf graph. + :param graph: The model's NNCF graph. :return: The model with applied sparsification operations. """ return model @@ -148,7 +148,7 @@ def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float """ Collects nodes in the model's graph corresponding to the layers for sparsification. - :param nncf_graph: NNCFGraph instance. + :param graph: NNCFGraph instance. :return: A dictionary with nodes and the corresponding target sparsity level. """ supported_metatypes = self._backend_entity.supported_metatypes @@ -179,7 +179,7 @@ def do_sparsification( Transforms the model into a sparsified one with node-specific target activation sparsity levels. :param model: The model to be sparsified. - :param graph: The model's nncf graph. + :param graph: The model's NNCF graph. :param target_sparsity_by_node: A dictionary that defines the target sparsity level for specified node layers. :param dataset: The dataset to calibrate the activation sparsifiers. @@ -208,7 +208,7 @@ def apply( Applies the algorithm to the given model. :param model: The model to be sparsified. - :param graph: The model's nncf graph. + :param graph: The model's NNCF graph. :param dataset: The dataset to calibrate the activation sparsifiers. :return: The sparsified model. """ @@ -238,7 +238,7 @@ def sparsify_activations( :param dataset: The dataset to calibrate the activation sparsifiers. :param target_sparsity_by_scope: A dictionary that defines the target activation sparsity level for specified layers. For each item, the key should be a complete scope name - in the nncf graph, or a regular expression specification starting with `{re}`; the + in the NNCF graph, or a regular expression specification starting with `{re}`; the corresponding value should be a float number in the range [0, 1] representing the target sparsity level. :param ignored_scope: An ignored scope that defines the list of model control flow graph diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 1597c0cb167..045f8a7f00b 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -9,17 +9,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Tuple, TypeVar +from typing import Dict, List, TypeVar import torch import torch.nn as nn +import nncf from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.operator_metatypes import CONST_NOOP_METATYPES from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout -from nncf.common.logging import nncf_logger from nncf.data import Dataset from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend from nncf.torch.graph import operator_metatypes as om @@ -52,13 +53,12 @@ def __init__(self, target_sparsity: float, alpha: float = 0.2): self.register_buffer("num_batches_tracked", torch.tensor(0)) self.running_threshold: torch.Tensor self.num_batches_tracked: torch.Tensor - self._freeze = True + self._freeze = False def forward(self, x: torch.Tensor) -> torch.Tensor: if not self._freeze: threshold = self._calculate_threshold(x, self.target_sparsity) self._update(threshold) - nncf_logger.info("Cur %f, Averaged %f", threshold, self.running_threshold) mask = torch.le(x.abs(), self.running_threshold) x = torch.masked_fill(x, mask, 0.0) return x @@ -131,18 +131,18 @@ def insert_sparsifiers( ) -> NNCFNetwork: transformation_layout = TransformationLayout() for node, target_sparsity in target_sparsity_by_node.items(): - _, act_port_id = self._get_activation_node_and_port(node, graph) + activation_port_id = self._get_activation_port_id(node, graph) sparsifier = ActivationSparsifier(target_sparsity=target_sparsity) # temporarily freeze it for model transformation sparsifier.freeze(True) - sparsifier_name = f"activation_sparsifier_{node.node_name.replace('.', '_')}" + sparsifier_name = f"activations_sparsifier_{node.node_name.replace('.', '_')}" transformation_layout.register( PTSharedFnInsertionCommand( [ PTTargetPoint( target_type=TargetType.PRE_LAYER_OPERATION, target_node_name=node.node_name, - input_port_id=act_port_id, + input_port_id=activation_port_id, ) ], sparsifier, @@ -168,27 +168,20 @@ def freeze_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph) -> NNCFNetwor model.nncf.rebuild_graph() return model - def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> NNCFNode: + def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> int: + """ + Finds the input activation port id for the node. + + :param node: The node to find its activation port id. + :param graph: The NNCF graph containing the node. + :return: The activation port id. + """ activation_ports = [] for prev_node in graph.get_previous_nodes(node): - if "weight" in prev_node.node_name.lower() or "bias" in prev_node.node_name: - # TODO(yujie): find activation - continue edge = graph.get_edge(prev_node, node) + if prev_node.metatype in CONST_NOOP_METATYPES or edge.input_port_id in node.metatype.weight_port_ids: + continue activation_ports.append(edge.input_port_id) - assert len(activation_ports) == 1 + if len(activation_ports) != 1: + raise nncf.InternalError(f'Cannot find activation port for node "{node}".') return activation_ports[0] - - def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> Tuple[NNCFNode, int]: - """ - Returns the activation layer and corresponding port id for the node. - - :param node: NNCFGraph node for which the activation is sought. - :param nncf_graph: NNCFGraph instance with the node. - :return: Tuple with the activation node and port id. - """ - activation_port = self._get_activation_port_id(node, nncf_graph) - activation_edge = nncf_graph.get_input_edges(node)[activation_port] - activation_node = activation_edge.from_node - port_id = activation_edge.output_port_id - return activation_node, port_id From 0dcd0f243e3f11b7a21426dc66a9135af851edef Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:04 +0800 Subject: [PATCH 09/76] minor doc change --- nncf/experimental/torch/sparsify_activations/torch_backend.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 045f8a7f00b..2f9cc7a3604 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -89,7 +89,6 @@ def _calculate_threshold(self, x: torch.Tensor, target_sparsity: float) -> torch def _update(self, threshold: torch.Tensor) -> torch.Tensor: """ Updates the running threshold by exponential moving average with decaying adjustment. - The updating logic is similar to `pandas.DataFrame.ewm(adjust=True)`. :param threshold: The threshold value derived from this batch to update the running threshold. From 7fa081d1e837138209d1b6cd3426a1c4702a33ab Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:04 +0800 Subject: [PATCH 10/76] use numpy's quantile --- .../sparsify_activations/sparsify_activations_impl.py | 2 +- .../torch/sparsify_activations/torch_backend.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 7a39d3f37b2..a486dc4f34c 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -49,7 +49,7 @@ def do_inference(self, model: TModel, dataset: Dataset): for input_data in track( dataset.get_inference_data(), total=dataset.get_length(), - description="Activation sparsifier calibration", + description="Activation Sparsifier Calibration", ): engine.infer(input_data) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 2f9cc7a3604..7355c68cf50 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -11,6 +11,7 @@ from typing import Dict, List, TypeVar +import numpy as np import torch import torch.nn as nn @@ -84,7 +85,12 @@ def _calculate_threshold(self, x: torch.Tensor, target_sparsity: float) -> torch :param target_sparsity: The target sparsity level on the input tensor. :return: The threshold value. """ - return x.abs().float().view(-1).quantile(q=target_sparsity, dim=-1) + # uses numpy's quantile implementation as torch's cannot handle large tensor + value = np.quantile( + x.detach().abs().cpu().numpy(), + q=target_sparsity, + ) + return torch.tensor(value, device=x.device, dtype=x.dtype) def _update(self, threshold: torch.Tensor) -> torch.Tensor: """ From 668a434358cc7bca20d198fd955252cf8cae190e Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:04 +0800 Subject: [PATCH 11/76] change method order --- .../sparsify_activations_impl.py | 112 +++++++++--------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index a486dc4f34c..fa0fa9da534 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -128,45 +128,31 @@ def available_backends(self) -> List[BackendType]: """ return [BackendType.TORCH] - def _set_backend_entity(self, model: TModel) -> None: - """ - Creates a helper class with a backend-specific logic of the algorithm. - - :param model: Backend-specific input model. - """ - model_backend = get_backend(model) - if model_backend == BackendType.TORCH: - from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend - - self._backend_entity = PTSparsifyActivationsAlgoBackend() - else: - raise nncf.UnsupportedBackendError( - f"{model_backend.value} backend is not supported for `sparsify_activations`." - ) - - def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float]: + def apply( + self, + model: TModel, + graph: NNCFGraph, + dataset: Dataset, + ) -> TModel: """ - Collects nodes in the model's graph corresponding to the layers for sparsification. + Applies the algorithm to the given model. - :param graph: NNCFGraph instance. - :return: A dictionary with nodes and the corresponding target sparsity level. + :param model: The model to be sparsified. + :param graph: The model's NNCF graph. + :param dataset: The dataset to calibrate the activation sparsifiers. + :return: The sparsified model. """ - supported_metatypes = self._backend_entity.supported_metatypes - ignored_names = get_ignored_node_names_from_ignored_scope( - self._ignored_scope, graph, strict=self._ignored_scope.validate + self._set_backend_entity(model) + target_sparsity_by_node = self._get_target_sparsity_by_node(graph) + if not target_sparsity_by_node: + raise nncf.ValidationError("No layers matched for activation sparsification.") + sparse_model = self.do_sparsification( + model, + graph, + target_sparsity_by_node, + dataset, ) - target_sparsity_by_node = {} - for node in graph.topological_sort(): - if node.metatype not in supported_metatypes or not should_consider_scope(node.node_name, ignored_names): - continue - for scope, target_sparsity in self._target_sparsity_by_scope.items(): - if matches_any(node.node_name, scope): - if node.node_name in target_sparsity_by_node: - raise nncf.ValidationError( - f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.' - ) - target_sparsity_by_node[node] = target_sparsity - return target_sparsity_by_node + return sparse_model def do_sparsification( self, @@ -198,31 +184,45 @@ def do_sparsification( model = self._backend_entity.freeze_sparsifiers(model, graph) return model - def apply( - self, - model: TModel, - graph: NNCFGraph, - dataset: Dataset, - ) -> TModel: + def _set_backend_entity(self, model: TModel) -> None: """ - Applies the algorithm to the given model. + Creates a helper class with a backend-specific logic of the algorithm. - :param model: The model to be sparsified. - :param graph: The model's NNCF graph. - :param dataset: The dataset to calibrate the activation sparsifiers. - :return: The sparsified model. + :param model: Backend-specific input model. """ - self._set_backend_entity(model) - target_sparsity_by_node = self._get_target_sparsity_by_node(graph) - if not target_sparsity_by_node: - raise nncf.ValidationError("No layers matched for activation sparsification.") - sparse_model = self.do_sparsification( - model, - graph, - target_sparsity_by_node, - dataset, + model_backend = get_backend(model) + if model_backend == BackendType.TORCH: + from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend + + self._backend_entity = PTSparsifyActivationsAlgoBackend() + else: + raise nncf.UnsupportedBackendError( + f"{model_backend.value} backend is not supported for `sparsify_activations`." + ) + + def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float]: + """ + Collects nodes in the model's graph corresponding to the layers for sparsification. + + :param graph: NNCFGraph instance. + :return: A dictionary with nodes and the corresponding target sparsity level. + """ + supported_metatypes = self._backend_entity.supported_metatypes + ignored_names = get_ignored_node_names_from_ignored_scope( + self._ignored_scope, graph, strict=self._ignored_scope.validate ) - return sparse_model + target_sparsity_by_node = {} + for node in graph.topological_sort(): + if node.metatype not in supported_metatypes or not should_consider_scope(node.node_name, ignored_names): + continue + for scope, target_sparsity in self._target_sparsity_by_scope.items(): + if matches_any(node.node_name, scope): + if node.node_name in target_sparsity_by_node: + raise nncf.ValidationError( + f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.' + ) + target_sparsity_by_node[node] = target_sparsity + return target_sparsity_by_node def sparsify_activations( From 371261076ac4d7ea76d5c5f32d9280284d7d0de2 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:04 +0800 Subject: [PATCH 12/76] algo test --- .../sparsify_activations/torch_backend.py | 23 ++- .../sparsify_activations/helpers.py | 60 ++++++ .../sparsify_activations/test_algo.py | 183 ++++++++++++++++++ 3 files changed, 257 insertions(+), 9 deletions(-) create mode 100644 tests/torch/experimental/sparsify_activations/helpers.py create mode 100644 tests/torch/experimental/sparsify_activations/test_algo.py diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 7355c68cf50..befb792c101 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -31,10 +31,11 @@ from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.utils import training_mode_switcher +ACTIVATIONS_SPARSIFIER_PREFIX = 'activations_sparsifier' TModel = TypeVar("TModel") -class ActivationSparsifier(nn.Module): +class ActivationsSparsifier(nn.Module): """ Sparsifies input activations by masking out values around zero. """ @@ -48,7 +49,8 @@ def __init__(self, target_sparsity: float, alpha: float = 0.2): super().__init__() self.target_sparsity = target_sparsity if alpha <= 0.0 or alpha >= 1.0: - raise ValueError("The decay factor `alpha` should be in range (0, 1).") + raise ValueError( + "The decay factor `alpha` should be in range (0, 1).") self.alpha = alpha self.register_buffer("running_threshold", torch.tensor(0.0)) self.register_buffer("num_batches_tracked", torch.tensor(0)) @@ -102,7 +104,8 @@ def _update(self, threshold: torch.Tensor) -> torch.Tensor: """ beta = 1.0 - self.alpha self.running_threshold = ( - threshold * self.alpha + self.running_threshold * beta * (1 - beta**self.num_batches_tracked) + threshold * self.alpha + self.running_threshold * + beta * (1 - beta**self.num_batches_tracked) ) / (1 - beta ** (self.num_batches_tracked + 1)) self.num_batches_tracked += 1 return self.running_threshold @@ -119,14 +122,14 @@ class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): def supported_metatypes(self) -> List[type[OperatorMetatype]]: return PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES - def get_sparsifiers(self, model: NNCFNetwork) -> List[ActivationSparsifier]: + def get_sparsifiers(self, model: NNCFNetwork) -> List[ActivationsSparsifier]: """ Finds all the activation sparsifiers in the model. :param model: The model with activation sparsifiers. :return: List of activation sparsifiers. """ - return [m for m in model.nncf.modules() if isinstance(m, ActivationSparsifier)] + return [m for m in model.nncf.modules() if isinstance(m, ActivationsSparsifier)] def insert_sparsifiers( self, @@ -137,10 +140,10 @@ def insert_sparsifiers( transformation_layout = TransformationLayout() for node, target_sparsity in target_sparsity_by_node.items(): activation_port_id = self._get_activation_port_id(node, graph) - sparsifier = ActivationSparsifier(target_sparsity=target_sparsity) + sparsifier = ActivationsSparsifier(target_sparsity=target_sparsity) # temporarily freeze it for model transformation sparsifier.freeze(True) - sparsifier_name = f"activations_sparsifier_{node.node_name.replace('.', '_')}" + sparsifier_name = f"{ACTIVATIONS_SPARSIFIER_PREFIX}_{node.node_name.replace('.', '_')}" transformation_layout.register( PTSharedFnInsertionCommand( [ @@ -155,7 +158,8 @@ def insert_sparsifiers( ) ) - transformed_model = PTModelTransformer(model).transform(transformation_layout) + transformed_model = PTModelTransformer( + model).transform(transformation_layout) return transformed_model def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: Dataset) -> NNCFNetwork: @@ -188,5 +192,6 @@ def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> int: continue activation_ports.append(edge.input_port_id) if len(activation_ports) != 1: - raise nncf.InternalError(f'Cannot find activation port for node "{node}".') + raise nncf.InternalError( + f'Cannot find activation port for node "{node}".') return activation_ports[0] diff --git a/tests/torch/experimental/sparsify_activations/helpers.py b/tests/torch/experimental/sparsify_activations/helpers.py new file mode 100644 index 00000000000..2b54d5a4dcf --- /dev/null +++ b/tests/torch/experimental/sparsify_activations/helpers.py @@ -0,0 +1,60 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Dict + +import openvino as ov +import pytest +import torch +import torch.nn as nn +import transformers.models + +import nncf +import nncf.experimental +import nncf.experimental.torch.sparsify_activations +from nncf.experimental.torch.sparsify_activations.torch_backend import ACTIVATIONS_SPARSIFIER_PREFIX +from nncf.experimental.torch.sparsify_activations.torch_backend import ActivationsSparsifier +from nncf.scopes import IgnoredScope +from nncf.torch.nncf_network import NNCFNetwork +from tests.shared.nx_graph import compare_nx_graph_with_reference +from tests.shared.paths import TEST_ROOT +from tests.torch.helpers import set_torch_seed + + +class TwoLinearModel(nn.Module): + def __init__(self) -> None: + super().__init__() + self.embedding = nn.Embedding(32, 2) + self.linear1 = nn.Linear(2, 3) + self.linear2 = nn.Linear(2, 4, bias=False) + + def forward(self, input_ids: torch.Tensor): + x = self.embedding(input_ids) + y0 = self.linear1(x) + y1 = self.linear2(x) + return y0, y1 + + +def dummy_llama_model(): + config = transformers.models.llama.configuration_llama.LlamaConfig( + vocab_size=32, + hidden_size=8, + intermediate_size=14, + num_attention_heads=2, + num_key_value_heads=1, + num_hidden_layers=2, + use_cache=False, + return_dict=False, + ) + model = transformers.AutoModelForCausalLM.from_config(config) + return model diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py new file mode 100644 index 00000000000..77284dd2c9c --- /dev/null +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -0,0 +1,183 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Dict + +import openvino as ov +import pytest +import torch +import torch.nn as nn + +import nncf +import nncf.experimental +import nncf.experimental.torch.sparsify_activations +from nncf.experimental.torch.sparsify_activations.torch_backend import ACTIVATIONS_SPARSIFIER_PREFIX +from nncf.experimental.torch.sparsify_activations.torch_backend import ActivationsSparsifier +from nncf.scopes import IgnoredScope +from nncf.torch.nncf_network import NNCFNetwork +from tests.shared.nx_graph import compare_nx_graph_with_reference +from tests.shared.paths import TEST_ROOT +from tests.torch.experimental.sparsify_activations.helpers import TwoLinearModel +from tests.torch.experimental.sparsify_activations.helpers import dummy_llama_model +from tests.torch.helpers import set_torch_seed + + +@dataclass +class AlgoTestDesc: + model_name: str + model_getter: Callable[[], nn.Module] + dataset_getter: Callable[[torch.device], nncf.Dataset] + compress_weights: bool + target_sparsity_by_scope: Dict[str, float] + ignored_scope: nncf.IgnoredScope | None + ref_sparsifier_target_sparsity: Dict[str, float] + ref_num_batches_tracked: int + + @property + def desc_id(self): + compress_weights_flag = "_compressed_weights" if self.compress_weights else "" + return f"{self.model_name}{compress_weights_flag}" + + @property + def ref_dot_path(self): + return TEST_ROOT / Path("torch/data/sparsify_activations", f"{self.desc_id}.dot") + + +AlgoTestDescs: list[AlgoTestDesc] = [] +for compress_weights in [False, True]: + AlgoTestDescs += [ + AlgoTestDesc( + model_name="linear", + model_getter=lambda: nn.Linear(4, 2), + dataset_getter=lambda device: nncf.Dataset(torch.randn([3, 2, 4]).to(device)), + compress_weights=compress_weights, + target_sparsity_by_scope={ + "{re}.*linear.*": 0.3, + }, + ignored_scope=None, + ref_sparsifier_target_sparsity={ + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_Linear/linear_0": 0.3, + }, + ref_num_batches_tracked=3, + ), + AlgoTestDesc( + model_name="two_linear", + model_getter=TwoLinearModel, + dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), + compress_weights=compress_weights, + target_sparsity_by_scope={ + "{re}.*linear2.*": 0.4, + }, + ignored_scope=IgnoredScope(patterns=[".*linear1.*"]), + ref_sparsifier_target_sparsity={ + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_TwoLinearModel/Linear[linear2]/linear_0": 0.4, + }, + ref_num_batches_tracked=3, + ), + AlgoTestDesc( + model_name="dummy_llama", + model_getter=dummy_llama_model, + dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), + compress_weights=compress_weights, + target_sparsity_by_scope={ + "{re}.*gate_proj.*": 0.2, + "{re}.*up_proj.*": 0.3, + "{re}.*down_proj.*": 0.4, + }, + ignored_scope=None, + ref_sparsifier_target_sparsity={ + ( + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/" + f"LlamaDecoderLayer[{layer_id}]/LlamaMLP[mlp]/Linear[{name}]/linear_0" + ): sparsity + for name, sparsity in [("gate_proj", 0.2), ("up_proj", 0.3), ("down_proj", 0.4)] + for layer_id in [0, 1] + }, + ref_num_batches_tracked=3, + ), + ] + + +@pytest.mark.parametrize("desc", AlgoTestDescs, ids=[p.desc_id for p in AlgoTestDescs], scope="class") +@pytest.mark.parametrize("use_cuda", [False, True], ids=["cpu", "cuda"], scope="class") +class TestSparsifyActivationsAlgorithm: + + @pytest.fixture(autouse=True, scope="class") + def setup(self, request, desc: AlgoTestDesc, use_cuda: bool): + if use_cuda and not torch.cuda.is_available(): + pytest.skip("CUDA is not available") + request.cls.use_cuda = use_cuda + device = torch.device("cuda" if use_cuda else "cpu") + request.cls.device = device + request.cls.desc = desc + with set_torch_seed(): + model = desc.model_getter() + model = model.to(device).eval() + dataset = desc.dataset_getter(device) + if desc.compress_weights: + model = nncf.compress_weights( + model, + mode=nncf.CompressWeightsMode.INT8_SYM, + dataset=dataset, + ) + model = nncf.experimental.torch.sparsify_activations.sparsify_activations( + model=model, + dataset=dataset, + target_sparsity_by_scope=desc.target_sparsity_by_scope, + ignored_scope=desc.ignored_scope, + ) + request.cls.model = model + request.cls.dataset = dataset + + def test_inserted_sparsifier(self): + desc: AlgoTestDesc = self.desc + model = self.model + assert isinstance(model, NNCFNetwork) + num_sparsifiers = 0 + for name, op in model.nncf.external_op.items(): + if isinstance(op, ActivationsSparsifier): + assert op.target_sparsity == desc.ref_sparsifier_target_sparsity[name] + assert op.num_batches_tracked == desc.ref_num_batches_tracked + num_sparsifiers += 1 + assert num_sparsifiers == len(desc.ref_sparsifier_target_sparsity) + + def test_nncf_graph(self): + desc: AlgoTestDesc = self.desc + model: NNCFNetwork = self.model + graph = model.nncf.get_graph() + graph.dump_graph(desc.ref_dot_path) + graph = model.nncf.get_graph().get_graph_for_structure_analysis() + compare_nx_graph_with_reference(graph, desc.ref_dot_path) + + def test_export_openvino(self): + model: NNCFNetwork = self.model + example_input = next(iter(self.dataset.get_inference_data())) + with torch.no_grad(): + torch_outputs = model(example_input) + if isinstance(torch_outputs, dict): + torch_outputs = tuple(torch_outputs.values()) + if not isinstance(torch_outputs, tuple): + torch_outputs = (torch_outputs,) + + ov_model = ov.convert_model(model, example_input=example_input) + compiled_model = ov.compile_model(ov_model, "CPU") + ov_outputs = compiled_model(example_input.cpu()).to_tuple() + + assert len(torch_outputs) == len(ov_outputs) + for torch_output, ov_output in zip(torch_outputs, ov_outputs): + torch.testing.assert_close( + torch_output.cpu(), + torch.from_numpy(ov_output), + rtol=1e-3, + atol=1e-3, + ) From b1b66d6e91a53fd105a7acc5090306d2385eed71 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:05 +0800 Subject: [PATCH 13/76] fix test on newer cpu as it might use bf16 for ov inference --- tests/torch/experimental/sparsify_activations/test_algo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index 77284dd2c9c..cd6c4c8e95e 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -170,7 +170,7 @@ def test_export_openvino(self): torch_outputs = (torch_outputs,) ov_model = ov.convert_model(model, example_input=example_input) - compiled_model = ov.compile_model(ov_model, "CPU") + compiled_model = ov.compile_model(ov_model, "CPU", config={ov.properties.hint.inference_precision: "f32"}) ov_outputs = compiled_model(example_input.cpu()).to_tuple() assert len(torch_outputs) == len(ov_outputs) From 66f59c03ee7db0cb334ea4b535a84dd2bd04385f Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:05 +0800 Subject: [PATCH 14/76] add tests --- .../sparsify_activations_impl.py | 23 +- .../sparsify_activations/torch_backend.py | 15 +- .../sparsify_activations/helpers.py | 21 +- .../sparsify_activations/test_algo.py | 211 +++++++++++------- .../sparsify_activations/test_components.py | 185 +++++++++++++++ 5 files changed, 329 insertions(+), 126 deletions(-) create mode 100644 tests/torch/experimental/sparsify_activations/test_components.py diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index fa0fa9da534..3785784a7e6 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -49,7 +49,7 @@ def do_inference(self, model: TModel, dataset: Dataset): for input_data in track( dataset.get_inference_data(), total=dataset.get_length(), - description="Activation Sparsifier Calibration", + description="Activations Sparsifier Calibration", ): engine.infer(input_data) @@ -146,12 +146,7 @@ def apply( target_sparsity_by_node = self._get_target_sparsity_by_node(graph) if not target_sparsity_by_node: raise nncf.ValidationError("No layers matched for activation sparsification.") - sparse_model = self.do_sparsification( - model, - graph, - target_sparsity_by_node, - dataset, - ) + sparse_model = self.do_sparsification(model, graph, target_sparsity_by_node, dataset) return sparse_model def do_sparsification( @@ -171,16 +166,8 @@ def do_sparsification( :param dataset: The dataset to calibrate the activation sparsifiers. :return: The sparsified model. """ - model = self._backend_entity.insert_sparsifiers( - model, - graph, - target_sparsity_by_node, - ) - model = self._backend_entity.calibrate_sparsifiers( - model, - graph, - dataset, - ) + model = self._backend_entity.insert_sparsifiers(model, graph, target_sparsity_by_node) + model = self._backend_entity.calibrate_sparsifiers(model, graph, dataset) model = self._backend_entity.freeze_sparsifiers(model, graph) return model @@ -217,7 +204,7 @@ def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float continue for scope, target_sparsity in self._target_sparsity_by_scope.items(): if matches_any(node.node_name, scope): - if node.node_name in target_sparsity_by_node: + if node in target_sparsity_by_node: raise nncf.ValidationError( f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.' ) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index befb792c101..cc4be5170c7 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -31,7 +31,7 @@ from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.utils import training_mode_switcher -ACTIVATIONS_SPARSIFIER_PREFIX = 'activations_sparsifier' +ACTIVATIONS_SPARSIFIER_PREFIX = "activations_sparsifier" TModel = TypeVar("TModel") @@ -49,8 +49,7 @@ def __init__(self, target_sparsity: float, alpha: float = 0.2): super().__init__() self.target_sparsity = target_sparsity if alpha <= 0.0 or alpha >= 1.0: - raise ValueError( - "The decay factor `alpha` should be in range (0, 1).") + raise ValueError("The decay factor `alpha` should be in range (0, 1).") self.alpha = alpha self.register_buffer("running_threshold", torch.tensor(0.0)) self.register_buffer("num_batches_tracked", torch.tensor(0)) @@ -104,9 +103,9 @@ def _update(self, threshold: torch.Tensor) -> torch.Tensor: """ beta = 1.0 - self.alpha self.running_threshold = ( - threshold * self.alpha + self.running_threshold * - beta * (1 - beta**self.num_batches_tracked) + threshold * self.alpha + self.running_threshold * beta * (1 - beta**self.num_batches_tracked) ) / (1 - beta ** (self.num_batches_tracked + 1)) + self.running_threshold = self.running_threshold.type(threshold.dtype) self.num_batches_tracked += 1 return self.running_threshold @@ -158,8 +157,7 @@ def insert_sparsifiers( ) ) - transformed_model = PTModelTransformer( - model).transform(transformation_layout) + transformed_model = PTModelTransformer(model).transform(transformation_layout) return transformed_model def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: Dataset) -> NNCFNetwork: @@ -192,6 +190,5 @@ def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> int: continue activation_ports.append(edge.input_port_id) if len(activation_ports) != 1: - raise nncf.InternalError( - f'Cannot find activation port for node "{node}".') + raise nncf.InternalError(f'Cannot find activation port for node "{node}".') return activation_ports[0] diff --git a/tests/torch/experimental/sparsify_activations/helpers.py b/tests/torch/experimental/sparsify_activations/helpers.py index 2b54d5a4dcf..558ee9b156c 100644 --- a/tests/torch/experimental/sparsify_activations/helpers.py +++ b/tests/torch/experimental/sparsify_activations/helpers.py @@ -9,38 +9,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass -from pathlib import Path -from typing import Callable, Dict -import openvino as ov -import pytest import torch import torch.nn as nn import transformers.models -import nncf -import nncf.experimental -import nncf.experimental.torch.sparsify_activations -from nncf.experimental.torch.sparsify_activations.torch_backend import ACTIVATIONS_SPARSIFIER_PREFIX -from nncf.experimental.torch.sparsify_activations.torch_backend import ActivationsSparsifier -from nncf.scopes import IgnoredScope -from nncf.torch.nncf_network import NNCFNetwork -from tests.shared.nx_graph import compare_nx_graph_with_reference -from tests.shared.paths import TEST_ROOT -from tests.torch.helpers import set_torch_seed - -class TwoLinearModel(nn.Module): +class ThreeLinearModel(nn.Module): def __init__(self) -> None: super().__init__() self.embedding = nn.Embedding(32, 2) self.linear1 = nn.Linear(2, 3) self.linear2 = nn.Linear(2, 4, bias=False) + self.linear3 = nn.Linear(3, 5) def forward(self, input_ids: torch.Tensor): x = self.embedding(input_ids) - y0 = self.linear1(x) + y0 = self.linear3(self.linear1(x)) y1 = self.linear2(x) return y0, y1 diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index cd6c4c8e95e..68393383e31 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -21,110 +21,105 @@ import nncf import nncf.experimental import nncf.experimental.torch.sparsify_activations +from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgorithm from nncf.experimental.torch.sparsify_activations.torch_backend import ACTIVATIONS_SPARSIFIER_PREFIX from nncf.experimental.torch.sparsify_activations.torch_backend import ActivationsSparsifier from nncf.scopes import IgnoredScope +from nncf.torch.model_creation import wrap_model from nncf.torch.nncf_network import NNCFNetwork from tests.shared.nx_graph import compare_nx_graph_with_reference from tests.shared.paths import TEST_ROOT -from tests.torch.experimental.sparsify_activations.helpers import TwoLinearModel +from tests.torch.experimental.sparsify_activations.helpers import ThreeLinearModel from tests.torch.experimental.sparsify_activations.helpers import dummy_llama_model from tests.torch.helpers import set_torch_seed @dataclass -class AlgoTestDesc: - model_name: str +class SparsifyActivationsAlgorithmTestDesc: + name: str model_getter: Callable[[], nn.Module] dataset_getter: Callable[[torch.device], nncf.Dataset] - compress_weights: bool target_sparsity_by_scope: Dict[str, float] ignored_scope: nncf.IgnoredScope | None ref_sparsifier_target_sparsity: Dict[str, float] ref_num_batches_tracked: int - @property - def desc_id(self): - compress_weights_flag = "_compressed_weights" if self.compress_weights else "" - return f"{self.model_name}{compress_weights_flag}" - - @property - def ref_dot_path(self): - return TEST_ROOT / Path("torch/data/sparsify_activations", f"{self.desc_id}.dot") - - -AlgoTestDescs: list[AlgoTestDesc] = [] -for compress_weights in [False, True]: - AlgoTestDescs += [ - AlgoTestDesc( - model_name="linear", - model_getter=lambda: nn.Linear(4, 2), - dataset_getter=lambda device: nncf.Dataset(torch.randn([3, 2, 4]).to(device)), - compress_weights=compress_weights, - target_sparsity_by_scope={ - "{re}.*linear.*": 0.3, - }, - ignored_scope=None, - ref_sparsifier_target_sparsity={ - f"{ACTIVATIONS_SPARSIFIER_PREFIX}_Linear/linear_0": 0.3, - }, - ref_num_batches_tracked=3, - ), - AlgoTestDesc( - model_name="two_linear", - model_getter=TwoLinearModel, - dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), - compress_weights=compress_weights, - target_sparsity_by_scope={ - "{re}.*linear2.*": 0.4, - }, - ignored_scope=IgnoredScope(patterns=[".*linear1.*"]), - ref_sparsifier_target_sparsity={ - f"{ACTIVATIONS_SPARSIFIER_PREFIX}_TwoLinearModel/Linear[linear2]/linear_0": 0.4, - }, - ref_num_batches_tracked=3, - ), - AlgoTestDesc( - model_name="dummy_llama", - model_getter=dummy_llama_model, - dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), - compress_weights=compress_weights, - target_sparsity_by_scope={ - "{re}.*gate_proj.*": 0.2, - "{re}.*up_proj.*": 0.3, - "{re}.*down_proj.*": 0.4, - }, - ignored_scope=None, - ref_sparsifier_target_sparsity={ - ( - f"{ACTIVATIONS_SPARSIFIER_PREFIX}_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/" - f"LlamaDecoderLayer[{layer_id}]/LlamaMLP[mlp]/Linear[{name}]/linear_0" - ): sparsity - for name, sparsity in [("gate_proj", 0.2), ("up_proj", 0.3), ("down_proj", 0.4)] - for layer_id in [0, 1] - }, - ref_num_batches_tracked=3, - ), - ] - -@pytest.mark.parametrize("desc", AlgoTestDescs, ids=[p.desc_id for p in AlgoTestDescs], scope="class") +sparsify_activations_algorithm_test_descs = [ + SparsifyActivationsAlgorithmTestDesc( + name="linear", + model_getter=lambda: nn.Linear(4, 2), + dataset_getter=lambda device: nncf.Dataset(torch.randn([3, 2, 4]).to(device)), + target_sparsity_by_scope={ + "{re}.*linear.*": 0.3, + }, + ignored_scope=None, + ref_sparsifier_target_sparsity={ + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_Linear/linear_0": 0.3, + }, + ref_num_batches_tracked=3, + ), + SparsifyActivationsAlgorithmTestDesc( + name="three_linear", + model_getter=ThreeLinearModel, + dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), + target_sparsity_by_scope={ + "{re}.*linear.*": 0.4, + }, + ignored_scope=IgnoredScope(patterns=[".*linear1.*"]), + ref_sparsifier_target_sparsity={ + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_ThreeLinearModel/Linear[linear2]/linear_0": 0.4, + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_ThreeLinearModel/Linear[linear3]/linear_0": 0.4, + }, + ref_num_batches_tracked=3, + ), + SparsifyActivationsAlgorithmTestDesc( + name="dummy_llama", + model_getter=dummy_llama_model, + dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), + target_sparsity_by_scope={ + "{re}.*gate_proj.*": 0.2, + "{re}.*up_proj.*": 0.3, + "{re}.*down_proj.*": 0.4, + }, + ignored_scope=None, + ref_sparsifier_target_sparsity={ + ( + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/" + f"LlamaDecoderLayer[{layer_id}]/LlamaMLP[mlp]/Linear[{name}]/linear_0" + ): sparsity + for name, sparsity in [("gate_proj", 0.2), ("up_proj", 0.3), ("down_proj", 0.4)] + for layer_id in [0, 1] + }, + ref_num_batches_tracked=3, + ), +] + + +@pytest.mark.parametrize( + "desc", + sparsify_activations_algorithm_test_descs, + ids=[p.name for p in sparsify_activations_algorithm_test_descs], + scope="class", +) +@pytest.mark.parametrize("compress_weights", [False, True], scope="class") @pytest.mark.parametrize("use_cuda", [False, True], ids=["cpu", "cuda"], scope="class") class TestSparsifyActivationsAlgorithm: @pytest.fixture(autouse=True, scope="class") - def setup(self, request, desc: AlgoTestDesc, use_cuda: bool): + def setup(self, request, desc: SparsifyActivationsAlgorithmTestDesc, compress_weights: bool, use_cuda: bool): if use_cuda and not torch.cuda.is_available(): pytest.skip("CUDA is not available") request.cls.use_cuda = use_cuda device = torch.device("cuda" if use_cuda else "cpu") request.cls.device = device request.cls.desc = desc + request.cls.compress_weights = compress_weights with set_torch_seed(): model = desc.model_getter() model = model.to(device).eval() dataset = desc.dataset_getter(device) - if desc.compress_weights: + if compress_weights: model = nncf.compress_weights( model, mode=nncf.CompressWeightsMode.INT8_SYM, @@ -140,7 +135,7 @@ def setup(self, request, desc: AlgoTestDesc, use_cuda: bool): request.cls.dataset = dataset def test_inserted_sparsifier(self): - desc: AlgoTestDesc = self.desc + desc: SparsifyActivationsAlgorithmTestDesc = self.desc model = self.model assert isinstance(model, NNCFNetwork) num_sparsifiers = 0 @@ -152,12 +147,13 @@ def test_inserted_sparsifier(self): assert num_sparsifiers == len(desc.ref_sparsifier_target_sparsity) def test_nncf_graph(self): - desc: AlgoTestDesc = self.desc + desc: SparsifyActivationsAlgorithmTestDesc = self.desc model: NNCFNetwork = self.model graph = model.nncf.get_graph() - graph.dump_graph(desc.ref_dot_path) + file_name = f"{desc.name}_compressed_weights" if self.compress_weights else desc.name + ref_dot_path = Path(TEST_ROOT, "torch/data/sparsify_activations", f"{file_name}.dot") graph = model.nncf.get_graph().get_graph_for_structure_analysis() - compare_nx_graph_with_reference(graph, desc.ref_dot_path) + compare_nx_graph_with_reference(graph, ref_dot_path) def test_export_openvino(self): model: NNCFNetwork = self.model @@ -175,9 +171,62 @@ def test_export_openvino(self): assert len(torch_outputs) == len(ov_outputs) for torch_output, ov_output in zip(torch_outputs, ov_outputs): - torch.testing.assert_close( - torch_output.cpu(), - torch.from_numpy(ov_output), - rtol=1e-3, - atol=1e-3, - ) + torch.testing.assert_close(torch_output.cpu(), torch.from_numpy(ov_output), rtol=1e-3, atol=1e-3) + + +@dataclass +class TargetSparsityByNodeTestDesc: + target_sparsity_by_scope: Dict[str, float] + ignored_scope: IgnoredScope + ref_target_sparsity_by_node_name: Dict[str, float] = None + raise_error: bool = False + + +@pytest.mark.parametrize( + "desc", + [ + TargetSparsityByNodeTestDesc( + target_sparsity_by_scope={"{re}.*linear.*": 0.3}, + ignored_scope=IgnoredScope(), + ref_target_sparsity_by_node_name={ + "ThreeLinearModel/Linear[linear1]/linear_0": 0.3, + "ThreeLinearModel/Linear[linear2]/linear_0": 0.3, + "ThreeLinearModel/Linear[linear3]/linear_0": 0.3, + }, + ), + TargetSparsityByNodeTestDesc( + target_sparsity_by_scope={"{re}.*linear.*": 0.3}, + ignored_scope=IgnoredScope(patterns=[".*linear2.*"]), + ref_target_sparsity_by_node_name={ + "ThreeLinearModel/Linear[linear1]/linear_0": 0.3, + "ThreeLinearModel/Linear[linear3]/linear_0": 0.3, + }, + ), + TargetSparsityByNodeTestDesc( + target_sparsity_by_scope={"{re}.*nonexist.*": 0.3}, + ignored_scope=IgnoredScope(patterns=[".*linear2.*"]), + ref_target_sparsity_by_node_name=dict(), + ), + TargetSparsityByNodeTestDesc( + target_sparsity_by_scope={"{re}.*linear.*": 0.3, "{re}.*linear1.*": 0.4}, + ignored_scope=IgnoredScope(), + raise_error=True, # multiple matches for one layer + ), + ], +) +def test_get_target_sparsity_by_node(desc: TargetSparsityByNodeTestDesc): + model = wrap_model( + ThreeLinearModel(), + example_input=torch.ones((2, 4)).long(), + trace_parameters=True, + ) + graph = model.nncf.get_graph() + algo = SparsifyActivationsAlgorithm(desc.target_sparsity_by_scope, desc.ignored_scope) + algo._set_backend_entity(model) + if desc.raise_error: + with pytest.raises(nncf.ValidationError): + algo._get_target_sparsity_by_node(graph) + else: + target_sparsity_by_node = algo._get_target_sparsity_by_node(graph) + target_sparsity_by_node_name = {node.node_name: sparsity for node, sparsity in target_sparsity_by_node.items()} + assert sorted(target_sparsity_by_node_name.items()) == sorted(desc.ref_target_sparsity_by_node_name.items()) diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py new file mode 100644 index 00000000000..571d39a16eb --- /dev/null +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -0,0 +1,185 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import List + +import pytest +import torch + +import nncf +import nncf.experimental +import nncf.experimental.torch.sparsify_activations +from nncf.experimental.torch.sparsify_activations.torch_backend import ActivationsSparsifier +from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend +from nncf.torch.model_creation import wrap_model +from nncf.torch.nncf_network import NNCFNetwork +from tests.torch.experimental.sparsify_activations.helpers import ThreeLinearModel + + +@dataclass +class SparsifierForwardTestDesc: + target_sparsity: float + alpha: float + input_batches: List[torch.Tensor] + ref_running_thresholds: List[torch.Tensor] + ref_outputs: List[torch.Tensor] + + +sparsifier_forward_test_descs = { + "fp16": SparsifierForwardTestDesc( + target_sparsity=0.4, + alpha=0.2, + input_batches=[ + torch.tensor([1.0, 3.0, 2.0, 4.0], dtype=torch.float16), + torch.tensor([4.0, 5.0, 4.5, -3.0], dtype=torch.float16), + ], + ref_running_thresholds=[ + torch.tensor(2.1992, dtype=torch.float16), + torch.tensor(3.2559, dtype=torch.float16), + ], + ref_outputs=[ + torch.tensor([0.0, 3.0, 0.0, 4.0], dtype=torch.float16), + torch.tensor([4.0, 5.0, 4.5, 0.0], dtype=torch.float16), + ], + ), + "fp32": SparsifierForwardTestDesc( + target_sparsity=0.8, + alpha=0.1, + input_batches=[ + torch.tensor([-1.0, 1.0, 2.5]), + torch.tensor([1.0, 2.0, 0.0]), + torch.tensor([2.0, 0.0, 3.0]), + ], + ref_running_thresholds=[ + torch.tensor(1.9000), + torch.tensor(1.7421), + torch.tensor(2.0587), + ], + ref_outputs=[ + torch.tensor([0.0, 0.0, 2.5]), + torch.tensor([0.0, 2.0, 0.0]), + torch.tensor([0.0, 0.0, 3.0]), + ], + ), + "varying_shape": SparsifierForwardTestDesc( + target_sparsity=0.6, + alpha=0.5, + input_batches=[ + torch.tensor([1.0, 2.0, 7.0]), + torch.tensor([[1.0, 2.0], [7.0, -3.0]]), + torch.tensor([[[1.0], [5.5], [8.5], [-3.0], [2.5]]]), + ], + ref_running_thresholds=[ + torch.tensor(3.0000), + torch.tensor(2.8667), + torch.tensor(3.5143), + ], + ref_outputs=[ + torch.tensor([0.0, 0.0, 7.0]), + torch.tensor([[0.0, 0.0], [7.0, -3.0]]), + torch.tensor([[[0.0], [5.5], [8.5], [0.0], [0.0]]]), + ], + ), +} + + +class TestActivationsSparsifier: + @pytest.mark.parametrize("desc", sparsifier_forward_test_descs.values(), ids=sparsifier_forward_test_descs.keys()) + def test_forward(self, use_cuda: bool, desc: SparsifierForwardTestDesc): + if use_cuda and not torch.cuda.is_available(): + pytest.skip("CUDA is not available") + device = torch.device("cuda" if use_cuda else "cpu") + sparsifier = ActivationsSparsifier(desc.target_sparsity, desc.alpha).to(device) + running_thresholds = [] + outputs = [] + with torch.no_grad(): + for batch in desc.input_batches: + output = sparsifier(batch.to(device)) + running_thresholds.append(sparsifier.running_threshold) + outputs.append(output) + assert sparsifier.num_batches_tracked == len(desc.input_batches) + assert len(running_thresholds) == len(desc.ref_running_thresholds) + for threshold, ref_threshold in zip(running_thresholds, desc.ref_running_thresholds): + assert threshold.device.type == device.type + torch.testing.assert_close(threshold, ref_threshold, rtol=1e-4, atol=1e-4, check_device=False) + assert len(outputs) == len(desc.ref_outputs) + for output, ref_output in zip(outputs, desc.ref_outputs): + assert output.device.type == device.type + torch.testing.assert_close(output, ref_output, rtol=1e-4, atol=1e-4, check_device=False) + + sparsifier.freeze() + with torch.no_grad(): + batch = desc.input_batches[-1] + output = sparsifier(batch.to(device)) + assert sparsifier.num_batches_tracked == len(desc.input_batches) + torch.testing.assert_close( + sparsifier.running_threshold, desc.ref_running_thresholds[-1], rtol=1e-4, atol=1e-4, check_device=False + ) + torch.testing.assert_close(output, desc.ref_outputs[-1], rtol=1e-4, atol=1e-4, check_device=False) + + +class TestPTSparsifyActivationsAlgoBackend: + def test_get_sparsifers(self): + model = ThreeLinearModel() + dataset = nncf.Dataset(torch.randint(0, 30, (3, 2, 8))) + sparse_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( + model, dataset, target_sparsity_by_scope={"{re}.*": 0.5} + ) + backend = PTSparsifyActivationsAlgoBackend() + sparsifiers = backend.get_sparsifiers(sparse_model) + assert len(sparsifiers) == 3 + + @pytest.mark.parametrize("compress_weights", [False, True]) + def test_insert_sparsifiers(self, compress_weights: bool): + model, _ = self.create_model_and_dataset(compress_weights=compress_weights) + graph = model.nncf.get_graph() + nodes = graph.get_nodes_by_metatypes(PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES) + backend = PTSparsifyActivationsAlgoBackend() + model_with_sparsifiers = backend.insert_sparsifiers(model, graph, {node: 0.5 for node in nodes}) + assert len(backend.get_sparsifiers(model_with_sparsifiers)) == len(nodes) + + def test_calibrate_sparsifiers(self, mocker): + model, dataset = self.create_model_and_dataset() + graph = model.nncf.get_graph() + backend = PTSparsifyActivationsAlgoBackend() + mock_sparsifer = ActivationsSparsifier(0.5, 0.1) + mock_sparsifer.freeze(True) + num_model_forward_calls = 0 + + def model_forward_pre_hook(model: NNCFNetwork, args): + nonlocal num_model_forward_calls + num_model_forward_calls += 1 + assert model.training is False + + model.register_forward_pre_hook(model_forward_pre_hook) + + with mocker.patch.object(backend, "get_sparsifiers", return_value=[mock_sparsifer]): + backend.calibrate_sparsifiers(model, graph, dataset) + assert mock_sparsifer._freeze is False + assert num_model_forward_calls == dataset.get_length() + + def create_model_and_dataset(self, compress_weights: bool = False): + model = ThreeLinearModel() + dataset = nncf.Dataset(torch.randint(0, 30, (3, 2, 8))) + if compress_weights: + model = nncf.compress_weights( + model, + mode=nncf.CompressWeightsMode.INT8_SYM, + dataset=dataset, + ) + else: + model = wrap_model( + model, + example_input=next(iter(dataset.get_inference_data())), + trace_parameters=True, + ) + return model, dataset From e1d5f79fcc2378dc3a22f6a3d06e2f6f3d308dec Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:05 +0800 Subject: [PATCH 15/76] upload dots --- .../data/sparsify_activations/dummy_llama.dot | 424 +++++++++++++++ .../dummy_llama_compressed_weights.dot | 488 ++++++++++++++++++ .../data/sparsify_activations/linear.dot | 18 + .../linear_compressed_weights.dot | 22 + .../sparsify_activations/three_linear.dot | 41 ++ .../three_linear_compressed_weights.dot | 57 ++ 6 files changed, 1050 insertions(+) create mode 100644 tests/torch/data/sparsify_activations/dummy_llama.dot create mode 100644 tests/torch/data/sparsify_activations/dummy_llama_compressed_weights.dot create mode 100644 tests/torch/data/sparsify_activations/linear.dot create mode 100644 tests/torch/data/sparsify_activations/linear_compressed_weights.dot create mode 100644 tests/torch/data/sparsify_activations/three_linear.dot create mode 100644 tests/torch/data/sparsify_activations/three_linear_compressed_weights.dot diff --git a/tests/torch/data/sparsify_activations/dummy_llama.dot b/tests/torch/data/sparsify_activations/dummy_llama.dot new file mode 100644 index 00000000000..3995a93a591 --- /dev/null +++ b/tests/torch/data/sparsify_activations/dummy_llama.dot @@ -0,0 +1,424 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 model.embed_tokens.weight" [id=1, type=nncf_model_const]; +"2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0" [id=2, type=embedding]; +"3 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" [id=3, type=to]; +"4 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/pow_0" [id=4, type=pow]; +"5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/mean_0" [id=5, type=mean]; +"6 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__add___0" [id=6, type=__add__]; +"7 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/rsqrt_0" [id=7, type=rsqrt]; +"8 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0" [id=8, type=__mul__]; +"9 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_1" [id=9, type=to]; +"10 model.layers.0.input_layernorm.weight" [id=10, type=nncf_model_const]; +"11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" [id=11, type=__mul__]; +"12 model.layers.0.self_attn.q_proj.weight" [id=12, type=nncf_model_const]; +"13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" [id=13, type=linear]; +"14 model.layers.0.self_attn.k_proj.weight" [id=14, type=nncf_model_const]; +"15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" [id=15, type=linear]; +"16 model.layers.0.self_attn.v_proj.weight" [id=16, type=nncf_model_const]; +"17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" [id=17, type=linear]; +"18 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0" [id=18, type=view]; +"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" [id=19, type=transpose]; +"20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_1" [id=20, type=view]; +"21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1" [id=21, type=transpose]; +"22 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2" [id=22, type=view]; +"23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2" [id=23, type=transpose]; +"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" [id=24, type=cat]; +"25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" [id=25, type=cos]; +"26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=26, type=to]; +"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=27, type=sin]; +"28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" [id=28, type=to]; +"29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" [id=29, type=unsqueeze]; +"30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1" [id=30, type=unsqueeze]; +"31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0" [id=31, type=__mul__]; +"32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___0" [id=32, type=__getitem__]; +"33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___1" [id=33, type=__getitem__]; +"34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___0" [id=34, type=__neg__]; +"35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_0" [id=35, type=cat]; +"36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___1" [id=36, type=__mul__]; +"37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___0" [id=37, type=__add__]; +"38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2" [id=38, type=__mul__]; +"39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___2" [id=39, type=__getitem__]; +"40 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___3" [id=40, type=__getitem__]; +"41 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___1" [id=41, type=__neg__]; +"42 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_1" [id=42, type=cat]; +"43 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___3" [id=43, type=__mul__]; +"44 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___1" [id=44, type=__add__]; +"45 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___4" [id=45, type=__getitem__]; +"46 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_0" [id=46, type=expand]; +"47 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_0" [id=47, type=reshape]; +"48 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___5" [id=48, type=__getitem__]; +"49 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_1" [id=49, type=expand]; +"50 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_1" [id=50, type=reshape]; +"51 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_3" [id=51, type=transpose]; +"52 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_0" [id=52, type=matmul]; +"53 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__truediv___0" [id=53, type=__truediv__]; +"54 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___2" [id=54, type=__add__]; +"55 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/softmax_0" [id=55, type=softmax]; +"56 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/to_0" [id=56, type=to]; +"57 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/dropout_0" [id=57, type=dropout]; +"58 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_1" [id=58, type=matmul]; +"59 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_4" [id=59, type=transpose]; +"60 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0" [id=60, type=contiguous]; +"61 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2" [id=61, type=reshape]; +"62 model.layers.0.self_attn.o_proj.weight" [id=62, type=nncf_model_const]; +"63 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" [id=63, type=linear]; +"64 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0" [id=64, type=__add__]; +"65 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" [id=65, type=to]; +"66 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/pow_0" [id=66, type=pow]; +"67 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/mean_0" [id=67, type=mean]; +"68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__add___0" [id=68, type=__add__]; +"69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0" [id=69, type=rsqrt]; +"70 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___0" [id=70, type=__mul__]; +"71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_1" [id=71, type=to]; +"72 model.layers.0.post_attention_layernorm.weight" [id=72, type=nncf_model_const]; +"73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" [id=73, type=__mul__]; +"74 model.layers.0.mlp.gate_proj.weight" [id=74, type=nncf_model_const]; +"75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" [id=75, type=abs]; +"76 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" [id=76, type=le]; +"77 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" [id=77, type=masked_fill]; +"78 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" [id=78, type=linear]; +"79 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" [id=79, type=silu]; +"80 model.layers.0.mlp.up_proj.weight" [id=80, type=nncf_model_const]; +"81 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" [id=81, type=abs]; +"82 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" [id=82, type=le]; +"83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" [id=83, type=masked_fill]; +"84 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" [id=84, type=linear]; +"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" [id=85, type=__mul__]; +"86 model.layers.0.mlp.down_proj.weight" [id=86, type=nncf_model_const]; +"87 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" [id=87, type=abs]; +"88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" [id=88, type=le]; +"89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" [id=89, type=masked_fill]; +"90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0" [id=90, type=linear]; +"91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___1" [id=91, type=__add__]; +"92 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0" [id=92, type=to]; +"93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/pow_0" [id=93, type=pow]; +"94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/mean_0" [id=94, type=mean]; +"95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__add___0" [id=95, type=__add__]; +"96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/rsqrt_0" [id=96, type=rsqrt]; +"97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___0" [id=97, type=__mul__]; +"98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_1" [id=98, type=to]; +"99 model.layers.1.input_layernorm.weight" [id=99, type=nncf_model_const]; +"100 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" [id=100, type=__mul__]; +"101 model.layers.1.self_attn.q_proj.weight" [id=101, type=nncf_model_const]; +"102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" [id=102, type=linear]; +"103 model.layers.1.self_attn.k_proj.weight" [id=103, type=nncf_model_const]; +"104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" [id=104, type=linear]; +"105 model.layers.1.self_attn.v_proj.weight" [id=105, type=nncf_model_const]; +"106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" [id=106, type=linear]; +"107 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0" [id=107, type=view]; +"108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" [id=108, type=transpose]; +"109 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_1" [id=109, type=view]; +"110 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1" [id=110, type=transpose]; +"111 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2" [id=111, type=view]; +"112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2" [id=112, type=transpose]; +"113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" [id=113, type=cat]; +"114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" [id=114, type=cos]; +"115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=115, type=to]; +"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=116, type=sin]; +"117 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" [id=117, type=to]; +"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" [id=118, type=unsqueeze]; +"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1" [id=119, type=unsqueeze]; +"120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0" [id=120, type=__mul__]; +"121 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___0" [id=121, type=__getitem__]; +"122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___1" [id=122, type=__getitem__]; +"123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___0" [id=123, type=__neg__]; +"124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_0" [id=124, type=cat]; +"125 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___1" [id=125, type=__mul__]; +"126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___0" [id=126, type=__add__]; +"127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2" [id=127, type=__mul__]; +"128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___2" [id=128, type=__getitem__]; +"129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___3" [id=129, type=__getitem__]; +"130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___1" [id=130, type=__neg__]; +"131 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_1" [id=131, type=cat]; +"132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___3" [id=132, type=__mul__]; +"133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___1" [id=133, type=__add__]; +"134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___4" [id=134, type=__getitem__]; +"135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_0" [id=135, type=expand]; +"136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_0" [id=136, type=reshape]; +"137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___5" [id=137, type=__getitem__]; +"138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_1" [id=138, type=expand]; +"139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_1" [id=139, type=reshape]; +"140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_3" [id=140, type=transpose]; +"141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_0" [id=141, type=matmul]; +"142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__truediv___0" [id=142, type=__truediv__]; +"143 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___2" [id=143, type=__add__]; +"144 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/softmax_0" [id=144, type=softmax]; +"145 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/to_0" [id=145, type=to]; +"146 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/dropout_0" [id=146, type=dropout]; +"147 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_1" [id=147, type=matmul]; +"148 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_4" [id=148, type=transpose]; +"149 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0" [id=149, type=contiguous]; +"150 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2" [id=150, type=reshape]; +"151 model.layers.1.self_attn.o_proj.weight" [id=151, type=nncf_model_const]; +"152 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" [id=152, type=linear]; +"153 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0" [id=153, type=__add__]; +"154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" [id=154, type=to]; +"155 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/pow_0" [id=155, type=pow]; +"156 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/mean_0" [id=156, type=mean]; +"157 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__add___0" [id=157, type=__add__]; +"158 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0" [id=158, type=rsqrt]; +"159 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___0" [id=159, type=__mul__]; +"160 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_1" [id=160, type=to]; +"161 model.layers.1.post_attention_layernorm.weight" [id=161, type=nncf_model_const]; +"162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" [id=162, type=__mul__]; +"163 model.layers.1.mlp.gate_proj.weight" [id=163, type=nncf_model_const]; +"164 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" [id=164, type=abs]; +"165 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" [id=165, type=le]; +"166 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" [id=166, type=masked_fill]; +"167 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" [id=167, type=linear]; +"168 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" [id=168, type=silu]; +"169 model.layers.1.mlp.up_proj.weight" [id=169, type=nncf_model_const]; +"170 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" [id=170, type=abs]; +"171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" [id=171, type=le]; +"172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" [id=172, type=masked_fill]; +"173 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" [id=173, type=linear]; +"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" [id=174, type=__mul__]; +"175 model.layers.1.mlp.down_proj.weight" [id=175, type=nncf_model_const]; +"176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" [id=176, type=abs]; +"177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" [id=177, type=le]; +"178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" [id=178, type=masked_fill]; +"179 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0" [id=179, type=linear]; +"180 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___1" [id=180, type=__add__]; +"181 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_0" [id=181, type=to]; +"182 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/pow_0" [id=182, type=pow]; +"183 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/mean_0" [id=183, type=mean]; +"184 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__add___0" [id=184, type=__add__]; +"185 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/rsqrt_0" [id=185, type=rsqrt]; +"186 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___0" [id=186, type=__mul__]; +"187 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_1" [id=187, type=to]; +"188 model.norm.weight" [id=188, type=nncf_model_const]; +"189 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1" [id=189, type=__mul__]; +"190 lm_head.weight" [id=190, type=nncf_model_const]; +"191 LlamaForCausalLM/Linear[lm_head]/linear_0" [id=191, type=linear]; +"192 LlamaForCausalLM/float_0" [id=192, type=float]; +"193 /nncf_model_output_0" [id=193, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0"; +"1 model.embed_tokens.weight" -> "2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0"; +"2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0" -> "3 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0"; +"3 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" -> "4 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/pow_0"; +"3 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" -> "8 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0"; +"3 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" -> "64 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0"; +"4 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/pow_0" -> "5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/mean_0"; +"5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/mean_0" -> "6 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__add___0"; +"6 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__add___0" -> "7 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/rsqrt_0"; +"7 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/rsqrt_0" -> "8 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0"; +"8 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0" -> "9 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_1"; +"9 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_1" -> "11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1"; +"10 model.layers.0.input_layernorm.weight" -> "11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1"; +"11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"12 model.layers.0.self_attn.q_proj.weight" -> "13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" -> "18 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0"; +"14 model.layers.0.self_attn.k_proj.weight" -> "15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" -> "20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_1"; +"16 model.layers.0.self_attn.v_proj.weight" -> "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" -> "22 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2"; +"18 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0" -> "19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0"; +"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" -> "31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0"; +"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" -> "32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___0"; +"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" -> "33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___1"; +"20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_1" -> "21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1"; +"21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1" -> "38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2"; +"21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1" -> "39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___2"; +"21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1" -> "40 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___3"; +"22 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2" -> "23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2"; +"23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2" -> "48 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___5"; +"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0"; +"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; +"25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; +"26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0"; +"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" -> "30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1"; +"29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" -> "31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0"; +"29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" -> "38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2"; +"30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1" -> "36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___1"; +"30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1" -> "43 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___3"; +"31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0" -> "37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___0"; +"32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___0" -> "35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_0"; +"33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___1" -> "34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___0"; +"34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___0" -> "35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_0"; +"35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_0" -> "36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___1"; +"36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___1" -> "37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___0"; +"37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___0" -> "52 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_0"; +"38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2" -> "44 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___1"; +"39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___2" -> "42 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_1"; +"40 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___3" -> "41 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___1"; +"41 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___1" -> "42 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_1"; +"42 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_1" -> "43 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___3"; +"43 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___3" -> "44 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___1"; +"44 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___1" -> "45 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___4"; +"45 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___4" -> "46 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_0"; +"46 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_0" -> "47 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_0"; +"47 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_0" -> "51 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_3"; +"48 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___5" -> "49 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_1"; +"49 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_1" -> "50 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_1"; +"50 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_1" -> "58 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_1"; +"51 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_3" -> "52 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_0"; +"52 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_0" -> "53 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__truediv___0"; +"53 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__truediv___0" -> "54 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___2"; +"54 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___2" -> "55 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/softmax_0"; +"55 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/softmax_0" -> "56 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/to_0"; +"56 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/to_0" -> "57 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/dropout_0"; +"57 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/dropout_0" -> "58 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_1"; +"58 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_1" -> "59 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_4"; +"59 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_4" -> "60 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0"; +"60 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0" -> "61 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2"; +"61 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2" -> "63 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"62 model.layers.0.self_attn.o_proj.weight" -> "63 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"63 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" -> "64 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0"; +"64 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0" -> "65 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0"; +"65 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "66 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/pow_0"; +"65 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "70 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___0"; +"65 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___1"; +"66 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/pow_0" -> "67 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/mean_0"; +"67 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/mean_0" -> "68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__add___0"; +"68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__add___0" -> "69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0"; +"69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0" -> "70 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___0"; +"70 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___0" -> "71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_1"; +"71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_1" -> "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1"; +"72 model.layers.0.post_attention_layernorm.weight" -> "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1"; +"73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0"; +"73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "77 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; +"73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "81 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0"; +"73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; +"74 model.layers.0.mlp.gate_proj.weight" -> "78 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" -> "76 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0"; +"76 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" -> "77 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; +"77 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" -> "78 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"78 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" -> "79 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0"; +"79 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" -> "85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0"; +"80 model.layers.0.mlp.up_proj.weight" -> "84 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"81 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" -> "82 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0"; +"82 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" -> "83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; +"83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" -> "84 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"84 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" -> "85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0"; +"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" -> "87 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0"; +"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" -> "89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; +"86 model.layers.0.mlp.down_proj.weight" -> "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"87 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" -> "88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0"; +"88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" -> "89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; +"89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" -> "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0" -> "91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___1"; +"91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___1" -> "92 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0"; +"92 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0" -> "93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/pow_0"; +"92 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0" -> "97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___0"; +"92 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0" -> "153 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0"; +"93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/pow_0" -> "94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/mean_0"; +"94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/mean_0" -> "95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__add___0"; +"95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__add___0" -> "96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/rsqrt_0"; +"96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/rsqrt_0" -> "97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___0"; +"97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___0" -> "98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_1"; +"98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_1" -> "100 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1"; +"99 model.layers.1.input_layernorm.weight" -> "100 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1"; +"100 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"100 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"100 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"101 model.layers.1.self_attn.q_proj.weight" -> "102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" -> "107 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0"; +"103 model.layers.1.self_attn.k_proj.weight" -> "104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" -> "109 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_1"; +"105 model.layers.1.self_attn.v_proj.weight" -> "106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" -> "111 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2"; +"107 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0" -> "108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0"; +"108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" -> "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0"; +"108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" -> "121 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___0"; +"108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" -> "122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___1"; +"109 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_1" -> "110 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1"; +"110 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1" -> "127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2"; +"110 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1" -> "128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___2"; +"110 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1" -> "129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___3"; +"111 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2" -> "112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2"; +"112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2" -> "137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___5"; +"113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0"; +"113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; +"114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; +"115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0"; +"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "117 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"117 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" -> "119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1"; +"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" -> "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0"; +"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" -> "127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2"; +"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1" -> "125 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___1"; +"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1" -> "132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___3"; +"120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0" -> "126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___0"; +"121 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___0" -> "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_0"; +"122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___1" -> "123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___0"; +"123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___0" -> "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_0"; +"124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_0" -> "125 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___1"; +"125 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___1" -> "126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___0"; +"126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___0" -> "141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_0"; +"127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2" -> "133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___1"; +"128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___2" -> "131 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_1"; +"129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___3" -> "130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___1"; +"130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___1" -> "131 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_1"; +"131 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_1" -> "132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___3"; +"132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___3" -> "133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___1"; +"133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___1" -> "134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___4"; +"134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___4" -> "135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_0"; +"135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_0" -> "136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_0"; +"136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_0" -> "140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_3"; +"137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___5" -> "138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_1"; +"138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_1" -> "139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_1"; +"139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_1" -> "147 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_1"; +"140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_3" -> "141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_0"; +"141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_0" -> "142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__truediv___0"; +"142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__truediv___0" -> "143 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___2"; +"143 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___2" -> "144 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/softmax_0"; +"144 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/softmax_0" -> "145 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/to_0"; +"145 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/to_0" -> "146 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/dropout_0"; +"146 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/dropout_0" -> "147 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_1"; +"147 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_1" -> "148 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_4"; +"148 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_4" -> "149 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0"; +"149 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0" -> "150 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2"; +"150 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2" -> "152 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"151 model.layers.1.self_attn.o_proj.weight" -> "152 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"152 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" -> "153 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0"; +"153 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0" -> "154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0"; +"154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "155 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/pow_0"; +"154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "159 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___0"; +"154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "180 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___1"; +"155 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/pow_0" -> "156 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/mean_0"; +"156 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/mean_0" -> "157 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__add___0"; +"157 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__add___0" -> "158 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0"; +"158 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0" -> "159 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___0"; +"159 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___0" -> "160 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_1"; +"160 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_1" -> "162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1"; +"161 model.layers.1.post_attention_layernorm.weight" -> "162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1"; +"162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "164 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0"; +"162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "166 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; +"162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "170 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0"; +"162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; +"163 model.layers.1.mlp.gate_proj.weight" -> "167 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"164 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" -> "165 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0"; +"165 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" -> "166 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; +"166 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" -> "167 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"167 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" -> "168 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0"; +"168 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" -> "174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0"; +"169 model.layers.1.mlp.up_proj.weight" -> "173 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"170 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" -> "171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0"; +"171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" -> "172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; +"172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" -> "173 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"173 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" -> "174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0"; +"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" -> "176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0"; +"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" -> "178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; +"175 model.layers.1.mlp.down_proj.weight" -> "179 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" -> "177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0"; +"177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" -> "178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; +"178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" -> "179 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"179 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0" -> "180 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___1"; +"180 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___1" -> "181 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_0"; +"181 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_0" -> "182 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/pow_0"; +"181 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_0" -> "186 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___0"; +"182 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/pow_0" -> "183 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/mean_0"; +"183 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/mean_0" -> "184 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__add___0"; +"184 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__add___0" -> "185 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/rsqrt_0"; +"185 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/rsqrt_0" -> "186 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___0"; +"186 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___0" -> "187 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_1"; +"187 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_1" -> "189 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1"; +"188 model.norm.weight" -> "189 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1"; +"189 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1" -> "191 LlamaForCausalLM/Linear[lm_head]/linear_0"; +"190 lm_head.weight" -> "191 LlamaForCausalLM/Linear[lm_head]/linear_0"; +"191 LlamaForCausalLM/Linear[lm_head]/linear_0" -> "192 LlamaForCausalLM/float_0"; +"192 LlamaForCausalLM/float_0" -> "193 /nncf_model_output_0"; +} diff --git a/tests/torch/data/sparsify_activations/dummy_llama_compressed_weights.dot b/tests/torch/data/sparsify_activations/dummy_llama_compressed_weights.dot new file mode 100644 index 00000000000..18d9a10d615 --- /dev/null +++ b/tests/torch/data/sparsify_activations/dummy_llama_compressed_weights.dot @@ -0,0 +1,488 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 model.embed_tokens.weight" [id=1, type=nncf_model_const]; +"2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/decompress_0" [id=2, type=decompress]; +"3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/type_0" [id=3, type=type]; +"4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0" [id=4, type=embedding]; +"5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" [id=5, type=to]; +"6 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/pow_0" [id=6, type=pow]; +"7 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/mean_0" [id=7, type=mean]; +"8 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__add___0" [id=8, type=__add__]; +"9 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/rsqrt_0" [id=9, type=rsqrt]; +"10 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0" [id=10, type=__mul__]; +"11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_1" [id=11, type=to]; +"12 model.layers.0.input_layernorm.weight" [id=12, type=nncf_model_const]; +"13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" [id=13, type=__mul__]; +"14 model.layers.0.self_attn.q_proj.weight" [id=14, type=nncf_model_const]; +"15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0" [id=15, type=decompress]; +"16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0" [id=16, type=type]; +"17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" [id=17, type=linear]; +"18 model.layers.0.self_attn.k_proj.weight" [id=18, type=nncf_model_const]; +"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0" [id=19, type=decompress]; +"20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0" [id=20, type=type]; +"21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" [id=21, type=linear]; +"22 model.layers.0.self_attn.v_proj.weight" [id=22, type=nncf_model_const]; +"23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0" [id=23, type=decompress]; +"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0" [id=24, type=type]; +"25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" [id=25, type=linear]; +"26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0" [id=26, type=view]; +"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" [id=27, type=transpose]; +"28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_1" [id=28, type=view]; +"29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1" [id=29, type=transpose]; +"30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2" [id=30, type=view]; +"31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2" [id=31, type=transpose]; +"32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" [id=32, type=cat]; +"33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" [id=33, type=cos]; +"34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=34, type=to]; +"35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=35, type=sin]; +"36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" [id=36, type=to]; +"37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" [id=37, type=unsqueeze]; +"38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1" [id=38, type=unsqueeze]; +"39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0" [id=39, type=__mul__]; +"40 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___0" [id=40, type=__getitem__]; +"41 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___1" [id=41, type=__getitem__]; +"42 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___0" [id=42, type=__neg__]; +"43 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_0" [id=43, type=cat]; +"44 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___1" [id=44, type=__mul__]; +"45 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___0" [id=45, type=__add__]; +"46 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2" [id=46, type=__mul__]; +"47 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___2" [id=47, type=__getitem__]; +"48 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___3" [id=48, type=__getitem__]; +"49 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___1" [id=49, type=__neg__]; +"50 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_1" [id=50, type=cat]; +"51 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___3" [id=51, type=__mul__]; +"52 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___1" [id=52, type=__add__]; +"53 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___4" [id=53, type=__getitem__]; +"54 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_0" [id=54, type=expand]; +"55 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_0" [id=55, type=reshape]; +"56 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___5" [id=56, type=__getitem__]; +"57 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_1" [id=57, type=expand]; +"58 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_1" [id=58, type=reshape]; +"59 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_3" [id=59, type=transpose]; +"60 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_0" [id=60, type=matmul]; +"61 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__truediv___0" [id=61, type=__truediv__]; +"62 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___2" [id=62, type=__add__]; +"63 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/softmax_0" [id=63, type=softmax]; +"64 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/to_0" [id=64, type=to]; +"65 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/dropout_0" [id=65, type=dropout]; +"66 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_1" [id=66, type=matmul]; +"67 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_4" [id=67, type=transpose]; +"68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0" [id=68, type=contiguous]; +"69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2" [id=69, type=reshape]; +"70 model.layers.0.self_attn.o_proj.weight" [id=70, type=nncf_model_const]; +"71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0" [id=71, type=decompress]; +"72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0" [id=72, type=type]; +"73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" [id=73, type=linear]; +"74 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0" [id=74, type=__add__]; +"75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" [id=75, type=to]; +"76 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/pow_0" [id=76, type=pow]; +"77 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/mean_0" [id=77, type=mean]; +"78 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__add___0" [id=78, type=__add__]; +"79 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0" [id=79, type=rsqrt]; +"80 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___0" [id=80, type=__mul__]; +"81 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_1" [id=81, type=to]; +"82 model.layers.0.post_attention_layernorm.weight" [id=82, type=nncf_model_const]; +"83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" [id=83, type=__mul__]; +"84 model.layers.0.mlp.gate_proj.weight" [id=84, type=nncf_model_const]; +"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0" [id=85, type=decompress]; +"86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0" [id=86, type=type]; +"87 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" [id=87, type=abs]; +"88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" [id=88, type=le]; +"89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" [id=89, type=masked_fill]; +"90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" [id=90, type=linear]; +"91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" [id=91, type=silu]; +"92 model.layers.0.mlp.up_proj.weight" [id=92, type=nncf_model_const]; +"93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0" [id=93, type=decompress]; +"94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0" [id=94, type=type]; +"95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" [id=95, type=abs]; +"96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" [id=96, type=le]; +"97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" [id=97, type=masked_fill]; +"98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" [id=98, type=linear]; +"99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" [id=99, type=__mul__]; +"100 model.layers.0.mlp.down_proj.weight" [id=100, type=nncf_model_const]; +"101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0" [id=101, type=decompress]; +"102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0" [id=102, type=type]; +"103 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" [id=103, type=abs]; +"104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" [id=104, type=le]; +"105 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" [id=105, type=masked_fill]; +"106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0" [id=106, type=linear]; +"107 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___1" [id=107, type=__add__]; +"108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0" [id=108, type=to]; +"109 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/pow_0" [id=109, type=pow]; +"110 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/mean_0" [id=110, type=mean]; +"111 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__add___0" [id=111, type=__add__]; +"112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/rsqrt_0" [id=112, type=rsqrt]; +"113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___0" [id=113, type=__mul__]; +"114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_1" [id=114, type=to]; +"115 model.layers.1.input_layernorm.weight" [id=115, type=nncf_model_const]; +"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" [id=116, type=__mul__]; +"117 model.layers.1.self_attn.q_proj.weight" [id=117, type=nncf_model_const]; +"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0" [id=118, type=decompress]; +"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0" [id=119, type=type]; +"120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" [id=120, type=linear]; +"121 model.layers.1.self_attn.k_proj.weight" [id=121, type=nncf_model_const]; +"122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0" [id=122, type=decompress]; +"123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0" [id=123, type=type]; +"124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" [id=124, type=linear]; +"125 model.layers.1.self_attn.v_proj.weight" [id=125, type=nncf_model_const]; +"126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0" [id=126, type=decompress]; +"127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0" [id=127, type=type]; +"128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" [id=128, type=linear]; +"129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0" [id=129, type=view]; +"130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" [id=130, type=transpose]; +"131 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_1" [id=131, type=view]; +"132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1" [id=132, type=transpose]; +"133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2" [id=133, type=view]; +"134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2" [id=134, type=transpose]; +"135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" [id=135, type=cat]; +"136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" [id=136, type=cos]; +"137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=137, type=to]; +"138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=138, type=sin]; +"139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" [id=139, type=to]; +"140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" [id=140, type=unsqueeze]; +"141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1" [id=141, type=unsqueeze]; +"142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0" [id=142, type=__mul__]; +"143 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___0" [id=143, type=__getitem__]; +"144 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___1" [id=144, type=__getitem__]; +"145 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___0" [id=145, type=__neg__]; +"146 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_0" [id=146, type=cat]; +"147 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___1" [id=147, type=__mul__]; +"148 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___0" [id=148, type=__add__]; +"149 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2" [id=149, type=__mul__]; +"150 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___2" [id=150, type=__getitem__]; +"151 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___3" [id=151, type=__getitem__]; +"152 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___1" [id=152, type=__neg__]; +"153 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_1" [id=153, type=cat]; +"154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___3" [id=154, type=__mul__]; +"155 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___1" [id=155, type=__add__]; +"156 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___4" [id=156, type=__getitem__]; +"157 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_0" [id=157, type=expand]; +"158 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_0" [id=158, type=reshape]; +"159 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___5" [id=159, type=__getitem__]; +"160 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_1" [id=160, type=expand]; +"161 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_1" [id=161, type=reshape]; +"162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_3" [id=162, type=transpose]; +"163 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_0" [id=163, type=matmul]; +"164 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__truediv___0" [id=164, type=__truediv__]; +"165 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___2" [id=165, type=__add__]; +"166 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/softmax_0" [id=166, type=softmax]; +"167 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/to_0" [id=167, type=to]; +"168 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/dropout_0" [id=168, type=dropout]; +"169 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_1" [id=169, type=matmul]; +"170 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_4" [id=170, type=transpose]; +"171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0" [id=171, type=contiguous]; +"172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2" [id=172, type=reshape]; +"173 model.layers.1.self_attn.o_proj.weight" [id=173, type=nncf_model_const]; +"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0" [id=174, type=decompress]; +"175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0" [id=175, type=type]; +"176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" [id=176, type=linear]; +"177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0" [id=177, type=__add__]; +"178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" [id=178, type=to]; +"179 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/pow_0" [id=179, type=pow]; +"180 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/mean_0" [id=180, type=mean]; +"181 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__add___0" [id=181, type=__add__]; +"182 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0" [id=182, type=rsqrt]; +"183 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___0" [id=183, type=__mul__]; +"184 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_1" [id=184, type=to]; +"185 model.layers.1.post_attention_layernorm.weight" [id=185, type=nncf_model_const]; +"186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" [id=186, type=__mul__]; +"187 model.layers.1.mlp.gate_proj.weight" [id=187, type=nncf_model_const]; +"188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0" [id=188, type=decompress]; +"189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0" [id=189, type=type]; +"190 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" [id=190, type=abs]; +"191 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" [id=191, type=le]; +"192 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" [id=192, type=masked_fill]; +"193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" [id=193, type=linear]; +"194 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" [id=194, type=silu]; +"195 model.layers.1.mlp.up_proj.weight" [id=195, type=nncf_model_const]; +"196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0" [id=196, type=decompress]; +"197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0" [id=197, type=type]; +"198 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" [id=198, type=abs]; +"199 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" [id=199, type=le]; +"200 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" [id=200, type=masked_fill]; +"201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" [id=201, type=linear]; +"202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" [id=202, type=__mul__]; +"203 model.layers.1.mlp.down_proj.weight" [id=203, type=nncf_model_const]; +"204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0" [id=204, type=decompress]; +"205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0" [id=205, type=type]; +"206 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" [id=206, type=abs]; +"207 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" [id=207, type=le]; +"208 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" [id=208, type=masked_fill]; +"209 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0" [id=209, type=linear]; +"210 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___1" [id=210, type=__add__]; +"211 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_0" [id=211, type=to]; +"212 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/pow_0" [id=212, type=pow]; +"213 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/mean_0" [id=213, type=mean]; +"214 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__add___0" [id=214, type=__add__]; +"215 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/rsqrt_0" [id=215, type=rsqrt]; +"216 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___0" [id=216, type=__mul__]; +"217 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_1" [id=217, type=to]; +"218 model.norm.weight" [id=218, type=nncf_model_const]; +"219 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1" [id=219, type=__mul__]; +"220 lm_head.weight" [id=220, type=nncf_model_const]; +"221 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/decompress_0" [id=221, type=decompress]; +"222 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/type_0" [id=222, type=type]; +"223 LlamaForCausalLM/Linear[lm_head]/linear_0" [id=223, type=linear]; +"224 LlamaForCausalLM/float_0" [id=224, type=float]; +"225 /nncf_model_output_0" [id=225, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0"; +"1 model.embed_tokens.weight" -> "2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/decompress_0"; +"2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/decompress_0" -> "3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/type_0"; +"3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/type_0" -> "4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0"; +"4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0" -> "5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0"; +"5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" -> "6 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/pow_0"; +"5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" -> "10 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0"; +"5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" -> "74 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0"; +"6 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/pow_0" -> "7 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/mean_0"; +"7 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/mean_0" -> "8 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__add___0"; +"8 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__add___0" -> "9 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/rsqrt_0"; +"9 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/rsqrt_0" -> "10 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0"; +"10 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0" -> "11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_1"; +"11 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_1" -> "13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1"; +"12 model.layers.0.input_layernorm.weight" -> "13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1"; +"13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"14 model.layers.0.self_attn.q_proj.weight" -> "15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0"; +"15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0" -> "16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0"; +"16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0" -> "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" -> "26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0"; +"18 model.layers.0.self_attn.k_proj.weight" -> "19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0"; +"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0" -> "20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0"; +"20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0" -> "21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" -> "28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_1"; +"22 model.layers.0.self_attn.v_proj.weight" -> "23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0"; +"23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0" -> "24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0"; +"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0" -> "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" -> "30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2"; +"26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0" -> "27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0"; +"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" -> "39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0"; +"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" -> "40 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___0"; +"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" -> "41 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___1"; +"28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_1" -> "29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1"; +"29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1" -> "46 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2"; +"29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1" -> "47 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___2"; +"29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_1" -> "48 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___3"; +"30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2" -> "31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2"; +"31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2" -> "56 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___5"; +"32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0"; +"32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; +"33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; +"34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0"; +"35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" -> "38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1"; +"37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" -> "39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0"; +"37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" -> "46 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2"; +"38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1" -> "44 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___1"; +"38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1" -> "51 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___3"; +"39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0" -> "45 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___0"; +"40 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___0" -> "43 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_0"; +"41 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___1" -> "42 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___0"; +"42 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___0" -> "43 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_0"; +"43 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_0" -> "44 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___1"; +"44 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___1" -> "45 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___0"; +"45 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___0" -> "60 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_0"; +"46 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2" -> "52 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___1"; +"47 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___2" -> "50 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_1"; +"48 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___3" -> "49 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___1"; +"49 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__neg___1" -> "50 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_1"; +"50 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/cat_1" -> "51 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___3"; +"51 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___3" -> "52 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___1"; +"52 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___1" -> "53 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___4"; +"53 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___4" -> "54 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_0"; +"54 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_0" -> "55 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_0"; +"55 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_0" -> "59 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_3"; +"56 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___5" -> "57 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_1"; +"57 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/expand_1" -> "58 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_1"; +"58 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_1" -> "66 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_1"; +"59 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_3" -> "60 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_0"; +"60 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_0" -> "61 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__truediv___0"; +"61 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__truediv___0" -> "62 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___2"; +"62 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__add___2" -> "63 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/softmax_0"; +"63 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/softmax_0" -> "64 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/to_0"; +"64 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/to_0" -> "65 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/dropout_0"; +"65 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/dropout_0" -> "66 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_1"; +"66 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/matmul_1" -> "67 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_4"; +"67 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_4" -> "68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0"; +"68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0" -> "69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2"; +"69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2" -> "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"70 model.layers.0.self_attn.o_proj.weight" -> "71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0"; +"71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0" -> "72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0"; +"72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0" -> "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" -> "74 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0"; +"74 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0" -> "75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0"; +"75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "76 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/pow_0"; +"75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "80 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___0"; +"75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "107 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___1"; +"76 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/pow_0" -> "77 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/mean_0"; +"77 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/mean_0" -> "78 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__add___0"; +"78 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__add___0" -> "79 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0"; +"79 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0" -> "80 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___0"; +"80 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___0" -> "81 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_1"; +"81 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_1" -> "83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1"; +"82 model.layers.0.post_attention_layernorm.weight" -> "83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1"; +"83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "87 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0"; +"83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; +"83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0"; +"83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; +"84 model.layers.0.mlp.gate_proj.weight" -> "85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0"; +"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0" -> "86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0"; +"86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0" -> "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"87 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" -> "88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0"; +"88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" -> "89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; +"89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" -> "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" -> "91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0"; +"91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" -> "99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0"; +"92 model.layers.0.mlp.up_proj.weight" -> "93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0"; +"93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0" -> "94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0"; +"94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0" -> "98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" -> "96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0"; +"96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" -> "97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; +"97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" -> "98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" -> "99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0"; +"99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" -> "103 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0"; +"99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" -> "105 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; +"100 model.layers.0.mlp.down_proj.weight" -> "101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0"; +"101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0" -> "102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0"; +"102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0" -> "106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"103 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" -> "104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0"; +"104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" -> "105 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; +"105 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" -> "106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0" -> "107 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___1"; +"107 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___1" -> "108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0"; +"108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0" -> "109 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/pow_0"; +"108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0" -> "113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___0"; +"108 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_0" -> "177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0"; +"109 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/pow_0" -> "110 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/mean_0"; +"110 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/mean_0" -> "111 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__add___0"; +"111 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__add___0" -> "112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/rsqrt_0"; +"112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/rsqrt_0" -> "113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___0"; +"113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___0" -> "114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_1"; +"114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/to_1" -> "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1"; +"115 model.layers.1.input_layernorm.weight" -> "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1"; +"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"117 model.layers.1.self_attn.q_proj.weight" -> "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0"; +"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0" -> "119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0"; +"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0" -> "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" -> "129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0"; +"121 model.layers.1.self_attn.k_proj.weight" -> "122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0"; +"122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0" -> "123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0"; +"123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0" -> "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" -> "131 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_1"; +"125 model.layers.1.self_attn.v_proj.weight" -> "126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0"; +"126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0" -> "127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0"; +"127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0" -> "128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" -> "133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2"; +"129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0" -> "130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0"; +"130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" -> "142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0"; +"130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" -> "143 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___0"; +"130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" -> "144 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___1"; +"131 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_1" -> "132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1"; +"132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1" -> "149 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2"; +"132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1" -> "150 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___2"; +"132 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_1" -> "151 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___3"; +"133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2" -> "134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2"; +"134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2" -> "159 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___5"; +"135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0"; +"135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; +"136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; +"137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0"; +"138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" -> "141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1"; +"140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" -> "142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0"; +"140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" -> "149 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2"; +"141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1" -> "147 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___1"; +"141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1" -> "154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___3"; +"142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0" -> "148 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___0"; +"143 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___0" -> "146 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_0"; +"144 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___1" -> "145 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___0"; +"145 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___0" -> "146 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_0"; +"146 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_0" -> "147 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___1"; +"147 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___1" -> "148 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___0"; +"148 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___0" -> "163 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_0"; +"149 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2" -> "155 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___1"; +"150 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___2" -> "153 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_1"; +"151 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___3" -> "152 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___1"; +"152 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__neg___1" -> "153 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_1"; +"153 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/cat_1" -> "154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___3"; +"154 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___3" -> "155 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___1"; +"155 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___1" -> "156 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___4"; +"156 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___4" -> "157 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_0"; +"157 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_0" -> "158 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_0"; +"158 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_0" -> "162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_3"; +"159 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___5" -> "160 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_1"; +"160 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/expand_1" -> "161 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_1"; +"161 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_1" -> "169 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_1"; +"162 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_3" -> "163 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_0"; +"163 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_0" -> "164 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__truediv___0"; +"164 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__truediv___0" -> "165 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___2"; +"165 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__add___2" -> "166 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/softmax_0"; +"166 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/softmax_0" -> "167 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/to_0"; +"167 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/to_0" -> "168 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/dropout_0"; +"168 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/dropout_0" -> "169 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_1"; +"169 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/matmul_1" -> "170 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_4"; +"170 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_4" -> "171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0"; +"171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0" -> "172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2"; +"172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2" -> "176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"173 model.layers.1.self_attn.o_proj.weight" -> "174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0"; +"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0" -> "175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0"; +"175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0" -> "176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" -> "177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0"; +"177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0" -> "178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0"; +"178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "179 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/pow_0"; +"178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "183 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___0"; +"178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "210 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___1"; +"179 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/pow_0" -> "180 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/mean_0"; +"180 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/mean_0" -> "181 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__add___0"; +"181 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__add___0" -> "182 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0"; +"182 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/rsqrt_0" -> "183 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___0"; +"183 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___0" -> "184 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_1"; +"184 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_1" -> "186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1"; +"185 model.layers.1.post_attention_layernorm.weight" -> "186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1"; +"186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "190 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0"; +"186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "192 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; +"186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "198 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0"; +"186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "200 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; +"187 model.layers.1.mlp.gate_proj.weight" -> "188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0"; +"188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0" -> "189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0"; +"189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0" -> "193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"190 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" -> "191 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0"; +"191 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" -> "192 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; +"192 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" -> "193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" -> "194 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0"; +"194 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" -> "202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0"; +"195 model.layers.1.mlp.up_proj.weight" -> "196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0"; +"196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0" -> "197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0"; +"197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0" -> "201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"198 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" -> "199 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0"; +"199 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" -> "200 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; +"200 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" -> "201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" -> "202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0"; +"202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" -> "206 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0"; +"202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" -> "208 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; +"203 model.layers.1.mlp.down_proj.weight" -> "204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0"; +"204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0" -> "205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0"; +"205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0" -> "209 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"206 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" -> "207 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0"; +"207 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" -> "208 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; +"208 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" -> "209 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"209 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0" -> "210 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___1"; +"210 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___1" -> "211 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_0"; +"211 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_0" -> "212 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/pow_0"; +"211 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_0" -> "216 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___0"; +"212 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/pow_0" -> "213 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/mean_0"; +"213 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/mean_0" -> "214 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__add___0"; +"214 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__add___0" -> "215 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/rsqrt_0"; +"215 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/rsqrt_0" -> "216 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___0"; +"216 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___0" -> "217 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_1"; +"217 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_1" -> "219 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1"; +"218 model.norm.weight" -> "219 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1"; +"219 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1" -> "223 LlamaForCausalLM/Linear[lm_head]/linear_0"; +"220 lm_head.weight" -> "221 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/decompress_0"; +"221 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/decompress_0" -> "222 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/type_0"; +"222 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/type_0" -> "223 LlamaForCausalLM/Linear[lm_head]/linear_0"; +"223 LlamaForCausalLM/Linear[lm_head]/linear_0" -> "224 LlamaForCausalLM/float_0"; +"224 LlamaForCausalLM/float_0" -> "225 /nncf_model_output_0"; +} diff --git a/tests/torch/data/sparsify_activations/linear.dot b/tests/torch/data/sparsify_activations/linear.dot new file mode 100644 index 00000000000..a3192dfff20 --- /dev/null +++ b/tests/torch/data/sparsify_activations/linear.dot @@ -0,0 +1,18 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 weight" [id=1, type=nncf_model_const]; +"2 bias" [id=2, type=nncf_model_const]; +"3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0" [id=3, type=abs]; +"4 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0" [id=4, type=le]; +"5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0" [id=5, type=masked_fill]; +"6 Linear/linear_0" [id=6, type=linear]; +"7 /nncf_model_output_0" [id=7, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0"; +"0 /nncf_model_input_0" -> "5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0"; +"1 weight" -> "6 Linear/linear_0"; +"2 bias" -> "6 Linear/linear_0"; +"3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0" -> "4 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0"; +"4 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0" -> "5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0"; +"5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0" -> "6 Linear/linear_0"; +"6 Linear/linear_0" -> "7 /nncf_model_output_0"; +} diff --git a/tests/torch/data/sparsify_activations/linear_compressed_weights.dot b/tests/torch/data/sparsify_activations/linear_compressed_weights.dot new file mode 100644 index 00000000000..6c1ba9ca3a6 --- /dev/null +++ b/tests/torch/data/sparsify_activations/linear_compressed_weights.dot @@ -0,0 +1,22 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 weight" [id=1, type=nncf_model_const]; +"2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/decompress_0" [id=2, type=decompress]; +"3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/type_0" [id=3, type=type]; +"4 bias" [id=4, type=nncf_model_const]; +"5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0" [id=5, type=abs]; +"6 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0" [id=6, type=le]; +"7 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0" [id=7, type=masked_fill]; +"8 Linear/linear_0" [id=8, type=linear]; +"9 /nncf_model_output_0" [id=9, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0"; +"0 /nncf_model_input_0" -> "7 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0"; +"1 weight" -> "2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/decompress_0"; +"2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/decompress_0" -> "3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/type_0"; +"3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/type_0" -> "8 Linear/linear_0"; +"4 bias" -> "8 Linear/linear_0"; +"5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0" -> "6 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0"; +"6 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0" -> "7 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0"; +"7 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0" -> "8 Linear/linear_0"; +"8 Linear/linear_0" -> "9 /nncf_model_output_0"; +} diff --git a/tests/torch/data/sparsify_activations/three_linear.dot b/tests/torch/data/sparsify_activations/three_linear.dot new file mode 100644 index 00000000000..19a4b32561e --- /dev/null +++ b/tests/torch/data/sparsify_activations/three_linear.dot @@ -0,0 +1,41 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 embedding.weight" [id=1, type=nncf_model_const]; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" [id=2, type=embedding]; +"3 linear1.weight" [id=3, type=nncf_model_const]; +"4 linear1.bias" [id=4, type=nncf_model_const]; +"5 ThreeLinearModel/Linear[linear1]/linear_0" [id=5, type=linear]; +"6 linear3.weight" [id=6, type=nncf_model_const]; +"7 linear3.bias" [id=7, type=nncf_model_const]; +"8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=8, type=abs]; +"9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=9, type=le]; +"10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=10, type=masked_fill]; +"11 ThreeLinearModel/Linear[linear3]/linear_0" [id=11, type=linear]; +"12 linear2.weight" [id=12, type=nncf_model_const]; +"13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=13, type=abs]; +"14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=14, type=le]; +"15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=15, type=masked_fill]; +"16 ThreeLinearModel/Linear[linear2]/linear_0" [id=16, type=linear]; +"17 /nncf_model_output_0" [id=17, type=nncf_model_output]; +"18 /nncf_model_output_1" [id=18, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "2 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"1 embedding.weight" -> "2 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"3 linear1.weight" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; +"4 linear1.bias" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; +"5 ThreeLinearModel/Linear[linear1]/linear_0" -> "8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; +"5 ThreeLinearModel/Linear[linear1]/linear_0" -> "10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"6 linear3.weight" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; +"7 linear3.bias" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; +"8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; +"9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; +"11 ThreeLinearModel/Linear[linear3]/linear_0" -> "17 /nncf_model_output_0"; +"12 linear2.weight" -> "16 ThreeLinearModel/Linear[linear2]/linear_0"; +"13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; +"14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "16 ThreeLinearModel/Linear[linear2]/linear_0"; +"16 ThreeLinearModel/Linear[linear2]/linear_0" -> "18 /nncf_model_output_1"; +} diff --git a/tests/torch/data/sparsify_activations/three_linear_compressed_weights.dot b/tests/torch/data/sparsify_activations/three_linear_compressed_weights.dot new file mode 100644 index 00000000000..1ecd1f533d7 --- /dev/null +++ b/tests/torch/data/sparsify_activations/three_linear_compressed_weights.dot @@ -0,0 +1,57 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 embedding.weight" [id=1, type=nncf_model_const]; +"2 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/decompress_0" [id=2, type=decompress]; +"3 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/type_0" [id=3, type=type]; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" [id=4, type=embedding]; +"5 linear1.weight" [id=5, type=nncf_model_const]; +"6 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/decompress_0" [id=6, type=decompress]; +"7 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/type_0" [id=7, type=type]; +"8 linear1.bias" [id=8, type=nncf_model_const]; +"9 ThreeLinearModel/Linear[linear1]/linear_0" [id=9, type=linear]; +"10 linear3.weight" [id=10, type=nncf_model_const]; +"11 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/decompress_0" [id=11, type=decompress]; +"12 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/type_0" [id=12, type=type]; +"13 linear3.bias" [id=13, type=nncf_model_const]; +"14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=14, type=abs]; +"15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=15, type=le]; +"16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=16, type=masked_fill]; +"17 ThreeLinearModel/Linear[linear3]/linear_0" [id=17, type=linear]; +"18 linear2.weight" [id=18, type=nncf_model_const]; +"19 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/decompress_0" [id=19, type=decompress]; +"20 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/type_0" [id=20, type=type]; +"21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=21, type=abs]; +"22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=22, type=le]; +"23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=23, type=masked_fill]; +"24 ThreeLinearModel/Linear[linear2]/linear_0" [id=24, type=linear]; +"25 /nncf_model_output_0" [id=25, type=nncf_model_output]; +"26 /nncf_model_output_1" [id=26, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"1 embedding.weight" -> "2 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/decompress_0"; +"2 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/decompress_0" -> "3 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/type_0"; +"3 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/type_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"5 linear1.weight" -> "6 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/decompress_0"; +"6 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/decompress_0" -> "7 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/type_0"; +"7 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/type_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; +"8 linear1.bias" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; +"9 ThreeLinearModel/Linear[linear1]/linear_0" -> "14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; +"9 ThreeLinearModel/Linear[linear1]/linear_0" -> "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"10 linear3.weight" -> "11 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/decompress_0"; +"11 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/decompress_0" -> "12 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/type_0"; +"12 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/type_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; +"13 linear3.bias" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; +"14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; +"15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; +"17 ThreeLinearModel/Linear[linear3]/linear_0" -> "25 /nncf_model_output_0"; +"18 linear2.weight" -> "19 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/decompress_0"; +"19 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/decompress_0" -> "20 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/type_0"; +"20 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/type_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; +"21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; +"22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; +"24 ThreeLinearModel/Linear[linear2]/linear_0" -> "26 /nncf_model_output_1"; +} From 0930870c717d0cab89a203363a730c66d7bfcf92 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:05 +0800 Subject: [PATCH 16/76] conformance test --- .../sparsify_activations_impl.py | 4 +- .../sparsify_activations/reference_data.yaml | 4 + .../test_sparsify_activations_conformance.py | 215 ++++++++++++++++++ 3 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 tests/post_training/experimental/sparsify_activations/reference_data.yaml create mode 100644 tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 3785784a7e6..0d81a5357de 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -38,6 +38,8 @@ class SparsifyActivationsAlgoBackend(ABC): Abstract class for activation sparsification algorithm backend. """ + CALIBRATION_TRACKING_DESC = "Activations Sparsifier Calibration" + def do_inference(self, model: TModel, dataset: Dataset): """ Conducts model inference on given dataset to calibrate the activation sparsifiers. @@ -49,7 +51,7 @@ def do_inference(self, model: TModel, dataset: Dataset): for input_data in track( dataset.get_inference_data(), total=dataset.get_length(), - description="Activations Sparsifier Calibration", + description=SparsifyActivationsAlgoBackend.CALIBRATION_TRACKING_DESC, ): engine.infer(input_data) diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml new file mode 100644 index 00000000000..55d16cb637f --- /dev/null +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -0,0 +1,4 @@ +tinyllama_int8_sparse20_ffn_backend_TORCH: + metric_value: 0.79924 + num_int4: 0 + num_int8: 312 diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py new file mode 100644 index 00000000000..4b85b82acae --- /dev/null +++ b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py @@ -0,0 +1,215 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import time +import traceback +from collections import OrderedDict +from collections import defaultdict +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Optional + +import pandas as pd +import pytest +import yaml +from datasets import load_dataset +from memory_profiler import memory_usage + +import nncf +import nncf.experimental +import nncf.experimental.torch +import nncf.experimental.torch.sparsify_activations +from nncf.experimental.torch.sparsify_activations.torch_backend import SparsifyActivationsAlgoBackend +from nncf.parameters import CompressWeightsMode +from tests.post_training.model_scope import generate_tests_scope +from tests.post_training.pipelines.base import BackendType +from tests.post_training.pipelines.base import BaseTestPipeline +from tests.post_training.pipelines.base import RunInfo +from tests.post_training.pipelines.lm_weight_compression import LMWeightCompression +from tests.post_training.pipelines.lm_weight_compression import WCTimeStats +from tests.post_training.test_quantize_conformance import create_pipeline_kwargs +from tests.post_training.test_quantize_conformance import create_short_run_info +from tests.post_training.test_quantize_conformance import fixture_batch_size # noqa: F401 +from tests.post_training.test_quantize_conformance import fixture_data # noqa: F401 +from tests.post_training.test_quantize_conformance import fixture_extra_columns # noqa: F401 +from tests.post_training.test_quantize_conformance import fixture_no_eval # noqa: F401 +from tests.post_training.test_quantize_conformance import fixture_output # noqa: F401 +from tests.post_training.test_quantize_conformance import fixture_run_benchmark_app # noqa: F401 +from tests.post_training.test_quantize_conformance import fixture_run_fp32_backend # noqa: F401 +from tests.post_training.test_quantize_conformance import fixture_run_torch_cuda_backend # noqa: F401 +from tests.post_training.test_quantize_conformance import fixture_subset_size # noqa: F401 +from tests.post_training.test_quantize_conformance import maybe_skip_test_case +from tests.post_training.test_quantize_conformance import write_logs + + +@pytest.fixture(scope="session", name="sparsify_activations_reference_data") +def fixture_sparsify_activations_reference_data(): + path_reference = Path(__file__).parent / "reference_data.yaml" + with path_reference.open() as f: + data = yaml.safe_load(f) + fp32_test_cases = defaultdict(dict) + for test_case_name in data: + if "atol" not in data[test_case_name]: + data[test_case_name]["atol"] = 1e-3 + reported_name = test_case_name.split("_backend_")[0] + fp32_case_name = f"{reported_name}_backend_FP32" + fp32_test_cases[fp32_case_name]["metric_value"] = 1 + if "atol" not in fp32_test_cases[fp32_case_name]: + fp32_test_cases[fp32_case_name]["atol"] = 1e-10 + data.update(fp32_test_cases) + return data + + +@pytest.fixture(scope="session", name="sparsify_activations_result_data") +def fixture_sparsify_activations_report_data(output_dir): + data: Dict[str, RunInfo] = {} + yield data + if data: + test_results = OrderedDict(sorted(data.items())) + df = pd.DataFrame(v.get_result_dict() for v in test_results.values()) + output_dir.mkdir(parents=True, exist_ok=True) + df.to_csv(output_dir / "results.csv", index=False) + + +@dataclass +class SparsifyActivationsTimeStats(WCTimeStats): + """ + Contains statistics that are parsed from the stdout of Sparsify Activations tests. + """ + + time_sparsifier_calibration: Optional[str] = None + STAT_NAMES = [*WCTimeStats.STAT_NAMES, "Activations Sparsifer calibration time"] + VAR_NAMES = [*WCTimeStats.VAR_NAMES, "time_sparsifier_calibration"] + REGEX_PREFIX = [*WCTimeStats.REGEX_PREFIX, SparsifyActivationsAlgoBackend.CALIBRATION_TRACKING_DESC] + + +class LMSparsifyActivations(LMWeightCompression): + def collect_data_from_stdout(self, stdout: str): + stats = SparsifyActivationsTimeStats() + stats.fill(stdout) + self.run_info.stats_from_output = stats + + def compress(self) -> None: + if self.backend == BackendType.FP32: + return + start_time = time.perf_counter() + self.run_info.compression_memory_usage = memory_usage(self._compress, max_usage=True) + self.run_info.time_compression = time.perf_counter() - start_time + + def prepare_calibration_dataset(self): + dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") + dataset = dataset.filter(lambda example: len(example["text"]) > 512) + dataset = dataset.select(range(64)) + self.calibration_dataset = nncf.Dataset(dataset, self.get_transform_calibration_fn()) + + def _compress(self): + self.compressed_model = self.model + if self.compression_params["compress_weights"] is not None: + self.compressed_model = nncf.compress_weights( + self.compressed_model, + dataset=self.calibration_dataset, + **self.compression_params["compress_weights"], + ) + self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( + self.compressed_model, + dataset=self.calibration_dataset, + **self.compression_params["sparsify_activations"], + ) + + +SPARSIFY_ACTIVATIONS_MODELS = [ + { + "reported_name": "tinyllama_int8_sparse20_ffn", + "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", + "pipeline_cls": LMSparsifyActivations, + "compression_params": { + "compress_weights": { + "mode": CompressWeightsMode.INT8_ASYM, + }, + "sparsify_activations": { + "target_sparsity_by_scope": { + "{re}up_proj": 0.2, + "{re}gate_proj": 0.2, + "{re}down_proj": 0.2, + } + }, + }, + "backends": [BackendType.TORCH], + }, +] + +SPARSIFY_ACTIVATIONS_TEST_CASES = generate_tests_scope(SPARSIFY_ACTIVATIONS_MODELS) + + +@pytest.mark.parametrize("test_case_name", SPARSIFY_ACTIVATIONS_TEST_CASES.keys()) +def test_sparsify_activations( + sparsify_activations_reference_data: dict, + test_case_name: str, + data_dir: Path, + output_dir: Path, + sparsify_activations_result_data: Dict[str, RunInfo], + no_eval: bool, + batch_size: int, + run_fp32_backend: bool, + run_torch_cuda_backend: bool, + subset_size: Optional[int], + run_benchmark_app: bool, + capsys: pytest.CaptureFixture, + extra_columns: bool, +): + pipeline = None + err_msg = None + test_model_param = None + start_time = time.perf_counter() + try: + if test_case_name not in sparsify_activations_reference_data: + raise RuntimeError(f"{test_case_name} is not defined in `sparsify_activations_reference_data` fixture") + test_model_param = SPARSIFY_ACTIVATIONS_TEST_CASES[test_case_name] + maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size) + pipeline_cls = test_model_param["pipeline_cls"] + pipeline_kwargs = create_pipeline_kwargs( + test_model_param, subset_size, test_case_name, sparsify_activations_reference_data + ) + pipeline_kwargs.update( + { + "output_dir": output_dir, + "data_dir": data_dir, + "no_eval": no_eval, + "run_benchmark_app": run_benchmark_app, + "batch_size": batch_size, + } + ) + pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) + pipeline.run() + except Exception as e: + err_msg = str(e) + traceback.print_exc() + + if pipeline is not None: + pipeline.cleanup_cache() + run_info = pipeline.run_info + if err_msg: + run_info.status = f"{run_info.status} | {err_msg}" if run_info.status else err_msg + + captured = capsys.readouterr() + write_logs(captured, pipeline) + + if extra_columns: + pipeline.collect_data_from_stdout(captured.out) + else: + run_info = create_short_run_info(test_model_param, err_msg, test_case_name) + + run_info.time_total = time.perf_counter() - start_time + sparsify_activations_result_data[test_case_name] = run_info + + if err_msg: + pytest.fail(err_msg) From fad6961cef182d5ea84d9d89b80088f67456ee1b Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:05 +0800 Subject: [PATCH 17/76] type hint fix for older python --- .../torch/sparsify_activations/sparsify_activations_impl.py | 4 ++-- nncf/experimental/torch/sparsify_activations/torch_backend.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 0d81a5357de..9d9ef151257 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -11,7 +11,7 @@ from abc import ABC from abc import abstractmethod -from typing import Dict, List, Optional, TypeVar +from typing import Dict, List, Optional, Type, TypeVar import nncf from nncf.common import factory @@ -57,7 +57,7 @@ def do_inference(self, model: TModel, dataset: Dataset): @property @abstractmethod - def supported_metatypes(self) -> List[type[OperatorMetatype]]: + def supported_metatypes(self) -> List[Type[OperatorMetatype]]: """ Property for the backend-specific metatypes for supported layers. """ diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index cc4be5170c7..c4e31e2b1c4 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, TypeVar +from typing import Dict, List, Type, TypeVar import numpy as np import torch @@ -118,7 +118,7 @@ class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): SUPPORTED_METATYPES = [om.PTLinearMetatype] @property - def supported_metatypes(self) -> List[type[OperatorMetatype]]: + def supported_metatypes(self) -> List[Type[OperatorMetatype]]: return PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES def get_sparsifiers(self, model: NNCFNetwork) -> List[ActivationsSparsifier]: From 6c8c1e1611d0878d4a3ca735ec56026666a58bce Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:05 +0800 Subject: [PATCH 18/76] typo --- .../test_sparsify_activations_conformance.py | 2 +- .../sparsify_activations/test_components.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py index 4b85b82acae..15e25de60a5 100644 --- a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py +++ b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py @@ -87,7 +87,7 @@ class SparsifyActivationsTimeStats(WCTimeStats): """ time_sparsifier_calibration: Optional[str] = None - STAT_NAMES = [*WCTimeStats.STAT_NAMES, "Activations Sparsifer calibration time"] + STAT_NAMES = [*WCTimeStats.STAT_NAMES, "Activations Sparsifier calibration time"] VAR_NAMES = [*WCTimeStats.VAR_NAMES, "time_sparsifier_calibration"] REGEX_PREFIX = [*WCTimeStats.REGEX_PREFIX, SparsifyActivationsAlgoBackend.CALIBRATION_TRACKING_DESC] diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index 571d39a16eb..8337601efed 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -128,7 +128,7 @@ def test_forward(self, use_cuda: bool, desc: SparsifierForwardTestDesc): class TestPTSparsifyActivationsAlgoBackend: - def test_get_sparsifers(self): + def test_get_sparsifiers(self): model = ThreeLinearModel() dataset = nncf.Dataset(torch.randint(0, 30, (3, 2, 8))) sparse_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( @@ -151,8 +151,8 @@ def test_calibrate_sparsifiers(self, mocker): model, dataset = self.create_model_and_dataset() graph = model.nncf.get_graph() backend = PTSparsifyActivationsAlgoBackend() - mock_sparsifer = ActivationsSparsifier(0.5, 0.1) - mock_sparsifer.freeze(True) + mock_sparsifier = ActivationsSparsifier(0.5, 0.1) + mock_sparsifier.freeze(True) num_model_forward_calls = 0 def model_forward_pre_hook(model: NNCFNetwork, args): @@ -162,9 +162,9 @@ def model_forward_pre_hook(model: NNCFNetwork, args): model.register_forward_pre_hook(model_forward_pre_hook) - with mocker.patch.object(backend, "get_sparsifiers", return_value=[mock_sparsifer]): + with mocker.patch.object(backend, "get_sparsifiers", return_value=[mock_sparsifier]): backend.calibrate_sparsifiers(model, graph, dataset) - assert mock_sparsifer._freeze is False + assert mock_sparsifier._freeze is False assert num_model_forward_calls == dataset.get_length() def create_model_and_dataset(self, compress_weights: bool = False): From 9158b58e551d84c36876e26b05089acd9fdb5188 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:05 +0800 Subject: [PATCH 19/76] leave abstractmethod empty --- .../torch/sparsify_activations/sparsify_activations_impl.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 9d9ef151257..5c1f7bbc3d1 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -61,7 +61,6 @@ def supported_metatypes(self) -> List[Type[OperatorMetatype]]: """ Property for the backend-specific metatypes for supported layers. """ - return [] @abstractmethod def insert_sparsifiers( @@ -78,7 +77,6 @@ def insert_sparsifiers( :param target_sparsity_by_node: The target sparsity level for the input activation in each given node layer. :return: The model with inserted activation sparsifiers. """ - return model @abstractmethod def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Dataset) -> TModel: @@ -90,7 +88,6 @@ def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Datase :param dataset: The calibration dataset to update the thresholds in the sparsifiers. :return: The model with calibrated activation sparsifiers. """ - return model @abstractmethod def freeze_sparsifiers(self, model: TModel, graph: NNCFGraph) -> TModel: @@ -101,7 +98,6 @@ def freeze_sparsifiers(self, model: TModel, graph: NNCFGraph) -> TModel: :param graph: The model's NNCF graph. :return: The model with applied sparsification operations. """ - return model class SparsifyActivationsAlgorithm: From b6353cdf7ebaff37514c4c7a1b4135d2a58d5cb9 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:06 +0800 Subject: [PATCH 20/76] move ref dot files to "experimental subfolder" --- .../{ => experimental}/sparsify_activations/dummy_llama.dot | 0 .../sparsify_activations/dummy_llama_compressed_weights.dot | 0 .../data/{ => experimental}/sparsify_activations/linear.dot | 0 .../sparsify_activations/linear_compressed_weights.dot | 0 .../{ => experimental}/sparsify_activations/three_linear.dot | 0 .../sparsify_activations/three_linear_compressed_weights.dot | 0 tests/torch/experimental/sparsify_activations/test_algo.py | 2 +- 7 files changed, 1 insertion(+), 1 deletion(-) rename tests/torch/data/{ => experimental}/sparsify_activations/dummy_llama.dot (100%) rename tests/torch/data/{ => experimental}/sparsify_activations/dummy_llama_compressed_weights.dot (100%) rename tests/torch/data/{ => experimental}/sparsify_activations/linear.dot (100%) rename tests/torch/data/{ => experimental}/sparsify_activations/linear_compressed_weights.dot (100%) rename tests/torch/data/{ => experimental}/sparsify_activations/three_linear.dot (100%) rename tests/torch/data/{ => experimental}/sparsify_activations/three_linear_compressed_weights.dot (100%) diff --git a/tests/torch/data/sparsify_activations/dummy_llama.dot b/tests/torch/data/experimental/sparsify_activations/dummy_llama.dot similarity index 100% rename from tests/torch/data/sparsify_activations/dummy_llama.dot rename to tests/torch/data/experimental/sparsify_activations/dummy_llama.dot diff --git a/tests/torch/data/sparsify_activations/dummy_llama_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot similarity index 100% rename from tests/torch/data/sparsify_activations/dummy_llama_compressed_weights.dot rename to tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot diff --git a/tests/torch/data/sparsify_activations/linear.dot b/tests/torch/data/experimental/sparsify_activations/linear.dot similarity index 100% rename from tests/torch/data/sparsify_activations/linear.dot rename to tests/torch/data/experimental/sparsify_activations/linear.dot diff --git a/tests/torch/data/sparsify_activations/linear_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/linear_compressed_weights.dot similarity index 100% rename from tests/torch/data/sparsify_activations/linear_compressed_weights.dot rename to tests/torch/data/experimental/sparsify_activations/linear_compressed_weights.dot diff --git a/tests/torch/data/sparsify_activations/three_linear.dot b/tests/torch/data/experimental/sparsify_activations/three_linear.dot similarity index 100% rename from tests/torch/data/sparsify_activations/three_linear.dot rename to tests/torch/data/experimental/sparsify_activations/three_linear.dot diff --git a/tests/torch/data/sparsify_activations/three_linear_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot similarity index 100% rename from tests/torch/data/sparsify_activations/three_linear_compressed_weights.dot rename to tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index 68393383e31..177acd32842 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -151,7 +151,7 @@ def test_nncf_graph(self): model: NNCFNetwork = self.model graph = model.nncf.get_graph() file_name = f"{desc.name}_compressed_weights" if self.compress_weights else desc.name - ref_dot_path = Path(TEST_ROOT, "torch/data/sparsify_activations", f"{file_name}.dot") + ref_dot_path = Path(TEST_ROOT, "torch", "data", "experimental", "sparsify_activations", f"{file_name}.dot") graph = model.nncf.get_graph().get_graph_for_structure_analysis() compare_nx_graph_with_reference(graph, ref_dot_path) From d1a99bba28830152e524538bef7a6e98dfad4660 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:06 +0800 Subject: [PATCH 21/76] fix type hint for older python --- tests/torch/experimental/sparsify_activations/test_algo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index 177acd32842..d8d201baaa2 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -11,7 +11,7 @@ from dataclasses import dataclass from pathlib import Path -from typing import Callable, Dict +from typing import Callable, Dict, Optional import openvino as ov import pytest @@ -40,7 +40,7 @@ class SparsifyActivationsAlgorithmTestDesc: model_getter: Callable[[], nn.Module] dataset_getter: Callable[[torch.device], nncf.Dataset] target_sparsity_by_scope: Dict[str, float] - ignored_scope: nncf.IgnoredScope | None + ignored_scope: Optional[nncf.IgnoredScope] ref_sparsifier_target_sparsity: Dict[str, float] ref_num_batches_tracked: int @@ -178,7 +178,7 @@ def test_export_openvino(self): class TargetSparsityByNodeTestDesc: target_sparsity_by_scope: Dict[str, float] ignored_scope: IgnoredScope - ref_target_sparsity_by_node_name: Dict[str, float] = None + ref_target_sparsity_by_node_name: Optional[Dict[str, float]] = None raise_error: bool = False From 0702b4a12388435861b2a96f4a6e9f97539dfcfe Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:06 +0800 Subject: [PATCH 22/76] tests for "no layers matched" --- .../sparsify_activations/sparsify_activations_impl.py | 4 ++-- .../experimental/sparsify_activations/test_algo.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 5c1f7bbc3d1..e06fb787880 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -142,8 +142,6 @@ def apply( """ self._set_backend_entity(model) target_sparsity_by_node = self._get_target_sparsity_by_node(graph) - if not target_sparsity_by_node: - raise nncf.ValidationError("No layers matched for activation sparsification.") sparse_model = self.do_sparsification(model, graph, target_sparsity_by_node, dataset) return sparse_model @@ -207,6 +205,8 @@ def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.' ) target_sparsity_by_node[node] = target_sparsity + if not target_sparsity_by_node: + raise nncf.ValidationError("No layers matched for activation sparsification.") return target_sparsity_by_node diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index d8d201baaa2..7ad6ea39a75 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -179,7 +179,7 @@ class TargetSparsityByNodeTestDesc: target_sparsity_by_scope: Dict[str, float] ignored_scope: IgnoredScope ref_target_sparsity_by_node_name: Optional[Dict[str, float]] = None - raise_error: bool = False + raised_error_message: Optional[str] = None @pytest.mark.parametrize( @@ -205,12 +205,12 @@ class TargetSparsityByNodeTestDesc: TargetSparsityByNodeTestDesc( target_sparsity_by_scope={"{re}.*nonexist.*": 0.3}, ignored_scope=IgnoredScope(patterns=[".*linear2.*"]), - ref_target_sparsity_by_node_name=dict(), + raised_error_message="No layers matched", ), TargetSparsityByNodeTestDesc( target_sparsity_by_scope={"{re}.*linear.*": 0.3, "{re}.*linear1.*": 0.4}, ignored_scope=IgnoredScope(), - raise_error=True, # multiple matches for one layer + raised_error_message="matched by multiple items", ), ], ) @@ -223,8 +223,8 @@ def test_get_target_sparsity_by_node(desc: TargetSparsityByNodeTestDesc): graph = model.nncf.get_graph() algo = SparsifyActivationsAlgorithm(desc.target_sparsity_by_scope, desc.ignored_scope) algo._set_backend_entity(model) - if desc.raise_error: - with pytest.raises(nncf.ValidationError): + if desc.raised_error_message is not None: + with pytest.raises(nncf.ValidationError, match=desc.raised_error_message): algo._get_target_sparsity_by_node(graph) else: target_sparsity_by_node = algo._get_target_sparsity_by_node(graph) From c52c022c276158b470ccb616e18abb9c9968d237 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:06 +0800 Subject: [PATCH 23/76] use PTTransformationLayout instead of TransformationLayout --- nncf/experimental/torch/sparsify_activations/torch_backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index c4e31e2b1c4..beb2245cc14 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -21,12 +21,12 @@ from nncf.common.graph.operator_metatypes import CONST_NOOP_METATYPES from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.graph.transformations.commands import TargetType -from nncf.common.graph.transformations.layout import TransformationLayout from nncf.data import Dataset from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend from nncf.torch.graph import operator_metatypes as om from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand from nncf.torch.graph.transformations.commands import PTTargetPoint +from nncf.torch.graph.transformations.layout import PTTransformationLayout from nncf.torch.model_transformer import PTModelTransformer from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.utils import training_mode_switcher @@ -136,7 +136,7 @@ def insert_sparsifiers( graph: NNCFGraph, target_sparsity_by_node: Dict[NNCFNode, float], ) -> NNCFNetwork: - transformation_layout = TransformationLayout() + transformation_layout = PTTransformationLayout() for node, target_sparsity in target_sparsity_by_node.items(): activation_port_id = self._get_activation_port_id(node, graph) sparsifier = ActivationsSparsifier(target_sparsity=target_sparsity) From 5cff914bc0236a926c47ec3b7e3fd616cd588075 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:06 +0800 Subject: [PATCH 24/76] initialize sparsifier as frozen, with running_threshold is -inf --- .../sparsify_activations/torch_backend.py | 21 ++++++++++--------- .../sparsify_activations/test_components.py | 2 ++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index beb2245cc14..dec8f89e604 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -51,11 +51,11 @@ def __init__(self, target_sparsity: float, alpha: float = 0.2): if alpha <= 0.0 or alpha >= 1.0: raise ValueError("The decay factor `alpha` should be in range (0, 1).") self.alpha = alpha - self.register_buffer("running_threshold", torch.tensor(0.0)) + self.register_buffer("running_threshold", torch.tensor(float("-inf"))) self.register_buffer("num_batches_tracked", torch.tensor(0)) self.running_threshold: torch.Tensor self.num_batches_tracked: torch.Tensor - self._freeze = False + self._freeze = True def forward(self, x: torch.Tensor) -> torch.Tensor: if not self._freeze: @@ -69,7 +69,7 @@ def reset_running_stats(self): """ Resets the running threshold and the number of tracked batches to the initial stage. """ - self.running_threshold.zero_() + self.running_threshold.fill_(float("-inf")) self.num_batches_tracked.zero_() def freeze(self, freeze: bool = True): @@ -101,11 +101,14 @@ def _update(self, threshold: torch.Tensor) -> torch.Tensor: :param threshold: The threshold value derived from this batch to update the running threshold. :return: The updated running threshold. """ - beta = 1.0 - self.alpha - self.running_threshold = ( - threshold * self.alpha + self.running_threshold * beta * (1 - beta**self.num_batches_tracked) - ) / (1 - beta ** (self.num_batches_tracked + 1)) - self.running_threshold = self.running_threshold.type(threshold.dtype) + if self.num_batches_tracked == 0: + self.running_threshold = threshold + else: + beta = 1.0 - self.alpha + self.running_threshold = ( + threshold * self.alpha + self.running_threshold * beta * (1 - beta**self.num_batches_tracked) + ) / (1 - beta ** (self.num_batches_tracked + 1)) + self.running_threshold = self.running_threshold.type(threshold.dtype) self.num_batches_tracked += 1 return self.running_threshold @@ -140,8 +143,6 @@ def insert_sparsifiers( for node, target_sparsity in target_sparsity_by_node.items(): activation_port_id = self._get_activation_port_id(node, graph) sparsifier = ActivationsSparsifier(target_sparsity=target_sparsity) - # temporarily freeze it for model transformation - sparsifier.freeze(True) sparsifier_name = f"{ACTIVATIONS_SPARSIFIER_PREFIX}_{node.node_name.replace('.', '_')}" transformation_layout.register( PTSharedFnInsertionCommand( diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index 8337601efed..6dad9f7b117 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -99,6 +99,8 @@ def test_forward(self, use_cuda: bool, desc: SparsifierForwardTestDesc): pytest.skip("CUDA is not available") device = torch.device("cuda" if use_cuda else "cpu") sparsifier = ActivationsSparsifier(desc.target_sparsity, desc.alpha).to(device) + sparsifier.freeze(False) + running_thresholds = [] outputs = [] with torch.no_grad(): From 796e5bc450492910b75a7ad9bbc85d837bcd4e87 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:06 +0800 Subject: [PATCH 25/76] clean unnecessary codes --- .../experimental/torch/sparsify_activations/torch_backend.py | 5 +++-- tests/torch/experimental/sparsify_activations/test_algo.py | 1 - .../experimental/sparsify_activations/test_components.py | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index dec8f89e604..c9f890e07da 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -59,7 +59,7 @@ def __init__(self, target_sparsity: float, alpha: float = 0.2): def forward(self, x: torch.Tensor) -> torch.Tensor: if not self._freeze: - threshold = self._calculate_threshold(x, self.target_sparsity) + threshold = self.calculate_threshold(x, self.target_sparsity) self._update(threshold) mask = torch.le(x.abs(), self.running_threshold) x = torch.masked_fill(x, mask, 0.0) @@ -78,7 +78,8 @@ def freeze(self, freeze: bool = True): def extra_repr(self) -> str: return f"target_sparsity={self.target_sparsity}" - def _calculate_threshold(self, x: torch.Tensor, target_sparsity: float) -> torch.Tensor: + @staticmethod + def calculate_threshold(x: torch.Tensor, target_sparsity: float) -> torch.Tensor: """ Calculates the threshold so that the target sparsity can be achieved. diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index 7ad6ea39a75..281c92fda05 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -149,7 +149,6 @@ def test_inserted_sparsifier(self): def test_nncf_graph(self): desc: SparsifyActivationsAlgorithmTestDesc = self.desc model: NNCFNetwork = self.model - graph = model.nncf.get_graph() file_name = f"{desc.name}_compressed_weights" if self.compress_weights else desc.name ref_dot_path = Path(TEST_ROOT, "torch", "data", "experimental", "sparsify_activations", f"{file_name}.dot") graph = model.nncf.get_graph().get_graph_for_structure_analysis() diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index 6dad9f7b117..a714df6b1ab 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -131,8 +131,7 @@ def test_forward(self, use_cuda: bool, desc: SparsifierForwardTestDesc): class TestPTSparsifyActivationsAlgoBackend: def test_get_sparsifiers(self): - model = ThreeLinearModel() - dataset = nncf.Dataset(torch.randint(0, 30, (3, 2, 8))) + model, dataset = self.create_model_and_dataset() sparse_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( model, dataset, target_sparsity_by_scope={"{re}.*": 0.5} ) From 1f4fe99af50f08a3a474e066267611b704ccc66c Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:06 +0800 Subject: [PATCH 26/76] use get_nodes_by_metatypes instead of topolodical sort --- .../torch/sparsify_activations/sparsify_activations_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index e06fb787880..9b328459a19 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -195,8 +195,8 @@ def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float self._ignored_scope, graph, strict=self._ignored_scope.validate ) target_sparsity_by_node = {} - for node in graph.topological_sort(): - if node.metatype not in supported_metatypes or not should_consider_scope(node.node_name, ignored_names): + for node in graph.get_nodes_by_metatypes(supported_metatypes): + if not should_consider_scope(node.node_name, ignored_names): continue for scope, target_sparsity in self._target_sparsity_by_scope.items(): if matches_any(node.node_name, scope): From 5838d9d0a6054c226dc865b18afaf512b0fb7dc7 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:07 +0800 Subject: [PATCH 27/76] check sparsifer does not change model output before calibration --- .../sparsify_activations/test_components.py | 68 ++++++++++++++----- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index a714df6b1ab..97447932467 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -34,7 +34,7 @@ class SparsifierForwardTestDesc: ref_outputs: List[torch.Tensor] -sparsifier_forward_test_descs = { +sparsifier_forward_during_calibration_test_descs = { "fp16": SparsifierForwardTestDesc( target_sparsity=0.4, alpha=0.2, @@ -93,14 +93,34 @@ class SparsifierForwardTestDesc: class TestActivationsSparsifier: - @pytest.mark.parametrize("desc", sparsifier_forward_test_descs.values(), ids=sparsifier_forward_test_descs.keys()) - def test_forward(self, use_cuda: bool, desc: SparsifierForwardTestDesc): + @pytest.fixture(autouse=True) + def setup(self, use_cuda: bool): if use_cuda and not torch.cuda.is_available(): pytest.skip("CUDA is not available") - device = torch.device("cuda" if use_cuda else "cpu") + self.device = torch.device("cuda" if use_cuda else "cpu") + + @pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) + def test_forward_before_calibration(self, use_cuda: bool, dtype: torch.dtype): + device = self.device + input_tensor = torch.rand([3, 3], device=device, dtype=dtype) + sparsifier = ActivationsSparsifier(target_sparsity=0.9).to(device) + assert sparsifier._freeze is True + assert not sparsifier.num_batches_tracked.is_nonzero() + assert sparsifier.running_threshold.isneginf() + output_tensor = sparsifier(input_tensor) + assert not output_tensor.is_set_to(input_tensor) # The output tensor is a new tensor + # Before calibration, the sparsifier does not change the input + torch.testing.assert_close(output_tensor, input_tensor, rtol=1e-4, atol=1e-4) + + @pytest.mark.parametrize( + "desc", + sparsifier_forward_during_calibration_test_descs.values(), + ids=sparsifier_forward_during_calibration_test_descs.keys(), + ) + def test_forward_during_calibration(self, use_cuda: bool, desc: SparsifierForwardTestDesc): + device = self.device sparsifier = ActivationsSparsifier(desc.target_sparsity, desc.alpha).to(device) sparsifier.freeze(False) - running_thresholds = [] outputs = [] with torch.no_grad(): @@ -118,15 +138,23 @@ def test_forward(self, use_cuda: bool, desc: SparsifierForwardTestDesc): assert output.device.type == device.type torch.testing.assert_close(output, ref_output, rtol=1e-4, atol=1e-4, check_device=False) - sparsifier.freeze() - with torch.no_grad(): - batch = desc.input_batches[-1] - output = sparsifier(batch.to(device)) - assert sparsifier.num_batches_tracked == len(desc.input_batches) - torch.testing.assert_close( - sparsifier.running_threshold, desc.ref_running_thresholds[-1], rtol=1e-4, atol=1e-4, check_device=False - ) - torch.testing.assert_close(output, desc.ref_outputs[-1], rtol=1e-4, atol=1e-4, check_device=False) + @pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) + def test_forward_after_calibration(self, use_cuda: bool, dtype: torch.dtype): + device = self.device + sparsifier = ActivationsSparsifier(target_sparsity=0.9).to(device) + sparsifier.running_threshold.fill_(0.1) + sparsifier.num_batches_tracked.fill_(100) + + for _ in range(2): + # The sparsifier does not change in the following forwards + input_tensor = torch.rand([2, 10], device=device, dtype=dtype) + ref_output = torch.where(input_tensor.abs() <= 0.1, 0.0, input_tensor) + output_tensor = sparsifier(ref_output) + assert sparsifier.num_batches_tracked == 100 + torch.testing.assert_close( + sparsifier.running_threshold, torch.tensor(0.1, device=device), rtol=1e-4, atol=1e-4 + ) + torch.testing.assert_close(output_tensor, ref_output, rtol=1e-4, atol=1e-4) class TestPTSparsifyActivationsAlgoBackend: @@ -141,13 +169,21 @@ def test_get_sparsifiers(self): @pytest.mark.parametrize("compress_weights", [False, True]) def test_insert_sparsifiers(self, compress_weights: bool): - model, _ = self.create_model_and_dataset(compress_weights=compress_weights) + model, dataset = self.create_model_and_dataset(compress_weights=compress_weights) + example_input = next(iter(dataset.get_inference_data())) + ref_output = model(example_input) + graph = model.nncf.get_graph() nodes = graph.get_nodes_by_metatypes(PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES) backend = PTSparsifyActivationsAlgoBackend() - model_with_sparsifiers = backend.insert_sparsifiers(model, graph, {node: 0.5 for node in nodes}) + model_with_sparsifiers = backend.insert_sparsifiers(model, graph, {node: 0.9 for node in nodes}) assert len(backend.get_sparsifiers(model_with_sparsifiers)) == len(nodes) + output = model_with_sparsifiers(example_input) + torch.testing.assert_close( + output, ref_output, rtol=1e-4, atol=1e-4 + ) # At this time the sparsifers do not change the output + def test_calibrate_sparsifiers(self, mocker): model, dataset = self.create_model_and_dataset() graph = model.nncf.get_graph() From 03085861e17382639bc65487ddf01ab1e59b0c97 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:07 +0800 Subject: [PATCH 28/76] runnable confromance test for lm --- .../sparsify_activations_impl.py | 2 +- .../sparsify_activations/reference_data.yaml | 12 +- .../test_sparsify_activations_conformance.py | 107 ++++++++++++++---- 3 files changed, 96 insertions(+), 25 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 9b328459a19..010215b151b 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -38,7 +38,7 @@ class SparsifyActivationsAlgoBackend(ABC): Abstract class for activation sparsification algorithm backend. """ - CALIBRATION_TRACKING_DESC = "Activations Sparsifier Calibration" + CALIBRATION_TRACKING_DESC = "Conducting Activations Sparsifier Calibration" def do_inference(self, model: TModel, dataset: Dataset): """ diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index 55d16cb637f..ea0086a3333 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -1,4 +1,12 @@ -tinyllama_int8_sparse20_ffn_backend_TORCH: - metric_value: 0.79924 +tinyllama_backend_FP32: + metric_value: 1 + num_int4: 0 + num_int8: 0 +tinyllama_ffn_sparse20_backend_TORCH: + metric_value: 0.7852 + num_int4: 0 + num_int8: 0 +tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: + metric_value: 0.7997 num_int4: 0 num_int8: 312 diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py index 15e25de60a5..8c783168c45 100644 --- a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py +++ b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py @@ -15,14 +15,18 @@ from collections import OrderedDict from collections import defaultdict from dataclasses import dataclass +from functools import partial from pathlib import Path from typing import Dict, Optional import pandas as pd import pytest +import torch import yaml from datasets import load_dataset from memory_profiler import memory_usage +from optimum.intel.openvino import OVModelForCausalLM +from transformers import AutoModelForCausalLM import nncf import nncf.experimental @@ -49,6 +53,7 @@ from tests.post_training.test_quantize_conformance import fixture_subset_size # noqa: F401 from tests.post_training.test_quantize_conformance import maybe_skip_test_case from tests.post_training.test_quantize_conformance import write_logs +from tests.torch.helpers import set_torch_seed @pytest.fixture(scope="session", name="sparsify_activations_reference_data") @@ -57,15 +62,16 @@ def fixture_sparsify_activations_reference_data(): with path_reference.open() as f: data = yaml.safe_load(f) fp32_test_cases = defaultdict(dict) - for test_case_name in data: - if "atol" not in data[test_case_name]: - data[test_case_name]["atol"] = 1e-3 + for test_case_name, test_case in data.items(): + fp32_case = dict(metric_value=1.0) + fp32_case["num_int4"] = test_case.get("num_int4", 0) + fp32_case["num_int8"] = test_case.get("num_int8", 0) reported_name = test_case_name.split("_backend_")[0] fp32_case_name = f"{reported_name}_backend_FP32" - fp32_test_cases[fp32_case_name]["metric_value"] = 1 - if "atol" not in fp32_test_cases[fp32_case_name]: - fp32_test_cases[fp32_case_name]["atol"] = 1e-10 + fp32_test_cases[fp32_case_name] = fp32_case data.update(fp32_test_cases) + for test_case in data.values(): + test_case["atol"] = test_case.get("atol", 1e-5) return data @@ -93,10 +99,42 @@ class SparsifyActivationsTimeStats(WCTimeStats): class LMSparsifyActivations(LMWeightCompression): - def collect_data_from_stdout(self, stdout: str): - stats = SparsifyActivationsTimeStats() - stats.fill(stdout) - self.run_info.stats_from_output = stats + def prepare_model(self) -> None: + is_stateful = self.params.get("is_stateful", False) + + if self.backend == BackendType.TORCH: + if is_stateful: + raise RuntimeError(f"is_stateful={is_stateful} is not supported for PyTorch backend.") + + self.model_hf = AutoModelForCausalLM.from_pretrained( + self.model_id, torch_dtype=torch.float32, device_map="cpu" + ) + self.model = self.model_hf + elif self.backend in [BackendType.OV, BackendType.FP32]: + if is_stateful: + self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf") + if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists(): + # export by model_id + self.model_hf = OVModelForCausalLM.from_pretrained( + self.model_id, export=True, load_in_8bit=False, compile=False, stateful=is_stateful + ) + else: + # no export, load from IR. Applicable for sequential run of test cases in local environment. + self.model_hf = OVModelForCausalLM.from_pretrained( + self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=is_stateful + ) + self.model = self.model_hf.model + else: + raise RuntimeError(f"backend={self.backend.value} is not supported.") + + if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists(): + self._dump_model_fp32() + + def prepare_calibration_dataset(self): + dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") + dataset = dataset.filter(lambda example: len(example["text"].split()) > 256) + dataset = dataset.select(range(64)) + self.calibration_dataset = nncf.Dataset(dataset, partial(self.get_transform_calibration_fn(), max_tokens=256)) def compress(self) -> None: if self.backend == BackendType.FP32: @@ -105,30 +143,54 @@ def compress(self) -> None: self.run_info.compression_memory_usage = memory_usage(self._compress, max_usage=True) self.run_info.time_compression = time.perf_counter() - start_time - def prepare_calibration_dataset(self): - dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") - dataset = dataset.filter(lambda example: len(example["text"]) > 512) - dataset = dataset.select(range(64)) - self.calibration_dataset = nncf.Dataset(dataset, self.get_transform_calibration_fn()) + def collect_data_from_stdout(self, stdout: str): + stats = SparsifyActivationsTimeStats() + stats.fill(stdout) + self.run_info.stats_from_output = stats + @set_torch_seed(seed=42) def _compress(self): self.compressed_model = self.model - if self.compression_params["compress_weights"] is not None: + if self.compression_params.get("compress_weights", None) is not None: self.compressed_model = nncf.compress_weights( self.compressed_model, dataset=self.calibration_dataset, **self.compression_params["compress_weights"], ) - self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( - self.compressed_model, - dataset=self.calibration_dataset, - **self.compression_params["sparsify_activations"], - ) + if self.compression_params.get("sparsify_activations", None) is not None: + self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( + self.compressed_model, + dataset=self.calibration_dataset, + **self.compression_params["sparsify_activations"], + ) SPARSIFY_ACTIVATIONS_MODELS = [ { - "reported_name": "tinyllama_int8_sparse20_ffn", + "reported_name": "tinyllama", + "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", + "pipeline_cls": LMSparsifyActivations, + "compression_params": None, + "backends": [BackendType.FP32], + }, + { + "reported_name": "tinyllama_ffn_sparse20", + "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", + "pipeline_cls": LMSparsifyActivations, + "compression_params": { + "compress_weights": None, + "sparsify_activations": { + "target_sparsity_by_scope": { + "{re}up_proj": 0.2, + "{re}gate_proj": 0.2, + "{re}down_proj": 0.2, + } + }, + }, + "backends": [BackendType.TORCH], + }, + { + "reported_name": "tinyllama_int8_asym_data_free_ffn_sparse20", "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", "pipeline_cls": LMSparsifyActivations, "compression_params": { @@ -147,6 +209,7 @@ def _compress(self): }, ] + SPARSIFY_ACTIVATIONS_TEST_CASES = generate_tests_scope(SPARSIFY_ACTIVATIONS_MODELS) From 35196615954b6bb2b40452ef1e1427105c93fba6 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:07 +0800 Subject: [PATCH 29/76] add conformance test for deit-small --- .../sparsify_activations/model_scope.py | 88 ++++++++++ .../sparsify_activations/pipelines.py | 162 ++++++++++++++++++ .../sparsify_activations/reference_data.yaml | 6 + .../test_sparsify_activations_conformance.py | 146 +--------------- 4 files changed, 257 insertions(+), 145 deletions(-) create mode 100644 tests/post_training/experimental/sparsify_activations/model_scope.py create mode 100644 tests/post_training/experimental/sparsify_activations/pipelines.py diff --git a/tests/post_training/experimental/sparsify_activations/model_scope.py b/tests/post_training/experimental/sparsify_activations/model_scope.py new file mode 100644 index 00000000000..02634ba52dc --- /dev/null +++ b/tests/post_training/experimental/sparsify_activations/model_scope.py @@ -0,0 +1,88 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from nncf.parameters import CompressWeightsMode +from tests.post_training.experimental.sparsify_activations.pipelines import ImageClassificationTimmSparsifyActivations +from tests.post_training.experimental.sparsify_activations.pipelines import LMSparsifyActivations +from tests.post_training.model_scope import generate_tests_scope +from tests.post_training.pipelines.base import BackendType + +SPARSIFY_ACTIVATIONS_MODELS = [ + { + "reported_name": "tinyllama", + "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", + "pipeline_cls": LMSparsifyActivations, + "compression_params": None, + "backends": [BackendType.FP32], + }, + { + "reported_name": "tinyllama_ffn_sparse20", + "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", + "pipeline_cls": LMSparsifyActivations, + "compression_params": { + "compress_weights": None, + "sparsify_activations": { + "target_sparsity_by_scope": { + "{re}up_proj": 0.2, + "{re}gate_proj": 0.2, + "{re}down_proj": 0.2, + } + }, + }, + "backends": [BackendType.TORCH], + }, + { + "reported_name": "tinyllama_int8_asym_data_free_ffn_sparse20", + "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", + "pipeline_cls": LMSparsifyActivations, + "compression_params": { + "compress_weights": { + "mode": CompressWeightsMode.INT8_ASYM, + }, + "sparsify_activations": { + "target_sparsity_by_scope": { + "{re}up_proj": 0.2, + "{re}gate_proj": 0.2, + "{re}down_proj": 0.2, + } + }, + }, + "backends": [BackendType.TORCH], + }, + { + "reported_name": "timm/deit3_small_patch16_224", + "model_id": "deit3_small_patch16_224", + "pipeline_cls": ImageClassificationTimmSparsifyActivations, + "compression_params": {"sparsify_activations": None}, + "backends": [BackendType.FP32], + "batch_size": 128, + }, + { + "reported_name": "timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30", + "model_id": "deit3_small_patch16_224", + "pipeline_cls": ImageClassificationTimmSparsifyActivations, + "compression_params": { + "sparsify_activations": { + "target_sparsity_by_scope": { + "{re}qkv": 0.2, + "{re}fc1": 0.2, + "{re}fc2": 0.3, + } + }, + }, + "backends": [BackendType.TORCH, BackendType.CUDA_TORCH], + "batch_size": 128, + }, +] + + +SPARSIFY_ACTIVATIONS_TEST_CASES = generate_tests_scope(SPARSIFY_ACTIVATIONS_MODELS) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py new file mode 100644 index 00000000000..1398a58654e --- /dev/null +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -0,0 +1,162 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import time +from dataclasses import dataclass +from functools import partial +from typing import Optional + +import torch +from datasets import load_dataset +from memory_profiler import memory_usage +from optimum.intel.openvino import OVModelForCausalLM +from transformers import AutoModelForCausalLM + +import nncf +import nncf.experimental +import nncf.experimental.torch +import nncf.experimental.torch.sparsify_activations +from nncf.experimental.torch.sparsify_activations.torch_backend import SparsifyActivationsAlgoBackend +from tests.post_training.pipelines.base import PT_BACKENDS +from tests.post_training.pipelines.base import BackendType +from tests.post_training.pipelines.image_classification_timm import ImageClassificationTimm +from tests.post_training.pipelines.lm_weight_compression import LMWeightCompression +from tests.post_training.pipelines.lm_weight_compression import WCTimeStats +from tests.torch.helpers import set_torch_seed + + +@dataclass +class SparsifyActivationsTimeStats(WCTimeStats): + """ + Contains statistics that are parsed from the stdout of Sparsify Activations tests. + """ + + time_sparsifier_calibration: Optional[str] = None + STAT_NAMES = [*WCTimeStats.STAT_NAMES, "Activations Sparsifier calibration time"] + VAR_NAMES = [*WCTimeStats.VAR_NAMES, "time_sparsifier_calibration"] + REGEX_PREFIX = [*WCTimeStats.REGEX_PREFIX, SparsifyActivationsAlgoBackend.CALIBRATION_TRACKING_DESC] + + +class LMSparsifyActivations(LMWeightCompression): + def prepare_model(self) -> None: + is_stateful = self.params.get("is_stateful", False) + + if self.backend == BackendType.TORCH: + if is_stateful: + raise RuntimeError(f"is_stateful={is_stateful} is not supported for PyTorch backend.") + + self.model_hf = AutoModelForCausalLM.from_pretrained( + self.model_id, torch_dtype=torch.float32, device_map="cpu" + ) + self.model = self.model_hf + elif self.backend in [BackendType.OV, BackendType.FP32]: + if is_stateful: + self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf") + if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists(): + # export by model_id + self.model_hf = OVModelForCausalLM.from_pretrained( + self.model_id, + trust_remote_code=True, + export=True, + load_in_8bit=False, + compile=False, + stateful=is_stateful, + ) + else: + # no export, load from IR. Applicable for sequential run of test cases in local environment. + self.model_hf = OVModelForCausalLM.from_pretrained( + self.fp32_model_dir, load_in_8bit=False, compile=False, stateful=is_stateful + ) + self.model = self.model_hf.model + else: + raise RuntimeError(f"backend={self.backend.value} is not supported.") + + if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists(): + self._dump_model_fp32() + + def prepare_calibration_dataset(self): + dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") + dataset = dataset.filter(lambda example: len(example["text"].split()) > 256) + dataset = dataset.select(range(64)) + self.calibration_dataset = nncf.Dataset(dataset, partial(self.get_transform_calibration_fn(), max_tokens=256)) + + def compress(self) -> None: + if self.backend == BackendType.FP32: + return + start_time = time.perf_counter() + self.run_info.compression_memory_usage = memory_usage(self._compress, max_usage=True) + self.run_info.time_compression = time.perf_counter() - start_time + + def collect_data_from_stdout(self, stdout: str): + stats = SparsifyActivationsTimeStats() + stats.fill(stdout) + self.run_info.stats_from_output = stats + + @set_torch_seed(seed=42) + def _compress(self): + """ + Actual call of weight compression and/or activation sparsification. + """ + self.compressed_model = self.model + if self.compression_params.get("compress_weights", None) is not None: + self.compressed_model = nncf.compress_weights( + self.compressed_model, + dataset=self.calibration_dataset, + **self.compression_params["compress_weights"], + ) + if self.compression_params.get("sparsify_activations", None) is not None: + self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( + self.compressed_model, + dataset=self.calibration_dataset, + **self.compression_params["sparsify_activations"], + ) + + +class ImageClassificationTimmSparsifyActivations(ImageClassificationTimm): + def compress(self) -> None: + """ + Run compression of the model and collect time and memory usage information. + """ + if self.backend == BackendType.FP32: + # To validate not compressed model + self.path_compressed_ir = self.fp32_model_dir / "model_fp32.xml" + return + + if self.backend in PT_BACKENDS: + inference_num_threads = os.environ.get("INFERENCE_NUM_THREADS") + if inference_num_threads is not None: + torch.set_num_threads(int(inference_num_threads)) + else: + raise RuntimeError(f"backend={self.backend.value} is not supported.") + + start_time = time.perf_counter() + self.run_info.compression_memory_usage = memory_usage(self._compress, max_usage=True) + self.run_info.time_compression = time.perf_counter() - start_time + + def collect_data_from_stdout(self, stdout: str): + stats = SparsifyActivationsTimeStats() + stats.fill(stdout) + self.run_info.stats_from_output = stats + + @set_torch_seed(seed=42) + def _compress(self): + """ + Actual call of activation sparsification. + """ + self.compressed_model = self.model + if self.compression_params.get("sparsify_activations", None) is not None: + self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( + self.compressed_model, + dataset=self.calibration_dataset, + **self.compression_params["sparsify_activations"], + ) diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index ea0086a3333..7219a2b666d 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -10,3 +10,9 @@ tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: metric_value: 0.7997 num_int4: 0 num_int8: 312 +timm/deit3_small_patch16_224_backend_FP32: + metric_value: 0.81358 +timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_TORCH: + metric_value: 0.6 +timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_CUDA_TORCH: + metric_value: 0.6 \ No newline at end of file diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py index 8c783168c45..83de61fdb57 100644 --- a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py +++ b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py @@ -14,32 +14,16 @@ import traceback from collections import OrderedDict from collections import defaultdict -from dataclasses import dataclass -from functools import partial from pathlib import Path from typing import Dict, Optional import pandas as pd import pytest -import torch import yaml -from datasets import load_dataset -from memory_profiler import memory_usage -from optimum.intel.openvino import OVModelForCausalLM -from transformers import AutoModelForCausalLM -import nncf -import nncf.experimental -import nncf.experimental.torch -import nncf.experimental.torch.sparsify_activations -from nncf.experimental.torch.sparsify_activations.torch_backend import SparsifyActivationsAlgoBackend -from nncf.parameters import CompressWeightsMode -from tests.post_training.model_scope import generate_tests_scope -from tests.post_training.pipelines.base import BackendType +from tests.post_training.experimental.sparsify_activations.model_scope import SPARSIFY_ACTIVATIONS_TEST_CASES from tests.post_training.pipelines.base import BaseTestPipeline from tests.post_training.pipelines.base import RunInfo -from tests.post_training.pipelines.lm_weight_compression import LMWeightCompression -from tests.post_training.pipelines.lm_weight_compression import WCTimeStats from tests.post_training.test_quantize_conformance import create_pipeline_kwargs from tests.post_training.test_quantize_conformance import create_short_run_info from tests.post_training.test_quantize_conformance import fixture_batch_size # noqa: F401 @@ -53,7 +37,6 @@ from tests.post_training.test_quantize_conformance import fixture_subset_size # noqa: F401 from tests.post_training.test_quantize_conformance import maybe_skip_test_case from tests.post_training.test_quantize_conformance import write_logs -from tests.torch.helpers import set_torch_seed @pytest.fixture(scope="session", name="sparsify_activations_reference_data") @@ -86,133 +69,6 @@ def fixture_sparsify_activations_report_data(output_dir): df.to_csv(output_dir / "results.csv", index=False) -@dataclass -class SparsifyActivationsTimeStats(WCTimeStats): - """ - Contains statistics that are parsed from the stdout of Sparsify Activations tests. - """ - - time_sparsifier_calibration: Optional[str] = None - STAT_NAMES = [*WCTimeStats.STAT_NAMES, "Activations Sparsifier calibration time"] - VAR_NAMES = [*WCTimeStats.VAR_NAMES, "time_sparsifier_calibration"] - REGEX_PREFIX = [*WCTimeStats.REGEX_PREFIX, SparsifyActivationsAlgoBackend.CALIBRATION_TRACKING_DESC] - - -class LMSparsifyActivations(LMWeightCompression): - def prepare_model(self) -> None: - is_stateful = self.params.get("is_stateful", False) - - if self.backend == BackendType.TORCH: - if is_stateful: - raise RuntimeError(f"is_stateful={is_stateful} is not supported for PyTorch backend.") - - self.model_hf = AutoModelForCausalLM.from_pretrained( - self.model_id, torch_dtype=torch.float32, device_map="cpu" - ) - self.model = self.model_hf - elif self.backend in [BackendType.OV, BackendType.FP32]: - if is_stateful: - self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf") - if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists(): - # export by model_id - self.model_hf = OVModelForCausalLM.from_pretrained( - self.model_id, export=True, load_in_8bit=False, compile=False, stateful=is_stateful - ) - else: - # no export, load from IR. Applicable for sequential run of test cases in local environment. - self.model_hf = OVModelForCausalLM.from_pretrained( - self.fp32_model_dir, trust_remote_code=True, load_in_8bit=False, compile=False, stateful=is_stateful - ) - self.model = self.model_hf.model - else: - raise RuntimeError(f"backend={self.backend.value} is not supported.") - - if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists(): - self._dump_model_fp32() - - def prepare_calibration_dataset(self): - dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") - dataset = dataset.filter(lambda example: len(example["text"].split()) > 256) - dataset = dataset.select(range(64)) - self.calibration_dataset = nncf.Dataset(dataset, partial(self.get_transform_calibration_fn(), max_tokens=256)) - - def compress(self) -> None: - if self.backend == BackendType.FP32: - return - start_time = time.perf_counter() - self.run_info.compression_memory_usage = memory_usage(self._compress, max_usage=True) - self.run_info.time_compression = time.perf_counter() - start_time - - def collect_data_from_stdout(self, stdout: str): - stats = SparsifyActivationsTimeStats() - stats.fill(stdout) - self.run_info.stats_from_output = stats - - @set_torch_seed(seed=42) - def _compress(self): - self.compressed_model = self.model - if self.compression_params.get("compress_weights", None) is not None: - self.compressed_model = nncf.compress_weights( - self.compressed_model, - dataset=self.calibration_dataset, - **self.compression_params["compress_weights"], - ) - if self.compression_params.get("sparsify_activations", None) is not None: - self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( - self.compressed_model, - dataset=self.calibration_dataset, - **self.compression_params["sparsify_activations"], - ) - - -SPARSIFY_ACTIVATIONS_MODELS = [ - { - "reported_name": "tinyllama", - "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", - "pipeline_cls": LMSparsifyActivations, - "compression_params": None, - "backends": [BackendType.FP32], - }, - { - "reported_name": "tinyllama_ffn_sparse20", - "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", - "pipeline_cls": LMSparsifyActivations, - "compression_params": { - "compress_weights": None, - "sparsify_activations": { - "target_sparsity_by_scope": { - "{re}up_proj": 0.2, - "{re}gate_proj": 0.2, - "{re}down_proj": 0.2, - } - }, - }, - "backends": [BackendType.TORCH], - }, - { - "reported_name": "tinyllama_int8_asym_data_free_ffn_sparse20", - "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", - "pipeline_cls": LMSparsifyActivations, - "compression_params": { - "compress_weights": { - "mode": CompressWeightsMode.INT8_ASYM, - }, - "sparsify_activations": { - "target_sparsity_by_scope": { - "{re}up_proj": 0.2, - "{re}gate_proj": 0.2, - "{re}down_proj": 0.2, - } - }, - }, - "backends": [BackendType.TORCH], - }, -] - - -SPARSIFY_ACTIVATIONS_TEST_CASES = generate_tests_scope(SPARSIFY_ACTIVATIONS_MODELS) - - @pytest.mark.parametrize("test_case_name", SPARSIFY_ACTIVATIONS_TEST_CASES.keys()) def test_sparsify_activations( sparsify_activations_reference_data: dict, From b6791305299f6fe7020ad0f116470c2654d628e2 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:07 +0800 Subject: [PATCH 30/76] add tests --- .../sparsify_activations/model_scope.py | 2 + .../sparsify_activations/pipelines.py | 63 +++++++++++++++++++ .../sparsify_activations/reference_data.yaml | 6 +- .../test_sparsify_activations_conformance.py | 3 +- 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/model_scope.py b/tests/post_training/experimental/sparsify_activations/model_scope.py index 02634ba52dc..ecc9672a75e 100644 --- a/tests/post_training/experimental/sparsify_activations/model_scope.py +++ b/tests/post_training/experimental/sparsify_activations/model_scope.py @@ -39,6 +39,7 @@ }, }, "backends": [BackendType.TORCH], + "batch_size": 4, }, { "reported_name": "tinyllama_int8_asym_data_free_ffn_sparse20", @@ -57,6 +58,7 @@ }, }, "backends": [BackendType.TORCH], + "batch_size": 4, }, { "reported_name": "timm/deit3_small_patch16_224", diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index 1398a58654e..d293995771d 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -17,6 +17,7 @@ from typing import Optional import torch +import torch.utils from datasets import load_dataset from memory_profiler import memory_usage from optimum.intel.openvino import OVModelForCausalLM @@ -160,3 +161,65 @@ def _compress(self): dataset=self.calibration_dataset, **self.compression_params["sparsify_activations"], ) + + def prepare_calibration_dataset(self): + # TODO: for debugging only + import torch.utils + import torch.utils.data + + hf_dataset = load_dataset("imagenet-1k", split="validation") + + class Dataset(torch.utils.data.Dataset): + def __init__(self, hf_dataset, transform): + super().__init__() + self.hf_dataset = hf_dataset + self.transform = transform # will be assigned in timm internally + + def __getitem__(self, index): + sample = self.hf_dataset[index] + image = sample["image"] + image = image.convert("RGB") + return self.transform(image), sample["label"] + + def __len__(self): + return 512 + return len(self.hf_dataset) + + dataset = Dataset(hf_dataset, self.transform) + generator = torch.Generator() + generator.manual_seed(42) + loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, num_workers=2, shuffle=True, generator=generator) + self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) + + + def _get_imagenet(self): + # TODO: for debugging only + import torch.utils + import torch.utils.data + import numpy as np + + hf_dataset = load_dataset("imagenet-1k", split="validation") + + class Dataset(torch.utils.data.Dataset): + def __init__(self, hf_dataset, transform): + super().__init__() + self.hf_dataset = hf_dataset + self.transform = transform # will be assigned in timm internally + + def __getitem__(self, index): + sample = self.hf_dataset[index] + image = sample["image"] + image = image.convert("RGB") + return self.transform(image), sample["label"] + + def __len__(self): + # return 24 + return len(self.hf_dataset) + + dataset = Dataset(hf_dataset, self.transform) + return dataset + + def _validate(self): + return super()._validate() + with torch.autocast(device_type="cuda"): + return super()._validate() \ No newline at end of file diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index 7219a2b666d..ede9ff2295d 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -11,8 +11,8 @@ tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: num_int4: 0 num_int8: 312 timm/deit3_small_patch16_224_backend_FP32: - metric_value: 0.81358 + metric_value: 0.8135 timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_TORCH: - metric_value: 0.6 + metric_value: 0.8097 timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_CUDA_TORCH: - metric_value: 0.6 \ No newline at end of file + metric_value: 0.8097 \ No newline at end of file diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py index 83de61fdb57..67f6e2214b2 100644 --- a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py +++ b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py @@ -98,13 +98,14 @@ def test_sparsify_activations( pipeline_kwargs = create_pipeline_kwargs( test_model_param, subset_size, test_case_name, sparsify_activations_reference_data ) + calibration_batch_size = batch_size or test_model_param.get("batch_size", 1) pipeline_kwargs.update( { "output_dir": output_dir, "data_dir": data_dir, "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, - "batch_size": batch_size, + "batch_size": calibration_batch_size, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) From 91bb3f79bff29b4078c805ece1b995e3bf273cc9 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:07 +0800 Subject: [PATCH 31/76] support subset_size --- .../sparsify_activations/model_scope.py | 38 +++++++++++-- .../sparsify_activations/pipelines.py | 42 ++++----------- .../sparsify_activations/reference_data.yaml | 2 +- .../test_sparsify_activations_conformance.py | 54 ++++++++++++++----- 4 files changed, 83 insertions(+), 53 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/model_scope.py b/tests/post_training/experimental/sparsify_activations/model_scope.py index ecc9672a75e..4c5c143d990 100644 --- a/tests/post_training/experimental/sparsify_activations/model_scope.py +++ b/tests/post_training/experimental/sparsify_activations/model_scope.py @@ -9,11 +9,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy +from typing import Dict, List +import nncf from nncf.parameters import CompressWeightsMode from tests.post_training.experimental.sparsify_activations.pipelines import ImageClassificationTimmSparsifyActivations from tests.post_training.experimental.sparsify_activations.pipelines import LMSparsifyActivations -from tests.post_training.model_scope import generate_tests_scope from tests.post_training.pipelines.base import BackendType SPARSIFY_ACTIVATIONS_MODELS = [ @@ -21,7 +23,7 @@ "reported_name": "tinyllama", "model_id": "tinyllama/tinyllama-1.1b-step-50k-105b", "pipeline_cls": LMSparsifyActivations, - "compression_params": None, + "compression_params": {}, "backends": [BackendType.FP32], }, { @@ -39,7 +41,6 @@ }, }, "backends": [BackendType.TORCH], - "batch_size": 4, }, { "reported_name": "tinyllama_int8_asym_data_free_ffn_sparse20", @@ -58,13 +59,12 @@ }, }, "backends": [BackendType.TORCH], - "batch_size": 4, }, { "reported_name": "timm/deit3_small_patch16_224", "model_id": "deit3_small_patch16_224", "pipeline_cls": ImageClassificationTimmSparsifyActivations, - "compression_params": {"sparsify_activations": None}, + "compression_params": {}, "backends": [BackendType.FP32], "batch_size": 128, }, @@ -87,4 +87,32 @@ ] +def generate_tests_scope(models_list: List[Dict]) -> Dict[str, Dict]: + """ + Generate tests by names "{reported_name}_backend_{backend}" + """ + tests_scope = {} + fp32_models = set() + for test_model_param in models_list: + model_id = test_model_param["model_id"] + reported_name = test_model_param["reported_name"] + + for backend in test_model_param["backends"]: + model_param = copy.deepcopy(test_model_param) + if "is_batch_size_supported" not in model_param: # Set default value of is_batch_size_supported. + model_param["is_batch_size_supported"] = True + test_case_name = f"{reported_name}_backend_{backend.value}" + model_param["backend"] = backend + model_param.pop("backends") + if backend == BackendType.FP32: + if model_id in fp32_models: + raise nncf.ValidationError(f"Duplicate test case for {model_id} with FP32 backend") + fp32_models.add(model_id) + if test_case_name in tests_scope: + raise nncf.ValidationError(f"{test_case_name} already in tests_scope") + tests_scope[test_case_name] = model_param + + return tests_scope + + SPARSIFY_ACTIVATIONS_TEST_CASES = generate_tests_scope(SPARSIFY_ACTIVATIONS_MODELS) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index d293995771d..70daf20e75e 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -88,7 +88,8 @@ def prepare_model(self) -> None: def prepare_calibration_dataset(self): dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") dataset = dataset.filter(lambda example: len(example["text"].split()) > 256) - dataset = dataset.select(range(64)) + subset_size = self.compression_params.get("subset_size") or 64 + dataset = dataset.select(range(subset_size)) self.calibration_dataset = nncf.Dataset(dataset, partial(self.get_transform_calibration_fn(), max_tokens=256)) def compress(self) -> None: @@ -164,39 +165,20 @@ def _compress(self): def prepare_calibration_dataset(self): # TODO: for debugging only - import torch.utils - import torch.utils.data - - hf_dataset = load_dataset("imagenet-1k", split="validation") + subset_size = self.compression_params.get("subset_size") or 512 + dataset = self._get_imagenet(subset_size=subset_size) - class Dataset(torch.utils.data.Dataset): - def __init__(self, hf_dataset, transform): - super().__init__() - self.hf_dataset = hf_dataset - self.transform = transform # will be assigned in timm internally - - def __getitem__(self, index): - sample = self.hf_dataset[index] - image = sample["image"] - image = image.convert("RGB") - return self.transform(image), sample["label"] - - def __len__(self): - return 512 - return len(self.hf_dataset) - - dataset = Dataset(hf_dataset, self.transform) generator = torch.Generator() generator.manual_seed(42) - loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, num_workers=2, shuffle=True, generator=generator) + loader = torch.utils.data.DataLoader( + dataset, batch_size=self.batch_size, num_workers=4, shuffle=True, generator=generator + ) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) - - def _get_imagenet(self): + def _get_imagenet(self, subset_size=None): # TODO: for debugging only import torch.utils import torch.utils.data - import numpy as np hf_dataset = load_dataset("imagenet-1k", split="validation") @@ -213,13 +195,7 @@ def __getitem__(self, index): return self.transform(image), sample["label"] def __len__(self): - # return 24 - return len(self.hf_dataset) + return subset_size or len(self.hf_dataset) dataset = Dataset(hf_dataset, self.transform) return dataset - - def _validate(self): - return super()._validate() - with torch.autocast(device_type="cuda"): - return super()._validate() \ No newline at end of file diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index ede9ff2295d..b22139e16ed 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -1,5 +1,5 @@ tinyllama_backend_FP32: - metric_value: 1 + metric_value: 1.0 num_int4: 0 num_int8: 0 tinyllama_ffn_sparse20_backend_TORCH: diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py index 67f6e2214b2..7b5827e9c37 100644 --- a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py +++ b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py @@ -13,7 +13,6 @@ import time import traceback from collections import OrderedDict -from collections import defaultdict from pathlib import Path from typing import Dict, Optional @@ -22,9 +21,9 @@ import yaml from tests.post_training.experimental.sparsify_activations.model_scope import SPARSIFY_ACTIVATIONS_TEST_CASES +from tests.post_training.pipelines.base import BackendType from tests.post_training.pipelines.base import BaseTestPipeline from tests.post_training.pipelines.base import RunInfo -from tests.post_training.test_quantize_conformance import create_pipeline_kwargs from tests.post_training.test_quantize_conformance import create_short_run_info from tests.post_training.test_quantize_conformance import fixture_batch_size # noqa: F401 from tests.post_training.test_quantize_conformance import fixture_data # noqa: F401 @@ -44,17 +43,8 @@ def fixture_sparsify_activations_reference_data(): path_reference = Path(__file__).parent / "reference_data.yaml" with path_reference.open() as f: data = yaml.safe_load(f) - fp32_test_cases = defaultdict(dict) - for test_case_name, test_case in data.items(): - fp32_case = dict(metric_value=1.0) - fp32_case["num_int4"] = test_case.get("num_int4", 0) - fp32_case["num_int8"] = test_case.get("num_int8", 0) - reported_name = test_case_name.split("_backend_")[0] - fp32_case_name = f"{reported_name}_backend_FP32" - fp32_test_cases[fp32_case_name] = fp32_case - data.update(fp32_test_cases) - for test_case in data.values(): - test_case["atol"] = test_case.get("atol", 1e-5) + for test_case in data.values(): + test_case["atol"] = test_case.get("atol", 1e-5) return data @@ -69,6 +59,39 @@ def fixture_sparsify_activations_report_data(output_dir): df.to_csv(output_dir / "results.csv", index=False) +def create_pipeline_kwargs( + test_model_param: Dict, + subset_size, + test_case_name: str, + reference_data: Dict[str, Dict], + fp32_model_params: Dict[str, Dict], +): + if subset_size: + if "compression_params" not in test_model_param: + test_model_param["compression_params"] = {} + test_model_param["compression_params"]["subset_size"] = subset_size + + print("\n") + print(f"Model: {test_model_param['reported_name']}") + print(f"Backend: {test_model_param['backend']}") + print(f"Comprssion params: {test_model_param['compression_params']}") + + # Get target fp32 metric value + model_id = test_model_param["model_id"] + fp32_test_case_name = fp32_model_params[model_id]["reported_name"] + f"_backend_{BackendType.FP32.value}" + test_reference = reference_data[test_case_name] + test_reference["metric_value_fp32"] = reference_data[fp32_test_case_name]["metric_value"] + + return { + "reported_name": test_model_param["reported_name"], + "model_id": test_model_param["model_id"], + "backend": test_model_param["backend"], + "compression_params": test_model_param["compression_params"], + "params": test_model_param.get("params"), + "reference_data": test_reference, + } + + @pytest.mark.parametrize("test_case_name", SPARSIFY_ACTIVATIONS_TEST_CASES.keys()) def test_sparsify_activations( sparsify_activations_reference_data: dict, @@ -94,9 +117,12 @@ def test_sparsify_activations( raise RuntimeError(f"{test_case_name} is not defined in `sparsify_activations_reference_data` fixture") test_model_param = SPARSIFY_ACTIVATIONS_TEST_CASES[test_case_name] maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size) + fp32_model_params = { + tc["model_id"]: tc for tc in SPARSIFY_ACTIVATIONS_TEST_CASES.values() if tc["backend"] == BackendType.FP32 + } pipeline_cls = test_model_param["pipeline_cls"] pipeline_kwargs = create_pipeline_kwargs( - test_model_param, subset_size, test_case_name, sparsify_activations_reference_data + test_model_param, subset_size, test_case_name, sparsify_activations_reference_data, fp32_model_params ) calibration_batch_size = batch_size or test_model_param.get("batch_size", 1) pipeline_kwargs.update( From 90bd84958cdc397bff012154fc73a7bb1ea9500b Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:07 +0800 Subject: [PATCH 32/76] fix graph match for weightsdecompressor and eager mode attention --- .../sparsify_activations/pipelines.py | 2 +- .../dummy_llama_compressed_weights.dot | 160 +++++++++--------- .../linear_compressed_weights.dot | 10 +- .../three_linear_compressed_weights.dot | 40 ++--- .../sparsify_activations/helpers.py | 2 +- 5 files changed, 107 insertions(+), 107 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index 70daf20e75e..61d3ae43744 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -57,7 +57,7 @@ def prepare_model(self) -> None: raise RuntimeError(f"is_stateful={is_stateful} is not supported for PyTorch backend.") self.model_hf = AutoModelForCausalLM.from_pretrained( - self.model_id, torch_dtype=torch.float32, device_map="cpu" + self.model_id, torch_dtype=torch.float32, device_map="cpu", attn_implementation="eager", ) self.model = self.model_hf elif self.backend in [BackendType.OV, BackendType.FP32]: diff --git a/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot index 18d9a10d615..71d762b822d 100644 --- a/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot +++ b/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot @@ -1,8 +1,8 @@ strict digraph { "0 /nncf_model_input_0" [id=0, type=nncf_model_input]; "1 model.embed_tokens.weight" [id=1, type=nncf_model_const]; -"2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/decompress_0" [id=2, type=decompress]; -"3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/type_0" [id=3, type=type]; +"2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=2, type=decompress_symmetric]; +"3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/SymmetricWeightsDecompressor/type_0" [id=3, type=type]; "4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0" [id=4, type=embedding]; "5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" [id=5, type=to]; "6 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/pow_0" [id=6, type=pow]; @@ -14,16 +14,16 @@ strict digraph { "12 model.layers.0.input_layernorm.weight" [id=12, type=nncf_model_const]; "13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" [id=13, type=__mul__]; "14 model.layers.0.self_attn.q_proj.weight" [id=14, type=nncf_model_const]; -"15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0" [id=15, type=decompress]; -"16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0" [id=16, type=type]; +"15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=15, type=decompress_symmetric]; +"16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/type_0" [id=16, type=type]; "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" [id=17, type=linear]; "18 model.layers.0.self_attn.k_proj.weight" [id=18, type=nncf_model_const]; -"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0" [id=19, type=decompress]; -"20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0" [id=20, type=type]; +"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=19, type=decompress_symmetric]; +"20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/type_0" [id=20, type=type]; "21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" [id=21, type=linear]; "22 model.layers.0.self_attn.v_proj.weight" [id=22, type=nncf_model_const]; -"23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0" [id=23, type=decompress]; -"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0" [id=24, type=type]; +"23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=23, type=decompress_symmetric]; +"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/type_0" [id=24, type=type]; "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" [id=25, type=linear]; "26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0" [id=26, type=view]; "27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" [id=27, type=transpose]; @@ -70,8 +70,8 @@ strict digraph { "68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0" [id=68, type=contiguous]; "69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2" [id=69, type=reshape]; "70 model.layers.0.self_attn.o_proj.weight" [id=70, type=nncf_model_const]; -"71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0" [id=71, type=decompress]; -"72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0" [id=72, type=type]; +"71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=71, type=decompress_symmetric]; +"72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/type_0" [id=72, type=type]; "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" [id=73, type=linear]; "74 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0" [id=74, type=__add__]; "75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" [id=75, type=to]; @@ -84,24 +84,24 @@ strict digraph { "82 model.layers.0.post_attention_layernorm.weight" [id=82, type=nncf_model_const]; "83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" [id=83, type=__mul__]; "84 model.layers.0.mlp.gate_proj.weight" [id=84, type=nncf_model_const]; -"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0" [id=85, type=decompress]; -"86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0" [id=86, type=type]; +"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=85, type=decompress_symmetric]; +"86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/type_0" [id=86, type=type]; "87 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" [id=87, type=abs]; "88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" [id=88, type=le]; "89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" [id=89, type=masked_fill]; "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" [id=90, type=linear]; "91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" [id=91, type=silu]; "92 model.layers.0.mlp.up_proj.weight" [id=92, type=nncf_model_const]; -"93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0" [id=93, type=decompress]; -"94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0" [id=94, type=type]; +"93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=93, type=decompress_symmetric]; +"94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/type_0" [id=94, type=type]; "95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" [id=95, type=abs]; "96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" [id=96, type=le]; "97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" [id=97, type=masked_fill]; "98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" [id=98, type=linear]; "99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" [id=99, type=__mul__]; "100 model.layers.0.mlp.down_proj.weight" [id=100, type=nncf_model_const]; -"101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0" [id=101, type=decompress]; -"102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0" [id=102, type=type]; +"101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=101, type=decompress_symmetric]; +"102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/type_0" [id=102, type=type]; "103 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" [id=103, type=abs]; "104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" [id=104, type=le]; "105 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" [id=105, type=masked_fill]; @@ -117,16 +117,16 @@ strict digraph { "115 model.layers.1.input_layernorm.weight" [id=115, type=nncf_model_const]; "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" [id=116, type=__mul__]; "117 model.layers.1.self_attn.q_proj.weight" [id=117, type=nncf_model_const]; -"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0" [id=118, type=decompress]; -"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0" [id=119, type=type]; +"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=118, type=decompress_symmetric]; +"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/type_0" [id=119, type=type]; "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" [id=120, type=linear]; "121 model.layers.1.self_attn.k_proj.weight" [id=121, type=nncf_model_const]; -"122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0" [id=122, type=decompress]; -"123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0" [id=123, type=type]; +"122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=122, type=decompress_symmetric]; +"123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/type_0" [id=123, type=type]; "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" [id=124, type=linear]; "125 model.layers.1.self_attn.v_proj.weight" [id=125, type=nncf_model_const]; -"126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0" [id=126, type=decompress]; -"127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0" [id=127, type=type]; +"126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=126, type=decompress_symmetric]; +"127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/type_0" [id=127, type=type]; "128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" [id=128, type=linear]; "129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0" [id=129, type=view]; "130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" [id=130, type=transpose]; @@ -173,8 +173,8 @@ strict digraph { "171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0" [id=171, type=contiguous]; "172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2" [id=172, type=reshape]; "173 model.layers.1.self_attn.o_proj.weight" [id=173, type=nncf_model_const]; -"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0" [id=174, type=decompress]; -"175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0" [id=175, type=type]; +"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=174, type=decompress_symmetric]; +"175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/type_0" [id=175, type=type]; "176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" [id=176, type=linear]; "177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0" [id=177, type=__add__]; "178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" [id=178, type=to]; @@ -187,24 +187,24 @@ strict digraph { "185 model.layers.1.post_attention_layernorm.weight" [id=185, type=nncf_model_const]; "186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" [id=186, type=__mul__]; "187 model.layers.1.mlp.gate_proj.weight" [id=187, type=nncf_model_const]; -"188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0" [id=188, type=decompress]; -"189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0" [id=189, type=type]; +"188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=188, type=decompress_symmetric]; +"189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/type_0" [id=189, type=type]; "190 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" [id=190, type=abs]; "191 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" [id=191, type=le]; "192 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" [id=192, type=masked_fill]; "193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" [id=193, type=linear]; "194 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" [id=194, type=silu]; "195 model.layers.1.mlp.up_proj.weight" [id=195, type=nncf_model_const]; -"196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0" [id=196, type=decompress]; -"197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0" [id=197, type=type]; +"196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=196, type=decompress_symmetric]; +"197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/type_0" [id=197, type=type]; "198 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" [id=198, type=abs]; "199 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" [id=199, type=le]; "200 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" [id=200, type=masked_fill]; "201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" [id=201, type=linear]; "202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" [id=202, type=__mul__]; "203 model.layers.1.mlp.down_proj.weight" [id=203, type=nncf_model_const]; -"204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0" [id=204, type=decompress]; -"205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0" [id=205, type=type]; +"204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=204, type=decompress_symmetric]; +"205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/type_0" [id=205, type=type]; "206 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" [id=206, type=abs]; "207 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" [id=207, type=le]; "208 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" [id=208, type=masked_fill]; @@ -220,15 +220,15 @@ strict digraph { "218 model.norm.weight" [id=218, type=nncf_model_const]; "219 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1" [id=219, type=__mul__]; "220 lm_head.weight" [id=220, type=nncf_model_const]; -"221 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/decompress_0" [id=221, type=decompress]; -"222 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/type_0" [id=222, type=type]; +"221 LlamaForCausalLM/Linear[lm_head]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=221, type=decompress_symmetric]; +"222 LlamaForCausalLM/Linear[lm_head]/SymmetricWeightsDecompressor/type_0" [id=222, type=type]; "223 LlamaForCausalLM/Linear[lm_head]/linear_0" [id=223, type=linear]; "224 LlamaForCausalLM/float_0" [id=224, type=float]; "225 /nncf_model_output_0" [id=225, type=nncf_model_output]; "0 /nncf_model_input_0" -> "4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0"; -"1 model.embed_tokens.weight" -> "2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/decompress_0"; -"2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/decompress_0" -> "3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/type_0"; -"3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/WeightsDecompressor/type_0" -> "4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0"; +"1 model.embed_tokens.weight" -> "2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"2 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/SymmetricWeightsDecompressor/type_0"; +"3 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/SymmetricWeightsDecompressor/type_0" -> "4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0"; "4 LlamaForCausalLM/LlamaModel[model]/Embedding[embed_tokens]/embedding_0" -> "5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0"; "5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" -> "6 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/pow_0"; "5 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/to_0" -> "10 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___0"; @@ -243,17 +243,17 @@ strict digraph { "13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; "13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; "13 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; -"14 model.layers.0.self_attn.q_proj.weight" -> "15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0"; -"15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0" -> "16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0"; -"16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0" -> "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"14 model.layers.0.self_attn.q_proj.weight" -> "15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"15 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/type_0"; +"16 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/type_0" -> "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; "17 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" -> "26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0"; -"18 model.layers.0.self_attn.k_proj.weight" -> "19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0"; -"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0" -> "20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0"; -"20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0" -> "21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"18 model.layers.0.self_attn.k_proj.weight" -> "19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"19 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/type_0"; +"20 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/type_0" -> "21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; "21 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" -> "28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_1"; -"22 model.layers.0.self_attn.v_proj.weight" -> "23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0"; -"23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0" -> "24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0"; -"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0" -> "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"22 model.layers.0.self_attn.v_proj.weight" -> "23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/type_0"; +"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/type_0" -> "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" -> "30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2"; "26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_0" -> "27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0"; "27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_0" -> "39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0"; @@ -306,9 +306,9 @@ strict digraph { "67 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_4" -> "68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0"; "68 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/contiguous_0" -> "69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2"; "69 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/reshape_2" -> "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; -"70 model.layers.0.self_attn.o_proj.weight" -> "71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0"; -"71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0" -> "72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0"; -"72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0" -> "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"70 model.layers.0.self_attn.o_proj.weight" -> "71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"71 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/type_0"; +"72 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/type_0" -> "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; "73 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" -> "74 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0"; "74 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/__add___0" -> "75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0"; "75 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "76 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/pow_0"; @@ -325,26 +325,26 @@ strict digraph { "83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; "83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0"; "83 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; -"84 model.layers.0.mlp.gate_proj.weight" -> "85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0"; -"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0" -> "86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0"; -"86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0" -> "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"84 model.layers.0.mlp.gate_proj.weight" -> "85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"85 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/type_0"; +"86 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/type_0" -> "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; "87 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" -> "88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0"; "88 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" -> "89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; "89 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" -> "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; "90 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" -> "91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0"; "91 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" -> "99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0"; -"92 model.layers.0.mlp.up_proj.weight" -> "93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0"; -"93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0" -> "94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0"; -"94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0" -> "98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"92 model.layers.0.mlp.up_proj.weight" -> "93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"93 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/type_0"; +"94 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/type_0" -> "98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; "95 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" -> "96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0"; "96 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" -> "97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; "97 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" -> "98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; "98 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" -> "99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0"; "99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" -> "103 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0"; "99 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/__mul___0" -> "105 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; -"100 model.layers.0.mlp.down_proj.weight" -> "101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0"; -"101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0" -> "102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0"; -"102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0" -> "106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"100 model.layers.0.mlp.down_proj.weight" -> "101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"101 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/type_0"; +"102 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/type_0" -> "106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; "103 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" -> "104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0"; "104 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" -> "105 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; "105 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" -> "106 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; @@ -363,17 +363,17 @@ strict digraph { "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[input_layernorm]/__mul___1" -> "128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; -"117 model.layers.1.self_attn.q_proj.weight" -> "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0"; -"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/decompress_0" -> "119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0"; -"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/WeightsDecompressor/type_0" -> "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; +"117 model.layers.1.self_attn.q_proj.weight" -> "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/type_0"; +"119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/SymmetricWeightsDecompressor/type_0" -> "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0"; "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[q_proj]/linear_0" -> "129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0"; -"121 model.layers.1.self_attn.k_proj.weight" -> "122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0"; -"122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/decompress_0" -> "123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0"; -"123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/WeightsDecompressor/type_0" -> "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; +"121 model.layers.1.self_attn.k_proj.weight" -> "122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"122 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/type_0"; +"123 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/SymmetricWeightsDecompressor/type_0" -> "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0"; "124 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[k_proj]/linear_0" -> "131 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_1"; -"125 model.layers.1.self_attn.v_proj.weight" -> "126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0"; -"126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/decompress_0" -> "127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0"; -"127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/WeightsDecompressor/type_0" -> "128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; +"125 model.layers.1.self_attn.v_proj.weight" -> "126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"126 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/type_0"; +"127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/SymmetricWeightsDecompressor/type_0" -> "128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0"; "128 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[v_proj]/linear_0" -> "133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2"; "129 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_0" -> "130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0"; "130 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_0" -> "142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0"; @@ -426,9 +426,9 @@ strict digraph { "170 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_4" -> "171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0"; "171 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/contiguous_0" -> "172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2"; "172 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/reshape_2" -> "176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; -"173 model.layers.1.self_attn.o_proj.weight" -> "174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0"; -"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/decompress_0" -> "175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0"; -"175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/WeightsDecompressor/type_0" -> "176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; +"173 model.layers.1.self_attn.o_proj.weight" -> "174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"174 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/type_0"; +"175 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/SymmetricWeightsDecompressor/type_0" -> "176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0"; "176 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/Linear[o_proj]/linear_0" -> "177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0"; "177 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/__add___0" -> "178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0"; "178 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/to_0" -> "179 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/pow_0"; @@ -445,26 +445,26 @@ strict digraph { "186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "192 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; "186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "198 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0"; "186 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaRMSNorm[post_attention_layernorm]/__mul___1" -> "200 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; -"187 model.layers.1.mlp.gate_proj.weight" -> "188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0"; -"188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/decompress_0" -> "189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0"; -"189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/WeightsDecompressor/type_0" -> "193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; +"187 model.layers.1.mlp.gate_proj.weight" -> "188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"188 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/type_0"; +"189 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/SymmetricWeightsDecompressor/type_0" -> "193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; "190 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/abs_0" -> "191 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0"; "191 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/le_0" -> "192 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0"; "192 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/ActivationsSparsifier/masked_fill_0" -> "193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0"; "193 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[gate_proj]/linear_0" -> "194 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0"; "194 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/SiLU[act_fn]/silu_0" -> "202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0"; -"195 model.layers.1.mlp.up_proj.weight" -> "196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0"; -"196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/decompress_0" -> "197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0"; -"197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/WeightsDecompressor/type_0" -> "201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; +"195 model.layers.1.mlp.up_proj.weight" -> "196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"196 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/type_0"; +"197 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/SymmetricWeightsDecompressor/type_0" -> "201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; "198 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/abs_0" -> "199 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0"; "199 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/le_0" -> "200 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0"; "200 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/ActivationsSparsifier/masked_fill_0" -> "201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0"; "201 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[up_proj]/linear_0" -> "202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0"; "202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" -> "206 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0"; "202 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/__mul___0" -> "208 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; -"203 model.layers.1.mlp.down_proj.weight" -> "204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0"; -"204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/decompress_0" -> "205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0"; -"205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/WeightsDecompressor/type_0" -> "209 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; +"203 model.layers.1.mlp.down_proj.weight" -> "204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"204 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/type_0"; +"205 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/SymmetricWeightsDecompressor/type_0" -> "209 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; "206 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/abs_0" -> "207 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0"; "207 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/le_0" -> "208 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0"; "208 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/ActivationsSparsifier/masked_fill_0" -> "209 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaMLP[mlp]/Linear[down_proj]/linear_0"; @@ -480,9 +480,9 @@ strict digraph { "217 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/to_1" -> "219 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1"; "218 model.norm.weight" -> "219 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1"; "219 LlamaForCausalLM/LlamaModel[model]/LlamaRMSNorm[norm]/__mul___1" -> "223 LlamaForCausalLM/Linear[lm_head]/linear_0"; -"220 lm_head.weight" -> "221 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/decompress_0"; -"221 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/decompress_0" -> "222 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/type_0"; -"222 LlamaForCausalLM/Linear[lm_head]/WeightsDecompressor/type_0" -> "223 LlamaForCausalLM/Linear[lm_head]/linear_0"; +"220 lm_head.weight" -> "221 LlamaForCausalLM/Linear[lm_head]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"221 LlamaForCausalLM/Linear[lm_head]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "222 LlamaForCausalLM/Linear[lm_head]/SymmetricWeightsDecompressor/type_0"; +"222 LlamaForCausalLM/Linear[lm_head]/SymmetricWeightsDecompressor/type_0" -> "223 LlamaForCausalLM/Linear[lm_head]/linear_0"; "223 LlamaForCausalLM/Linear[lm_head]/linear_0" -> "224 LlamaForCausalLM/float_0"; "224 LlamaForCausalLM/float_0" -> "225 /nncf_model_output_0"; } diff --git a/tests/torch/data/experimental/sparsify_activations/linear_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/linear_compressed_weights.dot index 6c1ba9ca3a6..aa24d54a2e0 100644 --- a/tests/torch/data/experimental/sparsify_activations/linear_compressed_weights.dot +++ b/tests/torch/data/experimental/sparsify_activations/linear_compressed_weights.dot @@ -1,8 +1,8 @@ strict digraph { "0 /nncf_model_input_0" [id=0, type=nncf_model_input]; "1 weight" [id=1, type=nncf_model_const]; -"2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/decompress_0" [id=2, type=decompress]; -"3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/type_0" [id=3, type=type]; +"2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/SymmetricWeightsDecompressor[weights_decompressor_weight]/decompress_symmetric_0" [id=2, type=decompress_symmetric]; +"3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/SymmetricWeightsDecompressor[weights_decompressor_weight]/type_0" [id=3, type=type]; "4 bias" [id=4, type=nncf_model_const]; "5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0" [id=5, type=abs]; "6 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0" [id=6, type=le]; @@ -11,9 +11,9 @@ strict digraph { "9 /nncf_model_output_0" [id=9, type=nncf_model_output]; "0 /nncf_model_input_0" -> "5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0"; "0 /nncf_model_input_0" -> "7 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0"; -"1 weight" -> "2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/decompress_0"; -"2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/decompress_0" -> "3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/type_0"; -"3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/WeightsDecompressor[weights_decompressor_weight]/type_0" -> "8 Linear/linear_0"; +"1 weight" -> "2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/SymmetricWeightsDecompressor[weights_decompressor_weight]/decompress_symmetric_0"; +"2 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/SymmetricWeightsDecompressor[weights_decompressor_weight]/decompress_symmetric_0" -> "3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/SymmetricWeightsDecompressor[weights_decompressor_weight]/type_0"; +"3 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/SymmetricWeightsDecompressor[weights_decompressor_weight]/type_0" -> "8 Linear/linear_0"; "4 bias" -> "8 Linear/linear_0"; "5 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/abs_0" -> "6 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0"; "6 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/le_0" -> "7 Linear/NNCFNetworkInterface[_nncf]/ModuleDict[external_op]/ActivationsSparsifier[activations_sparsifier_Linear/linear_0]/masked_fill_0"; diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot index 1ecd1f533d7..ae3f667ff3a 100644 --- a/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot +++ b/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot @@ -1,25 +1,25 @@ strict digraph { "0 /nncf_model_input_0" [id=0, type=nncf_model_input]; "1 embedding.weight" [id=1, type=nncf_model_const]; -"2 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/decompress_0" [id=2, type=decompress]; -"3 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/type_0" [id=3, type=type]; +"2 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=2, type=decompress_symmetric]; +"3 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/type_0" [id=3, type=type]; "4 ThreeLinearModel/Embedding[embedding]/embedding_0" [id=4, type=embedding]; "5 linear1.weight" [id=5, type=nncf_model_const]; -"6 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/decompress_0" [id=6, type=decompress]; -"7 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/type_0" [id=7, type=type]; +"6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=6, type=decompress_symmetric]; +"7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0" [id=7, type=type]; "8 linear1.bias" [id=8, type=nncf_model_const]; "9 ThreeLinearModel/Linear[linear1]/linear_0" [id=9, type=linear]; "10 linear3.weight" [id=10, type=nncf_model_const]; -"11 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/decompress_0" [id=11, type=decompress]; -"12 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/type_0" [id=12, type=type]; +"11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=11, type=decompress_symmetric]; +"12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0" [id=12, type=type]; "13 linear3.bias" [id=13, type=nncf_model_const]; "14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=14, type=abs]; "15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=15, type=le]; "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=16, type=masked_fill]; "17 ThreeLinearModel/Linear[linear3]/linear_0" [id=17, type=linear]; "18 linear2.weight" [id=18, type=nncf_model_const]; -"19 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/decompress_0" [id=19, type=decompress]; -"20 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/type_0" [id=20, type=type]; +"19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=19, type=decompress_symmetric]; +"20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0" [id=20, type=type]; "21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=21, type=abs]; "22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=22, type=le]; "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=23, type=masked_fill]; @@ -27,29 +27,29 @@ strict digraph { "25 /nncf_model_output_0" [id=25, type=nncf_model_output]; "26 /nncf_model_output_1" [id=26, type=nncf_model_output]; "0 /nncf_model_input_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; -"1 embedding.weight" -> "2 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/decompress_0"; -"2 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/decompress_0" -> "3 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/type_0"; -"3 ThreeLinearModel/Embedding[embedding]/WeightsDecompressor/type_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"1 embedding.weight" -> "2 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"2 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "3 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/type_0"; +"3 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/type_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; "4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; "4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; "4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; -"5 linear1.weight" -> "6 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/decompress_0"; -"6 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/decompress_0" -> "7 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/type_0"; -"7 ThreeLinearModel/Linear[linear1]/WeightsDecompressor/type_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; +"5 linear1.weight" -> "6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0"; +"7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; "8 linear1.bias" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; "9 ThreeLinearModel/Linear[linear1]/linear_0" -> "14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; "9 ThreeLinearModel/Linear[linear1]/linear_0" -> "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; -"10 linear3.weight" -> "11 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/decompress_0"; -"11 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/decompress_0" -> "12 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/type_0"; -"12 ThreeLinearModel/Linear[linear3]/WeightsDecompressor/type_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; +"10 linear3.weight" -> "11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0"; +"12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; "13 linear3.bias" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; "14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; "15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; "17 ThreeLinearModel/Linear[linear3]/linear_0" -> "25 /nncf_model_output_0"; -"18 linear2.weight" -> "19 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/decompress_0"; -"19 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/decompress_0" -> "20 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/type_0"; -"20 ThreeLinearModel/Linear[linear2]/WeightsDecompressor/type_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; +"18 linear2.weight" -> "19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0"; +"20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; "21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; "22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; diff --git a/tests/torch/experimental/sparsify_activations/helpers.py b/tests/torch/experimental/sparsify_activations/helpers.py index 558ee9b156c..a9fa94d6ac1 100644 --- a/tests/torch/experimental/sparsify_activations/helpers.py +++ b/tests/torch/experimental/sparsify_activations/helpers.py @@ -41,5 +41,5 @@ def dummy_llama_model(): use_cache=False, return_dict=False, ) - model = transformers.AutoModelForCausalLM.from_config(config) + model = transformers.AutoModelForCausalLM.from_config(config, attn_implementation="eager") return model From 3113e66d9097e3c78e54ec6b36e51abc077f6d7f Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:07 +0800 Subject: [PATCH 33/76] temporarily fix the timm & graph match issues --- .../sparsify_activations/pipelines.py | 45 +++++-------------- .../sparsify_activations/dummy_llama.dot | 24 +++++----- .../dummy_llama_compressed_weights.dot | 24 +++++----- .../sparsify_activations/test_algo.py | 4 +- 4 files changed, 39 insertions(+), 58 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index 61d3ae43744..97806db0307 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -16,8 +16,11 @@ from functools import partial from typing import Optional +import numpy as np import torch import torch.utils +import torch.utils.data +import torchvision from datasets import load_dataset from memory_profiler import memory_usage from optimum.intel.openvino import OVModelForCausalLM @@ -57,7 +60,10 @@ def prepare_model(self) -> None: raise RuntimeError(f"is_stateful={is_stateful} is not supported for PyTorch backend.") self.model_hf = AutoModelForCausalLM.from_pretrained( - self.model_id, torch_dtype=torch.float32, device_map="cpu", attn_implementation="eager", + self.model_id, + torch_dtype=torch.float32, + device_map="cpu", + attn_implementation="eager", ) self.model = self.model_hf elif self.backend in [BackendType.OV, BackendType.FP32]: @@ -164,38 +170,11 @@ def _compress(self): ) def prepare_calibration_dataset(self): - # TODO: for debugging only subset_size = self.compression_params.get("subset_size") or 512 - dataset = self._get_imagenet(subset_size=subset_size) - - generator = torch.Generator() - generator.manual_seed(42) - loader = torch.utils.data.DataLoader( - dataset, batch_size=self.batch_size, num_workers=4, shuffle=True, generator=generator + val_dataset = torchvision.datasets.ImageFolder( + root=self.data_dir / "imagenet" / "val", transform=self.transform ) + indices = np.random.default_rng(42).choice(len(val_dataset), size=subset_size, replace=False) + subset = torch.utils.data.Subset(val_dataset, indices=indices) + loader = torch.utils.data.DataLoader(subset, batch_size=self.batch_size, num_workers=2, shuffle=False) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) - - def _get_imagenet(self, subset_size=None): - # TODO: for debugging only - import torch.utils - import torch.utils.data - - hf_dataset = load_dataset("imagenet-1k", split="validation") - - class Dataset(torch.utils.data.Dataset): - def __init__(self, hf_dataset, transform): - super().__init__() - self.hf_dataset = hf_dataset - self.transform = transform # will be assigned in timm internally - - def __getitem__(self, index): - sample = self.hf_dataset[index] - image = sample["image"] - image = image.convert("RGB") - return self.transform(image), sample["label"] - - def __len__(self): - return subset_size or len(self.hf_dataset) - - dataset = Dataset(hf_dataset, self.transform) - return dataset diff --git a/tests/torch/data/experimental/sparsify_activations/dummy_llama.dot b/tests/torch/data/experimental/sparsify_activations/dummy_llama.dot index 3995a93a591..05ba7d8f87c 100644 --- a/tests/torch/data/experimental/sparsify_activations/dummy_llama.dot +++ b/tests/torch/data/experimental/sparsify_activations/dummy_llama.dot @@ -25,8 +25,8 @@ strict digraph { "23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2" [id=23, type=transpose]; "24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" [id=24, type=cat]; "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" [id=25, type=cos]; -"26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=26, type=to]; -"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=27, type=sin]; +"26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=26, type=sin]; +"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=27, type=to]; "28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" [id=28, type=to]; "29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" [id=29, type=unsqueeze]; "30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1" [id=30, type=unsqueeze]; @@ -114,8 +114,8 @@ strict digraph { "112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2" [id=112, type=transpose]; "113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" [id=113, type=cat]; "114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" [id=114, type=cos]; -"115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=115, type=to]; -"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=116, type=sin]; +"115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=115, type=sin]; +"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=116, type=to]; "117 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" [id=117, type=to]; "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" [id=118, type=unsqueeze]; "119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1" [id=119, type=unsqueeze]; @@ -226,10 +226,10 @@ strict digraph { "22 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2" -> "23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2"; "23 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2" -> "48 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___5"; "24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0"; -"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; -"25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; -"26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0"; -"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"24 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; +"25 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; +"26 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"27 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0"; "28 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" -> "30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1"; "29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" -> "31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0"; "29 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" -> "38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2"; @@ -332,10 +332,10 @@ strict digraph { "111 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2" -> "112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2"; "112 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2" -> "137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___5"; "113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0"; -"113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; -"114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; -"115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0"; -"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "117 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"113 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; +"114 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; +"115 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "117 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"116 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0"; "117 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" -> "119 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1"; "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" -> "120 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0"; "118 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" -> "127 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2"; diff --git a/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot index 71d762b822d..c3e5cf0d0c9 100644 --- a/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot +++ b/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot @@ -33,8 +33,8 @@ strict digraph { "31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2" [id=31, type=transpose]; "32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" [id=32, type=cat]; "33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" [id=33, type=cos]; -"34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=34, type=to]; -"35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=35, type=sin]; +"34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=34, type=sin]; +"35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=35, type=to]; "36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" [id=36, type=to]; "37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" [id=37, type=unsqueeze]; "38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1" [id=38, type=unsqueeze]; @@ -136,8 +136,8 @@ strict digraph { "134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2" [id=134, type=transpose]; "135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" [id=135, type=cat]; "136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" [id=136, type=cos]; -"137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=137, type=to]; -"138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=138, type=sin]; +"137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" [id=137, type=sin]; +"138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" [id=138, type=to]; "139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" [id=139, type=to]; "140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" [id=140, type=unsqueeze]; "141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1" [id=141, type=unsqueeze]; @@ -266,10 +266,10 @@ strict digraph { "30 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/view_2" -> "31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2"; "31 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/transpose_2" -> "56 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__getitem___5"; "32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0"; -"32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; -"33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; -"34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0"; -"35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"32 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; +"33 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; +"34 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"35 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0"; "36 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" -> "38 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_1"; "37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" -> "39 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___0"; "37 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/unsqueeze_0" -> "46 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[0]/LlamaAttention[self_attn]/__mul___2"; @@ -386,10 +386,10 @@ strict digraph { "133 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/view_2" -> "134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2"; "134 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/transpose_2" -> "159 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__getitem___5"; "135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0"; -"135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; -"136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; -"137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0"; -"138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"135 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cat_0" -> "137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0"; +"136 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/cos_0" -> "138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0"; +"137 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/sin_0" -> "139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1"; +"138 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_0" -> "140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0"; "139 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/LlamaRotaryEmbedding[rotary_emb]/to_1" -> "141 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_1"; "140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" -> "142 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___0"; "140 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/unsqueeze_0" -> "149 LlamaForCausalLM/LlamaModel[model]/ModuleList[layers]/LlamaDecoderLayer[1]/LlamaAttention[self_attn]/__mul___2"; diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index 281c92fda05..50cff0d3a5f 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -152,7 +152,9 @@ def test_nncf_graph(self): file_name = f"{desc.name}_compressed_weights" if self.compress_weights else desc.name ref_dot_path = Path(TEST_ROOT, "torch", "data", "experimental", "sparsify_activations", f"{file_name}.dot") graph = model.nncf.get_graph().get_graph_for_structure_analysis() - compare_nx_graph_with_reference(graph, ref_dot_path) + if desc.name != "dummy_llama": + # TODO(yujie): full graph comparison is fragile + compare_nx_graph_with_reference(graph, ref_dot_path) def test_export_openvino(self): model: NNCFNetwork = self.model From e307ac81d34c080857f8c35f888b461bc8f36d68 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:08 +0800 Subject: [PATCH 34/76] update metric --- .../experimental/sparsify_activations/reference_data.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index b22139e16ed..96999fae811 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -3,11 +3,11 @@ tinyllama_backend_FP32: num_int4: 0 num_int8: 0 tinyllama_ffn_sparse20_backend_TORCH: - metric_value: 0.7852 + metric_value: 0.7929 num_int4: 0 num_int8: 0 tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: - metric_value: 0.7997 + metric_value: 0.7915 num_int4: 0 num_int8: 312 timm/deit3_small_patch16_224_backend_FP32: From ff00a1e93c3b03a80d669457486fae57858aba4a Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:08 +0800 Subject: [PATCH 35/76] re-enable graph comparison for dummy llama --- tests/torch/experimental/sparsify_activations/test_algo.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index 50cff0d3a5f..281c92fda05 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -152,9 +152,7 @@ def test_nncf_graph(self): file_name = f"{desc.name}_compressed_weights" if self.compress_weights else desc.name ref_dot_path = Path(TEST_ROOT, "torch", "data", "experimental", "sparsify_activations", f"{file_name}.dot") graph = model.nncf.get_graph().get_graph_for_structure_analysis() - if desc.name != "dummy_llama": - # TODO(yujie): full graph comparison is fragile - compare_nx_graph_with_reference(graph, ref_dot_path) + compare_nx_graph_with_reference(graph, ref_dot_path) def test_export_openvino(self): model: NNCFNetwork = self.model From 982024d625f6f0a5286d8ff13294aa20aa2735ab Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:08 +0800 Subject: [PATCH 36/76] re-order staticmethods --- .../sparsify_activations_impl.py | 7 +-- .../sparsify_activations/torch_backend.py | 54 ++++++++++--------- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 010215b151b..dfb8236a586 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -40,7 +40,8 @@ class SparsifyActivationsAlgoBackend(ABC): CALIBRATION_TRACKING_DESC = "Conducting Activations Sparsifier Calibration" - def do_inference(self, model: TModel, dataset: Dataset): + @staticmethod + def do_inference(model: TModel, dataset: Dataset): """ Conducts model inference on given dataset to calibrate the activation sparsifiers. @@ -90,7 +91,7 @@ def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Datase """ @abstractmethod - def freeze_sparsifiers(self, model: TModel, graph: NNCFGraph) -> TModel: + def apply_sparsifiers(self, model: TModel, graph: NNCFGraph) -> TModel: """ Freezes the activation sparsifiers and applies the sparsification to the model. @@ -164,7 +165,7 @@ def do_sparsification( """ model = self._backend_entity.insert_sparsifiers(model, graph, target_sparsity_by_node) model = self._backend_entity.calibrate_sparsifiers(model, graph, dataset) - model = self._backend_entity.freeze_sparsifiers(model, graph) + model = self._backend_entity.apply_sparsifiers(model, graph) return model def _set_backend_entity(self, model: TModel) -> None: diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index c9f890e07da..0cc2ba01e9c 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -57,6 +57,22 @@ def __init__(self, target_sparsity: float, alpha: float = 0.2): self.num_batches_tracked: torch.Tensor self._freeze = True + @staticmethod + def calculate_threshold(x: torch.Tensor, target_sparsity: float) -> torch.Tensor: + """ + Calculates the threshold for sparsifying the input tensor if locations of `x.abs() <= threshold` are zeroed. + + :param x: The input tensor. + :param target_sparsity: The target sparsity level on the input tensor. + :return: The threshold value. + """ + # uses numpy's quantile implementation as torch's cannot handle large tensor + value = np.quantile( + x.detach().abs().cpu().numpy(), + q=target_sparsity, + ) + return torch.tensor(value, device=x.device, dtype=x.dtype) + def forward(self, x: torch.Tensor) -> torch.Tensor: if not self._freeze: threshold = self.calculate_threshold(x, self.target_sparsity) @@ -65,6 +81,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: x = torch.masked_fill(x, mask, 0.0) return x + def freeze(self, freeze: bool = True): + self._freeze = freeze + def reset_running_stats(self): """ Resets the running threshold and the number of tracked batches to the initial stage. @@ -72,28 +91,9 @@ def reset_running_stats(self): self.running_threshold.fill_(float("-inf")) self.num_batches_tracked.zero_() - def freeze(self, freeze: bool = True): - self._freeze = freeze - def extra_repr(self) -> str: return f"target_sparsity={self.target_sparsity}" - @staticmethod - def calculate_threshold(x: torch.Tensor, target_sparsity: float) -> torch.Tensor: - """ - Calculates the threshold so that the target sparsity can be achieved. - - :param x: The input tensor. - :param target_sparsity: The target sparsity level on the input tensor. - :return: The threshold value. - """ - # uses numpy's quantile implementation as torch's cannot handle large tensor - value = np.quantile( - x.detach().abs().cpu().numpy(), - q=target_sparsity, - ) - return torch.tensor(value, device=x.device, dtype=x.dtype) - def _update(self, threshold: torch.Tensor) -> torch.Tensor: """ Updates the running threshold by exponential moving average with decaying adjustment. @@ -121,11 +121,8 @@ class PTSparsifyActivationsAlgoBackend(SparsifyActivationsAlgoBackend): SUPPORTED_METATYPES = [om.PTLinearMetatype] - @property - def supported_metatypes(self) -> List[Type[OperatorMetatype]]: - return PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES - - def get_sparsifiers(self, model: NNCFNetwork) -> List[ActivationsSparsifier]: + @staticmethod + def get_sparsifiers(model: NNCFNetwork) -> List[ActivationsSparsifier]: """ Finds all the activation sparsifiers in the model. @@ -134,6 +131,10 @@ def get_sparsifiers(self, model: NNCFNetwork) -> List[ActivationsSparsifier]: """ return [m for m in model.nncf.modules() if isinstance(m, ActivationsSparsifier)] + @property + def supported_metatypes(self) -> List[Type[OperatorMetatype]]: + return PTSparsifyActivationsAlgoBackend.SUPPORTED_METATYPES + def insert_sparsifiers( self, model: NNCFNetwork, @@ -171,13 +172,14 @@ def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: D self.do_inference(model, dataset) return model - def freeze_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph) -> NNCFNetwork: + def apply_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph) -> NNCFNetwork: for sparsifier in self.get_sparsifiers(model): sparsifier.freeze(True) model.nncf.rebuild_graph() return model - def _get_activation_port_id(self, node: NNCFNode, graph: NNCFGraph) -> int: + @staticmethod + def _get_activation_port_id(node: NNCFNode, graph: NNCFGraph) -> int: """ Finds the input activation port id for the node. From 8f500f7c647a15e1252212e1ee393a60af97b910 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:08 +0800 Subject: [PATCH 37/76] refactor pipelines with added cuda_torch backend tests --- .../sparsify_activations/model_scope.py | 4 +- .../sparsify_activations/pipelines.py | 252 +++++++++++++----- .../sparsify_activations/reference_data.yaml | 8 + .../test_sparsify_activations_conformance.py | 6 +- 4 files changed, 192 insertions(+), 78 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/model_scope.py b/tests/post_training/experimental/sparsify_activations/model_scope.py index 4c5c143d990..54026c9f1a8 100644 --- a/tests/post_training/experimental/sparsify_activations/model_scope.py +++ b/tests/post_training/experimental/sparsify_activations/model_scope.py @@ -40,7 +40,7 @@ } }, }, - "backends": [BackendType.TORCH], + "backends": [BackendType.TORCH, BackendType.CUDA_TORCH], }, { "reported_name": "tinyllama_int8_asym_data_free_ffn_sparse20", @@ -58,7 +58,7 @@ } }, }, - "backends": [BackendType.TORCH], + "backends": [BackendType.TORCH, BackendType.CUDA_TORCH], }, { "reported_name": "timm/deit3_small_patch16_224", diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index 97806db0307..755a5dd5477 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -10,19 +10,19 @@ # limitations under the License. -import os -import time from dataclasses import dataclass -from functools import partial +from dataclasses import field +from pathlib import Path from typing import Optional import numpy as np +import openvino as ov import torch import torch.utils import torch.utils.data import torchvision from datasets import load_dataset -from memory_profiler import memory_usage +from optimum.exporters.openvino.convert import export_from_model from optimum.intel.openvino import OVModelForCausalLM from transformers import AutoModelForCausalLM @@ -31,8 +31,11 @@ import nncf.experimental.torch import nncf.experimental.torch.sparsify_activations from nncf.experimental.torch.sparsify_activations.torch_backend import SparsifyActivationsAlgoBackend +from tests.post_training.pipelines.base import LIMIT_LENGTH_OF_STATUS from tests.post_training.pipelines.base import PT_BACKENDS from tests.post_training.pipelines.base import BackendType +from tests.post_training.pipelines.base import NumCompressNodes +from tests.post_training.pipelines.base import RunInfo from tests.post_training.pipelines.image_classification_timm import ImageClassificationTimm from tests.post_training.pipelines.lm_weight_compression import LMWeightCompression from tests.post_training.pipelines.lm_weight_compression import WCTimeStats @@ -40,7 +43,7 @@ @dataclass -class SparsifyActivationsTimeStats(WCTimeStats): +class SATimeStats(WCTimeStats): """ Contains statistics that are parsed from the stdout of Sparsify Activations tests. """ @@ -51,18 +54,143 @@ class SparsifyActivationsTimeStats(WCTimeStats): REGEX_PREFIX = [*WCTimeStats.REGEX_PREFIX, SparsifyActivationsAlgoBackend.CALIBRATION_TRACKING_DESC] -class LMSparsifyActivations(LMWeightCompression): - def prepare_model(self) -> None: +@dataclass +class SANumCompressNodes(NumCompressNodes): + num_sparse_activations: Optional[int] = None + + +@dataclass +class SARunInfo(RunInfo): + num_compress_nodes: SANumCompressNodes = field(default_factory=SANumCompressNodes) + + def get_result_dict(self): + return { + "Model": self.model, + "Backend": self.backend.value if self.backend else None, + "Metric name": self.metric_name, + "Metric value": self.metric_value, + "Metric diff": self.metric_diff, + "Num FQ": self.num_compress_nodes.num_fq_nodes, + "Num int4": self.num_compress_nodes.num_int4, + "Num int8": self.num_compress_nodes.num_int8, + "Num sparse activations": self.num_compress_nodes.num_sparse_activations, + "RAM MiB": self.format_memory_usage(self.compression_memory_usage), + "Compr. time": self.format_time(self.time_compression), + **self.stats_from_output.get_stats(), + "Total time": self.format_time(self.time_total), + "FPS": self.fps, + "Status": self.status[:LIMIT_LENGTH_OF_STATUS] if self.status is not None else None, + } + + +class SAPipelineMixin: + """ + Common methods in the test pipeline for Sparsify Activations. + """ + + def __init__( + self, + reported_name: str, + model_id: str, + backend: BackendType, + compression_params: dict, + output_dir: Path, + data_dir: Path, + reference_data: dict, + no_eval: bool, + run_benchmark_app: bool, + params: dict = None, + batch_size: int = 1, + ): + super().__init__( + reported_name=reported_name, + model_id=model_id, + backend=backend, + compression_params=compression_params, + output_dir=output_dir, + data_dir=data_dir, + reference_data=reference_data, + no_eval=no_eval, + run_benchmark_app=run_benchmark_app, + params=params, + batch_size=batch_size, + ) + self.run_info = SARunInfo(model=reported_name, backend=backend) + + @staticmethod + def count_compressed_nodes_from_ir(model: ov.Model) -> SANumCompressNodes: + """ + Get number of compressed nodes in the compressed IR. + """ + num_fq_nodes = 0 + num_int8 = 0 + num_int4 = 0 + num_sparse_activations = 0 + + for node in model.get_ops(): + if node.type_info.name == "FakeQuantize": + num_fq_nodes += 1 + for i in range(node.get_output_size()): + if node.get_output_element_type(i).get_type_name() in ["i8", "u8"]: + num_int8 += 1 + if node.get_output_element_type(i).get_type_name() in ["i4", "u4"]: + num_int4 += 1 + return SANumCompressNodes( + num_fq_nodes=num_fq_nodes, + num_int8=num_int8, + num_int4=num_int4, + num_sparse_activations=num_sparse_activations, + ) + + def collect_data_from_stdout(self, stdout: str): + stats = SATimeStats() + stats.fill(stdout) + self.run_info.stats_from_output = stats + + def _validate(self): + super()._validate() + ref_num_sparse_activations = self.reference_data.get("num_sparse_activations", 0) + num_sparse_activations = self.run_info.num_compress_nodes.num_sparse_activations + if num_sparse_activations != ref_num_sparse_activations: + status_msg = f"Regression: The number of sparse activations is {num_sparse_activations}, \ + which differs from reference {ref_num_sparse_activations}." + # raise ValueError(status_msg) + print(status_msg) + + @set_torch_seed(seed=42) + def _compress(self): + """ + Actual call of weight compression and/or activation sparsification. + """ + self.compressed_model = self.model + if self.compression_params.get("compress_weights", None) is not None: + self.compressed_model = nncf.compress_weights( + self.compressed_model, + dataset=self.calibration_dataset, + **self.compression_params["compress_weights"], + ) + if self.compression_params.get("sparsify_activations", None) is not None: + self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( + self.compressed_model, + dataset=self.calibration_dataset, + **self.compression_params["sparsify_activations"], + ) + + +class LMSparsifyActivations(SAPipelineMixin, LMWeightCompression): + DEFAULT_SUBSET_SIZE = 32 + + def prepare_model(self): is_stateful = self.params.get("is_stateful", False) - if self.backend == BackendType.TORCH: + if self.backend in PT_BACKENDS: if is_stateful: raise RuntimeError(f"is_stateful={is_stateful} is not supported for PyTorch backend.") self.model_hf = AutoModelForCausalLM.from_pretrained( self.model_id, torch_dtype=torch.float32, - device_map="cpu", + device_map="cuda" if self.backend == BackendType.CUDA_TORCH else "cpu", attn_implementation="eager", ) self.model = self.model_hf @@ -91,86 +219,57 @@ def prepare_model(self) -> None: if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists(): self._dump_model_fp32() + def get_transform_calibration_fn(self): + original_fn = super().get_transform_calibration_fn() + + def transform_fn(data): + inputs = original_fn(data, max_tokens=256) + if self.backend == BackendType.CUDA_TORCH: + for input_name in inputs: + inputs[input_name] = torch.from_numpy(inputs[input_name]).cuda() + return inputs + + return transform_fn + def prepare_calibration_dataset(self): dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") dataset = dataset.filter(lambda example: len(example["text"].split()) > 256) - subset_size = self.compression_params.get("subset_size") or 64 + subset_size = self.compression_params.get("subset_size") or self.DEFAULT_SUBSET_SIZE dataset = dataset.select(range(subset_size)) - self.calibration_dataset = nncf.Dataset(dataset, partial(self.get_transform_calibration_fn(), max_tokens=256)) - - def compress(self) -> None: - if self.backend == BackendType.FP32: - return - start_time = time.perf_counter() - self.run_info.compression_memory_usage = memory_usage(self._compress, max_usage=True) - self.run_info.time_compression = time.perf_counter() - start_time - - def collect_data_from_stdout(self, stdout: str): - stats = SparsifyActivationsTimeStats() - stats.fill(stdout) - self.run_info.stats_from_output = stats + self.calibration_dataset = nncf.Dataset(dataset, self.get_transform_calibration_fn()) - @set_torch_seed(seed=42) - def _compress(self): - """ - Actual call of weight compression and/or activation sparsification. - """ - self.compressed_model = self.model - if self.compression_params.get("compress_weights", None) is not None: - self.compressed_model = nncf.compress_weights( - self.compressed_model, - dataset=self.calibration_dataset, - **self.compression_params["compress_weights"], - ) - if self.compression_params.get("sparsify_activations", None) is not None: - self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( - self.compressed_model, - dataset=self.calibration_dataset, - **self.compression_params["sparsify_activations"], + def save_compressed_model(self): + if self.backend == BackendType.CUDA_TORCH: + export_from_model( + self.model_hf, self.output_model_dir, stateful=False, compression_option="fp32", device="cuda" ) + else: + super().__init__() - -class ImageClassificationTimmSparsifyActivations(ImageClassificationTimm): - def compress(self) -> None: + def get_num_compressed(self): """ - Run compression of the model and collect time and memory usage information. + Get number of quantization ops and sparsifier ops in the compressed IR. """ - if self.backend == BackendType.FP32: - # To validate not compressed model - self.path_compressed_ir = self.fp32_model_dir / "model_fp32.xml" - return - if self.backend in PT_BACKENDS: - inference_num_threads = os.environ.get("INFERENCE_NUM_THREADS") - if inference_num_threads is not None: - torch.set_num_threads(int(inference_num_threads)) + model = ov.Core().read_model(self.output_model_dir / self.OV_MODEL_NAME) else: - raise RuntimeError(f"backend={self.backend.value} is not supported.") + model = self.model + self.run_info.num_compress_nodes = self.count_compressed_nodes_from_ir(model) - start_time = time.perf_counter() - self.run_info.compression_memory_usage = memory_usage(self._compress, max_usage=True) - self.run_info.time_compression = time.perf_counter() - start_time + def _dump_model_fp32(self): + if self.backend == BackendType.TORCH: + export_from_model( + self.model_hf, self.fp32_model_dir, stateful=False, compression_option="fp32", device="cuda" + ) + else: + super()._dump_model_fp32() - def collect_data_from_stdout(self, stdout: str): - stats = SparsifyActivationsTimeStats() - stats.fill(stdout) - self.run_info.stats_from_output = stats - @set_torch_seed(seed=42) - def _compress(self): - """ - Actual call of activation sparsification. - """ - self.compressed_model = self.model - if self.compression_params.get("sparsify_activations", None) is not None: - self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( - self.compressed_model, - dataset=self.calibration_dataset, - **self.compression_params["sparsify_activations"], - ) +class ImageClassificationTimmSparsifyActivations(SAPipelineMixin, ImageClassificationTimm): + DEFAULT_SUBSET_SIZE = 256 def prepare_calibration_dataset(self): - subset_size = self.compression_params.get("subset_size") or 512 + subset_size = self.compression_params.get("subset_size") or self.DEFAULT_SUBSET_SIZE val_dataset = torchvision.datasets.ImageFolder( root=self.data_dir / "imagenet" / "val", transform=self.transform ) @@ -178,3 +277,10 @@ def prepare_calibration_dataset(self): subset = torch.utils.data.Subset(val_dataset, indices=indices) loader = torch.utils.data.DataLoader(subset, batch_size=self.batch_size, num_workers=2, shuffle=False) self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn()) + + def get_num_compressed(self): + """ + Get number of quantization ops and sparsifier ops in the compressed IR. + """ + model = ov.Core().read_model(model=self.path_compressed_ir) + self.run_info.num_compress_nodes = self.count_compressed_nodes_from_ir(model) diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index 96999fae811..48629e9b2a8 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -6,10 +6,18 @@ tinyllama_ffn_sparse20_backend_TORCH: metric_value: 0.7929 num_int4: 0 num_int8: 0 +tinyllama_ffn_sparse20_backend_CUDA_TORCH: + metric_value: 0.7929 + num_int4: 0 + num_int8: 0 tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: metric_value: 0.7915 num_int4: 0 num_int8: 312 +tinyllama_int8_asym_data_free_ffn_sparse20_backend_CUDA_TORCH: + metric_value: 0.7915 + num_int4: 0 + num_int8: 312 timm/deit3_small_patch16_224_backend_FP32: metric_value: 0.8135 timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_TORCH: diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py index 7b5827e9c37..712dcc8960a 100644 --- a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py +++ b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py @@ -21,9 +21,9 @@ import yaml from tests.post_training.experimental.sparsify_activations.model_scope import SPARSIFY_ACTIVATIONS_TEST_CASES +from tests.post_training.experimental.sparsify_activations.pipelines import SARunInfo from tests.post_training.pipelines.base import BackendType from tests.post_training.pipelines.base import BaseTestPipeline -from tests.post_training.pipelines.base import RunInfo from tests.post_training.test_quantize_conformance import create_short_run_info from tests.post_training.test_quantize_conformance import fixture_batch_size # noqa: F401 from tests.post_training.test_quantize_conformance import fixture_data # noqa: F401 @@ -50,7 +50,7 @@ def fixture_sparsify_activations_reference_data(): @pytest.fixture(scope="session", name="sparsify_activations_result_data") def fixture_sparsify_activations_report_data(output_dir): - data: Dict[str, RunInfo] = {} + data: Dict[str, SARunInfo] = {} yield data if data: test_results = OrderedDict(sorted(data.items())) @@ -98,7 +98,7 @@ def test_sparsify_activations( test_case_name: str, data_dir: Path, output_dir: Path, - sparsify_activations_result_data: Dict[str, RunInfo], + sparsify_activations_result_data: Dict[str, SARunInfo], no_eval: bool, batch_size: int, run_fp32_backend: bool, From 109d305c36a7da0e9f6f23419e49c29bf03c2a3d Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:08 +0800 Subject: [PATCH 38/76] bugfix --- .../experimental/sparsify_activations/pipelines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index 755a5dd5477..cd9be02f7fa 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -244,7 +244,7 @@ def save_compressed_model(self): self.model_hf, self.output_model_dir, stateful=False, compression_option="fp32", device="cuda" ) else: - super().__init__() + super().save_compressed_model() def get_num_compressed(self): """ @@ -257,7 +257,7 @@ def get_num_compressed(self): self.run_info.num_compress_nodes = self.count_compressed_nodes_from_ir(model) def _dump_model_fp32(self): - if self.backend == BackendType.TORCH: + if self.backend == BackendType.CUDA_TORCH: export_from_model( self.model_hf, self.fp32_model_dir, stateful=False, compression_option="fp32", device="cuda" ) From f54682688992ac05d67b8839bf90da7d87616984 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:08 +0800 Subject: [PATCH 39/76] add tests on sparsifier pattern count --- .../sparsify_activations/pipelines.py | 8 +- .../sparsify_activations/reference_data.yaml | 16 +++- .../sparsify_activations/three_linear.dot | 71 ++++++++------- .../three_linear_compressed_weights.dot | 87 ++++++++++--------- .../three_linear_ignore1.dot | 41 +++++++++ ...hree_linear_ignore1_compressed_weights.dot | 57 ++++++++++++ .../sparsify_activations/helpers.py | 26 ++++++ .../sparsify_activations/test_algo.py | 24 ++++- 8 files changed, 252 insertions(+), 78 deletions(-) create mode 100644 tests/torch/data/experimental/sparsify_activations/three_linear_ignore1.dot create mode 100644 tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_compressed_weights.dot diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index cd9be02f7fa..55a0d8b410a 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -39,6 +39,7 @@ from tests.post_training.pipelines.image_classification_timm import ImageClassificationTimm from tests.post_training.pipelines.lm_weight_compression import LMWeightCompression from tests.post_training.pipelines.lm_weight_compression import WCTimeStats +from tests.torch.experimental.sparsify_activations.helpers import count_sparsifier_patterns_in_ov from tests.torch.helpers import set_torch_seed @@ -125,8 +126,6 @@ def count_compressed_nodes_from_ir(model: ov.Model) -> SANumCompressNodes: num_fq_nodes = 0 num_int8 = 0 num_int4 = 0 - num_sparse_activations = 0 - for node in model.get_ops(): if node.type_info.name == "FakeQuantize": num_fq_nodes += 1 @@ -135,6 +134,8 @@ def count_compressed_nodes_from_ir(model: ov.Model) -> SANumCompressNodes: num_int8 += 1 if node.get_output_element_type(i).get_type_name() in ["i4", "u4"]: num_int4 += 1 + + num_sparse_activations = count_sparsifier_patterns_in_ov(model) return SANumCompressNodes( num_fq_nodes=num_fq_nodes, num_int8=num_int8, @@ -154,8 +155,7 @@ def _validate(self): if num_sparse_activations != ref_num_sparse_activations: status_msg = f"Regression: The number of sparse activations is {num_sparse_activations}, \ which differs from reference {ref_num_sparse_activations}." - # raise ValueError(status_msg) - print(status_msg) + raise ValueError(status_msg) @set_torch_seed(seed=42) def _compress(self): diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index 48629e9b2a8..d237bf5986b 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -2,25 +2,39 @@ tinyllama_backend_FP32: metric_value: 1.0 num_int4: 0 num_int8: 0 + num_sparse_activations: 0 tinyllama_ffn_sparse20_backend_TORCH: metric_value: 0.7929 num_int4: 0 num_int8: 0 + num_sparse_activations: 44 tinyllama_ffn_sparse20_backend_CUDA_TORCH: metric_value: 0.7929 num_int4: 0 num_int8: 0 + num_sparse_activations: 44 tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: metric_value: 0.7915 num_int4: 0 num_int8: 312 + num_sparse_activations: 44 tinyllama_int8_asym_data_free_ffn_sparse20_backend_CUDA_TORCH: metric_value: 0.7915 num_int4: 0 num_int8: 312 + num_sparse_activations: 44 timm/deit3_small_patch16_224_backend_FP32: metric_value: 0.8135 + num_int4: 0 + num_int8: 0 + num_sparse_activations: 36 timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_TORCH: metric_value: 0.8097 + num_int4: 0 + num_int8: 0 + num_sparse_activations: 36 timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_CUDA_TORCH: - metric_value: 0.8097 \ No newline at end of file + metric_value: 0.8097 + num_int4: 0 + num_int8: 0 + num_sparse_activations: 36 \ No newline at end of file diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear.dot b/tests/torch/data/experimental/sparsify_activations/three_linear.dot index 19a4b32561e..36779fe7f61 100644 --- a/tests/torch/data/experimental/sparsify_activations/three_linear.dot +++ b/tests/torch/data/experimental/sparsify_activations/three_linear.dot @@ -4,38 +4,45 @@ strict digraph { "2 ThreeLinearModel/Embedding[embedding]/embedding_0" [id=2, type=embedding]; "3 linear1.weight" [id=3, type=nncf_model_const]; "4 linear1.bias" [id=4, type=nncf_model_const]; -"5 ThreeLinearModel/Linear[linear1]/linear_0" [id=5, type=linear]; -"6 linear3.weight" [id=6, type=nncf_model_const]; -"7 linear3.bias" [id=7, type=nncf_model_const]; -"8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=8, type=abs]; -"9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=9, type=le]; -"10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=10, type=masked_fill]; -"11 ThreeLinearModel/Linear[linear3]/linear_0" [id=11, type=linear]; -"12 linear2.weight" [id=12, type=nncf_model_const]; -"13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=13, type=abs]; -"14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=14, type=le]; -"15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=15, type=masked_fill]; -"16 ThreeLinearModel/Linear[linear2]/linear_0" [id=16, type=linear]; -"17 /nncf_model_output_0" [id=17, type=nncf_model_output]; -"18 /nncf_model_output_1" [id=18, type=nncf_model_output]; +"5 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/abs_0" [id=5, type=abs]; +"6 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/le_0" [id=6, type=le]; +"7 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0" [id=7, type=masked_fill]; +"8 ThreeLinearModel/Linear[linear1]/linear_0" [id=8, type=linear]; +"9 linear3.weight" [id=9, type=nncf_model_const]; +"10 linear3.bias" [id=10, type=nncf_model_const]; +"11 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=11, type=abs]; +"12 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=12, type=le]; +"13 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=13, type=masked_fill]; +"14 ThreeLinearModel/Linear[linear3]/linear_0" [id=14, type=linear]; +"15 linear2.weight" [id=15, type=nncf_model_const]; +"16 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=16, type=abs]; +"17 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=17, type=le]; +"18 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=18, type=masked_fill]; +"19 ThreeLinearModel/Linear[linear2]/linear_0" [id=19, type=linear]; +"20 /nncf_model_output_0" [id=20, type=nncf_model_output]; +"21 /nncf_model_output_1" [id=21, type=nncf_model_output]; "0 /nncf_model_input_0" -> "2 ThreeLinearModel/Embedding[embedding]/embedding_0"; "1 embedding.weight" -> "2 ThreeLinearModel/Embedding[embedding]/embedding_0"; -"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; -"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; -"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; -"3 linear1.weight" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; -"4 linear1.bias" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; -"5 ThreeLinearModel/Linear[linear1]/linear_0" -> "8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; -"5 ThreeLinearModel/Linear[linear1]/linear_0" -> "10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; -"6 linear3.weight" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; -"7 linear3.bias" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; -"8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; -"9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; -"10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; -"11 ThreeLinearModel/Linear[linear3]/linear_0" -> "17 /nncf_model_output_0"; -"12 linear2.weight" -> "16 ThreeLinearModel/Linear[linear2]/linear_0"; -"13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; -"14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; -"15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "16 ThreeLinearModel/Linear[linear2]/linear_0"; -"16 ThreeLinearModel/Linear[linear2]/linear_0" -> "18 /nncf_model_output_1"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "5 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/abs_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "7 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "16 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "18 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"3 linear1.weight" -> "8 ThreeLinearModel/Linear[linear1]/linear_0"; +"4 linear1.bias" -> "8 ThreeLinearModel/Linear[linear1]/linear_0"; +"5 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/abs_0" -> "6 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/le_0"; +"6 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/le_0" -> "7 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0"; +"7 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0" -> "8 ThreeLinearModel/Linear[linear1]/linear_0"; +"8 ThreeLinearModel/Linear[linear1]/linear_0" -> "11 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; +"8 ThreeLinearModel/Linear[linear1]/linear_0" -> "13 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"9 linear3.weight" -> "14 ThreeLinearModel/Linear[linear3]/linear_0"; +"10 linear3.bias" -> "14 ThreeLinearModel/Linear[linear3]/linear_0"; +"11 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "12 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; +"12 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "13 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"13 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "14 ThreeLinearModel/Linear[linear3]/linear_0"; +"14 ThreeLinearModel/Linear[linear3]/linear_0" -> "20 /nncf_model_output_0"; +"15 linear2.weight" -> "19 ThreeLinearModel/Linear[linear2]/linear_0"; +"16 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "17 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; +"17 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "18 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"18 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "19 ThreeLinearModel/Linear[linear2]/linear_0"; +"19 ThreeLinearModel/Linear[linear2]/linear_0" -> "21 /nncf_model_output_1"; } diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot index ae3f667ff3a..c6488f1131b 100644 --- a/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot +++ b/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot @@ -8,50 +8,57 @@ strict digraph { "6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=6, type=decompress_symmetric]; "7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0" [id=7, type=type]; "8 linear1.bias" [id=8, type=nncf_model_const]; -"9 ThreeLinearModel/Linear[linear1]/linear_0" [id=9, type=linear]; -"10 linear3.weight" [id=10, type=nncf_model_const]; -"11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=11, type=decompress_symmetric]; -"12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0" [id=12, type=type]; -"13 linear3.bias" [id=13, type=nncf_model_const]; -"14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=14, type=abs]; -"15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=15, type=le]; -"16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=16, type=masked_fill]; -"17 ThreeLinearModel/Linear[linear3]/linear_0" [id=17, type=linear]; -"18 linear2.weight" [id=18, type=nncf_model_const]; -"19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=19, type=decompress_symmetric]; -"20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0" [id=20, type=type]; -"21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=21, type=abs]; -"22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=22, type=le]; -"23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=23, type=masked_fill]; -"24 ThreeLinearModel/Linear[linear2]/linear_0" [id=24, type=linear]; -"25 /nncf_model_output_0" [id=25, type=nncf_model_output]; -"26 /nncf_model_output_1" [id=26, type=nncf_model_output]; +"9 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/abs_0" [id=9, type=abs]; +"10 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/le_0" [id=10, type=le]; +"11 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0" [id=11, type=masked_fill]; +"12 ThreeLinearModel/Linear[linear1]/linear_0" [id=12, type=linear]; +"13 linear3.weight" [id=13, type=nncf_model_const]; +"14 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=14, type=decompress_symmetric]; +"15 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0" [id=15, type=type]; +"16 linear3.bias" [id=16, type=nncf_model_const]; +"17 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=17, type=abs]; +"18 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=18, type=le]; +"19 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=19, type=masked_fill]; +"20 ThreeLinearModel/Linear[linear3]/linear_0" [id=20, type=linear]; +"21 linear2.weight" [id=21, type=nncf_model_const]; +"22 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=22, type=decompress_symmetric]; +"23 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0" [id=23, type=type]; +"24 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=24, type=abs]; +"25 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=25, type=le]; +"26 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=26, type=masked_fill]; +"27 ThreeLinearModel/Linear[linear2]/linear_0" [id=27, type=linear]; +"28 /nncf_model_output_0" [id=28, type=nncf_model_output]; +"29 /nncf_model_output_1" [id=29, type=nncf_model_output]; "0 /nncf_model_input_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; "1 embedding.weight" -> "2 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/decompress_symmetric_0"; "2 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "3 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/type_0"; "3 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/type_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; -"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; -"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; -"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "9 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/abs_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "11 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "24 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "26 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; "5 linear1.weight" -> "6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0"; "6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0"; -"7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; -"8 linear1.bias" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; -"9 ThreeLinearModel/Linear[linear1]/linear_0" -> "14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; -"9 ThreeLinearModel/Linear[linear1]/linear_0" -> "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; -"10 linear3.weight" -> "11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0"; -"11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0"; -"12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; -"13 linear3.bias" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; -"14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; -"15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; -"16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; -"17 ThreeLinearModel/Linear[linear3]/linear_0" -> "25 /nncf_model_output_0"; -"18 linear2.weight" -> "19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0"; -"19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0"; -"20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; -"21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; -"22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; -"23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; -"24 ThreeLinearModel/Linear[linear2]/linear_0" -> "26 /nncf_model_output_1"; +"7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0" -> "12 ThreeLinearModel/Linear[linear1]/linear_0"; +"8 linear1.bias" -> "12 ThreeLinearModel/Linear[linear1]/linear_0"; +"9 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/abs_0" -> "10 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/le_0"; +"10 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/le_0" -> "11 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0"; +"11 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0" -> "12 ThreeLinearModel/Linear[linear1]/linear_0"; +"12 ThreeLinearModel/Linear[linear1]/linear_0" -> "17 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; +"12 ThreeLinearModel/Linear[linear1]/linear_0" -> "19 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"13 linear3.weight" -> "14 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"14 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "15 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0"; +"15 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0" -> "20 ThreeLinearModel/Linear[linear3]/linear_0"; +"16 linear3.bias" -> "20 ThreeLinearModel/Linear[linear3]/linear_0"; +"17 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "18 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; +"18 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "19 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"19 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "20 ThreeLinearModel/Linear[linear3]/linear_0"; +"20 ThreeLinearModel/Linear[linear3]/linear_0" -> "28 /nncf_model_output_0"; +"21 linear2.weight" -> "22 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"22 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "23 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0"; +"23 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0" -> "27 ThreeLinearModel/Linear[linear2]/linear_0"; +"24 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "25 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; +"25 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "26 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"26 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "27 ThreeLinearModel/Linear[linear2]/linear_0"; +"27 ThreeLinearModel/Linear[linear2]/linear_0" -> "29 /nncf_model_output_1"; } diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1.dot new file mode 100644 index 00000000000..19a4b32561e --- /dev/null +++ b/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1.dot @@ -0,0 +1,41 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 embedding.weight" [id=1, type=nncf_model_const]; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" [id=2, type=embedding]; +"3 linear1.weight" [id=3, type=nncf_model_const]; +"4 linear1.bias" [id=4, type=nncf_model_const]; +"5 ThreeLinearModel/Linear[linear1]/linear_0" [id=5, type=linear]; +"6 linear3.weight" [id=6, type=nncf_model_const]; +"7 linear3.bias" [id=7, type=nncf_model_const]; +"8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=8, type=abs]; +"9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=9, type=le]; +"10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=10, type=masked_fill]; +"11 ThreeLinearModel/Linear[linear3]/linear_0" [id=11, type=linear]; +"12 linear2.weight" [id=12, type=nncf_model_const]; +"13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=13, type=abs]; +"14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=14, type=le]; +"15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=15, type=masked_fill]; +"16 ThreeLinearModel/Linear[linear2]/linear_0" [id=16, type=linear]; +"17 /nncf_model_output_0" [id=17, type=nncf_model_output]; +"18 /nncf_model_output_1" [id=18, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "2 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"1 embedding.weight" -> "2 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; +"2 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"3 linear1.weight" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; +"4 linear1.bias" -> "5 ThreeLinearModel/Linear[linear1]/linear_0"; +"5 ThreeLinearModel/Linear[linear1]/linear_0" -> "8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; +"5 ThreeLinearModel/Linear[linear1]/linear_0" -> "10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"6 linear3.weight" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; +"7 linear3.bias" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; +"8 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; +"9 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"10 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "11 ThreeLinearModel/Linear[linear3]/linear_0"; +"11 ThreeLinearModel/Linear[linear3]/linear_0" -> "17 /nncf_model_output_0"; +"12 linear2.weight" -> "16 ThreeLinearModel/Linear[linear2]/linear_0"; +"13 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; +"14 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"15 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "16 ThreeLinearModel/Linear[linear2]/linear_0"; +"16 ThreeLinearModel/Linear[linear2]/linear_0" -> "18 /nncf_model_output_1"; +} diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_compressed_weights.dot new file mode 100644 index 00000000000..ae3f667ff3a --- /dev/null +++ b/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_compressed_weights.dot @@ -0,0 +1,57 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 embedding.weight" [id=1, type=nncf_model_const]; +"2 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=2, type=decompress_symmetric]; +"3 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/type_0" [id=3, type=type]; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" [id=4, type=embedding]; +"5 linear1.weight" [id=5, type=nncf_model_const]; +"6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=6, type=decompress_symmetric]; +"7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0" [id=7, type=type]; +"8 linear1.bias" [id=8, type=nncf_model_const]; +"9 ThreeLinearModel/Linear[linear1]/linear_0" [id=9, type=linear]; +"10 linear3.weight" [id=10, type=nncf_model_const]; +"11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=11, type=decompress_symmetric]; +"12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0" [id=12, type=type]; +"13 linear3.bias" [id=13, type=nncf_model_const]; +"14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" [id=14, type=abs]; +"15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" [id=15, type=le]; +"16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" [id=16, type=masked_fill]; +"17 ThreeLinearModel/Linear[linear3]/linear_0" [id=17, type=linear]; +"18 linear2.weight" [id=18, type=nncf_model_const]; +"19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0" [id=19, type=decompress_symmetric]; +"20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0" [id=20, type=type]; +"21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" [id=21, type=abs]; +"22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" [id=22, type=le]; +"23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" [id=23, type=masked_fill]; +"24 ThreeLinearModel/Linear[linear2]/linear_0" [id=24, type=linear]; +"25 /nncf_model_output_0" [id=25, type=nncf_model_output]; +"26 /nncf_model_output_1" [id=26, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"1 embedding.weight" -> "2 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"2 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "3 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/type_0"; +"3 ThreeLinearModel/Embedding[embedding]/SymmetricWeightsDecompressor/type_0" -> "4 ThreeLinearModel/Embedding[embedding]/embedding_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0"; +"4 ThreeLinearModel/Embedding[embedding]/embedding_0" -> "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"5 linear1.weight" -> "6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"6 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0"; +"7 ThreeLinearModel/Linear[linear1]/SymmetricWeightsDecompressor/type_0" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; +"8 linear1.bias" -> "9 ThreeLinearModel/Linear[linear1]/linear_0"; +"9 ThreeLinearModel/Linear[linear1]/linear_0" -> "14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0"; +"9 ThreeLinearModel/Linear[linear1]/linear_0" -> "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"10 linear3.weight" -> "11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"11 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0"; +"12 ThreeLinearModel/Linear[linear3]/SymmetricWeightsDecompressor/type_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; +"13 linear3.bias" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; +"14 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/abs_0" -> "15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0"; +"15 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/le_0" -> "16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0"; +"16 ThreeLinearModel/Linear[linear3]/ActivationsSparsifier/masked_fill_0" -> "17 ThreeLinearModel/Linear[linear3]/linear_0"; +"17 ThreeLinearModel/Linear[linear3]/linear_0" -> "25 /nncf_model_output_0"; +"18 linear2.weight" -> "19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0"; +"19 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/decompress_symmetric_0" -> "20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0"; +"20 ThreeLinearModel/Linear[linear2]/SymmetricWeightsDecompressor/type_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; +"21 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/abs_0" -> "22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0"; +"22 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/le_0" -> "23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0"; +"23 ThreeLinearModel/Linear[linear2]/ActivationsSparsifier/masked_fill_0" -> "24 ThreeLinearModel/Linear[linear2]/linear_0"; +"24 ThreeLinearModel/Linear[linear2]/linear_0" -> "26 /nncf_model_output_1"; +} diff --git a/tests/torch/experimental/sparsify_activations/helpers.py b/tests/torch/experimental/sparsify_activations/helpers.py index a9fa94d6ac1..c2bf1a61e1e 100644 --- a/tests/torch/experimental/sparsify_activations/helpers.py +++ b/tests/torch/experimental/sparsify_activations/helpers.py @@ -10,6 +10,9 @@ # limitations under the License. +from collections import defaultdict + +import openvino as ov import torch import torch.nn as nn import transformers.models @@ -43,3 +46,26 @@ def dummy_llama_model(): ) model = transformers.AutoModelForCausalLM.from_config(config, attn_implementation="eager") return model + + +def count_sparsifier_patterns_in_ov(model: ov.Model): + pattern = ("Abs", "LessEqual", "Select") + result = 0 + connections = defaultdict(list) + for node in model.get_ops(): + for output in node.outputs(): + for input_ in output.get_target_inputs(): + connections[node].append(input_.get_node()) + + def dfs(node, location=0): + nonlocal result + if location < len(pattern) and node.get_type_name() == pattern[location]: + if location == len(pattern) - 1: + result += 1 + else: + for next_node in connections[node]: + dfs(next_node, location + 1) + + for node in model.get_ops(): + dfs(node) + return result diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index 281c92fda05..af1e5470cff 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -30,6 +30,7 @@ from tests.shared.nx_graph import compare_nx_graph_with_reference from tests.shared.paths import TEST_ROOT from tests.torch.experimental.sparsify_activations.helpers import ThreeLinearModel +from tests.torch.experimental.sparsify_activations.helpers import count_sparsifier_patterns_in_ov from tests.torch.experimental.sparsify_activations.helpers import dummy_llama_model from tests.torch.helpers import set_torch_seed @@ -43,6 +44,7 @@ class SparsifyActivationsAlgorithmTestDesc: ignored_scope: Optional[nncf.IgnoredScope] ref_sparsifier_target_sparsity: Dict[str, float] ref_num_batches_tracked: int + ref_num_patterns_in_ov: int sparsify_activations_algorithm_test_descs = [ @@ -58,6 +60,7 @@ class SparsifyActivationsAlgorithmTestDesc: f"{ACTIVATIONS_SPARSIFIER_PREFIX}_Linear/linear_0": 0.3, }, ref_num_batches_tracked=3, + ref_num_patterns_in_ov=1, ), SparsifyActivationsAlgorithmTestDesc( name="three_linear", @@ -66,12 +69,29 @@ class SparsifyActivationsAlgorithmTestDesc: target_sparsity_by_scope={ "{re}.*linear.*": 0.4, }, + ignored_scope=None, + ref_sparsifier_target_sparsity={ + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_ThreeLinearModel/Linear[linear1]/linear_0": 0.4, + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_ThreeLinearModel/Linear[linear2]/linear_0": 0.4, + f"{ACTIVATIONS_SPARSIFIER_PREFIX}_ThreeLinearModel/Linear[linear3]/linear_0": 0.4, + }, + ref_num_batches_tracked=3, + ref_num_patterns_in_ov=2, # Sparsifiers are combined in linear1 and linear2 + ), + SparsifyActivationsAlgorithmTestDesc( + name="three_linear_ignore1", + model_getter=ThreeLinearModel, + dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), + target_sparsity_by_scope={ + "{re}.*linear.*": 0.4, + }, ignored_scope=IgnoredScope(patterns=[".*linear1.*"]), ref_sparsifier_target_sparsity={ f"{ACTIVATIONS_SPARSIFIER_PREFIX}_ThreeLinearModel/Linear[linear2]/linear_0": 0.4, f"{ACTIVATIONS_SPARSIFIER_PREFIX}_ThreeLinearModel/Linear[linear3]/linear_0": 0.4, }, ref_num_batches_tracked=3, + ref_num_patterns_in_ov=2, ), SparsifyActivationsAlgorithmTestDesc( name="dummy_llama", @@ -92,6 +112,7 @@ class SparsifyActivationsAlgorithmTestDesc: for layer_id in [0, 1] }, ref_num_batches_tracked=3, + ref_num_patterns_in_ov=6, ), ] @@ -165,9 +186,10 @@ def test_export_openvino(self): torch_outputs = (torch_outputs,) ov_model = ov.convert_model(model, example_input=example_input) + assert count_sparsifier_patterns_in_ov(ov_model) == self.desc.ref_num_patterns_in_ov + compiled_model = ov.compile_model(ov_model, "CPU", config={ov.properties.hint.inference_precision: "f32"}) ov_outputs = compiled_model(example_input.cpu()).to_tuple() - assert len(torch_outputs) == len(ov_outputs) for torch_output, ov_output in zip(torch_outputs, ov_outputs): torch.testing.assert_close(torch_output.cpu(), torch.from_numpy(ov_output), rtol=1e-3, atol=1e-3) From 85b2683a991fc3cccc40d9253b9f0a92f56e55ec Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:08 +0800 Subject: [PATCH 40/76] update metric --- .../experimental/sparsify_activations/reference_data.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index d237bf5986b..a6ae06c4e04 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -4,22 +4,22 @@ tinyllama_backend_FP32: num_int8: 0 num_sparse_activations: 0 tinyllama_ffn_sparse20_backend_TORCH: - metric_value: 0.7929 + metric_value: 0.7916 num_int4: 0 num_int8: 0 num_sparse_activations: 44 tinyllama_ffn_sparse20_backend_CUDA_TORCH: - metric_value: 0.7929 + metric_value: 0.7687 num_int4: 0 num_int8: 0 num_sparse_activations: 44 tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: - metric_value: 0.7915 + metric_value: 0.7864 num_int4: 0 num_int8: 312 num_sparse_activations: 44 tinyllama_int8_asym_data_free_ffn_sparse20_backend_CUDA_TORCH: - metric_value: 0.7915 + metric_value: 0.7906 num_int4: 0 num_int8: 312 num_sparse_activations: 44 From e2fee6a370c6955d692ad5fa05aa89f986e8660b Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:08 +0800 Subject: [PATCH 41/76] adjust atol --- .../test_sparsify_activations_conformance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py index 712dcc8960a..ebcb1921981 100644 --- a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py +++ b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py @@ -44,7 +44,7 @@ def fixture_sparsify_activations_reference_data(): with path_reference.open() as f: data = yaml.safe_load(f) for test_case in data.values(): - test_case["atol"] = test_case.get("atol", 1e-5) + test_case["atol"] = test_case.get("atol", 1e-3) return data From c4993ac4c0e6238ea44baf3ea04b8665194680b8 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:09 +0800 Subject: [PATCH 42/76] ref metric fix --- .../experimental/sparsify_activations/reference_data.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index a6ae06c4e04..6f467a6d228 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -27,7 +27,7 @@ timm/deit3_small_patch16_224_backend_FP32: metric_value: 0.8135 num_int4: 0 num_int8: 0 - num_sparse_activations: 36 + num_sparse_activations: 0 timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_TORCH: metric_value: 0.8097 num_int4: 0 From 510128233b92d113a6142b624a84bd3d51101494 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:09 +0800 Subject: [PATCH 43/76] attempt to solve the "import file mismatch" error in testing --- .../experimental/sparsify_activations/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/torch/experimental/sparsify_activations/__init__.py diff --git a/tests/torch/experimental/sparsify_activations/__init__.py b/tests/torch/experimental/sparsify_activations/__init__.py new file mode 100644 index 00000000000..2e49d63977d --- /dev/null +++ b/tests/torch/experimental/sparsify_activations/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. From 3806ef4f5441c9f93bc6c24a735ea17ea392556c Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:09 +0800 Subject: [PATCH 44/76] switch to "TargetScope" in interface --- .../torch/sparsify_activations/__init__.py | 1 + .../sparsify_activations_impl.py | 18 ++- .../sparsify_activations/target_scope.py | 107 ++++++++++++++++++ .../sparsify_activations/model_scope.py | 14 +-- .../sparsify_activations/pipelines.py | 6 +- .../sparsify_activations/helpers.py | 19 +++- .../sparsify_activations/test_algo.py | 75 +++++++++--- .../sparsify_activations/test_components.py | 77 ++++++++++++- 8 files changed, 281 insertions(+), 36 deletions(-) create mode 100644 nncf/experimental/torch/sparsify_activations/target_scope.py diff --git a/nncf/experimental/torch/sparsify_activations/__init__.py b/nncf/experimental/torch/sparsify_activations/__init__.py index 41a2b7ebcea..ecfaa78cc4f 100644 --- a/nncf/experimental/torch/sparsify_activations/__init__.py +++ b/nncf/experimental/torch/sparsify_activations/__init__.py @@ -10,3 +10,4 @@ # limitations under the License. from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import sparsify_activations # noqa: F401 +from nncf.experimental.torch.sparsify_activations.target_scope import TargetScope # noqa: F401 diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index dfb8236a586..1db99f1eaa5 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -20,11 +20,12 @@ from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.common.logging.track_progress import track -from nncf.common.scopes import matches_any from nncf.common.scopes import should_consider_scope from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend from nncf.data import Dataset +from nncf.experimental.torch.sparsify_activations.target_scope import TargetScope +from nncf.experimental.torch.sparsify_activations.target_scope import get_target_node_names_from_target_scope from nncf.scopes import IgnoredScope from nncf.scopes import get_ignored_node_names_from_ignored_scope from nncf.torch.model_creation import is_wrapped_model @@ -108,7 +109,7 @@ class SparsifyActivationsAlgorithm: def __init__( self, - target_sparsity_by_scope: Dict[str, float], + target_sparsity_by_scope: Dict[TargetScope, float], ignored_scope: IgnoredScope, ): """ @@ -195,26 +196,31 @@ def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float ignored_names = get_ignored_node_names_from_ignored_scope( self._ignored_scope, graph, strict=self._ignored_scope.validate ) + target_scope_vs_target_names = { + scope: get_target_node_names_from_target_scope(scope, graph, strict=scope.validate) + for scope in self._target_sparsity_by_scope + } target_sparsity_by_node = {} for node in graph.get_nodes_by_metatypes(supported_metatypes): - if not should_consider_scope(node.node_name, ignored_names): + if not should_consider_scope(node.node_name, ignored_scopes=ignored_names): continue for scope, target_sparsity in self._target_sparsity_by_scope.items(): - if matches_any(node.node_name, scope): + target_names = target_scope_vs_target_names[scope] + if should_consider_scope(node.node_name, ignored_scopes=[], target_scopes=target_names): if node in target_sparsity_by_node: raise nncf.ValidationError( f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.' ) target_sparsity_by_node[node] = target_sparsity if not target_sparsity_by_node: - raise nncf.ValidationError("No layers matched for activation sparsification.") + raise nncf.ValidationError("No layers to conduct activation sparsification.") return target_sparsity_by_node def sparsify_activations( model: TModel, dataset: Dataset, - target_sparsity_by_scope: Dict[str, float], + target_sparsity_by_scope: Dict[TargetScope, float], ignored_scope: Optional[IgnoredScope] = None, ) -> TModel: """ diff --git a/nncf/experimental/torch/sparsify_activations/target_scope.py b/nncf/experimental/torch/sparsify_activations/target_scope.py new file mode 100644 index 00000000000..a76767308cf --- /dev/null +++ b/nncf/experimental/torch/sparsify_activations/target_scope.py @@ -0,0 +1,107 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Set + +import nncf +from nncf.common.graph.graph import NNCFGraph +from nncf.scopes import IgnoredScope +from nncf.scopes import get_difference_ignored_scope +from nncf.scopes import get_matched_ignored_scope_info + + +@dataclass +class TargetScope(IgnoredScope): + """ + Specifies the target portions of model to conduct activation sparsification. + + Example: + + .. code-block:: python + # Specified by node name: + node_names = ['node_1', 'node_2', 'node_3'] + target_scope = TargetScope(names=node_names) + + # Specified using regular expressions: + patterns = ['.*node_\\d'] + target_scope = TargetScope(patterns=patterns) + + # Specified by operation type: + + # OpenVINO opset https://docs.openvino.ai/latest/openvino_docs_ops_opset.html + operation_types = ['Multiply', 'GroupConvolution', 'Interpolate'] + target_scope = TargetScope(types=operation_types) + + # ONNX opset https://github.com/onnx/onnx/blob/main/docs/Operators.md + operation_types = ['Mul', 'Conv', 'Resize'] + target_scope = TargetScope(types=operation_types) + + **Note:** Operation types must be specified according to the model framework. + + :param names: List of target node names. + :type names: List[str] + :param patterns: List of regular expressions that define patterns for names of target nodes. + :type patterns: List[str] + :param types: List of target operation types. + :type types: List[str] + :param subgraphs: List of target subgraphs. + :type subgraphs: List[Subgraph] + :param validate: If set to True, then a RuntimeError will be raised if any target scope does not match + in the model graph. + :type types: bool + """ + + def __hash__(self) -> int: + return hash( + ( + frozenset(self.names), + frozenset(self.patterns), + frozenset(self.types), + frozenset((frozenset(subgraph.inputs), frozenset(subgraph.outputs)) for subgraph in self.subgraphs), + self.validate, + ) + ) + + +def get_target_node_names_from_target_scope( + target_scope: TargetScope, nncf_graph: NNCFGraph, strict: bool = True +) -> Set[str]: + """ + Returns NNCF node names from the graph that are matched by target scope. + If strict is True, raises nncf.ValidationError if no rule is matched. + + :param target_scope: Target scope specifying the matching rules. + :param nncf_graph: The graph. + :param strict: Whether target_scope must match at least one node or not. + :return: NNCF node names from the given graph matched by target scope. + """ + matched_target_scope, matches = get_matched_ignored_scope_info(target_scope, [nncf_graph]) + if strict: + _check_target_scope_strictly_matched(target_scope, matched_target_scope) + return set().union(*matches.values()) + + +def _check_target_scope_strictly_matched(target_scope: TargetScope, matched_target_scope: TargetScope): + """ + Passes when target_scope and matched_target_scope are equal, otherwise raises ValidationError. + + :param target_scope: The given target scope. + :param matched_target_scope: The actual target scope matched in a graph. + """ + unmatched_scope = get_difference_ignored_scope(target_scope, matched_target_scope) + error_messages = [] + for match_type in ("names", "types", "patterns", "subgraphs"): + unmatched_rules = getattr(unmatched_scope, match_type) + if unmatched_rules: + error_messages.append(f"The following {match_type} are not found in the graph: {unmatched_rules}.") + if error_messages: + raise nncf.ValidationError("\n".join(error_messages)) diff --git a/tests/post_training/experimental/sparsify_activations/model_scope.py b/tests/post_training/experimental/sparsify_activations/model_scope.py index 54026c9f1a8..190b81c843c 100644 --- a/tests/post_training/experimental/sparsify_activations/model_scope.py +++ b/tests/post_training/experimental/sparsify_activations/model_scope.py @@ -13,6 +13,7 @@ from typing import Dict, List import nncf +from nncf.experimental.torch.sparsify_activations import TargetScope from nncf.parameters import CompressWeightsMode from tests.post_training.experimental.sparsify_activations.pipelines import ImageClassificationTimmSparsifyActivations from tests.post_training.experimental.sparsify_activations.pipelines import LMSparsifyActivations @@ -34,9 +35,7 @@ "compress_weights": None, "sparsify_activations": { "target_sparsity_by_scope": { - "{re}up_proj": 0.2, - "{re}gate_proj": 0.2, - "{re}down_proj": 0.2, + TargetScope(patterns=[".*up_proj.*", ".*gate_proj.*", ".*down_proj.*"]): 0.2, } }, }, @@ -52,9 +51,7 @@ }, "sparsify_activations": { "target_sparsity_by_scope": { - "{re}up_proj": 0.2, - "{re}gate_proj": 0.2, - "{re}down_proj": 0.2, + TargetScope(patterns=[".*up_proj.*", ".*gate_proj.*", ".*down_proj.*"]): 0.2, } }, }, @@ -75,9 +72,8 @@ "compression_params": { "sparsify_activations": { "target_sparsity_by_scope": { - "{re}qkv": 0.2, - "{re}fc1": 0.2, - "{re}fc2": 0.3, + TargetScope(patterns=[".*qkv.*", ".*fc1.*"]): 0.2, + TargetScope(patterns=[".*fc2.*"]): 0.3, } }, }, diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index 55a0d8b410a..c39205952da 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -27,9 +27,7 @@ from transformers import AutoModelForCausalLM import nncf -import nncf.experimental -import nncf.experimental.torch -import nncf.experimental.torch.sparsify_activations +from nncf.experimental.torch.sparsify_activations import sparsify_activations from nncf.experimental.torch.sparsify_activations.torch_backend import SparsifyActivationsAlgoBackend from tests.post_training.pipelines.base import LIMIT_LENGTH_OF_STATUS from tests.post_training.pipelines.base import PT_BACKENDS @@ -170,7 +168,7 @@ def _compress(self): **self.compression_params["compress_weights"], ) if self.compression_params.get("sparsify_activations", None) is not None: - self.compressed_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( + self.compressed_model = sparsify_activations( self.compressed_model, dataset=self.calibration_dataset, **self.compression_params["sparsify_activations"], diff --git a/tests/torch/experimental/sparsify_activations/helpers.py b/tests/torch/experimental/sparsify_activations/helpers.py index c2bf1a61e1e..437103ec166 100644 --- a/tests/torch/experimental/sparsify_activations/helpers.py +++ b/tests/torch/experimental/sparsify_activations/helpers.py @@ -17,6 +17,9 @@ import torch.nn as nn import transformers.models +from nncf import IgnoredScope +from nncf.experimental.torch.sparsify_activations import TargetScope + class ThreeLinearModel(nn.Module): def __init__(self) -> None: @@ -48,7 +51,11 @@ def dummy_llama_model(): return model -def count_sparsifier_patterns_in_ov(model: ov.Model): +def count_sparsifier_patterns_in_ov(model: ov.Model) -> int: + """ + Counts the number of activation sparsification pattern "Abs -> LessEqual -> Select" + in the OpenVINO model. + """ pattern = ("Abs", "LessEqual", "Select") result = 0 connections = defaultdict(list) @@ -69,3 +76,13 @@ def dfs(node, location=0): for node in model.get_ops(): dfs(node) return result + + +def convert_ignored_scope_to_target_scope(ignored_scope: IgnoredScope) -> TargetScope: + return TargetScope( + ignored_scope.names, + ignored_scope.patterns, + ignored_scope.types, + ignored_scope.subgraphs, + ignored_scope.validate, + ) diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index af1e5470cff..35fb6093f1d 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -22,6 +22,7 @@ import nncf.experimental import nncf.experimental.torch.sparsify_activations from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgorithm +from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import TargetScope from nncf.experimental.torch.sparsify_activations.torch_backend import ACTIVATIONS_SPARSIFIER_PREFIX from nncf.experimental.torch.sparsify_activations.torch_backend import ActivationsSparsifier from nncf.scopes import IgnoredScope @@ -40,7 +41,7 @@ class SparsifyActivationsAlgorithmTestDesc: name: str model_getter: Callable[[], nn.Module] dataset_getter: Callable[[torch.device], nncf.Dataset] - target_sparsity_by_scope: Dict[str, float] + target_sparsity_by_scope: Dict[TargetScope, float] ignored_scope: Optional[nncf.IgnoredScope] ref_sparsifier_target_sparsity: Dict[str, float] ref_num_batches_tracked: int @@ -53,7 +54,7 @@ class SparsifyActivationsAlgorithmTestDesc: model_getter=lambda: nn.Linear(4, 2), dataset_getter=lambda device: nncf.Dataset(torch.randn([3, 2, 4]).to(device)), target_sparsity_by_scope={ - "{re}.*linear.*": 0.3, + TargetScope(names=["Linear/linear_0"]): 0.3, }, ignored_scope=None, ref_sparsifier_target_sparsity={ @@ -67,7 +68,7 @@ class SparsifyActivationsAlgorithmTestDesc: model_getter=ThreeLinearModel, dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), target_sparsity_by_scope={ - "{re}.*linear.*": 0.4, + TargetScope(types=["linear"]): 0.4, }, ignored_scope=None, ref_sparsifier_target_sparsity={ @@ -83,7 +84,8 @@ class SparsifyActivationsAlgorithmTestDesc: model_getter=ThreeLinearModel, dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), target_sparsity_by_scope={ - "{re}.*linear.*": 0.4, + TargetScope(names=["ThreeLinearModel/Linear[linear2]/linear_0"]): 0.4, + TargetScope(patterns=[".*linear3.*"]): 0.4, }, ignored_scope=IgnoredScope(patterns=[".*linear1.*"]), ref_sparsifier_target_sparsity={ @@ -98,9 +100,9 @@ class SparsifyActivationsAlgorithmTestDesc: model_getter=dummy_llama_model, dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)), target_sparsity_by_scope={ - "{re}.*gate_proj.*": 0.2, - "{re}.*up_proj.*": 0.3, - "{re}.*down_proj.*": 0.4, + TargetScope(patterns=[".*gate_proj.*"]): 0.2, + TargetScope(patterns=[".*up_proj.*"]): 0.3, + TargetScope(patterns=[".*down_proj.*"]): 0.4, }, ignored_scope=None, ref_sparsifier_target_sparsity={ @@ -197,7 +199,7 @@ def test_export_openvino(self): @dataclass class TargetSparsityByNodeTestDesc: - target_sparsity_by_scope: Dict[str, float] + target_sparsity_by_scope: Dict[TargetScope, float] ignored_scope: IgnoredScope ref_target_sparsity_by_node_name: Optional[Dict[str, float]] = None raised_error_message: Optional[str] = None @@ -207,7 +209,7 @@ class TargetSparsityByNodeTestDesc: "desc", [ TargetSparsityByNodeTestDesc( - target_sparsity_by_scope={"{re}.*linear.*": 0.3}, + target_sparsity_by_scope={TargetScope(patterns=[".*linear.*"]): 0.3}, ignored_scope=IgnoredScope(), ref_target_sparsity_by_node_name={ "ThreeLinearModel/Linear[linear1]/linear_0": 0.3, @@ -216,20 +218,65 @@ class TargetSparsityByNodeTestDesc: }, ), TargetSparsityByNodeTestDesc( - target_sparsity_by_scope={"{re}.*linear.*": 0.3}, - ignored_scope=IgnoredScope(patterns=[".*linear2.*"]), + target_sparsity_by_scope={TargetScope(patterns=[".*linear[23].*"], types=["linear"]): 0.3}, + ignored_scope=IgnoredScope(), ref_target_sparsity_by_node_name={ "ThreeLinearModel/Linear[linear1]/linear_0": 0.3, + "ThreeLinearModel/Linear[linear2]/linear_0": 0.3, + "ThreeLinearModel/Linear[linear3]/linear_0": 0.3, + }, + ), + TargetSparsityByNodeTestDesc( + target_sparsity_by_scope={ + TargetScope( + subgraphs=[nncf.Subgraph(inputs=["/nncf_model_input_0"], outputs=["/nncf_model_output_0"])] + ): 0.1, + }, + ignored_scope=IgnoredScope(), + ref_target_sparsity_by_node_name={ + "ThreeLinearModel/Linear[linear1]/linear_0": 0.1, + "ThreeLinearModel/Linear[linear3]/linear_0": 0.1, + }, + ), + TargetSparsityByNodeTestDesc( + target_sparsity_by_scope={ + TargetScope(names=["ThreeLinearModel/Linear[linear1]/linear_0"]): 0.1, + TargetScope(patterns=[".*linear[23].*"]): 0.3, + }, + ignored_scope=IgnoredScope(patterns=[".*linear2.*"]), + ref_target_sparsity_by_node_name={ + "ThreeLinearModel/Linear[linear1]/linear_0": 0.1, "ThreeLinearModel/Linear[linear3]/linear_0": 0.3, }, ), TargetSparsityByNodeTestDesc( - target_sparsity_by_scope={"{re}.*nonexist.*": 0.3}, + target_sparsity_by_scope={ + TargetScope(patterns=[".*nonexist.*"], validate=False): 0.3, + TargetScope(names=["ThreeLinearModel/Linear[linear1]/linear_0"]): 0.3, + }, + ignored_scope=IgnoredScope(), + ref_target_sparsity_by_node_name={ + "ThreeLinearModel/Linear[linear1]/linear_0": 0.3, + }, + ), + TargetSparsityByNodeTestDesc( + target_sparsity_by_scope={TargetScope(patterns=[".*nonexist.*"]): 0.3}, + ignored_scope=IgnoredScope(), + raised_error_message="not found in the graph", + ), + TargetSparsityByNodeTestDesc( + target_sparsity_by_scope={ + TargetScope(patterns=[".*linear2.*"]): 0.3, + TargetScope(types=["embedding"]): 0.3, # Embedding is not supported + }, ignored_scope=IgnoredScope(patterns=[".*linear2.*"]), - raised_error_message="No layers matched", + raised_error_message="No layers to conduct activation sparsification", ), TargetSparsityByNodeTestDesc( - target_sparsity_by_scope={"{re}.*linear.*": 0.3, "{re}.*linear1.*": 0.4}, + target_sparsity_by_scope={ + TargetScope(names=["ThreeLinearModel/Linear[linear1]/linear_0"]): 0.3, + TargetScope(patterns=[".*linear1.*"]): 0.4, + }, ignored_scope=IgnoredScope(), raised_error_message="matched by multiple items", ), diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index 97447932467..64acafd26e2 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -18,11 +18,19 @@ import nncf import nncf.experimental import nncf.experimental.torch.sparsify_activations +from nncf.experimental.torch.sparsify_activations.target_scope import TargetScope +from nncf.experimental.torch.sparsify_activations.target_scope import get_target_node_names_from_target_scope from nncf.experimental.torch.sparsify_activations.torch_backend import ActivationsSparsifier from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend from nncf.torch.model_creation import wrap_model from nncf.torch.nncf_network import NNCFNetwork +from tests.common.test_ignored_scope import CONV_TYPE +from tests.common.test_ignored_scope import IGNORED_SCOPES_TEST_DATA +from tests.common.test_ignored_scope import LINEAR_TYPE +from tests.common.test_ignored_scope import WRONG_IGNORED_SCOPES_TEST_DATA +from tests.common.test_ignored_scope import NNCFGraphToTestIgnoredScope from tests.torch.experimental.sparsify_activations.helpers import ThreeLinearModel +from tests.torch.experimental.sparsify_activations.helpers import convert_ignored_scope_to_target_scope @dataclass @@ -108,7 +116,8 @@ def test_forward_before_calibration(self, use_cuda: bool, dtype: torch.dtype): assert not sparsifier.num_batches_tracked.is_nonzero() assert sparsifier.running_threshold.isneginf() output_tensor = sparsifier(input_tensor) - assert not output_tensor.is_set_to(input_tensor) # The output tensor is a new tensor + # The output tensor is a new tensor + assert not output_tensor.is_set_to(input_tensor) # Before calibration, the sparsifier does not change the input torch.testing.assert_close(output_tensor, input_tensor, rtol=1e-4, atol=1e-4) @@ -161,7 +170,7 @@ class TestPTSparsifyActivationsAlgoBackend: def test_get_sparsifiers(self): model, dataset = self.create_model_and_dataset() sparse_model = nncf.experimental.torch.sparsify_activations.sparsify_activations( - model, dataset, target_sparsity_by_scope={"{re}.*": 0.5} + model, dataset, target_sparsity_by_scope={TargetScope(patterns=[".*"]): 0.5} ) backend = PTSparsifyActivationsAlgoBackend() sparsifiers = backend.get_sparsifiers(sparse_model) @@ -220,3 +229,67 @@ def create_model_and_dataset(self, compress_weights: bool = False): trace_parameters=True, ) return model, dataset + + +class TestTargetScope: + SAME_HASH_PAIRS = [ + (TargetScope(), TargetScope()), + ( + TargetScope( + names=["node_1", "node_2"], + patterns=["node\\d", "layer\\d"], + types=["Conv", "MatMul"], + subgraphs=[ + nncf.Subgraph(inputs=["node_1", "node_2"], outputs=["node_3", "node_4"]), + nncf.Subgraph(inputs=["layer_1", "layer_2"], outputs=["layer_3", "layer_4", "layer_5"]), + ], + ), + TargetScope( + names=["node_2", "node_1"], + patterns=["layer\\d", "node\\d"], + types=["MatMul", "Conv"], + subgraphs=[ + nncf.Subgraph(inputs=["layer_2", "layer_1"], outputs=["layer_5", "layer_4", "layer_3"]), + nncf.Subgraph(inputs=["node_2", "node_1"], outputs=["node_4", "node_3"]), + ], + ), + ), + ] + + DIFFERENT_HASH_PAIRS = [ + (TargetScope(), TargetScope(types=["Conv"])), + ( + TargetScope(names=["node_1"]), + TargetScope(names=["node_1"], patterns=["layer\\d"]), + ), + ( + TargetScope(subgraphs=[nncf.Subgraph(inputs=["node_1"], outputs=["node_2"])]), + TargetScope(subgraphs=[nncf.Subgraph(inputs=["node_1"], outputs=["node_3"])]), + ), + ] + + TARGET_SCOPE_MATCH_DATA = [ + (convert_ignored_scope_to_target_scope(ignored_scope), ref_ignored_names) + for ignored_scope, ref_ignored_names in IGNORED_SCOPES_TEST_DATA + ] + WRONG_TARGET_SCOPE_MATCH_DATA = list(map(convert_ignored_scope_to_target_scope, WRONG_IGNORED_SCOPES_TEST_DATA)) + + @pytest.mark.parametrize("target_scope1,target_scope2", SAME_HASH_PAIRS) + def test_same_hash(self, target_scope1: TargetScope, target_scope2: TargetScope): + assert hash(target_scope1) == hash(target_scope2) + + @pytest.mark.parametrize("target_scope1,target_scope2", DIFFERENT_HASH_PAIRS) + def test_different_hash(self, target_scope1: TargetScope, target_scope2: TargetScope): + assert hash(target_scope1) != hash(target_scope2) + + @pytest.mark.parametrize("target_scope,ref_target_names", TARGET_SCOPE_MATCH_DATA) + def test_get_target_node_names_from_target_scope(self, target_scope: TargetScope, ref_target_names: List[str]): + nncf_graph = NNCFGraphToTestIgnoredScope(CONV_TYPE, LINEAR_TYPE).nncf_graph + ignored_names = get_target_node_names_from_target_scope(target_scope, nncf_graph) + assert sorted(ignored_names) == sorted(ref_target_names) + + @pytest.mark.parametrize("target_scope", WRONG_TARGET_SCOPE_MATCH_DATA) + def test_wrong_target_scope(self, target_scope: TargetScope): + nncf_graph = NNCFGraphToTestIgnoredScope(CONV_TYPE, LINEAR_TYPE).nncf_graph + with pytest.raises(nncf.ValidationError): + get_target_node_names_from_target_scope(target_scope, nncf_graph) From 400d1dac5bd8b024ee9c1acf63ba28c8665a52a8 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:09 +0800 Subject: [PATCH 45/76] rename ref dot files --- ...ot => dummy_llama_int8_sym_weights_sparse_activations.dot} | 0 .../{dummy_llama.dot => dummy_llama_sparse_activations.dot} | 0 ...hts.dot => linear_int8_sym_weights_sparse_activations.dot} | 0 .../{linear.dot => linear_sparse_activations.dot} | 0 ...ee_linear_ignore1_int8_sym_weights_sparse_activations.dot} | 0 ...gnore1.dot => three_linear_ignore1_sparse_activations.dot} | 0 ...t => three_linear_int8_sym_weights_sparse_activations.dot} | 0 .../{three_linear.dot => three_linear_sparse_activations.dot} | 0 tests/torch/experimental/sparsify_activations/test_algo.py | 4 +++- 9 files changed, 3 insertions(+), 1 deletion(-) rename tests/torch/data/experimental/sparsify_activations/{dummy_llama_compressed_weights.dot => dummy_llama_int8_sym_weights_sparse_activations.dot} (100%) rename tests/torch/data/experimental/sparsify_activations/{dummy_llama.dot => dummy_llama_sparse_activations.dot} (100%) rename tests/torch/data/experimental/sparsify_activations/{linear_compressed_weights.dot => linear_int8_sym_weights_sparse_activations.dot} (100%) rename tests/torch/data/experimental/sparsify_activations/{linear.dot => linear_sparse_activations.dot} (100%) rename tests/torch/data/experimental/sparsify_activations/{three_linear_ignore1_compressed_weights.dot => three_linear_ignore1_int8_sym_weights_sparse_activations.dot} (100%) rename tests/torch/data/experimental/sparsify_activations/{three_linear_ignore1.dot => three_linear_ignore1_sparse_activations.dot} (100%) rename tests/torch/data/experimental/sparsify_activations/{three_linear_compressed_weights.dot => three_linear_int8_sym_weights_sparse_activations.dot} (100%) rename tests/torch/data/experimental/sparsify_activations/{three_linear.dot => three_linear_sparse_activations.dot} (100%) diff --git a/tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/dummy_llama_int8_sym_weights_sparse_activations.dot similarity index 100% rename from tests/torch/data/experimental/sparsify_activations/dummy_llama_compressed_weights.dot rename to tests/torch/data/experimental/sparsify_activations/dummy_llama_int8_sym_weights_sparse_activations.dot diff --git a/tests/torch/data/experimental/sparsify_activations/dummy_llama.dot b/tests/torch/data/experimental/sparsify_activations/dummy_llama_sparse_activations.dot similarity index 100% rename from tests/torch/data/experimental/sparsify_activations/dummy_llama.dot rename to tests/torch/data/experimental/sparsify_activations/dummy_llama_sparse_activations.dot diff --git a/tests/torch/data/experimental/sparsify_activations/linear_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/linear_int8_sym_weights_sparse_activations.dot similarity index 100% rename from tests/torch/data/experimental/sparsify_activations/linear_compressed_weights.dot rename to tests/torch/data/experimental/sparsify_activations/linear_int8_sym_weights_sparse_activations.dot diff --git a/tests/torch/data/experimental/sparsify_activations/linear.dot b/tests/torch/data/experimental/sparsify_activations/linear_sparse_activations.dot similarity index 100% rename from tests/torch/data/experimental/sparsify_activations/linear.dot rename to tests/torch/data/experimental/sparsify_activations/linear_sparse_activations.dot diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_int8_sym_weights_sparse_activations.dot similarity index 100% rename from tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_compressed_weights.dot rename to tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_int8_sym_weights_sparse_activations.dot diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_sparse_activations.dot similarity index 100% rename from tests/torch/data/experimental/sparsify_activations/three_linear_ignore1.dot rename to tests/torch/data/experimental/sparsify_activations/three_linear_ignore1_sparse_activations.dot diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_int8_sym_weights_sparse_activations.dot similarity index 100% rename from tests/torch/data/experimental/sparsify_activations/three_linear_compressed_weights.dot rename to tests/torch/data/experimental/sparsify_activations/three_linear_int8_sym_weights_sparse_activations.dot diff --git a/tests/torch/data/experimental/sparsify_activations/three_linear.dot b/tests/torch/data/experimental/sparsify_activations/three_linear_sparse_activations.dot similarity index 100% rename from tests/torch/data/experimental/sparsify_activations/three_linear.dot rename to tests/torch/data/experimental/sparsify_activations/three_linear_sparse_activations.dot diff --git a/tests/torch/experimental/sparsify_activations/test_algo.py b/tests/torch/experimental/sparsify_activations/test_algo.py index 35fb6093f1d..b7214aaa5fa 100644 --- a/tests/torch/experimental/sparsify_activations/test_algo.py +++ b/tests/torch/experimental/sparsify_activations/test_algo.py @@ -172,7 +172,9 @@ def test_inserted_sparsifier(self): def test_nncf_graph(self): desc: SparsifyActivationsAlgorithmTestDesc = self.desc model: NNCFNetwork = self.model - file_name = f"{desc.name}_compressed_weights" if self.compress_weights else desc.name + file_name = "_".join( + filter(None, [desc.name, "int8_sym_weights" if self.compress_weights else None, "sparse_activations"]) + ) ref_dot_path = Path(TEST_ROOT, "torch", "data", "experimental", "sparsify_activations", f"{file_name}.dot") graph = model.nncf.get_graph().get_graph_for_structure_analysis() compare_nx_graph_with_reference(graph, ref_dot_path) From fe1cedf1e794c839ecccdaffb3dd6ae296f2db36 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:09 +0800 Subject: [PATCH 46/76] misc order change --- .../sparsify_activations/pipelines.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index c39205952da..0a56fb9b9fe 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -146,15 +146,6 @@ def collect_data_from_stdout(self, stdout: str): stats.fill(stdout) self.run_info.stats_from_output = stats - def _validate(self): - super()._validate() - ref_num_sparse_activations = self.reference_data.get("num_sparse_activations", 0) - num_sparse_activations = self.run_info.num_compress_nodes.num_sparse_activations - if num_sparse_activations != ref_num_sparse_activations: - status_msg = f"Regression: The number of sparse activations is {num_sparse_activations}, \ - which differs from reference {ref_num_sparse_activations}." - raise ValueError(status_msg) - @set_torch_seed(seed=42) def _compress(self): """ @@ -174,6 +165,15 @@ def _compress(self): **self.compression_params["sparsify_activations"], ) + def _validate(self): + super()._validate() + ref_num_sparse_activations = self.reference_data.get("num_sparse_activations", 0) + num_sparse_activations = self.run_info.num_compress_nodes.num_sparse_activations + if num_sparse_activations != ref_num_sparse_activations: + status_msg = f"Regression: The number of sparse activations is {num_sparse_activations}, \ + which differs from reference {ref_num_sparse_activations}." + raise ValueError(status_msg) + class LMSparsifyActivations(SAPipelineMixin, LMWeightCompression): DEFAULT_SUBSET_SIZE = 32 From c3835efd999f904ae210257729d5de503e529f6f Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:09 +0800 Subject: [PATCH 47/76] add documentation --- .../sparsify_activations_impl.py | 30 +++++++++++++----- .../sparsify_activations/target_scope.py | 31 ++++++++----------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 1db99f1eaa5..aacdc7fee2c 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -226,15 +226,31 @@ def sparsify_activations( """ Post-training activation sparsification on the given model. + This algorithm sparsifies the input activations in supported layers based on a calibration + dataset. The goal is to zero out neurons with small activation values around 0, thereby + roughly achieving the target sparsity at a statistical level. + + Note that currently only linear layers are supported. + :param model: The model to be sparsified. :param dataset: The dataset to calibrate the activation sparsifiers. - :param target_sparsity_by_scope: A dictionary that defines the target activation sparsity - level for specified layers. For each item, the key should be a complete scope name - in the NNCF graph, or a regular expression specification starting with `{re}`; the - corresponding value should be a float number in the range [0, 1] representing the - target sparsity level. - :param ignored_scope: An ignored scope that defines the list of model control flow graph - nodes to be ignored during activation sparsification. + :param target_sparsity_by_scope: Defines the target activation sparsity level + for specified layers. For each item, the key is an instance of `TargetScope` class + representing the layers to match in the model's NNCF graph; the corresponding value + is a float number in the range [0, 1] representing the target sparsity level. + + Example: + .. code-block:: python + { + # Target sparsity is 60% for node "Dummy/Linear[layer]/linear_0" in the model graph + TargetScope(names=["Dummy/Linear[layer]/linear_0"]): 0.6, + # Target sparsity is 30% for the layers whose name contains "up_proj" or "down_proj". + TargetScope(patterns=[".*up_proj.*", ".*down_proj.*"]): 0.3, + } + + :param ignored_scope: Optional. It defines the nodes in the model graph that should be be ignored + during activation sparsification. Note that layers other than linear type are already filtered + out internally, so there is no need to mention them in `ignored_scope`. :return: The sparsified model. """ diff --git a/nncf/experimental/torch/sparsify_activations/target_scope.py b/nncf/experimental/torch/sparsify_activations/target_scope.py index a76767308cf..6d3cff3ad01 100644 --- a/nncf/experimental/torch/sparsify_activations/target_scope.py +++ b/nncf/experimental/torch/sparsify_activations/target_scope.py @@ -27,37 +27,32 @@ class TargetScope(IgnoredScope): Example: .. code-block:: python - # Specified by node name: - node_names = ['node_1', 'node_2', 'node_3'] - target_scope = TargetScope(names=node_names) + # Specified by node name: + node_names = ['node_1', 'node_2', 'node_3'] + target_scope = TargetScope(names=node_names) - # Specified using regular expressions: - patterns = ['.*node_\\d'] - target_scope = TargetScope(patterns=patterns) + # Specified using regular expressions: + patterns = ['.*node_\\d'] + target_scope = TargetScope(patterns=patterns) - # Specified by operation type: + # Specified by operation type: - # OpenVINO opset https://docs.openvino.ai/latest/openvino_docs_ops_opset.html - operation_types = ['Multiply', 'GroupConvolution', 'Interpolate'] - target_scope = TargetScope(types=operation_types) + # OpenVINO opset https://docs.openvino.ai/latest/openvino_docs_ops_opset.html + operation_types = ['Multiply', 'GroupConvolution', 'Interpolate'] + target_scope = TargetScope(types=operation_types) - # ONNX opset https://github.com/onnx/onnx/blob/main/docs/Operators.md - operation_types = ['Mul', 'Conv', 'Resize'] - target_scope = TargetScope(types=operation_types) + # ONNX opset https://github.com/onnx/onnx/blob/main/docs/Operators.md + operation_types = ['Mul', 'Conv', 'Resize'] + target_scope = TargetScope(types=operation_types) **Note:** Operation types must be specified according to the model framework. :param names: List of target node names. - :type names: List[str] :param patterns: List of regular expressions that define patterns for names of target nodes. - :type patterns: List[str] :param types: List of target operation types. - :type types: List[str] :param subgraphs: List of target subgraphs. - :type subgraphs: List[Subgraph] :param validate: If set to True, then a RuntimeError will be raised if any target scope does not match in the model graph. - :type types: bool """ def __hash__(self) -> int: From 1c3c10e6e25abe2bc9d881a53cb0af32550cc24d Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:10 +0800 Subject: [PATCH 48/76] update metric --- .../sparsify_activations/pipelines.py | 2 +- .../sparsify_activations/reference_data.yaml | 31 ++++++++++++------- .../pipelines/lm_weight_compression.py | 10 +++--- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index 0a56fb9b9fe..d169da2aa30 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -221,7 +221,7 @@ def get_transform_calibration_fn(self): original_fn = super().get_transform_calibration_fn() def transform_fn(data): - inputs = original_fn(data, max_tokens=256) + inputs = original_fn(data, max_tokens=128, filter_bad_tokens=False) if self.backend == BackendType.CUDA_TORCH: for input_name in inputs: inputs[input_name] = torch.from_numpy(inputs[input_name]).cuda() diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index 6f467a6d228..da91609696c 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -3,38 +3,45 @@ tinyllama_backend_FP32: num_int4: 0 num_int8: 0 num_sparse_activations: 0 -tinyllama_ffn_sparse20_backend_TORCH: - metric_value: 0.7916 +tinyllama_ffn_sparse20_backend_CUDA_TORCH: + metric_value: 0.7970 + atol: 0.005 num_int4: 0 num_int8: 0 num_sparse_activations: 44 -tinyllama_ffn_sparse20_backend_CUDA_TORCH: - metric_value: 0.7687 +tinyllama_ffn_sparse20_backend_TORCH: + metric_value: 0.7697 + atol: 0.005 num_int4: 0 num_int8: 0 num_sparse_activations: 44 -tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: - metric_value: 0.7864 +tinyllama_int8_asym_data_free_ffn_sparse20_backend_CUDA_TORCH: + metric_value: 0.7952 + atol: 0.005 num_int4: 0 num_int8: 312 num_sparse_activations: 44 -tinyllama_int8_asym_data_free_ffn_sparse20_backend_CUDA_TORCH: - metric_value: 0.7906 +tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: + metric_value: 0.8013 + atol: 0.005 num_int4: 0 num_int8: 312 num_sparse_activations: 44 timm/deit3_small_patch16_224_backend_FP32: metric_value: 0.8135 + atol: 0.001 num_int4: 0 num_int8: 0 num_sparse_activations: 0 -timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_TORCH: - metric_value: 0.8097 +timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_CUDA_TORCH: + metric_value: 0.8102 + atol: 0.001 num_int4: 0 num_int8: 0 num_sparse_activations: 36 -timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_CUDA_TORCH: - metric_value: 0.8097 +timm/deit3_small_patch16_224_qkv_sparse20_fc1_sparse20_fc2_sparse30_backend_TORCH: + metric_value: 0.8102 + atol: 0.001 num_int4: 0 num_int8: 0 num_sparse_activations: 36 \ No newline at end of file diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py index 31266d172f9..27479fe6a50 100644 --- a/tests/post_training/pipelines/lm_weight_compression.py +++ b/tests/post_training/pipelines/lm_weight_compression.py @@ -110,12 +110,14 @@ def prepare_preprocessor(self) -> None: self.preprocessor = AutoTokenizer.from_pretrained(self.model_id) def get_transform_calibration_fn(self): - def transform_fn(data, max_tokens=128): + def transform_fn(data, max_tokens=128, filter_bad_tokens=True): tokenized_text = self.preprocessor(data["text"], return_tensors="np") - - bad_tokens = self.preprocessor("", return_tensors="np")["input_ids"] raw_tokens = tokenized_text["input_ids"][0, :] - filtered_tokens = np.array(list(filter(lambda x: x not in bad_tokens, raw_tokens))) + if filter_bad_tokens: + bad_tokens = self.preprocessor("", return_tensors="np")["input_ids"] + filtered_tokens = np.array(list(filter(lambda x: x not in bad_tokens, raw_tokens))) + else: + filtered_tokens = raw_tokens tokenized_text["input_ids"] = np.expand_dims(filtered_tokens, 0) tokenized_text["attention_mask"] = tokenized_text["attention_mask"][:, : filtered_tokens.shape[0]] From 2201d88032e6aebc26b9a7d1f4fc8d03f9049d1d Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:10 +0800 Subject: [PATCH 49/76] update documentation --- .../torch/sparsify_activations/sparsify_activations_impl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index aacdc7fee2c..1d0d7e1c243 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -248,9 +248,9 @@ def sparsify_activations( TargetScope(patterns=[".*up_proj.*", ".*down_proj.*"]): 0.3, } - :param ignored_scope: Optional. It defines the nodes in the model graph that should be be ignored - during activation sparsification. Note that layers other than linear type are already filtered - out internally, so there is no need to mention them in `ignored_scope`. + :param ignored_scope: Optional. It defines the nodes in the model graph that should be be + ignored during activation sparsification. Note that unsupported layer types are already + filtered out internally, so there is no need to mention them in `ignored_scope`. :return: The sparsified model. """ From b6b2a3563ca106cfed581403ac70b450ac11a457 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:10 +0800 Subject: [PATCH 50/76] minor variable name fix --- .../experimental/sparsify_activations/test_components.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index 64acafd26e2..b2135febd8f 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -285,8 +285,8 @@ def test_different_hash(self, target_scope1: TargetScope, target_scope2: TargetS @pytest.mark.parametrize("target_scope,ref_target_names", TARGET_SCOPE_MATCH_DATA) def test_get_target_node_names_from_target_scope(self, target_scope: TargetScope, ref_target_names: List[str]): nncf_graph = NNCFGraphToTestIgnoredScope(CONV_TYPE, LINEAR_TYPE).nncf_graph - ignored_names = get_target_node_names_from_target_scope(target_scope, nncf_graph) - assert sorted(ignored_names) == sorted(ref_target_names) + target_names = get_target_node_names_from_target_scope(target_scope, nncf_graph) + assert sorted(target_names) == sorted(ref_target_names) @pytest.mark.parametrize("target_scope", WRONG_TARGET_SCOPE_MATCH_DATA) def test_wrong_target_scope(self, target_scope: TargetScope): From acf651035380a7d71e2abcef5d7df0b1f4feaa11 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:10 +0800 Subject: [PATCH 51/76] simplify _get_target_sparsity_by_node --- .../sparsify_activations_impl.py | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 1d0d7e1c243..20c46cfa819 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -196,22 +196,20 @@ def _get_target_sparsity_by_node(self, graph: NNCFGraph) -> Dict[NNCFNode, float ignored_names = get_ignored_node_names_from_ignored_scope( self._ignored_scope, graph, strict=self._ignored_scope.validate ) - target_scope_vs_target_names = { - scope: get_target_node_names_from_target_scope(scope, graph, strict=scope.validate) - for scope in self._target_sparsity_by_scope - } target_sparsity_by_node = {} - for node in graph.get_nodes_by_metatypes(supported_metatypes): - if not should_consider_scope(node.node_name, ignored_scopes=ignored_names): - continue - for scope, target_sparsity in self._target_sparsity_by_scope.items(): - target_names = target_scope_vs_target_names[scope] - if should_consider_scope(node.node_name, ignored_scopes=[], target_scopes=target_names): - if node in target_sparsity_by_node: - raise nncf.ValidationError( - f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.' - ) - target_sparsity_by_node[node] = target_sparsity + for scope, target_sparsity in self._target_sparsity_by_scope.items(): + target_names = get_target_node_names_from_target_scope(scope, graph, strict=scope.validate) + for node_name in target_names: + node = graph.get_node_by_name(node_name) + if node.metatype not in supported_metatypes or not should_consider_scope( + node.node_name, ignored_scopes=ignored_names + ): + continue + if node in target_sparsity_by_node: + raise nncf.ValidationError( + f'"{node.node_name}" is matched by multiple items in `target_sparsity_by_scope`.' + ) + target_sparsity_by_node[node] = target_sparsity if not target_sparsity_by_node: raise nncf.ValidationError("No layers to conduct activation sparsification.") return target_sparsity_by_node @@ -248,7 +246,7 @@ def sparsify_activations( TargetScope(patterns=[".*up_proj.*", ".*down_proj.*"]): 0.3, } - :param ignored_scope: Optional. It defines the nodes in the model graph that should be be + :param ignored_scope: Optional. It defines the nodes in the model graph that should be ignored during activation sparsification. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. :return: The sparsified model. From 9dbeea7de2d8eebb10235174c5db24141d6badfa Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:10 +0800 Subject: [PATCH 52/76] use nncf's quantile impl --- .../torch/sparsify_activations/torch_backend.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 0cc2ba01e9c..7d26910b259 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -11,7 +11,6 @@ from typing import Dict, List, Type, TypeVar -import numpy as np import torch import torch.nn as nn @@ -23,6 +22,7 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.data import Dataset from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend +from nncf.tensor.functions.torch_numeric import quantile from nncf.torch.graph import operator_metatypes as om from nncf.torch.graph.transformations.commands import PTSharedFnInsertionCommand from nncf.torch.graph.transformations.commands import PTTargetPoint @@ -60,18 +60,15 @@ def __init__(self, target_sparsity: float, alpha: float = 0.2): @staticmethod def calculate_threshold(x: torch.Tensor, target_sparsity: float) -> torch.Tensor: """ - Calculates the threshold for sparsifying the input tensor if locations of `x.abs() <= threshold` are zeroed. + Calculates the threshold to sparsify the input tensor with target sparsity if locations of + `x.abs() <= threshold` are zeroed out. :param x: The input tensor. :param target_sparsity: The target sparsity level on the input tensor. :return: The threshold value. """ - # uses numpy's quantile implementation as torch's cannot handle large tensor - value = np.quantile( - x.detach().abs().cpu().numpy(), - q=target_sparsity, - ) - return torch.tensor(value, device=x.device, dtype=x.dtype) + value = quantile(x.detach().abs().view(-1), q=target_sparsity, axis=0) + return value.to(dtype=x.dtype) def forward(self, x: torch.Tensor) -> torch.Tensor: if not self._freeze: From 591e4db147f0dfcefe939e6ab0db901c506f6e88 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:10 +0800 Subject: [PATCH 53/76] refine TargetScope docstring --- .../torch/sparsify_activations/target_scope.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/target_scope.py b/nncf/experimental/torch/sparsify_activations/target_scope.py index 6d3cff3ad01..eb09718224a 100644 --- a/nncf/experimental/torch/sparsify_activations/target_scope.py +++ b/nncf/experimental/torch/sparsify_activations/target_scope.py @@ -22,12 +22,12 @@ @dataclass class TargetScope(IgnoredScope): """ - Specifies the target portions of model to conduct activation sparsification. + Specifies the target portions in a model graph. Example: .. code-block:: python - # Specified by node name: + # Specified by node names: node_names = ['node_1', 'node_2', 'node_3'] target_scope = TargetScope(names=node_names) @@ -35,7 +35,7 @@ class TargetScope(IgnoredScope): patterns = ['.*node_\\d'] target_scope = TargetScope(patterns=patterns) - # Specified by operation type: + # Specified by operation types, e.g., # OpenVINO opset https://docs.openvino.ai/latest/openvino_docs_ops_opset.html operation_types = ['Multiply', 'GroupConvolution', 'Interpolate'] @@ -45,6 +45,12 @@ class TargetScope(IgnoredScope): operation_types = ['Mul', 'Conv', 'Resize'] target_scope = TargetScope(types=operation_types) + # Specifies by subgraphs: + from nncf import Subgraph + target_scope = TargetScope(subgraphs=[ + Subgraph(inputs=["node_1"], outputs=["node_3"]) + ]) + **Note:** Operation types must be specified according to the model framework. :param names: List of target node names. From 9439b8297e7dad2e2aa6d57f77b55d19d6873914 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:10 +0800 Subject: [PATCH 54/76] use higher precision to calculate running_threshold --- .../sparsify_activations/torch_backend.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 7d26910b259..ff165c84704 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -67,13 +67,12 @@ def calculate_threshold(x: torch.Tensor, target_sparsity: float) -> torch.Tensor :param target_sparsity: The target sparsity level on the input tensor. :return: The threshold value. """ - value = quantile(x.detach().abs().view(-1), q=target_sparsity, axis=0) - return value.to(dtype=x.dtype) + return quantile(x.detach().abs().view(-1), q=target_sparsity, axis=0) def forward(self, x: torch.Tensor) -> torch.Tensor: if not self._freeze: threshold = self.calculate_threshold(x, self.target_sparsity) - self._update(threshold) + self._update(threshold, dtype=x.dtype) mask = torch.le(x.abs(), self.running_threshold) x = torch.masked_fill(x, mask, 0.0) return x @@ -91,22 +90,25 @@ def reset_running_stats(self): def extra_repr(self) -> str: return f"target_sparsity={self.target_sparsity}" - def _update(self, threshold: torch.Tensor) -> torch.Tensor: + def _update(self, threshold: torch.Tensor, dtype: torch.dtype) -> torch.Tensor: """ Updates the running threshold by exponential moving average with decaying adjustment. The updating logic is similar to `pandas.DataFrame.ewm(adjust=True)`. :param threshold: The threshold value derived from this batch to update the running threshold. + :param dtype: Data type of the updated running threshold. :return: The updated running threshold. """ if self.num_batches_tracked == 0: - self.running_threshold = threshold + running_threshold = threshold else: beta = 1.0 - self.alpha - self.running_threshold = ( - threshold * self.alpha + self.running_threshold * beta * (1 - beta**self.num_batches_tracked) + old_running_threshold = self.running_threshold.to(device=threshold.device, dtype=torch.float64) + running_threshold = ( + threshold.to(torch.float64) * self.alpha + + old_running_threshold * beta * (1 - beta**self.num_batches_tracked) ) / (1 - beta ** (self.num_batches_tracked + 1)) - self.running_threshold = self.running_threshold.type(threshold.dtype) + self.running_threshold = running_threshold.type(dtype) self.num_batches_tracked += 1 return self.running_threshold From a90dc18db5a3f691fe96160729ff7f8170c95a68 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:11 +0800 Subject: [PATCH 55/76] delete `apply_sparsifiers` as it is not needed --- .../sparsify_activations/sparsify_activations_impl.py | 10 ---------- .../torch/sparsify_activations/torch_backend.py | 9 +++------ .../sparsify_activations/test_components.py | 2 +- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index 20c46cfa819..eeeee3c0469 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -91,16 +91,6 @@ def calibrate_sparsifiers(self, model: TModel, graph: NNCFGraph, dataset: Datase :return: The model with calibrated activation sparsifiers. """ - @abstractmethod - def apply_sparsifiers(self, model: TModel, graph: NNCFGraph) -> TModel: - """ - Freezes the activation sparsifiers and applies the sparsification to the model. - - :param model: The model with activation sparsifiers. - :param graph: The model's NNCF graph. - :return: The model with applied sparsification operations. - """ - class SparsifyActivationsAlgorithm: """ diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index ff165c84704..2ef8e3853c5 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -163,18 +163,15 @@ def insert_sparsifiers( return transformed_model def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: Dataset) -> NNCFNetwork: - for sparsifier in self.get_sparsifiers(model): + sparsifiers = self.get_sparsifiers(model) + for sparsifier in sparsifiers: sparsifier.reset_running_stats() sparsifier.freeze(False) with training_mode_switcher(model, is_training=False): with torch.no_grad(): self.do_inference(model, dataset) - return model - - def apply_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph) -> NNCFNetwork: - for sparsifier in self.get_sparsifiers(model): + for sparsifier in sparsifiers: sparsifier.freeze(True) - model.nncf.rebuild_graph() return model @staticmethod diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index b2135febd8f..9ded37dc298 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -210,7 +210,7 @@ def model_forward_pre_hook(model: NNCFNetwork, args): with mocker.patch.object(backend, "get_sparsifiers", return_value=[mock_sparsifier]): backend.calibrate_sparsifiers(model, graph, dataset) - assert mock_sparsifier._freeze is False + assert mock_sparsifier._freeze is True assert num_model_forward_calls == dataset.get_length() def create_model_and_dataset(self, compress_weights: bool = False): From 53f6b26a846d7d6eaf80ee79c1e8dc3438d466d3 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:11 +0800 Subject: [PATCH 56/76] make `freeze` a property --- .../sparsify_activations_impl.py | 1 - .../torch/sparsify_activations/torch_backend.py | 17 +++++++++++------ .../sparsify_activations/test_components.py | 8 ++++---- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py index eeeee3c0469..83a7a418911 100644 --- a/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py +++ b/nncf/experimental/torch/sparsify_activations/sparsify_activations_impl.py @@ -156,7 +156,6 @@ def do_sparsification( """ model = self._backend_entity.insert_sparsifiers(model, graph, target_sparsity_by_node) model = self._backend_entity.calibrate_sparsifiers(model, graph, dataset) - model = self._backend_entity.apply_sparsifiers(model, graph) return model def _set_backend_entity(self, model: TModel) -> None: diff --git a/nncf/experimental/torch/sparsify_activations/torch_backend.py b/nncf/experimental/torch/sparsify_activations/torch_backend.py index 2ef8e3853c5..a10f12c6518 100644 --- a/nncf/experimental/torch/sparsify_activations/torch_backend.py +++ b/nncf/experimental/torch/sparsify_activations/torch_backend.py @@ -69,17 +69,22 @@ def calculate_threshold(x: torch.Tensor, target_sparsity: float) -> torch.Tensor """ return quantile(x.detach().abs().view(-1), q=target_sparsity, axis=0) + @property + def freeze(self): + return self._freeze + + @freeze.setter + def freeze(self, value: bool): + self._freeze = value + def forward(self, x: torch.Tensor) -> torch.Tensor: - if not self._freeze: + if not self.freeze: threshold = self.calculate_threshold(x, self.target_sparsity) self._update(threshold, dtype=x.dtype) mask = torch.le(x.abs(), self.running_threshold) x = torch.masked_fill(x, mask, 0.0) return x - def freeze(self, freeze: bool = True): - self._freeze = freeze - def reset_running_stats(self): """ Resets the running threshold and the number of tracked batches to the initial stage. @@ -166,12 +171,12 @@ def calibrate_sparsifiers(self, model: NNCFNetwork, graph: NNCFGraph, dataset: D sparsifiers = self.get_sparsifiers(model) for sparsifier in sparsifiers: sparsifier.reset_running_stats() - sparsifier.freeze(False) + sparsifier.freeze = False with training_mode_switcher(model, is_training=False): with torch.no_grad(): self.do_inference(model, dataset) for sparsifier in sparsifiers: - sparsifier.freeze(True) + sparsifier.freeze = True return model @staticmethod diff --git a/tests/torch/experimental/sparsify_activations/test_components.py b/tests/torch/experimental/sparsify_activations/test_components.py index 9ded37dc298..9c5fde1c9e5 100644 --- a/tests/torch/experimental/sparsify_activations/test_components.py +++ b/tests/torch/experimental/sparsify_activations/test_components.py @@ -112,7 +112,7 @@ def test_forward_before_calibration(self, use_cuda: bool, dtype: torch.dtype): device = self.device input_tensor = torch.rand([3, 3], device=device, dtype=dtype) sparsifier = ActivationsSparsifier(target_sparsity=0.9).to(device) - assert sparsifier._freeze is True + assert sparsifier.freeze is True assert not sparsifier.num_batches_tracked.is_nonzero() assert sparsifier.running_threshold.isneginf() output_tensor = sparsifier(input_tensor) @@ -129,7 +129,7 @@ def test_forward_before_calibration(self, use_cuda: bool, dtype: torch.dtype): def test_forward_during_calibration(self, use_cuda: bool, desc: SparsifierForwardTestDesc): device = self.device sparsifier = ActivationsSparsifier(desc.target_sparsity, desc.alpha).to(device) - sparsifier.freeze(False) + sparsifier.freeze = False running_thresholds = [] outputs = [] with torch.no_grad(): @@ -198,7 +198,7 @@ def test_calibrate_sparsifiers(self, mocker): graph = model.nncf.get_graph() backend = PTSparsifyActivationsAlgoBackend() mock_sparsifier = ActivationsSparsifier(0.5, 0.1) - mock_sparsifier.freeze(True) + mock_sparsifier.freeze = True num_model_forward_calls = 0 def model_forward_pre_hook(model: NNCFNetwork, args): @@ -210,7 +210,7 @@ def model_forward_pre_hook(model: NNCFNetwork, args): with mocker.patch.object(backend, "get_sparsifiers", return_value=[mock_sparsifier]): backend.calibrate_sparsifiers(model, graph, dataset) - assert mock_sparsifier._freeze is True + assert mock_sparsifier.freeze is True assert num_model_forward_calls == dataset.get_length() def create_model_and_dataset(self, compress_weights: bool = False): From 8a5fc4acfaf04df917f87e0d45a80601477dab51 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:11 +0800 Subject: [PATCH 57/76] enhanace reproducibility --- .../sparsify_activations/model_scope.py | 2 + .../sparsify_activations/pipelines.py | 48 +++++++++++++++---- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/model_scope.py b/tests/post_training/experimental/sparsify_activations/model_scope.py index 190b81c843c..5a89847829d 100644 --- a/tests/post_training/experimental/sparsify_activations/model_scope.py +++ b/tests/post_training/experimental/sparsify_activations/model_scope.py @@ -40,6 +40,7 @@ }, }, "backends": [BackendType.TORCH, BackendType.CUDA_TORCH], + "batch_size": 8, }, { "reported_name": "tinyllama_int8_asym_data_free_ffn_sparse20", @@ -56,6 +57,7 @@ }, }, "backends": [BackendType.TORCH, BackendType.CUDA_TORCH], + "batch_size": 8, }, { "reported_name": "timm/deit3_small_patch16_224", diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index d169da2aa30..8627e295b66 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from dataclasses import field from pathlib import Path -from typing import Optional +from typing import Dict, List, Optional import numpy as np import openvino as ov @@ -28,7 +28,8 @@ import nncf from nncf.experimental.torch.sparsify_activations import sparsify_activations -from nncf.experimental.torch.sparsify_activations.torch_backend import SparsifyActivationsAlgoBackend +from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend +from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend from tests.post_training.pipelines.base import LIMIT_LENGTH_OF_STATUS from tests.post_training.pipelines.base import PT_BACKENDS from tests.post_training.pipelines.base import BackendType @@ -218,10 +219,24 @@ def prepare_model(self): self._dump_model_fp32() def get_transform_calibration_fn(self): - original_fn = super().get_transform_calibration_fn() - - def transform_fn(data): - inputs = original_fn(data, max_tokens=128, filter_bad_tokens=False) + process_one = super().get_transform_calibration_fn() + + def transform_fn(chunk: List[Dict]): + samples = [process_one(data, max_tokens=128, filter_bad_tokens=False) for data in chunk] + inputs = {} + for input_name, sample_value in samples[0].items(): + if isinstance(sample_value, torch.Tensor): + inputs[input_name] = torch.cat([sample[input_name] for sample in samples], dim=0) + elif isinstance(sample_value, np.ndarray): + inputs[input_name] = np.concatenate([sample[input_name] for sample in samples], axis=0) + elif isinstance(sample_value, ov.Tensor): + shape = sample_value.get_shape() + shape[0] = len(samples) + inputs[input_name] = ov.Tensor(sample_value.get_element_type(), shape) + else: + raise RuntimeError( + f"Failed to generate calibration set for {input_name} in type {type(sample_value)}" + ) if self.backend == BackendType.CUDA_TORCH: for input_name in inputs: inputs[input_name] = torch.from_numpy(inputs[input_name]).cuda() @@ -230,11 +245,16 @@ def transform_fn(data): return transform_fn def prepare_calibration_dataset(self): - dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") - dataset = dataset.filter(lambda example: len(example["text"].split()) > 256) subset_size = self.compression_params.get("subset_size") or self.DEFAULT_SUBSET_SIZE - dataset = dataset.select(range(subset_size)) - self.calibration_dataset = nncf.Dataset(dataset, self.get_transform_calibration_fn()) + dataset = ( + load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") + .filter(lambda example: len(example["text"].split()) > 256) + .shuffle(seed=42) + .select(range(subset_size)) + .to_list() + ) + chunks = [dataset[i : i + self.batch_size] for i in range(0, subset_size, self.batch_size)] + self.calibration_dataset = nncf.Dataset(chunks, self.get_transform_calibration_fn()) def save_compressed_model(self): if self.backend == BackendType.CUDA_TORCH: @@ -262,6 +282,14 @@ def _dump_model_fp32(self): else: super()._dump_model_fp32() + def _compress(self): + super()._compress() + if self.backend in PT_BACKENDS: + # This helps reproducibility but is not needed in actual usage. + for sparsifier in PTSparsifyActivationsAlgoBackend.get_sparsifiers(self.compressed_model): + original_dtype = sparsifier.running_threshold.dtype + sparsifier.running_threshold = sparsifier.running_threshold.half().to(original_dtype) + class ImageClassificationTimmSparsifyActivations(SAPipelineMixin, ImageClassificationTimm): DEFAULT_SUBSET_SIZE = 256 From 6e358514254d54d07f8efef630bb9907843d23f2 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:11 +0800 Subject: [PATCH 58/76] use fp16 on cuda --- .../experimental/sparsify_activations/pipelines.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index 8627e295b66..f51ee4a74bd 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -148,6 +148,7 @@ def collect_data_from_stdout(self, stdout: str): self.run_info.stats_from_output = stats @set_torch_seed(seed=42) + @torch.no_grad() def _compress(self): """ Actual call of weight compression and/or activation sparsification. @@ -218,6 +219,10 @@ def prepare_model(self): if not (self.fp32_model_dir / self.OV_MODEL_NAME).exists(): self._dump_model_fp32() + # Use FP16 for CUDA_TORCH backend as it is more common when running LLM on CUDA. + if self.backend == BackendType.CUDA_TORCH: + self.model_hf.half() + def get_transform_calibration_fn(self): process_one = super().get_transform_calibration_fn() @@ -259,7 +264,7 @@ def prepare_calibration_dataset(self): def save_compressed_model(self): if self.backend == BackendType.CUDA_TORCH: export_from_model( - self.model_hf, self.output_model_dir, stateful=False, compression_option="fp32", device="cuda" + self.model_hf.float(), self.output_model_dir, stateful=False, compression_option="fp32", device="cuda" ) else: super().save_compressed_model() From 9171f0c9e0487e701f9332271cb6f4188d22c459 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:11 +0800 Subject: [PATCH 59/76] fix int8+sparse export --- .../experimental/sparsify_activations/pipelines.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index f51ee4a74bd..a48e17cff6e 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -30,6 +30,8 @@ from nncf.experimental.torch.sparsify_activations import sparsify_activations from nncf.experimental.torch.sparsify_activations.sparsify_activations_impl import SparsifyActivationsAlgoBackend from nncf.experimental.torch.sparsify_activations.torch_backend import PTSparsifyActivationsAlgoBackend +from nncf.torch.quantization.layers import AsymmetricWeightsDecompressor +from nncf.torch.quantization.layers import SymmetricWeightsDecompressor from tests.post_training.pipelines.base import LIMIT_LENGTH_OF_STATUS from tests.post_training.pipelines.base import PT_BACKENDS from tests.post_training.pipelines.base import BackendType @@ -263,8 +265,12 @@ def prepare_calibration_dataset(self): def save_compressed_model(self): if self.backend == BackendType.CUDA_TORCH: + self.model_hf.float() + for module in self.model_hf.nncf.modules(): + if isinstance(module, (AsymmetricWeightsDecompressor, SymmetricWeightsDecompressor)): + module.result_dtype = torch.float32 export_from_model( - self.model_hf.float(), self.output_model_dir, stateful=False, compression_option="fp32", device="cuda" + self.model_hf, self.output_model_dir, stateful=False, compression_option="fp32", device="cuda" ) else: super().save_compressed_model() From b652785c5341422fd0e6b2c6d5d0d6f722c7adbd Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:11 +0800 Subject: [PATCH 60/76] update metric --- .../sparsify_activations/pipelines.py | 2 +- .../sparsify_activations/reference_data.yaml | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/pipelines.py b/tests/post_training/experimental/sparsify_activations/pipelines.py index a48e17cff6e..82da57caa86 100644 --- a/tests/post_training/experimental/sparsify_activations/pipelines.py +++ b/tests/post_training/experimental/sparsify_activations/pipelines.py @@ -296,7 +296,7 @@ def _dump_model_fp32(self): def _compress(self): super()._compress() if self.backend in PT_BACKENDS: - # This helps reproducibility but is not needed in actual usage. + # This helps reproducibility but is not needed in actual use. for sparsifier in PTSparsifyActivationsAlgoBackend.get_sparsifiers(self.compressed_model): original_dtype = sparsifier.running_threshold.dtype sparsifier.running_threshold = sparsifier.running_threshold.half().to(original_dtype) diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index da91609696c..e379cf45ad0 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -4,26 +4,26 @@ tinyllama_backend_FP32: num_int8: 0 num_sparse_activations: 0 tinyllama_ffn_sparse20_backend_CUDA_TORCH: - metric_value: 0.7970 - atol: 0.005 + metric_value: 0.7858 + atol: 0.02 num_int4: 0 num_int8: 0 num_sparse_activations: 44 tinyllama_ffn_sparse20_backend_TORCH: - metric_value: 0.7697 - atol: 0.005 + metric_value: 0.7882 + atol: 0.02 num_int4: 0 num_int8: 0 num_sparse_activations: 44 tinyllama_int8_asym_data_free_ffn_sparse20_backend_CUDA_TORCH: - metric_value: 0.7952 - atol: 0.005 + metric_value: 0.8044 + atol: 0.02 num_int4: 0 num_int8: 312 num_sparse_activations: 44 tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: - metric_value: 0.8013 - atol: 0.005 + metric_value: 0.7977 + atol: 0.02 num_int4: 0 num_int8: 312 num_sparse_activations: 44 From c67753f504745403299c922ca2cbe1d47b0abc3f Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:11 +0800 Subject: [PATCH 61/76] update ref metric --- .../sparsify_activations/reference_data.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/post_training/experimental/sparsify_activations/reference_data.yaml b/tests/post_training/experimental/sparsify_activations/reference_data.yaml index e379cf45ad0..3e368a9c185 100644 --- a/tests/post_training/experimental/sparsify_activations/reference_data.yaml +++ b/tests/post_training/experimental/sparsify_activations/reference_data.yaml @@ -4,26 +4,26 @@ tinyllama_backend_FP32: num_int8: 0 num_sparse_activations: 0 tinyllama_ffn_sparse20_backend_CUDA_TORCH: - metric_value: 0.7858 - atol: 0.02 + metric_value: 0.7818 + atol: 0.025 num_int4: 0 num_int8: 0 num_sparse_activations: 44 tinyllama_ffn_sparse20_backend_TORCH: - metric_value: 0.7882 - atol: 0.02 + metric_value: 0.7879 + atol: 0.025 num_int4: 0 num_int8: 0 num_sparse_activations: 44 tinyllama_int8_asym_data_free_ffn_sparse20_backend_CUDA_TORCH: metric_value: 0.8044 - atol: 0.02 + atol: 0.025 num_int4: 0 num_int8: 312 num_sparse_activations: 44 tinyllama_int8_asym_data_free_ffn_sparse20_backend_TORCH: - metric_value: 0.7977 - atol: 0.02 + metric_value: 0.7846 + atol: 0.030 num_int4: 0 num_int8: 312 num_sparse_activations: 44 From 303a66a98092f32b3c10e7ab8f201bbc18cdcc5d Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:11 +0800 Subject: [PATCH 62/76] Initial documentation of sparsify_activations algorithm --- .../ActivationSparsity.md | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 nncf/experimental/torch/sparsify_activations/ActivationSparsity.md diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md new file mode 100644 index 00000000000..0d27019bfbe --- /dev/null +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -0,0 +1,62 @@ +# Sparsify Activations + +The `Sparsify Activations` algorithm introduces sparsity into the activations of a neural network, reducing the number of active neurons during inference when using optimized sparse kernels. + +This algorithm sparsifies the input of a layer by zeroing out neuron \( x \) if \( \text{abs}(x) \leq \tau \), where \( \tau \) is a static threshold determined based on statistical information from a calibration dataset to meet the desired level of sparsity. + +## Example Usage + +Below is an example of applying Activation Sparsity algorithm to a torch model. Optionally, you can also call `nncf.compress_weights()` before sparsification to get an optimized model with quantized weights and sparse activations. + +```python +import nncf +from nncf.experimental.torch.sparsify_activations import sparsify_activations, TargetScope + +model = ... # Your model +dataset = ... # Calibration set + +# (Optional) Weight-only quantization +model = nncf.compress_weights( + model=model, + mode=nncf.CompressWeightsMode.INT8_ASYM, + dataset=dataset, +) + +# Activation sparsification +model = sparsify_activations( + model=model, + dataset=dataset, + target_sparsity_by_scope={ + TargetScope(patterns=[".*up_proj.*", ".*gate_proj.*"]): 0.3, + TargetScope(patterns=[".*down_proj.*",]): 0.5, + }, + ignored_scope=nncf.IgnoredScope(), +) +``` + +In this example, we first conduct data-free INT8 asymmetric weight only quantization on the model. Then we do activation sparsification, setting the target activation sparsity as 30% for all the layers named "up_proj" and "gate_proj", and 50% for layers named "down_proj". + +## Interface Details + +- `model`: The model to be sparsified. Currently only Torch backend is supported. +- `dataset`: A dataset to calibrate the thresholds that this algorithm uses to sparsify the neurons. +- `target_sparsity_by_scope`: Defines the target activation sparsity level for specified layers. For each item in this dict, the key is an instance of `TargetScope` class representing the layers to match in the model's NNCF graph; the corresponding value is a float number in the range [0, 1] representing the target sparsity level. + + - Example: + + ```python + { + # Target sparsity is 60% for node "Dummy/Linear[layer]/linear_0" in the model graph + TargetScope(names=["Dummy/Linear[layer]/linear_0"]): 0.6, + # Target sparsity is 30% for the layers whose name contains "up_proj" or "down_proj". + TargetScope(patterns=[".*up_proj.*", ".*down_proj.*"]): 0.3, + } + ``` + +- `ignored_scope`: Optional. It defines the nodes in the model graph that should be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. + +## Known Limitations + +1. Activation sparsification currently supports only linear layers in the Torch backend. +2. The actual activation sparsity during inference might deviate from the target. This is because the algorithm uses static thresholds, which inevitably cannot accommodate all possible inputs. A good estimate of the thresholds may depend on the size of the calibration set, the batch size, and the quality of samples compared with the actual inference data. +3. There is a tradeoff between model accuracy and activation sparsity. For large language models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. From e90f39ac1117ed991ecf54e3e772867a8e6d4b70 Mon Sep 17 00:00:00 2001 From: Vui Seng Chua Date: Tue, 16 Jul 2024 16:39:12 +0800 Subject: [PATCH 63/76] Revise ActivationSparsity.md --- .../ActivationSparsity.md | 48 +++++++++++++------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 0d27019bfbe..20d7af82237 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -1,12 +1,28 @@ -# Sparsify Activations +### Activation Sparsity -The `Sparsify Activations` algorithm introduces sparsity into the activations of a neural network, reducing the number of active neurons during inference when using optimized sparse kernels. +The `sparsify_activations` algorithm is a post-training method designed to introduce sparsity into the activations of a neural network. This process reduces the number of active neurons during inference by masking out neurons based on their magnitude relative to a calibrated static threshold. -This algorithm sparsifies the input of a layer by zeroing out neuron \( x \) if \( \text{abs}(x) \leq \tau \), where \( \tau \) is a static threshold determined based on statistical information from a calibration dataset to meet the desired level of sparsity. +The algorithm sparsifies the input of a layer by applying the following function: -## Example Usage +$$ +sparsify(X) = +\begin{cases} +\cdot & \text{if } |\cdot| \ge \tau \\ +0 & \text{if } |\cdot| < \tau +\end{cases} +$$ -Below is an example of applying Activation Sparsity algorithm to a torch model. Optionally, you can also call `nncf.compress_weights()` before sparsification to get an optimized model with quantized weights and sparse activations. +The magnitude threshold $\tau$ that corresponds to a desired level of sparsity is determined by the statistical quantile value of activations collected via an input dataset: + +$$ +\tau = Quantile(|X|,\ target\ sparsity) +$$ + +`sparsify_activations` automates the process of identifying the pruning thresholds based on user-specified layers, target sparsities and input dataset. + +#### Example Usage + +Below is an example of applying `sparsify_activations` algorithm to a torch model. Optionally, you can also call `nncf.compress_weights()` before sparsification to get an optimized model with quantized weights and sparse activations. ```python import nncf @@ -34,13 +50,13 @@ model = sparsify_activations( ) ``` -In this example, we first conduct data-free INT8 asymmetric weight only quantization on the model. Then we do activation sparsification, setting the target activation sparsity as 30% for all the layers named "up_proj" and "gate_proj", and 50% for layers named "down_proj". +In this example, we first conduct data-free INT8 asymmetric weight quantization on the model. Then we do activation sparsification, setting the target activation sparsity to 30% for all the layers containing the keywords "up_proj" and "gate_proj", and 50% for layers with "down_proj" keyword. -## Interface Details +#### Interface Details - `model`: The model to be sparsified. Currently only Torch backend is supported. -- `dataset`: A dataset to calibrate the thresholds that this algorithm uses to sparsify the neurons. -- `target_sparsity_by_scope`: Defines the target activation sparsity level for specified layers. For each item in this dict, the key is an instance of `TargetScope` class representing the layers to match in the model's NNCF graph; the corresponding value is a float number in the range [0, 1] representing the target sparsity level. +- `dataset`: A dataset to calibrate the pruning thresholds. **TODO** NNCF Dataset +- `target_sparsity_by_scope`: A dictionary defines the target activation sparsity level for specified layers. For each item, the key is an instance of `TargetScope` class representing the layers to match in the model's NNCF graph; the corresponding value is a float number in the range [0, 1] representing the target sparsity level. `TargetScope` supports absolute and REGEX-based name matching. - Example: @@ -53,10 +69,14 @@ In this example, we first conduct data-free INT8 asymmetric weight only quantiza } ``` -- `ignored_scope`: Optional. It defines the nodes in the model graph that should be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. +- `ignored_scope`: Optional. It defines the nodes in the model graph that should be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layer. + + +#### Evaluation results +> TODO -## Known Limitations +#### Known Limitations -1. Activation sparsification currently supports only linear layers in the Torch backend. -2. The actual activation sparsity during inference might deviate from the target. This is because the algorithm uses static thresholds, which inevitably cannot accommodate all possible inputs. A good estimate of the thresholds may depend on the size of the calibration set, the batch size, and the quality of samples compared with the actual inference data. -3. There is a tradeoff between model accuracy and activation sparsity. For large language models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. +1. When used with `nncf.compress_weight`, only int8 is supported. can it work before or after? **TODO** +2. Actual activation sparsity during inference is dynamic and per input basis, deviation from the target should be expected. In our local experiments, the statistical mean of actual activation sparsity aligned to the target when thresholds are calibrated on datasets similar to the final task. +3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For large language models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. From 4b8eb09a84f95da0d1f6a7a5c3a5dd960a7a063e Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:12 +0800 Subject: [PATCH 64/76] update readme --- .../ActivationSparsity.md | 74 +++++++++++++++++-- 1 file changed, 68 insertions(+), 6 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 20d7af82237..d3e4d9f7ca3 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -7,8 +7,8 @@ The algorithm sparsifies the input of a layer by applying the following function $$ sparsify(X) = \begin{cases} -\cdot & \text{if } |\cdot| \ge \tau \\ -0 & \text{if } |\cdot| < \tau +X & \text{if } |X| \ge \tau \\ +0 & \text{if } |X| < \tau \end{cases} $$ @@ -55,8 +55,8 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization #### Interface Details - `model`: The model to be sparsified. Currently only Torch backend is supported. -- `dataset`: A dataset to calibrate the pruning thresholds. **TODO** NNCF Dataset -- `target_sparsity_by_scope`: A dictionary defines the target activation sparsity level for specified layers. For each item, the key is an instance of `TargetScope` class representing the layers to match in the model's NNCF graph; the corresponding value is a float number in the range [0, 1] representing the target sparsity level. `TargetScope` supports absolute and REGEX-based name matching. +- `dataset`: An `nncf.Dataset` instance used to calibrate the pruning thresholds. +- `target_sparsity_by_scope`: A dictionary that defines the target activation sparsity level for specified layers. For each item, the key is an instance of `TargetScope` class representing the layers to match in the model's NNCF graph; the corresponding value is a float number in the range [0, 1] representing the target sparsity level. `TargetScope` supports absolute and REGEX-based name matching. - Example: @@ -73,10 +73,72 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization #### Evaluation results -> TODO +Here is the word perplexity for different language models on a subset of [wikitext dataset](https://arxiv.org/pdf/1609.07843.pdf), with maximum context length set as 2048. In the table, "int8_asym" means the model weights are asymmetrically quantized to int8. "Avg. Activation Sparsity" column shows the average activation sparsity on the evaluation samples. "up/gate/down" means the up, gate, and down projection layers in the [GLU](https://arxiv.org/abs/1612.08083)-style feed forward networks. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelModeAvg. Activation SparsityWord Perplexity (↓)
meta-llama/Llama-2-7b-hffp32-9.242
sparse_activationup/gate30% + down50%9.508
int8_asym + sparse_activationup/gate30% + down50%9.511
meta-llama/Meta-Llama-3-8B-Instructfp32-10.802
sparse_activationup/gate30% + down50%11.294
int8_asym + sparse_activationup/gate30% + down50%11.302
mistralai/Mixtral-8x7B-Instruct-v0.1fp32-6.224
sparse_activationup/gate40% + down50%6.561
int8_asym + sparse_activationup/gate40% + down50%6.579
#### Known Limitations -1. When used with `nncf.compress_weight`, only int8 is supported. can it work before or after? **TODO** +1. Currently activation sparsity only supports Torch backend. Consequently, this restricts the available compression modes to 8-bit integer modes when using `nncf.compress_weight` before activation sparsification. More information on supported modes can be found at [Weights Compression](../../../../docs/usage/post_training_compression/weights_compression/Usage.md#limitations). 2. Actual activation sparsity during inference is dynamic and per input basis, deviation from the target should be expected. In our local experiments, the statistical mean of actual activation sparsity aligned to the target when thresholds are calibrated on datasets similar to the final task. 3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For large language models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. From 238d6eb70c370fc9b129b4ad6f0630049d3857b4 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:12 +0800 Subject: [PATCH 65/76] style fix --- .../torch/sparsify_activations/ActivationSparsity.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index d3e4d9f7ca3..b803ffbb7ed 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -4,11 +4,11 @@ The `sparsify_activations` algorithm is a post-training method designed to intro The algorithm sparsifies the input of a layer by applying the following function: -$$ -sparsify(X) = -\begin{cases} +$$ +sparsify(X) = +\begin{cases} X & \text{if } |X| \ge \tau \\ -0 & \text{if } |X| < \tau +0 & \text{if } |X| < \tau \end{cases} $$ @@ -71,8 +71,8 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization - `ignored_scope`: Optional. It defines the nodes in the model graph that should be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layer. - #### Evaluation results + Here is the word perplexity for different language models on a subset of [wikitext dataset](https://arxiv.org/pdf/1609.07843.pdf), with maximum context length set as 2048. In the table, "int8_asym" means the model weights are asymmetrically quantized to int8. "Avg. Activation Sparsity" column shows the average activation sparsity on the evaluation samples. "up/gate/down" means the up, gate, and down projection layers in the [GLU](https://arxiv.org/abs/1612.08083)-style feed forward networks. From cb39cfd071cbaf9fd0d055ee2e8ec09274d6e157 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:12 +0800 Subject: [PATCH 66/76] update main readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9929b92255c..3e2c5c983cb 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ learning frameworks. | :------------------------------------------------------------------------------------------------------- | :-------: | :-------: | :-----------: | :-----------: | | [Post-Training Quantization](./docs/usage/post_training_compression/post_training_quantization/Usage.md) | Supported | Supported | Supported | Supported | | [Weights Compression](./docs/usage/post_training_compression/weights_compression/Usage.md) | Supported | Supported | Not supported | Not supported | +| [Activation Sparsity](./nncf/experimental/torch/sparsify_activations/ActivationSparsity.md) | Not supported | Experimental |Not supported| Not supported | ### Training-Time Compression Algorithms From 893947f83ea5dee1529dc6dbd49569e162305cf5 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:12 +0800 Subject: [PATCH 67/76] fix equation --- .../torch/sparsify_activations/ActivationSparsity.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index b803ffbb7ed..866352fc5b6 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -7,8 +7,8 @@ The algorithm sparsifies the input of a layer by applying the following function $$ sparsify(X) = \begin{cases} -X & \text{if } |X| \ge \tau \\ -0 & \text{if } |X| < \tau +X & \text{if } |X| > \tau \\ +0 & \text{if } |X| \le \tau \end{cases} $$ From 93844a9399943567b4dde50f4bfc513d0e594641 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:12 +0800 Subject: [PATCH 68/76] update readme --- .../torch/sparsify_activations/ActivationSparsity.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 866352fc5b6..724463c8ed3 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -73,7 +73,8 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization #### Evaluation results -Here is the word perplexity for different language models on a subset of [wikitext dataset](https://arxiv.org/pdf/1609.07843.pdf), with maximum context length set as 2048. In the table, "int8_asym" means the model weights are asymmetrically quantized to int8. "Avg. Activation Sparsity" column shows the average activation sparsity on the evaluation samples. "up/gate/down" means the up, gate, and down projection layers in the [GLU](https://arxiv.org/abs/1612.08083)-style feed forward networks. +Here is the word perplexity for different language models on a subset of [wikitext dataset](https://arxiv.org/pdf/1609.07843.pdf), with maximum context length set as 2048. In the table, "int8_asym" means the model weights are asymmetrically quantized to INT8. "up/gate/down" means the up, gate, and down projection layers in the [Gated Linear Units](https://arxiv.org/abs/1612.08083) (GLU) style feed forward networks. "Avg. Activation Sparsity" column shows the average activation sparsity on the evaluation samples. For example, "down50%" means that on average the input activations of all "down" layers have a sparsity of 50%. +
From 35e279c55c03acd682ec304e27c0f66c9ccb4119 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:12 +0800 Subject: [PATCH 69/76] documentation update --- .../torch/sparsify_activations/ActivationSparsity.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 724463c8ed3..4b4e59d845a 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -1,4 +1,4 @@ -### Activation Sparsity +### Activation Sparsity (experimental feature) The `sparsify_activations` algorithm is a post-training method designed to introduce sparsity into the activations of a neural network. This process reduces the number of active neurons during inference by masking out neurons based on their magnitude relative to a calibrated static threshold. @@ -46,7 +46,7 @@ model = sparsify_activations( TargetScope(patterns=[".*up_proj.*", ".*gate_proj.*"]): 0.3, TargetScope(patterns=[".*down_proj.*",]): 0.5, }, - ignored_scope=nncf.IgnoredScope(), + ignored_scope=None, ) ``` @@ -69,7 +69,7 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization } ``` -- `ignored_scope`: Optional. It defines the nodes in the model graph that should be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layer. +- `ignored_scope`: Optional. If specified, it should be an instance of `nncf.IgnoredScope` class that defines the nodes in the model graph to be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layers. #### Evaluation results @@ -142,4 +142,4 @@ Here is the word perplexity for different language models on a subset of [wikite 1. Currently activation sparsity only supports Torch backend. Consequently, this restricts the available compression modes to 8-bit integer modes when using `nncf.compress_weight` before activation sparsification. More information on supported modes can be found at [Weights Compression](../../../../docs/usage/post_training_compression/weights_compression/Usage.md#limitations). 2. Actual activation sparsity during inference is dynamic and per input basis, deviation from the target should be expected. In our local experiments, the statistical mean of actual activation sparsity aligned to the target when thresholds are calibrated on datasets similar to the final task. -3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For large language models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. +3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For Large Language Models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. From 6491ba8e65b8a148e271729911c3844a65b4a867 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:12 +0800 Subject: [PATCH 70/76] update arxiv link --- .../torch/sparsify_activations/ActivationSparsity.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 4b4e59d845a..a7aceb0d1fb 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -73,7 +73,7 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization #### Evaluation results -Here is the word perplexity for different language models on a subset of [wikitext dataset](https://arxiv.org/pdf/1609.07843.pdf), with maximum context length set as 2048. In the table, "int8_asym" means the model weights are asymmetrically quantized to INT8. "up/gate/down" means the up, gate, and down projection layers in the [Gated Linear Units](https://arxiv.org/abs/1612.08083) (GLU) style feed forward networks. "Avg. Activation Sparsity" column shows the average activation sparsity on the evaluation samples. For example, "down50%" means that on average the input activations of all "down" layers have a sparsity of 50%. +Here is the word perplexity for different language models on a subset of [wikitext dataset](https://arxiv.org/abs/1609.07843), with maximum context length set as 2048. In the table, "int8_asym" means the model weights are asymmetrically quantized to INT8. "up/gate/down" means the up, gate, and down projection layers in the [Gated Linear Units](https://arxiv.org/abs/1612.08083) (GLU) style feed forward networks. "Avg. Activation Sparsity" column shows the average activation sparsity on the evaluation samples. For example, "down50%" means that on average the input activations of all "down" layers have a sparsity of 50%.
Model
From aa672519d760759472e5bd1d93a20d41ae80de03 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:13 +0800 Subject: [PATCH 71/76] mention dejavu for acceleration example --- .../torch/sparsify_activations/ActivationSparsity.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index a7aceb0d1fb..96292321963 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -1,6 +1,6 @@ ### Activation Sparsity (experimental feature) -The `sparsify_activations` algorithm is a post-training method designed to introduce sparsity into the activations of a neural network. This process reduces the number of active neurons during inference by masking out neurons based on their magnitude relative to a calibrated static threshold. +The `sparsify_activations` algorithm is a post-training method designed to introduce sparsity into the activations of a neural network. This process reduces the number of active neurons during inference by masking out neurons based on their magnitude relative to a calibrated static threshold. Typically this can help accelerate inference for Transformer-based Large Language Models on edge devices; one such example is [Liu et al., 2023](https://arxiv.org/abs/2310.17157). The algorithm sparsifies the input of a layer by applying the following function: From b1ea92947261ea432744a470f5e76bc2860f4d35 Mon Sep 17 00:00:00 2001 From: Vui Seng Chua Date: Tue, 16 Jul 2024 16:39:13 +0800 Subject: [PATCH 72/76] Revise ActivationSparsity.md * remove ignore_scope argument sparsify_activations call in example snippet --- .../torch/sparsify_activations/ActivationSparsity.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 96292321963..38beb44ccaa 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -45,8 +45,7 @@ model = sparsify_activations( target_sparsity_by_scope={ TargetScope(patterns=[".*up_proj.*", ".*gate_proj.*"]): 0.3, TargetScope(patterns=[".*down_proj.*",]): 0.5, - }, - ignored_scope=None, + } ) ``` From d8a0ef6b1a533b4eeb534e35bcc154ae5d5a805d Mon Sep 17 00:00:00 2001 From: Vui Seng Chua Date: Tue, 16 Jul 2024 16:39:13 +0800 Subject: [PATCH 73/76] Revise ActivationSparsity.md * Add notes about experimental features and in-development of runtime kernel * Elaborate on target support only on Linear layers for LLMs --- .../torch/sparsify_activations/ActivationSparsity.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 38beb44ccaa..56ad7459faf 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -1,6 +1,6 @@ ### Activation Sparsity (experimental feature) -The `sparsify_activations` algorithm is a post-training method designed to introduce sparsity into the activations of a neural network. This process reduces the number of active neurons during inference by masking out neurons based on their magnitude relative to a calibrated static threshold. Typically this can help accelerate inference for Transformer-based Large Language Models on edge devices; one such example is [Liu et al., 2023](https://arxiv.org/abs/2310.17157). +The `sparsify_activations` algorithm is a post-training method designed to introduce sparsity into the activations of a neural network. This process reduces the number of active neurons during inference by masking out neurons based on their magnitude relative to a calibrated static threshold. The algorithm sparsifies the input of a layer by applying the following function: @@ -20,6 +20,9 @@ $$ `sparsify_activations` automates the process of identifying the pruning thresholds based on user-specified layers, target sparsities and input dataset. +> Note: This feature is **experimental** and intended solely for evaluation of sparsity-task performance. While activation sparsity can improve inference efficiency of LLM decoding phase [Liu et al., 2023](https://arxiv.org/abs/2310.17157), it neccessitates optimized runtime kernels, which are in development. + + #### Example Usage Below is an example of applying `sparsify_activations` algorithm to a torch model. Optionally, you can also call `nncf.compress_weights()` before sparsification to get an optimized model with quantized weights and sparse activations. @@ -51,6 +54,7 @@ model = sparsify_activations( In this example, we first conduct data-free INT8 asymmetric weight quantization on the model. Then we do activation sparsification, setting the target activation sparsity to 30% for all the layers containing the keywords "up_proj" and "gate_proj", and 50% for layers with "down_proj" keyword. + #### Interface Details - `model`: The model to be sparsified. Currently only Torch backend is supported. @@ -68,7 +72,8 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization } ``` -- `ignored_scope`: Optional. If specified, it should be an instance of `nncf.IgnoredScope` class that defines the nodes in the model graph to be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layers. +- `ignored_scope`: Optional. If specified, it should be an instance of `nncf.IgnoredScope` class that defines the nodes in the model graph to be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layers, as they benefit most from dynamic sparse activations by reducing memory read bandwidth for the large Linear weights used in LLMs. + #### Evaluation results @@ -137,8 +142,9 @@ Here is the word perplexity for different language models on a subset of [wikite
+ #### Known Limitations 1. Currently activation sparsity only supports Torch backend. Consequently, this restricts the available compression modes to 8-bit integer modes when using `nncf.compress_weight` before activation sparsification. More information on supported modes can be found at [Weights Compression](../../../../docs/usage/post_training_compression/weights_compression/Usage.md#limitations). 2. Actual activation sparsity during inference is dynamic and per input basis, deviation from the target should be expected. In our local experiments, the statistical mean of actual activation sparsity aligned to the target when thresholds are calibrated on datasets similar to the final task. -3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For Large Language Models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. +3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For large language models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. From 98bd3bab12c5b637042a1012e2929aa35d74b46e Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:13 +0800 Subject: [PATCH 74/76] style fix --- .../torch/sparsify_activations/ActivationSparsity.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 56ad7459faf..601db5a73a7 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -1,6 +1,6 @@ ### Activation Sparsity (experimental feature) -The `sparsify_activations` algorithm is a post-training method designed to introduce sparsity into the activations of a neural network. This process reduces the number of active neurons during inference by masking out neurons based on their magnitude relative to a calibrated static threshold. +The `sparsify_activations` algorithm is a post-training method designed to introduce sparsity into the activations of a neural network. This process reduces the number of active neurons during inference by masking out neurons based on their magnitude relative to a calibrated static threshold. The algorithm sparsifies the input of a layer by applying the following function: @@ -20,8 +20,7 @@ $$ `sparsify_activations` automates the process of identifying the pruning thresholds based on user-specified layers, target sparsities and input dataset. -> Note: This feature is **experimental** and intended solely for evaluation of sparsity-task performance. While activation sparsity can improve inference efficiency of LLM decoding phase [Liu et al., 2023](https://arxiv.org/abs/2310.17157), it neccessitates optimized runtime kernels, which are in development. - +> Note: This feature is **experimental** and intended solely for evaluation of sparsity-task performance. While activation sparsity can improve inference efficiency of decoding phase for Large Language Models (LLMs) [Liu et al., 2023](https://arxiv.org/abs/2310.17157), it neccessitates optimized runtime kernels, which are in development. #### Example Usage @@ -54,7 +53,6 @@ model = sparsify_activations( In this example, we first conduct data-free INT8 asymmetric weight quantization on the model. Then we do activation sparsification, setting the target activation sparsity to 30% for all the layers containing the keywords "up_proj" and "gate_proj", and 50% for layers with "down_proj" keyword. - #### Interface Details - `model`: The model to be sparsified. Currently only Torch backend is supported. @@ -74,7 +72,6 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization - `ignored_scope`: Optional. If specified, it should be an instance of `nncf.IgnoredScope` class that defines the nodes in the model graph to be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layers, as they benefit most from dynamic sparse activations by reducing memory read bandwidth for the large Linear weights used in LLMs. - #### Evaluation results Here is the word perplexity for different language models on a subset of [wikitext dataset](https://arxiv.org/abs/1609.07843), with maximum context length set as 2048. In the table, "int8_asym" means the model weights are asymmetrically quantized to INT8. "up/gate/down" means the up, gate, and down projection layers in the [Gated Linear Units](https://arxiv.org/abs/1612.08083) (GLU) style feed forward networks. "Avg. Activation Sparsity" column shows the average activation sparsity on the evaluation samples. For example, "down50%" means that on average the input activations of all "down" layers have a sparsity of 50%. @@ -142,9 +139,8 @@ Here is the word perplexity for different language models on a subset of [wikite - #### Known Limitations 1. Currently activation sparsity only supports Torch backend. Consequently, this restricts the available compression modes to 8-bit integer modes when using `nncf.compress_weight` before activation sparsification. More information on supported modes can be found at [Weights Compression](../../../../docs/usage/post_training_compression/weights_compression/Usage.md#limitations). 2. Actual activation sparsity during inference is dynamic and per input basis, deviation from the target should be expected. In our local experiments, the statistical mean of actual activation sparsity aligned to the target when thresholds are calibrated on datasets similar to the final task. -3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For large language models like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. +3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For LLMs like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. From 0613dcefa36acb717971ebf6f85d699ec50c2907 Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:13 +0800 Subject: [PATCH 75/76] fix compress_weights name --- .../torch/sparsify_activations/ActivationSparsity.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index 601db5a73a7..d94d143ff1c 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -141,6 +141,6 @@ Here is the word perplexity for different language models on a subset of [wikite #### Known Limitations -1. Currently activation sparsity only supports Torch backend. Consequently, this restricts the available compression modes to 8-bit integer modes when using `nncf.compress_weight` before activation sparsification. More information on supported modes can be found at [Weights Compression](../../../../docs/usage/post_training_compression/weights_compression/Usage.md#limitations). +1. Currently activation sparsity only supports Torch backend. Consequently, this restricts the available compression modes to 8-bit integer modes when using `nncf.compress_weights()` before activation sparsification. More information on supported modes can be found at [Weights Compression](../../../../docs/usage/post_training_compression/weights_compression/Usage.md#limitations). 2. Actual activation sparsity during inference is dynamic and per input basis, deviation from the target should be expected. In our local experiments, the statistical mean of actual activation sparsity aligned to the target when thresholds are calibrated on datasets similar to the final task. 3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For LLMs like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. From 5f04275dd53c0dfee8babb066cccab5da55c4a6b Mon Sep 17 00:00:00 2001 From: "Pan, Yujie" Date: Tue, 16 Jul 2024 16:39:13 +0800 Subject: [PATCH 76/76] minor fix for "L"inear and parentheses for citation --- .../torch/sparsify_activations/ActivationSparsity.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md index d94d143ff1c..c0d94ceac05 100644 --- a/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md +++ b/nncf/experimental/torch/sparsify_activations/ActivationSparsity.md @@ -20,7 +20,7 @@ $$ `sparsify_activations` automates the process of identifying the pruning thresholds based on user-specified layers, target sparsities and input dataset. -> Note: This feature is **experimental** and intended solely for evaluation of sparsity-task performance. While activation sparsity can improve inference efficiency of decoding phase for Large Language Models (LLMs) [Liu et al., 2023](https://arxiv.org/abs/2310.17157), it neccessitates optimized runtime kernels, which are in development. +> Note: This feature is **experimental** and intended solely for evaluation of sparsity-task performance. While activation sparsity can improve inference efficiency of decoding phase for Large Language Models (LLMs) ([Liu et al., 2023](https://arxiv.org/abs/2310.17157)), it neccessitates optimized runtime kernels, which are in development. #### Example Usage @@ -70,7 +70,7 @@ In this example, we first conduct data-free INT8 asymmetric weight quantization } ``` -- `ignored_scope`: Optional. If specified, it should be an instance of `nncf.IgnoredScope` class that defines the nodes in the model graph to be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layers, as they benefit most from dynamic sparse activations by reducing memory read bandwidth for the large Linear weights used in LLMs. +- `ignored_scope`: Optional. If specified, it should be an instance of `nncf.IgnoredScope` class that defines the nodes in the model graph to be ignored by this algorithm. Note that unsupported layer types are already filtered out internally, so there is no need to mention them in `ignored_scope`. The algorithm currently only supports Linear layers, as they benefit most from dynamic sparse activations by reducing memory read bandwidth for the large Linear layer weights used in LLMs. #### Evaluation results @@ -143,4 +143,4 @@ Here is the word perplexity for different language models on a subset of [wikite 1. Currently activation sparsity only supports Torch backend. Consequently, this restricts the available compression modes to 8-bit integer modes when using `nncf.compress_weights()` before activation sparsification. More information on supported modes can be found at [Weights Compression](../../../../docs/usage/post_training_compression/weights_compression/Usage.md#limitations). 2. Actual activation sparsity during inference is dynamic and per input basis, deviation from the target should be expected. In our local experiments, the statistical mean of actual activation sparsity aligned to the target when thresholds are calibrated on datasets similar to the final task. -3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For LLMs like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the linear layers in feed-forward networks. +3. Similar to other compression methods, model accuracy and activation sparsity are trade-off at play. For LLMs like [Llama](https://llama.meta.com), it is recommended to start with 30%~50% sparsity for the Linear layers in feed-forward networks.