diff --git a/.github/workflows/run_keras_sony_custom_layers.yml b/.github/workflows/run_keras_sony_custom_layers.yml deleted file mode 100644 index 92c0a6d7b..000000000 --- a/.github/workflows/run_keras_sony_custom_layers.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Run Keras Sony Custom Layers Tests -on: - workflow_dispatch: # Allow manual triggers - schedule: - - cron: 0 0 * * * - pull_request: - branches: - - main - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - -jobs: - run-tensorflow-tests: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Install Python 3 - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install tensorflow==2.13.* - pip install sony-custom-layers - - name: Run unittests - run: | - python -m unittest discover tests/keras_tests/custom_layers_tests -v diff --git a/.github/workflows/run_keras_tests.yml b/.github/workflows/run_keras_tests.yml index 073a72bb3..0bcda518f 100644 --- a/.github/workflows/run_keras_tests.yml +++ b/.github/workflows/run_keras_tests.yml @@ -23,8 +23,9 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install tensorflow==${{ inputs.tf-version }} sony-custom-layers pytest - + pip install tensorflow==${{ inputs.tf-version }} sony-custom-layers + pip install pytest pytest-mock + pip check - name: Run unittests run: | python -m unittest discover tests/keras_tests -v diff --git a/.github/workflows/run_pytorch_tests.yml b/.github/workflows/run_pytorch_tests.yml index f976d2d31..5f61a610d 100644 --- a/.github/workflows/run_pytorch_tests.yml +++ b/.github/workflows/run_pytorch_tests.yml @@ -24,7 +24,8 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt pip install torch==${{ inputs.torch-version }} torchvision onnx onnxruntime onnxruntime-extensions - pip install pytest + pip install pytest pytest-mock + pip check - name: Run unittests run: | python -m unittest discover tests/pytorch_tests -v diff --git a/.github/workflows/run_tests_suite_coverage.yml b/.github/workflows/run_tests_suite_coverage.yml index 49577de96..f1fe68e74 100644 --- a/.github/workflows/run_tests_suite_coverage.yml +++ b/.github/workflows/run_tests_suite_coverage.yml @@ -30,43 +30,48 @@ jobs: with: python-version: '3.10' - - name: Set up Coverage + - name: Set up environment for common tests run: | python -m pip install --upgrade pip - pip install coverage + pip install -r requirements.txt coverage pytest pytest-mock + + - name: Run common tests (unittest) + run: coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest discover tests/common_tests -v + + - name: Run common tests (pytest) + run: coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" pytest tests_pytest/common - name: Set up TensorFlow environment run: | python -m venv tf_env source tf_env/bin/activate python -m pip install --upgrade pip - pip install -r requirements.txt - pip install tensorflow==2.13.* coverage pytest + pip install -r requirements.txt tensorflow==2.13.* sony-custom-layers coverage pytest pytest-mock - - name: Run TensorFlow testsuite + - name: Run TensorFlow tests (unittest) run: | source tf_env/bin/activate - coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest tests/test_suite.py -v - - - name: Run TensorFlow pytest + coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest discover tests/keras_tests -v + + - name: Run TensorFlow tests (pytest) run: | source tf_env/bin/activate coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" pytest tests_pytest/keras - - name: Set up Pytorch environment + - name: Set up PyTorch environment run: | python -m venv torch_env source torch_env/bin/activate python -m pip install --upgrade pip pip install -r requirements.txt - pip install torch==2.0.* torchvision onnx onnxruntime onnxruntime-extensions coverage pytest + pip install torch==2.0.* torchvision onnx onnxruntime onnxruntime-extensions sony-custom-layers coverage pytest pytest-mock - - name: Run torch testsuite + - name: Run PyTorch tests (unittest) run: | source torch_env/bin/activate - coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest tests/test_suite.py -v + coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest discover tests/pytorch_tests -v - - name: Run torch pytest + - name: Run PyTorch tests (pytest) run: | source torch_env/bin/activate coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" pytest tests_pytest/pytorch diff --git a/.github/workflows/tests_common.yml b/.github/workflows/tests_common.yml index 89090d949..563d803d5 100644 --- a/.github/workflows/tests_common.yml +++ b/.github/workflows/tests_common.yml @@ -23,6 +23,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install -r requirements.txt + pip install pytest pytest-mock + pip check + - name: Run unittests run: python -m unittest discover tests/common_tests -v + + - name: Run pytest + run: pytest tests_pytest/common + diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py index 097ed6dbb..2455ad2dc 100644 --- a/model_compression_toolkit/core/common/graph/base_node.py +++ b/model_compression_toolkit/core/common/graph/base_node.py @@ -30,6 +30,9 @@ FrameworkQuantizationCapabilities +WeightAttrT = Union[str, int] + + class BaseNode: """ Class to represent a node in a graph that represents the model. @@ -40,7 +43,7 @@ def __init__(self, framework_attr: Dict[str, Any], input_shape: Tuple[Any], output_shape: Tuple[Any], - weights: Dict[Union[str, int], np.ndarray], + weights: Dict[WeightAttrT, np.ndarray], layer_class: type, reuse: bool = False, reuse_group: str = None, @@ -189,7 +192,7 @@ def is_reused(self) -> bool: """ return self.reuse or self.reuse_group is not None - def _get_weight_name(self, name: Union[str, int]) -> List[Union[str, int]]: + def _get_weight_name(self, name: WeightAttrT) -> List[WeightAttrT]: """ Get weight names that match argument name (either string weights or integer for positional weights). @@ -203,7 +206,7 @@ def _get_weight_name(self, name: Union[str, int]) -> List[Union[str, int]]: return [k for k in self.weights.keys() if (isinstance(k, int) and name == k) or (isinstance(k, str) and name in k)] - def get_weights_by_keys(self, name: Union[str, int]) -> np.ndarray: + def get_weights_by_keys(self, name: WeightAttrT) -> np.ndarray: """ Get a node's weight by its name. Args: diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py index e4767b19b..a469a52e9 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py @@ -15,13 +15,14 @@ from collections import defaultdict from copy import deepcopy from enum import Enum, auto -from typing import Dict, NamedTuple, Optional, Tuple, List, Iterable, Union, Literal, Sequence, Set +from typing import Dict, NamedTuple, Optional, Tuple, List, Iterable, Union, Literal, Sequence from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import FLOAT_BITWIDTH from model_compression_toolkit.core import FrameworkInfo from model_compression_toolkit.core.common import Graph, BaseNode from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation +from model_compression_toolkit.core.common.graph.base_node import WeightAttrT from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut @@ -79,24 +80,25 @@ class Utilization(NamedTuple): bytes: memory utilization. """ size: int - bytes: Optional[float] + bytes: float def __add__(self, other: 'Utilization') -> 'Utilization': + """ Add another Utilization object. """ return Utilization(self.size + other.size, self.bytes + other.bytes) - def __radd__(self, other: Union['Utilization', Literal[0]]): - # Needed for sum (with default start_value=0). - if other == 0: - return self - return self + other # pragma: no cover + def __radd__(self, other: Literal[0]): + """ Right add is only supported with 0 to allow the sum operator (with the default start_value=0) """ + if other != 0: + raise ValueError('radd is only supported with 0') + return self def __gt__(self, other: 'Utilization'): - # Needed for max. Compare by bytes. + """ Greater than operator by bytes. Needed for max. """ return self.bytes > other.bytes def __lt__(self, other: 'Utilization'): - # Needed for min. Compare by bytes. - return self.bytes < other.bytes # pragma: no cover + """ Less than operator by bytes. Needed for min. """ + return self.bytes < other.bytes class ResourceUtilizationCalculator: @@ -107,6 +109,8 @@ class ResourceUtilizationCalculator: BitwidthMode.QMinBit: min, } + unexpected_qc_error = 'Custom quantization configuration is not expected for non-custom bit mode.' + def __init__(self, graph: Graph, fw_impl: FrameworkImplementation, fw_info: FrameworkInfo): self.graph = graph self.fw_impl = fw_impl @@ -118,17 +122,17 @@ def __init__(self, graph: Graph, fw_impl: FrameworkImplementation, fw_info: Fram self._params_cnt = {} for n in graph.nodes: self._act_tensors_size[n] = n.get_total_output_params() - self._params_cnt[n] = {k: v.size for k, v in n.weights.items()} + if n.weights: + self._params_cnt[n] = {k: v.size for k, v in n.weights.items()} self._cuts: Optional[Dict[Cut, List[BaseNode]]] = None @property def cuts(self) -> Dict[Cut, List[BaseNode]]: """ Compute if needed and return graph cuts and their memory element nodes. """ if self._cuts is None: - memory_graph = MemoryGraph(deepcopy(self.graph)) - _, _, cuts = compute_graph_max_cut(memory_graph) + cuts = self._compute_cuts() if cuts is None: # pragma: no cover - raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover + raise RuntimeError("Failed to calculate activation memory cuts for graph.") cuts = [cut for cut in cuts if cut.mem_elements.elements] # cache cuts nodes for future use, so do not filter by target self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements] @@ -140,7 +144,8 @@ def compute_resource_utilization(self, bitwidth_mode: BitwidthMode, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]] = None, - ru_targets: Iterable[RUTarget] = None) -> ResourceUtilization: + ru_targets: Iterable[RUTarget] = None, + allow_unused_qcs: bool = False) -> ResourceUtilization: """ Compute network's resource utilization. @@ -154,16 +159,26 @@ def compute_resource_utilization(self, In custom mode, must provide configuration for all configurable weights. For non-configurable weights, if not provided, the default configuration will be extracted from the node. ru_targets: metrics to include for computation. If None, all metrics are calculated. + allow_unused_qcs: by default, if custom quantization configs are passed, but are not going to be used for + any of the requested targets, an error is raised. To disable the validation, pass True. Returns: Resource utilization object. """ ru_targets = set(ru_targets) if ru_targets else set(RUTarget) - if w_qcs is not None and not self.is_custom_weights_config_applicable(ru_targets): - raise ValueError('Weight configuration passed but no relevant metric requested.') - if act_qcs is not None and not self.is_custom_activation_config_applicable(ru_targets): - raise ValueError('Activation configuration passed but no relevant metric requested.') + if (w_qcs or act_qcs) and bitwidth_mode != BitwidthMode.QCustom: + raise ValueError(self.unexpected_qc_error) + + if w_qcs and not {RUTarget.WEIGHTS, RUTarget.TOTAL, RUTarget.BOPS}.intersection(ru_targets): + if not allow_unused_qcs: + raise ValueError('Weight configuration passed but no relevant ru_targets requested.') + w_qcs = None + + if act_qcs and not {RUTarget.ACTIVATION, RUTarget.TOTAL, RUTarget.BOPS}.intersection(ru_targets): + if not allow_unused_qcs: + raise ValueError('Activation configuration passed but no relevant ru_targets requested.') + act_qcs = None w_total, a_total = None, None if {RUTarget.WEIGHTS, RUTarget.TOTAL}.intersection(ru_targets): @@ -180,8 +195,7 @@ def compute_resource_utilization(self, if RUTarget.TOTAL in ru_targets: ru.total_memory = w_total + a_total if RUTarget.BOPS in ru_targets: - ru.bops, _ = self.compute_bops(target_criterion=target_criterion, - bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs) + ru.bops, _ = self.compute_bops(target_criterion, bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs) assert ru.get_restricted_targets() == set(ru_targets), 'Mismatch between the number of requested and computed metrics' return ru @@ -206,35 +220,35 @@ def compute_weights_utilization(self, - Per node total weights utilization. Dict keys are nodes in a topological order. - Detailed per node per weight attribute utilization. Dict keys are nodes in a topological order. """ - nodes = self._get_target_weight_nodes(target_criterion, include_reused=False) - if not nodes: - return 0, {}, {} + if w_qcs and bitwidth_mode != BitwidthMode.QCustom: + raise ValueError(self.unexpected_qc_error) + + node_attrs = self._collect_target_nodes_w_attrs(target_criterion, include_reused=False) util_per_node: Dict[BaseNode, Utilization] = {} util_per_node_per_weight = {} - - for n in self._topo_sort(nodes): + for n in self._topo_sort(list(node_attrs.keys())): w_qc = w_qcs.get(n) if w_qcs else None - node_weights_util, per_weight_util = self.compute_node_weights_utilization(n, target_criterion, + node_weights_util, per_weight_util = self.compute_node_weights_utilization(n, node_attrs[n], bitwidth_mode, w_qc) util_per_node[n] = node_weights_util util_per_node_per_weight[n] = per_weight_util - total_util = sum(util_per_node.values()) + total_util = sum(util_per_node.values()) if util_per_node else Utilization(0, 0) return total_util.bytes, util_per_node, util_per_node_per_weight def compute_node_weights_utilization(self, n: BaseNode, - target_criterion: TargetInclusionCriterion, + target_criterion: Union[TargetInclusionCriterion, List[str]], bitwidth_mode: BitwidthMode, - qc: NodeWeightsQuantizationConfig)\ + qc: Optional[NodeWeightsQuantizationConfig] = None)\ -> Tuple[Utilization, Dict[str, Utilization]]: """ Compute resource utilization for weights of a node. Args: n: node. - target_criterion: criterion to include weights for computation. + target_criterion: criterion to include weights for computation, or explicit attributes list (full names). bitwidth_mode: bit-width mode for the computation. qc: custom weights quantization configuration. Should be provided for custom bit mode only. In custom mode, must provide configuration for all configurable weights. For non-configurable @@ -244,9 +258,21 @@ def compute_node_weights_utilization(self, - Node's total weights utilization. - Detailed per weight attribute utilization. """ - weight_attrs = self._get_target_weight_attrs(n, target_criterion) - if not weight_attrs: # pragma: no cover - return Utilization(0, 0), {} + if qc: + if bitwidth_mode != BitwidthMode.QCustom: + raise ValueError(self.unexpected_qc_error) + if set(qc.all_weight_attrs) - set(n.get_node_weights_attributes()): + raise ValueError(f'Custom configuration contains unexpected weight attrs {qc.all_weight_attrs} for ' + f'node {n} containing weight attrs {n.get_node_weights_attributes()}.') + + # If target criterion is passed, weights_attrs may return empty, that's fine. + # However, if an explicit list is passed, it must be non-empty. + if isinstance(target_criterion, TargetInclusionCriterion): + weight_attrs = self._get_target_weight_attrs(n, target_criterion) + else: + weight_attrs = target_criterion + if not weight_attrs: + raise ValueError('Explicit list of attributes to compute cannot be empty.') attr_util = {} for attr in weight_attrs: @@ -255,7 +281,7 @@ def compute_node_weights_utilization(self, bytes_ = size * nbits / 8 attr_util[attr] = Utilization(size, bytes_) - total_weights: Utilization = sum(attr_util.values()) # type: ignore + total_weights: Utilization = sum(attr_util.values()) if attr_util else Utilization(0, 0) return total_weights, attr_util def compute_activations_utilization(self, @@ -280,7 +306,7 @@ def compute_activations_utilization(self, def compute_activation_utilization_by_cut(self, target_criterion: TargetInclusionCriterion, bitwidth_mode: BitwidthMode, - act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \ + act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None) \ -> Tuple[float, Dict[Cut, Utilization], Dict[Cut, Dict[BaseNode, Utilization]]]: """ Compute graph activation cuts utilization. @@ -297,15 +323,15 @@ def compute_activation_utilization_by_cut(self, - Total activation utilization per cut. - Detailed activation utilization per cut per node. """ - if target_criterion != TargetInclusionCriterion.AnyQuantized: # pragma: no cover - raise NotImplementedError('Computing MaxCut activation utilization is currently only supported for quantized targets.') + if act_qcs and not bitwidth_mode == BitwidthMode.QCustom: + raise ValueError(self.unexpected_qc_error) graph_target_nodes = self._get_target_activation_nodes(target_criterion, include_reused=True) # if there are no target activations in the graph, don't waste time looking for cuts if not graph_target_nodes: return 0, {}, {} - util_per_cut: Dict[Cut, Utilization] = {} # type: ignore + util_per_cut: Dict[Cut, Utilization] = {} util_per_cut_per_node = defaultdict(dict) for cut in self.cuts: cut_target_nodes = self._get_cut_target_nodes(cut, target_criterion) @@ -325,7 +351,7 @@ def compute_activation_tensors_utilization(self, bitwidth_mode: BitwidthMode, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None, include_reused=False) \ - -> Tuple[float, Dict[BaseNode, Utilization]]: # pragma: no cover + -> Tuple[float, Dict[BaseNode, Utilization]]: """ Compute resource utilization for graph's activations tensors. @@ -341,9 +367,10 @@ def compute_activation_tensors_utilization(self, - Detailed utilization per node. Dict keys are nodes in a topological order. """ + if act_qcs and bitwidth_mode != BitwidthMode.QCustom: + raise ValueError(self.unexpected_qc_error) + nodes = self._get_target_activation_nodes(target_criterion, include_reused=include_reused) - if not nodes: - return 0, {} util_per_node: Dict[BaseNode, Utilization] = {} for n in self._topo_sort(nodes): @@ -351,14 +378,14 @@ def compute_activation_tensors_utilization(self, util = self.compute_node_activation_tensor_utilization(n, None, bitwidth_mode, qc) util_per_node[n] = util - total_util = max(util_per_node.values()) - return total_util.bytes, util_per_node + total_util = max(util_per_node.values()).bytes if util_per_node else 0 + return total_util, util_per_node def compute_node_activation_tensor_utilization(self, n: BaseNode, target_criterion: Optional[TargetInclusionCriterion], bitwidth_mode: BitwidthMode, - qc: Optional[NodeActivationQuantizationConfig]) -> Utilization: + qc: Optional[NodeActivationQuantizationConfig] = None) -> Utilization: """ Compute activation resource utilization for a node. @@ -372,9 +399,13 @@ def compute_node_activation_tensor_utilization(self, Returns: Node's activation utilization. """ + if qc and bitwidth_mode != BitwidthMode.QCustom: + raise ValueError(self.unexpected_qc_error) + if target_criterion: + # only check whether the node meets the criterion nodes = self._get_target_activation_nodes(target_criterion=target_criterion, include_reused=True, nodes=[n]) - if not nodes: # pragma: no cover + if not nodes: return Utilization(0, 0) size = self._act_tensors_size[n] @@ -410,7 +441,7 @@ def compute_bops(self, if target_criterion != TargetInclusionCriterion.AnyQuantized: # pragma: no cover raise NotImplementedError('BOPS computation is currently only supported for quantized targets.') - nodes = self._get_target_weight_nodes(target_criterion, include_reused=True) + nodes = self._collect_target_nodes_w_attrs(target_criterion, include_reused=True) # filter out nodes with only positional weights # TODO add as arg to get target nodes nodes = [n for n in nodes if n.has_kernel_weight_to_quantize(self.fw_info)] @@ -448,7 +479,7 @@ def compute_node_bops(self, incoming_edges = self.graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX) # TODO temporary adding this for const_representation test in torch which has Linear with const input - if not incoming_edges: + if not incoming_edges: # pragma: no cover return 0 assert len(incoming_edges) == 1, \ f'Unexpected number of inputs {len(incoming_edges)} for BOPS calculation. Expected 1.' @@ -465,13 +496,11 @@ def compute_node_bops(self, node_bops = a_nbits * w_nbits * node_mac return node_bops - def is_custom_weights_config_applicable(self, ru_targets: Set[RUTarget]) -> bool: - """ Whether custom configuration for weights is compatible with the requested targets.""" - return bool({RUTarget.WEIGHTS, RUTarget.TOTAL, RUTarget.BOPS}.intersection(ru_targets)) - - def is_custom_activation_config_applicable(self, ru_targets: Set[RUTarget]) -> bool: - """ Whether custom configuration for activations is compatible with the requested targets.""" - return bool({RUTarget.ACTIVATION, RUTarget.TOTAL, RUTarget.BOPS}.intersection(ru_targets)) + def _compute_cuts(self): + """ Compute activation cuts of the graph. """ + memory_graph = MemoryGraph(deepcopy(self.graph)) + _, _, cuts = compute_graph_max_cut(memory_graph) + return cuts def _get_cut_target_nodes(self, cut: Cut, target_criterion: TargetInclusionCriterion) -> List[BaseNode]: """ @@ -487,37 +516,23 @@ def _get_cut_target_nodes(self, cut: Cut, target_criterion: TargetInclusionCrite cut_nodes = self.cuts[cut] return self._get_target_activation_nodes(target_criterion, include_reused=True, nodes=cut_nodes) - def _get_target_weight_nodes(self, - target_criterion: TargetInclusionCriterion, - include_reused: bool) -> List[BaseNode]: + def _collect_target_nodes_w_attrs(self, + target_criterion: TargetInclusionCriterion, + include_reused: bool) -> Dict[BaseNode, List[WeightAttrT]]: """ - Collect nodes to include in weights utilization computation. + Collect nodes and their weight attributes to include in weights utilization computation. Args: target_criterion: criterion to include weights for computation. include_reused: whether to include reused nodes. Returns: - Target nodes. + A mapping from nodes to their weights attributes. """ - if target_criterion == TargetInclusionCriterion.QConfigurable: - nodes = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused) - elif target_criterion == TargetInclusionCriterion.AnyQuantized: - nodes = [n for n in self.graph if n.has_any_weight_attr_to_quantize()] - elif target_criterion == TargetInclusionCriterion.QNonConfigurable: - # TODO this is wrong. Need to look at specific weights and not the whole node (if w1 is configurable and w2 - # is non-configurable we want to discover the node both as configurable and non-configurable) - quantized = [n for n in self.graph if n.has_any_weight_attr_to_quantize()] - configurable = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused) - nodes = [n for n in quantized if n not in configurable] - elif target_criterion == TargetInclusionCriterion.Any: # pragma: no cover - nodes = list(self.graph.nodes) - else: # pragma: no cover - raise ValueError(f'Unknown {target_criterion}.') - - if not include_reused: - nodes = [n for n in nodes if not n.reuse] - return nodes + nodes_attrs = {n: attrs for n in self.graph.nodes + if (attrs := self._get_target_weight_attrs(n, target_criterion)) + and (include_reused or not n.reuse)} + return nodes_attrs def _get_target_weight_attrs(self, n: BaseNode, target_criterion: TargetInclusionCriterion) -> List[str]: """ @@ -530,6 +545,7 @@ def _get_target_weight_attrs(self, n: BaseNode, target_criterion: TargetInclusio Returns: Selected weight attributes names. """ + # weight_attrs are the full names in the layer, e.g. 'conv2d_1/kernel:0' (or an integer for positional attrs) weight_attrs = n.get_node_weights_attributes() if target_criterion == TargetInclusionCriterion.QConfigurable: weight_attrs = [attr for attr in weight_attrs if n.is_configurable_weight(attr)] @@ -548,14 +564,17 @@ def _topo_sort(self, nodes: Sequence[BaseNode]) -> List[BaseNode]: Sort nodes in a topological order (based on graph's nodes). Args: - nodes: nodes to sort. + nodes: nodes to sort. Allowed to be empty. Returns: Nodes in topological order. """ + if not nodes: + return list(nodes) + graph_topo_nodes = self.graph.get_topo_sorted_nodes() topo_nodes = [n for n in graph_topo_nodes if n in nodes] - if len(topo_nodes) != len(nodes): # pragma: no cover + if len(topo_nodes) != len(nodes): missing_nodes = [n for n in nodes if n not in topo_nodes] raise ValueError(f'Could not topo-sort, nodes {missing_nodes} do not match the graph nodes.') return topo_nodes @@ -576,15 +595,15 @@ def _get_target_activation_nodes(self, Selected nodes. """ nodes = nodes or self.graph.nodes - if target_criterion == TargetInclusionCriterion.QConfigurable: # pragma: no cover + if target_criterion == TargetInclusionCriterion.QConfigurable: nodes = [n for n in nodes if n.has_configurable_activation()] elif target_criterion == TargetInclusionCriterion.AnyQuantized: nodes = [n for n in nodes if n.is_activation_quantization_enabled()] - elif target_criterion == TargetInclusionCriterion.QNonConfigurable: # pragma: no cover + elif target_criterion == TargetInclusionCriterion.QNonConfigurable: nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()] elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover raise ValueError(f'Unknown {target_criterion}.') - if not include_reused: # pragma: no cover + if not include_reused: nodes = [n for n in nodes if not n.reuse] return nodes @@ -607,8 +626,7 @@ def _get_activation_nbits(cls, Activation bit-width. """ if act_qc: - if bitwidth_mode != BitwidthMode.QCustom: # pragma: no cover - raise ValueError(f'Activation config is not expected for non-custom bit mode {bitwidth_mode}') + assert bitwidth_mode == BitwidthMode.QCustom return act_qc.activation_n_bits if act_qc.enable_activation_quantization else FLOAT_BITWIDTH if bitwidth_mode == BitwidthMode.Float or not n.is_activation_quantization_enabled(): @@ -623,8 +641,8 @@ def _get_activation_nbits(cls, if bitwidth_mode in [BitwidthMode.QCustom, BitwidthMode.QDefaultSP]: qcs = n.get_unique_activation_candidates() - if len(qcs) != 1: # pragma: no cover - raise ValueError(f'Could not retrieve the activation quantization candidate for node {n.name} ' + if len(qcs) != 1: + raise ValueError(f'Could not retrieve the activation quantization candidate for node {n} ' f'as it has {len(qcs)}!=1 unique candidates .') return qcs[0].activation_quantization_cfg.activation_n_bits @@ -650,9 +668,8 @@ def _get_weight_nbits(cls, Returns: Weight bit-width. """ + assert not (w_qc and bitwidth_mode != BitwidthMode.QCustom) if w_qc and w_qc.has_attribute_config(w_attr): - if bitwidth_mode != BitwidthMode.QCustom: # pragma: no cover - raise ValueError('Weight config is not expected for non-custom bit mode {bitwidth_mode}') attr_cfg = w_qc.get_attr_config(w_attr) return attr_cfg.weights_n_bits if attr_cfg.enable_weights_quantization else FLOAT_BITWIDTH @@ -669,9 +686,9 @@ def _get_weight_nbits(cls, if bitwidth_mode in [BitwidthMode.QCustom, BitwidthMode.QDefaultSP]: # if configuration was not passed and the weight has only one candidate, use it - if len(w_qcs) != 1: # pragma: no cover - raise ValueError(f'Could not retrieve the quantization candidate for attr {w_attr} of node {n.name} ' - f'as it {len(w_qcs)}!=1 unique candidates.') + if len(w_qcs) != 1: + raise ValueError(f'Could not retrieve the quantization candidate for attr {w_attr} of node {n} ' + f'as it has {len(w_qcs)}!=1 unique candidates.') return w_qcs[0].weights_n_bits raise ValueError(f'Unknown mode {bitwidth_mode.name}') # pragma: no cover diff --git a/model_compression_toolkit/core/common/quantization/node_quantization_config.py b/model_compression_toolkit/core/common/quantization/node_quantization_config.py index cad9c510a..607903bcf 100644 --- a/model_compression_toolkit/core/common/quantization/node_quantization_config.py +++ b/model_compression_toolkit/core/common/quantization/node_quantization_config.py @@ -14,7 +14,7 @@ # ============================================================================== -from typing import Callable, Any, List, Tuple, Union, Dict +from typing import Callable, Any, List, Tuple, Union, Dict, TYPE_CHECKING import numpy as np @@ -28,6 +28,8 @@ from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig, \ OpQuantizationConfig +if TYPE_CHECKING: + from model_compression_toolkit.core.common.graph.base_node import WeightAttrT ########################################## # Every node holds a quantization configuration @@ -482,6 +484,15 @@ def has_attribute_config(self, attr_name: Union[str, int]) -> bool: return False + @property + def all_weight_attrs(self) -> List['WeightAttrT']: + """ Fetch all weight attributes keys (positional and named). + + Returns: + List of attributes. + """ + return list(self.pos_attributes_config_mapping.keys()) + list(self.attributes_config_mapping.keys()) + def _extract_config_for_attributes_with_name(self, attr_name) -> Dict[str, WeightsAttrQuantizationConfig]: """ Extract the saved attributes that contain the given attribute name. diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index 69b658a12..ab0322df7 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -229,14 +229,11 @@ def _set_final_resource_utilization(graph: Graph, final_ru = None if ru_targets: ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info) - w_qcs, a_qcs = None, None - if ru_calculator.is_custom_weights_config_applicable(ru_targets): - w_qcs = {n: n.final_weights_quantization_cfg for n in graph.nodes} - if ru_calculator.is_custom_activation_config_applicable(ru_targets): - a_qcs = {n: n.final_activation_quantization_cfg for n in graph.nodes} + w_qcs = {n: n.final_weights_quantization_cfg for n in graph.nodes} + a_qcs = {n: n.final_activation_quantization_cfg for n in graph.nodes} final_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, - BitwidthMode.QCustom, - act_qcs=a_qcs, w_qcs=w_qcs, ru_targets=ru_targets) + BitwidthMode.QCustom, act_qcs=a_qcs, w_qcs=w_qcs, + ru_targets=ru_targets, allow_unused_qcs=True) summary = final_ru.get_summary_str(restricted=True) Logger.info(f'Resource utilization for quantized mixed-precision targets:\n {summary}.') graph.user_info.final_resource_utilization = final_ru diff --git a/model_compression_toolkit/logger.py b/model_compression_toolkit/logger.py index 03b6b6fae..713d3f4cb 100644 --- a/model_compression_toolkit/logger.py +++ b/model_compression_toolkit/logger.py @@ -136,7 +136,6 @@ def info(msg: str): msg: Message to log. """ - print(msg) Logger.get_logger().info(msg) @staticmethod @@ -148,7 +147,6 @@ def warning(msg: str): msg: Message to log. """ - print(msg) Logger.get_logger().warning(msg) @staticmethod diff --git a/tests/common_tests/function_tests/test_logger.py b/tests/common_tests/function_tests/test_logger.py index be69c7c3a..c01770fa8 100644 --- a/tests/common_tests/function_tests/test_logger.py +++ b/tests/common_tests/function_tests/test_logger.py @@ -82,18 +82,14 @@ def test_debug(self, mock_get_logger): def test_info(self, mock_get_logger): logger_mock = MagicMock() mock_get_logger.return_value = logger_mock - with patch('sys.stdout', new=StringIO()) as fake_out: - Logger.info(self.log_message) - self.assertEqual(fake_out.getvalue().strip(), self.log_message) + Logger.info(self.log_message) logger_mock.info.assert_called_once_with(self.log_message) @patch('model_compression_toolkit.logger.Logger.get_logger') def test_warning(self, mock_get_logger): logger_mock = MagicMock() mock_get_logger.return_value = logger_mock - with patch('sys.stdout', new=StringIO()) as fake_out: - Logger.warning(self.log_message) - self.assertEqual(fake_out.getvalue().strip(), self.log_message) + Logger.warning(self.log_message) logger_mock.warning.assert_called_once_with(self.log_message) @patch('model_compression_toolkit.logger.Logger.get_logger') diff --git a/tests/test_suite.py b/tests/test_suite.py deleted file mode 100644 index 11ce0f22a..000000000 --- a/tests/test_suite.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -# ----------------- Unit test framework -import importlib -import unittest -from packaging import version - -from tests.common_tests.function_tests.test_collectors_manipulation import TestCollectorsManipulations -from tests.common_tests.function_tests.test_edge_matcher import TestEdgeMatcher -# ---------------- Individual test suites -from tests.common_tests.function_tests.test_histogram_collector import TestHistogramCollector -from tests.common_tests.function_tests.test_immutable_class import TestImmutableClass -from tests.common_tests.function_tests.test_logger import TestLogger -from tests.common_tests.function_tests.test_resource_utilization_object import TestResourceUtilizationObject -from tests.common_tests.function_tests.test_threshold_selection import TestThresholdSelection -from tests.common_tests.test_doc_examples import TestCommonDocsExamples -from tests.common_tests.test_tpc import TargetPlatformModelingTest, OpsetTest, QCOptionsTest, FusingTest, \ - TPModelInputOutputTests - -found_tf = importlib.util.find_spec("tensorflow") is not None -if found_tf: - import tensorflow as tf - # MCT doesn't support TensorFlow version 2.16 or higher - if version.parse(tf.__version__) >= version.parse("2.16"): - found_tf = False -found_pytorch = importlib.util.find_spec("torch") is not None and importlib.util.find_spec( - "torchvision") is not None - -if found_tf: - from tests.keras_tests.xquant_tests.test_xquant_end2end import BaseTestEnd2EndKerasXQuant - from tests.keras_tests.function_tests.test_activation_quantization_functions import TestActivationQuantizationFunctions as TestActivationQuantizationFunctionsKeras - from tests.keras_tests.function_tests.test_custom_layer import TestCustomLayer - from tests.keras_tests.function_tests.test_hessian_info_calculator import TestHessianInfoCalculatorWeights, \ - TestHessianInfoCalculatorActivation - from tests.keras_tests.function_tests.test_hessian_service import TestHessianService - from tests.keras_tests.feature_networks_tests.test_features_runner import FeatureNetworkTest - from tests.keras_tests.function_tests.test_quantization_configurations import TestQuantizationConfigurations - from tests.keras_tests.non_parallel_tests.test_tensorboard_writer import TestFileLogger - from tests.keras_tests.function_tests.test_lut_quanitzer_params import TestLUTQuantizerParams - from tests.keras_tests.function_tests.test_lut_activation_quanitzer_params import TestLUTActivationsQuantizerParams - from tests.keras_tests.function_tests.test_lut_activation_quanitzer_fake_quant import TestLUTQuantizerFakeQuant - from tests.keras_tests.non_parallel_tests.test_lp_search_bitwidth import TestLpSearchBitwidth, \ - TestSearchBitwidthConfiguration - from tests.keras_tests.function_tests.test_bn_info_collection import TestBNInfoCollection - from tests.keras_tests.graph_tests.test_graph_reading import TestGraphReading - from tests.keras_tests.layer_tests.test_layers_runner import LayerTest as TFLayerTest - from tests.keras_tests.function_tests.test_symmetric_threshold_selection_weights import \ - TestSymmetricThresholdSelectionWeights - from tests.keras_tests.function_tests.test_uniform_quantize_tensor import TestUniformQuantizeTensor - from tests.keras_tests.function_tests.test_uniform_range_selection_weights import TestUniformRangeSelectionWeights - from tests.keras_tests.non_parallel_tests.test_keras_tpc import TestKerasTPModel - from tests.keras_tests.function_tests.test_sensitivity_metric_interest_points import \ - TestSensitivityMetricInterestPoints - from tests.keras_tests.function_tests.test_weights_activation_split_substitution import TestWeightsActivationSplit - from tests.keras_tests.function_tests.test_activation_weights_composition_substitution import \ - TestActivationWeightsComposition - from tests.keras_tests.function_tests.test_graph_max_cut import TestGraphMaxCut - from tests.keras_tests.function_tests.test_sensitivity_eval_non_suppoerted_output import \ - TestSensitivityEvalWithNonSupportedOutputNodes - from tests.keras_tests.function_tests.test_set_layer_to_bitwidth import TestKerasSetLayerToBitwidth - from tests.keras_tests.function_tests.test_export_keras_fully_quantized_model import TestKerasFakeQuantExporter - from tests.keras_tests.function_tests.test_resource_utilization_data import TestResourceUtilizationData - from tests.keras_tests.exporter_tests.test_runner import ExporterTestsRunner - from tests.keras_tests.function_tests.test_get_gptq_config import TestGetGPTQConfig - from tests.keras_tests.function_tests.test_gptq_loss_functions import TestGPTQLossFunctions - from tests.keras_tests.trainable_infrastructure_tests.test_keras_trainable_infra_runner import \ - KerasTrainableInfrastructureTestRunner - from tests.keras_tests.function_tests.test_gptq_soft_quantizer import TestGPTQSoftQuantizer as keras_gptq_soft_quantizer_test - from tests.keras_tests.function_tests.test_activation_quantization_holder_gptq import TestGPTQModelBuilderWithActivationHolder - from tests.keras_tests.data_generation_tests.test_keras_data_generation_runner import KerasDataGenerationTestRunner - from tests.keras_tests.pruning_tests.test_memory_calculator import TestParameterCounter - from tests.keras_tests.pruning_tests.test_pretrained_models import PruningPretrainedModelsTest - from tests.keras_tests.pruning_tests.feature_networks.test_pruning_feature_networks import PruningFeatureNetworksTest - from tests.keras_tests.function_tests.test_hmse_error_method import TestParamSelectionWithHMSE - from tests.keras_tests.data_generation_tests.test_scheduler_step import TestReduceLROnPlateau - from tests.keras_tests.function_tests.test_node_quantization_configurations import TestNodeQuantizationConfigurations - from tests.keras_tests.function_tests.test_quant_config_filtering import TestKerasQuantConfigFiltering - -if found_pytorch: - from tests.pytorch_tests.xquant_tests.test_xquant_end2end import BaseTestEnd2EndPytorchXQuant - from tests.pytorch_tests.function_tests.test_activation_quantization_functions import TestActivationQuantizationFunctions as TestActivationQuantizationFunctionsPytorch - from tests.pytorch_tests.function_tests.test_torch_utils import TestTorchUtils - from tests.pytorch_tests.function_tests.test_device_manager import TestDeviceManager - from tests.pytorch_tests.layer_tests.test_layers_runner import LayerTest as TorchLayerTest - from tests.pytorch_tests.model_tests.test_feature_models_runner import FeatureModelsTestRunner - # from tests.pytorch_tests.model_tests.test_models_runner import ModelTest - from tests.pytorch_tests.function_tests.test_function_runner import FunctionTestRunner - from tests.pytorch_tests.function_tests.test_pytorch_tpc import TestPytorchTPModel - from tests.pytorch_tests.trainable_infrastructure_tests.test_pytorch_trainable_infra_runner import PytorchTrainableInfrastructureTestRunner - from tests.pytorch_tests.function_tests.test_gptq_soft_quantizer import TestGPTQSoftQuantizer as pytorch_gptq_soft_quantier_test - from tests.pytorch_tests.function_tests.test_activation_quantization_holder_gptq import \ - TestGPTQModelBuilderWithActivationHolder as TestGPTQModelBuilderWithActivationHolderPytorch - from tests.pytorch_tests.exporter_tests.test_runner import PytorchExporterTestsRunner - from tests.pytorch_tests.data_generation_tests.test_pytorch_data_generation_runner import PytorchDataGenerationTestRunner - from tests.pytorch_tests.graph_tests.test_fx_errors import TestGraphReading - from tests.pytorch_tests.pruning_tests.feature_networks.test_pruning_feature_networks import PruningFeatureNetworksTest - from tests.pytorch_tests.exporter_tests.test_exporting_qat_models import TestExportingQATModelTorchscript - from tests.pytorch_tests.function_tests.test_quant_config_filtering import TestTorchQuantConfigFiltering - -if __name__ == '__main__': - # ----------------- Load all the test cases - suiteList = [] - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestHistogramCollector)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestCollectorsManipulations)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestThresholdSelection)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TPModelInputOutputTests)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TargetPlatformModelingTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(OpsetTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(QCOptionsTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(FusingTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestCommonDocsExamples)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestResourceUtilizationObject)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestEdgeMatcher)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestLogger)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestImmutableClass)) - - # Add TF tests only if tensorflow is installed - if found_tf: - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(BaseTestEnd2EndKerasXQuant)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestActivationQuantizationFunctionsKeras)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestReduceLROnPlateau)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestCustomLayer)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestParameterCounter)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(PruningPretrainedModelsTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(PruningFeatureNetworksTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestHessianInfoCalculatorWeights)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestHessianInfoCalculatorActivation)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestHessianService)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGPTQModelBuilderWithActivationHolder)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(ExporterTestsRunner)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestSensitivityMetricInterestPoints)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestQuantizationConfigurations)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestNodeQuantizationConfigurations)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(FeatureNetworkTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestLpSearchBitwidth)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestSearchBitwidthConfiguration)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestBNInfoCollection)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestLUTQuantizerParams)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestLUTActivationsQuantizerParams)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestLUTQuantizerFakeQuant)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGraphReading)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestSymmetricThresholdSelectionWeights)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestUniformQuantizeTensor)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestUniformRangeSelectionWeights)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestKerasTPModel)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestWeightsActivationSplit)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestActivationWeightsComposition)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGraphMaxCut)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestKerasSetLayerToBitwidth)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestSensitivityEvalWithNonSupportedOutputNodes)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestKerasFakeQuantExporter)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestResourceUtilizationData)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestFileLogger)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGetGPTQConfig)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGPTQLossFunctions)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(KerasTrainableInfrastructureTestRunner)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(keras_gptq_soft_quantizer_test)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TFLayerTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(KerasDataGenerationTestRunner)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestParamSelectionWithHMSE)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestKerasQuantConfigFiltering)) - - if found_pytorch: - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(BaseTestEnd2EndPytorchXQuant)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestActivationQuantizationFunctionsPytorch)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestTorchUtils)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestDeviceManager)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGPTQModelBuilderWithActivationHolderPytorch)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TorchLayerTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(FeatureModelsTestRunner)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(FunctionTestRunner)) - # Exporter test of pytorch must have ONNX installed - # suiteList.append(unittest.TestLoader().loadTestsFromName('test_mobilenet_v2', ModelTest)) - # suiteList.append(unittest.TestLoader().loadTestsFromName('test_mobilenet_v3', ModelTest)) - # suiteList.append(unittest.TestLoader().loadTestsFromName('test_efficientnet_b0', ModelTest)) - # suiteList.append(unittest.TestLoader().loadTestsFromName('test_resnet18', ModelTest)) - # suiteList.append(unittest.TestLoader().loadTestsFromName('test_shufflenet_v2_x1_0', ModelTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestPytorchTPModel)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(pytorch_gptq_soft_quantier_test)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(PytorchTrainableInfrastructureTestRunner)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(PytorchExporterTestsRunner)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(PytorchDataGenerationTestRunner)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestGraphReading)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(PruningFeatureNetworksTest)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestExportingQATModelTorchscript)) - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestTorchQuantConfigFiltering)) - - # ---------------- Join them together and run them - comboSuite = unittest.TestSuite(suiteList) - unittest.TextTestRunner(verbosity=0).run(comboSuite) diff --git a/tests_pytest/core/__init__.py b/tests_pytest/common/__init__.py similarity index 100% rename from tests_pytest/core/__init__.py rename to tests_pytest/common/__init__.py diff --git a/tests_pytest/core/common/__init__.py b/tests_pytest/common/core/__init__.py similarity index 100% rename from tests_pytest/core/common/__init__.py rename to tests_pytest/common/core/__init__.py diff --git a/tests_pytest/core/common/mixed_precision/__init__.py b/tests_pytest/common/core/common/__init__.py similarity index 100% rename from tests_pytest/core/common/mixed_precision/__init__.py rename to tests_pytest/common/core/common/__init__.py diff --git a/tests_pytest/core/common/mixed_precision/resource_utilization_tools/__init__.py b/tests_pytest/common/core/common/mixed_precision/__init__.py similarity index 100% rename from tests_pytest/core/common/mixed_precision/resource_utilization_tools/__init__.py rename to tests_pytest/common/core/common/mixed_precision/__init__.py diff --git a/tests_pytest/common/core/common/mixed_precision/resource_utilization_tools/__init__.py b/tests_pytest/common/core/common/mixed_precision/resource_utilization_tools/__init__.py new file mode 100644 index 000000000..5397dea24 --- /dev/null +++ b/tests_pytest/common/core/common/mixed_precision/resource_utilization_tools/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== diff --git a/tests_pytest/common/core/common/mixed_precision/resource_utilization_tools/test_resource_utilization_calculator.py b/tests_pytest/common/core/common/mixed_precision/resource_utilization_tools/test_resource_utilization_calculator.py new file mode 100644 index 000000000..b7560a176 --- /dev/null +++ b/tests_pytest/common/core/common/mixed_precision/resource_utilization_tools/test_resource_utilization_calculator.py @@ -0,0 +1,947 @@ +# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from types import MethodType + +from typing import Iterable, Union + +from unittest.mock import Mock + +import numpy as np +import pytest +from mct_quantizers import QuantizationMethod + +from model_compression_toolkit.constants import FLOAT_BITWIDTH +from model_compression_toolkit.core import FrameworkInfo, QuantizationConfig, ResourceUtilization +from model_compression_toolkit.core.common import Graph, BaseNode +from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation +from model_compression_toolkit.core.common.graph.edge import Edge +from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut +from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut +from model_compression_toolkit.core.common.graph.memory_graph.memory_element import MemoryElements, ActivationMemoryTensor +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ + RUTarget +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \ + Utilization, ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode +from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \ + CandidateNodeQuantizationConfig +from model_compression_toolkit.core.common.quantization.node_quantization_config import \ + NodeActivationQuantizationConfig, NodeWeightsQuantizationConfig +from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import OpQuantizationConfig, \ + AttributeQuantizationConfig, Signedness + + +@pytest.fixture +def graph_mock(): + return Mock(spec_set=Graph, nodes=[]) + + +@pytest.fixture +def fw_impl_mock(): + return Mock(spec_set=FrameworkImplementation) + + +@pytest.fixture +def fw_info_mock(): + return Mock(spec_set=FrameworkInfo) + + +def full_attr_name(canonical_name: Union[str, dict, Iterable]): + """ Convert canonical attr (such as 'kernel') into a full name originated from the layer (e.g. 'conv2d_1/kernel:0') + We just need the names to differ from canonical to make sure we call the correct apis. We use the same + template for simplicity, so we don't have to explicitly synchronize names between node and weight configs.""" + convert = lambda name: f'{name[0]}/{name}/{name[-1]}' if isinstance(name, str) else name + if isinstance(canonical_name, str): + return convert(canonical_name) + assert isinstance(canonical_name, (list, tuple, set)) + return canonical_name.__class__([convert(name) for name in canonical_name]) + + +def build_qc(a_nbits=8, a_enable=True, w_attr=None, pos_attr=(32, False, ())): + """ Build quantization config for tests. + w_attr contains {canonical name: (nbits, q_enabled)} + pos_attr: (nbits, q enabled, indices) """ + w_attr = w_attr or {} + attr_weights_configs_mapping = { + k: AttributeQuantizationConfig(weights_n_bits=v[0], enable_weights_quantization=v[1]) + for k, v in w_attr.items() + } + qc = QuantizationConfig() + # positional attrs are set via default weight config (so all pos attrs have the same q config) + op_cfg = OpQuantizationConfig( + # canonical names (as 'kernel') + attr_weights_configs_mapping=attr_weights_configs_mapping, + activation_n_bits=a_nbits, + enable_activation_quantization=a_enable, + default_weight_attr_config=AttributeQuantizationConfig(weights_n_bits=pos_attr[0], + enable_weights_quantization=pos_attr[1]), + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + quantization_preserving=False, + supported_input_activation_n_bits=[2, 4, 8], + fixed_scale=None, + fixed_zero_point=None, + simd_size=None, + signedness=Signedness.AUTO + ) + a_qcfg = NodeActivationQuantizationConfig(qc=qc, op_cfg=op_cfg, + activation_quantization_fn=None, + activation_quantization_params_fn=None) + # full names from the layers + attr_names = [full_attr_name(k) for k in w_attr.keys()] + w_qcfg = NodeWeightsQuantizationConfig(qc=qc, op_cfg=op_cfg, + weights_channels_axis=None, + node_attrs_list=attr_names + list(pos_attr[2])) + qc = CandidateNodeQuantizationConfig(activation_quantization_cfg=a_qcfg, + weights_quantization_cfg=w_qcfg) + + # we generate q configs via constructors to follow the real code as closely as reasonably possible. + # verify that we actually got the configurations we want + assert qc.activation_quantization_cfg.activation_n_bits == a_nbits + assert qc.activation_quantization_cfg.enable_activation_quantization is a_enable + for k, v in w_attr.items(): + # get_attr_config accepts canonical attr names + assert qc.weights_quantization_cfg.get_attr_config(k).weights_n_bits == v[0] + assert qc.weights_quantization_cfg.get_attr_config(k).enable_weights_quantization == v[1] + for pos in pos_attr[2]: + assert qc.weights_quantization_cfg.get_attr_config(pos).weights_n_bits == pos_attr[0] + assert qc.weights_quantization_cfg.get_attr_config(pos).enable_weights_quantization == pos_attr[1] + + return qc + + +class DummyLayer: + """ Only needed for repr(node) to work. """ + pass + + +def build_node(name='node', canonical_weights: dict=None, qcs=None, input_shape=(4, 5, 6), output_shape=(4, 5, 6), + layer_class=DummyLayer, reuse=False): + """ Build a node for tests. + Canonical weights are converted into full unique names. + candidate_quantization_cfg is set is qcs is passed.""" + weights = canonical_weights or {} + weights = {k if isinstance(k, int) else full_attr_name(k): w for k, w in weights.items()} + node = BaseNode(name=name, + framework_attr={}, + input_shape=input_shape, + output_shape=output_shape, + weights=weights, + layer_class=layer_class, + reuse=reuse) + if qcs: + node.candidates_quantization_cfg = qcs + return node + + +BM = BitwidthMode +TIC = TargetInclusionCriterion + + +class TestUtilization: + def test_operations(self): + u = [Utilization(10, 15), Utilization(25, 10), Utilization(35, 5)] + assert u[0] + u[1] == Utilization(35, 25) + assert sum(u) == Utilization(70, 30) + # min/max is by bytes, not size + assert max(u) == u[0] + assert min(u) == u[2] + + def test_invalid_radd(self): + with pytest.raises(ValueError, match='radd is only supported with 0'): + 1 + Utilization(5, 5) + + +class TestComputeResurceUtilization: + """ Test compute_resource_utilization public api. """ + @pytest.fixture(autouse=True) + def setup(self, graph_mock, fw_impl_mock, fw_info_mock): + n1 = build_node('n1', qcs=[build_qc()], output_shape=(None, 5, 10)) + n2 = build_node('n2', output_shape=(None, 10, 20, 3), + canonical_weights={'foo': np.zeros((3, 14))}, + qcs=[build_qc(w_attr={'foo': (4, True)})]) + n3 = build_node('n3', qcs=[build_qc(4)], output_shape=(None, 2, 71)) + graph = Graph('g', input_nodes=[n1], nodes=[n2], output_nodes=[n3], + edge_list=[Edge(n1, n2, 0, 0), Edge(n2, n3, 0, 0)]) + + fw_info_mock.get_kernel_op_attributes = Mock(return_value = ['foo']) # for bops + fw_impl_mock.get_node_mac_operations = lambda n, fw_info: 42 if n == n2 else 0 # for bops + + ru_calc = ResourceUtilizationCalculator(graph, fw_impl_mock, fw_info_mock) + # wrap real methods + ru_calc.compute_activations_utilization = Mock(wraps=ru_calc.compute_activations_utilization) + ru_calc.compute_weights_utilization = Mock(wraps=ru_calc.compute_weights_utilization) + ru_calc.compute_bops = Mock(wraps=ru_calc.compute_bops) + + self.ru_calc = ru_calc + self.nodes = [n1, n2, n3] + + def test_compute_ru_all_targets(self): + # default targets + ru = self.ru_calc.compute_resource_utilization(TIC.AnyQuantized, BM.QDefaultSP) + + self.ru_calc.compute_activations_utilization.assert_called_once_with(TIC.AnyQuantized, BM.QDefaultSP, None) + self.ru_calc.compute_weights_utilization.assert_called_once_with(TIC.AnyQuantized, BM.QDefaultSP, None) + self.ru_calc.compute_bops.assert_called_once_with(TIC.AnyQuantized, BM.QDefaultSP, act_qcs=None, w_qcs=None) + assert ru == ResourceUtilization(weights_memory=21, + activation_memory=671, + total_memory=692, + bops=42*4*8) + # explicit targets + ru2 = self.ru_calc.compute_resource_utilization(TIC.AnyQuantized, BM.QDefaultSP, ru_targets=list(RUTarget)) + assert ru2 == ru + + def test_compute_ru_w(self): + ru = self.ru_calc.compute_resource_utilization(TIC.Any, BM.Q8Bit, ru_targets=[RUTarget.WEIGHTS]) + + self.ru_calc.compute_weights_utilization.assert_called_once_with(TIC.Any, BM.Q8Bit, None) + self.ru_calc.compute_activations_utilization.assert_not_called() + self.ru_calc.compute_bops.assert_not_called() + assert ResourceUtilization(weights_memory=42) + + def test_compute_ru_act(self): + ru = self.ru_calc.compute_resource_utilization(TIC.Any, BM.Q8Bit, ru_targets=[RUTarget.ACTIVATION]) + + self.ru_calc.compute_activations_utilization.assert_called_once_with(TIC.Any, BM.Q8Bit, None) + self.ru_calc.compute_weights_utilization.assert_not_called() + self.ru_calc.compute_bops.assert_not_called() + assert ru == ResourceUtilization(activation_memory=742) + + def test_compute_ru_total(self): + ru = self.ru_calc.compute_resource_utilization(TIC.Any, BM.Q8Bit, ru_targets=[RUTarget.TOTAL]) + + self.ru_calc.compute_activations_utilization.assert_called_once_with(TIC.Any, BM.Q8Bit, None) + self.ru_calc.compute_weights_utilization.assert_called_once_with(TIC.Any, BM.Q8Bit, None) + self.ru_calc.compute_bops.assert_not_called() + assert ru == ResourceUtilization(total_memory=742+42) + + def test_compute_ru_bops(self): + ru = self.ru_calc.compute_resource_utilization(TIC.AnyQuantized, BM.Q8Bit, ru_targets=[RUTarget.BOPS]) + + self.ru_calc.compute_bops.assert_called_once_with(TIC.AnyQuantized, BM.Q8Bit, act_qcs=None, w_qcs=None) + self.ru_calc.compute_activations_utilization.assert_not_called() + self.ru_calc.compute_weights_utilization.assert_not_called() + assert ru == ResourceUtilization(bops=42*8*8) + + def test_compute_ru_custom_w_qc(self): + w_qcs = {self.nodes[1]: build_qc(w_attr={'foo': (16, True)}).weights_quantization_cfg} + + ru = self.ru_calc.compute_resource_utilization(TIC.AnyQuantized, BM.QCustom, w_qcs=w_qcs) + + self.ru_calc.compute_activations_utilization.assert_called_once_with(TIC.AnyQuantized, BM.QCustom, None) + self.ru_calc.compute_weights_utilization.assert_called_once_with(TIC.AnyQuantized, BM.QCustom, w_qcs) + self.ru_calc.compute_bops.assert_called_once_with(TIC.AnyQuantized, BM.QCustom, act_qcs=None, w_qcs=w_qcs) + + def test_compute_ru_custom_a_qc(self): + a_qcs = {self.nodes[1]: build_qc(w_attr={'foo': (16, True)}).activation_quantization_cfg} + + ru = self.ru_calc.compute_resource_utilization(TIC.AnyQuantized, BM.QCustom, act_qcs=a_qcs) + + self.ru_calc.compute_activations_utilization.assert_called_once_with(TIC.AnyQuantized, BM.QCustom, a_qcs) + self.ru_calc.compute_weights_utilization.assert_called_once_with(TIC.AnyQuantized, BM.QCustom, None) + self.ru_calc.compute_bops.assert_called_once_with(TIC.AnyQuantized, BM.QCustom, act_qcs=a_qcs, w_qcs=None) + + @pytest.mark.parametrize('bm', set(BM)-{BM.QCustom}) + def test_unexpected_qc_for_bitmode(self, bm): + with pytest.raises(ValueError, match=self.ru_calc.unexpected_qc_error): + self.ru_calc.compute_resource_utilization(TIC.Any, BM, act_qcs=Mock()) + + with pytest.raises(ValueError, match=self.ru_calc.unexpected_qc_error): + self.ru_calc.compute_resource_utilization(TIC.Any, BM, w_qcs=Mock()) + + def test_unexpected_qc_for_targets(self): + with pytest.raises(ValueError, match='Activation configuration passed but no relevant ru_targets requested.'): + self.ru_calc.compute_resource_utilization(TIC.Any, BM.QCustom, act_qcs=Mock(), + ru_targets=[RUTarget.WEIGHTS]) + + with pytest.raises(ValueError, match='Weight configuration passed but no relevant ru_targets requested.'): + self.ru_calc.compute_resource_utilization(TIC.Any, BM.QCustom, w_qcs=Mock(), + ru_targets=[RUTarget.ACTIVATION]) + + def test_allowed_unexpected_qc_for_targets(self): + self.ru_calc.compute_resource_utilization(TIC.Any, BM.QCustom, act_qcs=Mock(), ru_targets=[RUTarget.WEIGHTS], + allow_unused_qcs=True) + # unexpected config is converted to None + self.ru_calc.compute_weights_utilization.assert_called_once_with(TIC.Any, BM.QCustom, None) + + self.ru_calc.compute_resource_utilization(TIC.Any, BM.QCustom, w_qcs=Mock(), ru_targets=[RUTarget.ACTIVATION], + allow_unused_qcs=True) + # unexpected config is converted to None + self.ru_calc.compute_activations_utilization.assert_called_once_with(TIC.Any, BM.QCustom, None) + + +class TestActivationUtilizationMethods: + """ Tests for non-public activation utilization api. """ + def test_get_a_nbits_configurable(self, graph_mock, fw_impl_mock, fw_info_mock): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + node = build_node(qcs=[build_qc(7), build_qc(4), build_qc(2)]) + assert ru_calc._get_activation_nbits(node, BM.Float, None) == FLOAT_BITWIDTH + assert ru_calc._get_activation_nbits(node, BM.QMinBit, None) == 2 + assert ru_calc._get_activation_nbits(node, BM.QMaxBit, None) == 7 + assert ru_calc._get_activation_nbits(node, BM.Q8Bit, None) == 8 + + @pytest.mark.parametrize('node', [ + build_node(qcs=[build_qc(42)]), + build_node(qcs=[build_qc(42, w_attr={'foo': (4, True)}), build_qc(42, w_attr={'foo': (2, False)})]) + ]) + def test_get_a_nbits_nonconfigurable(self, graph_mock, fw_impl_mock, fw_info_mock, node): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + for bm in set(BitwidthMode) - {BM.Float, BM.Q8Bit}: + assert ru_calc._get_activation_nbits(node, bm, None) == 42 + assert ru_calc._get_activation_nbits(node, BM.Float, None) == FLOAT_BITWIDTH + assert ru_calc._get_activation_nbits(node, BM.Q8Bit, None) == 8 + + @pytest.mark.parametrize('node, qc, exp_nbit', [ + (build_node(qcs=[build_qc(4)]), build_qc(17), 17), + (build_node(qcs=[build_qc(4)]), build_qc(17, False), 32), + (build_node(qcs=[build_qc(4, False)]), build_qc(17, True), 17) + ]) + def test_get_a_nbits_custom(self, graph_mock, fw_impl_mock, fw_info_mock, node, qc, exp_nbit): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + assert ru_calc._get_activation_nbits(node, BM.QCustom, qc.activation_quantization_cfg) == exp_nbit + + @pytest.mark.parametrize('bm', list(BM)) + def test_get_a_nbits_non_q(self, graph_mock, fw_impl_mock, fw_info_mock, bm): + node = build_node(qcs=[build_qc(a_enable=False)]) + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + assert ru_calc._get_activation_nbits(node, bm, None) == FLOAT_BITWIDTH + + def test_get_a_nbits_errors(self, graph_mock, fw_impl_mock, fw_info_mock): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + node = build_node(qcs=[build_qc(8), build_qc(4)]) + + with pytest.raises(ValueError, match=f'Could not retrieve the activation quantization candidate for node {node}'): + ru_calc._get_activation_nbits(node, BM.QCustom, act_qc=None) + + with pytest.raises(ValueError, match='Could not retrieve the activation quantization candidate'): + ru_calc._get_activation_nbits(node, BM.QDefaultSP, act_qc=None) + + def test_get_target_activation_nodes(self, graph_mock, fw_impl_mock, fw_info_mock): + sp1 = build_node('n1', qcs=[build_qc(8), build_qc(4)]) + sp2 = build_node('n2', qcs=[build_qc(4, w_attr={'foo': (8, True)}), + build_qc(4, w_attr={'foo': (4, True)})]) + sp3 = build_node('n3', qcs=[build_qc(4)], reuse=True) + mp = build_node('n4', qcs=[build_qc(4), build_qc(2)], reuse=True) + noq = build_node('noq', qcs=[build_qc(4, False, w_attr={'foo': (8, True)}), + build_qc(4, False, w_attr={'foo': (4, True)})]) + + graph_mock.nodes = [sp1, sp2, sp3, mp, noq] + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + + assert len(TIC) == 4, 'enum changed, update tests' + assert ru_calc._get_target_activation_nodes(TIC.QConfigurable, include_reused=True) == [sp1, mp] + assert ru_calc._get_target_activation_nodes(TIC.QConfigurable, include_reused=False) == [sp1] + + assert ru_calc._get_target_activation_nodes(TIC.QNonConfigurable, include_reused=True) == [sp2, sp3] + assert ru_calc._get_target_activation_nodes(TIC.QNonConfigurable, include_reused=False) == [sp2] + + assert ru_calc._get_target_activation_nodes(TIC.AnyQuantized, include_reused=True) == [sp1, sp2, sp3, mp] + assert ru_calc._get_target_activation_nodes(TIC.AnyQuantized, include_reused=False) == [sp1, sp2] + + assert ru_calc._get_target_activation_nodes(TIC.Any, include_reused=True) == [sp1, sp2, sp3, mp, noq] + assert ru_calc._get_target_activation_nodes(TIC.Any, include_reused=False) == [sp1, sp2, noq] + # explicit nodes list + assert ru_calc._get_target_activation_nodes(TIC.QNonConfigurable, + include_reused=True, nodes=[sp1, sp2, sp3]) == [sp2, sp3] + # no nodes found + assert ru_calc._get_target_activation_nodes(TIC.AnyQuantized, + include_reused=False, nodes=[sp3, mp, noq]) == [] + + +class TestComputeActivationTensorsUtilization: + """ Tests for activation tensors utilization public apis. """ + def test_compute_node_activation_tensor_utilization(self, graph_mock, fw_impl_mock, fw_info_mock): + mp_reuse = build_node(output_shape=(None, 3, 14), qcs=[build_qc(4), build_qc(16)], reuse=True) + noq = build_node(output_shape=(None, 15, 9), qcs=[build_qc(a_enable=False)]) + graph_mock.nodes = [mp_reuse, noq] + + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + # _get_activation_nbits is already fully checked, just make sure we use it, and use correctly + ru_calc._get_activation_nbits = Mock(wraps=ru_calc._get_activation_nbits) + + custom_qc = build_qc(16, True).activation_quantization_cfg + res = ru_calc.compute_node_activation_tensor_utilization(noq, TIC.Any, BM.QCustom, custom_qc) + ru_calc._get_activation_nbits.assert_called_once_with(noq, BM.QCustom, custom_qc) + assert res == Utilization(135, 270.) + # reused is not ignored + res = ru_calc.compute_node_activation_tensor_utilization(mp_reuse, TIC.QConfigurable, BM.QMinBit) + assert res == Utilization(42, 21.) + # not a target node + res = ru_calc.compute_node_activation_tensor_utilization(noq, TIC.AnyQuantized, BM.QCustom, custom_qc) + assert res == Utilization(0, 0) + # no target + res = ru_calc.compute_node_activation_tensor_utilization(noq, None, BM.Q8Bit) + assert res == Utilization(135, 540.) + + @pytest.mark.parametrize('bitmode', set(BM)-{BM.QCustom}) + def test_compute_node_activation_tensor_utilization_errors(self, graph_mock, fw_impl_mock, fw_info_mock, bitmode): + node = build_node(qcs=[build_qc()]) + graph_mock.nodes = [node] + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + with pytest.raises(ValueError, match=ResourceUtilizationCalculator.unexpected_qc_error): + ru_calc.compute_node_activation_tensor_utilization(node, TIC.Any, bitmode, qc=build_qc()) + + def test_compute_act_tensors_utilization(self, fw_impl_mock, fw_info_mock): + mp = build_node('mp', output_shape=(None, 3, 14), qcs=[build_qc(4), build_qc(2)]) + noq = build_node('noq', output_shape=(None, 2, 71), qcs=[build_qc(a_enable=False)]) + sp = build_node('sp', output_shape=(None, 59), qcs=[build_qc()], reuse=True) + + g = Graph('g', input_nodes=[mp], nodes=[noq], output_nodes=[sp], + edge_list=[Edge(mp, noq, 0, 0), Edge(noq, sp, 0, 0)]) + ru_calc = ResourceUtilizationCalculator(g, fw_impl_mock, fw_info_mock) + ru_calc._topo_sort = Mock(wraps=ru_calc._topo_sort) + # wrap the methods that were fully tested separately to verify we use them and use correctly + ru_calc._get_target_activation_nodes = Mock(wraps=ru_calc._get_target_activation_nodes) + ru_calc.compute_node_activation_tensor_utilization = Mock(wraps=ru_calc.compute_node_activation_tensor_utilization) + + qcs = { + mp: build_qc(a_enable=False).activation_quantization_cfg, + noq: build_qc(4, True).activation_quantization_cfg + } + # include reuse + custom qc + total, per_node = ru_calc.compute_activation_tensors_utilization(TIC.Any, BM.QCustom, act_qcs=qcs, + include_reused=True) + assert per_node == {mp: Utilization(42, 168.), noq: Utilization(142, 71.), sp: Utilization(59, 59.)} + assert total == 168. + ru_calc._get_target_activation_nodes.assert_called_once_with(TIC.Any, include_reused=True) + + ru_calc._topo_sort.assert_called_once() + assert sorted(ru_calc._topo_sort.call_args.args[0], key=lambda n: n.name) == [mp, noq, sp] + + calls = sorted(ru_calc.compute_node_activation_tensor_utilization.call_args_list, + key=lambda call: call.args[0].name) + assert len(calls) == 3 + assert calls[0].args == (mp, None, BM.QCustom, qcs[mp]) + assert calls[1].args == (noq, None, BM.QCustom, qcs[noq]) + assert calls[2].args == (sp, None, BM.QCustom, None) + + # no reused + no custom + total, per_node = ru_calc.compute_activation_tensors_utilization(TIC.AnyQuantized, BM.QMinBit, + include_reused=False) + ru_calc._get_target_activation_nodes.assert_called_with(TIC.AnyQuantized, include_reused=False) + assert per_node == {mp: Utilization(42, 10.5)} + assert total == 10.5 + + # no target nodes + total, per_node = ru_calc.compute_activation_tensors_utilization(TIC.QNonConfigurable, BM.QMinBit, + include_reused=False) + assert total == 0 + assert per_node == {} + + @pytest.mark.parametrize('bitmode', set(BM) - {BM.QCustom}) + def test_compute_act_tensors_utilization_errors(self, graph_mock, fw_impl_mock, fw_info_mock, bitmode): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + with pytest.raises(ValueError, match=ResourceUtilizationCalculator.unexpected_qc_error): + ru_calc.compute_activation_tensors_utilization(TIC.Any, bitmode, act_qcs=Mock()) + + +class TestActivationMaxCutUtilization: + """ Tests for activation max cut utilization. """ + def test_compute_cuts_integration(self, graph_mock, fw_impl_mock, fw_info_mock, mocker): + """ Test integration with max cut computation. """ + # Test a simple linear dummy graph with the real max cut computation. + n1 = build_node('n1', qcs=[build_qc()], input_shape=(None, 10, 20, 3), output_shape=(None, 10, 20, 3)) + n2 = build_node('n2', qcs=[build_qc()], input_shape=(None, 10, 20, 3), output_shape=(None, 5, 10)) + n3 = build_node('n3', qcs=[build_qc()], input_shape=(None, 5, 10), output_shape=(None, 5, 10)) + n4 = build_node('n4', qcs=[build_qc()], input_shape=(None, 5, 10, 32), output_shape=(None, 5, 10, 32)) + edges = [Edge(n1, n2, 0, 0), Edge(n2, n3, 0, 0), Edge(n3, n4, 0, 0)] + graph = Graph('g', input_nodes=[n1], nodes=[n2, n3], output_nodes=[n4], edge_list=edges) + ru_calc = ResourceUtilizationCalculator(graph, fw_impl_mock, fw_info_mock) + # wrap the real implementation + maxcut_spy = mocker.patch('model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.' + 'resource_utilization_calculator.compute_graph_max_cut', wraps=compute_graph_max_cut) + + # trigger cuts cache computation + cuts_cache = ru_calc.cuts + + # verify the cache + assert len(cuts_cache) == 5 + assert all(isinstance(k, Cut) for k in cuts_cache.keys()) + # for each cut we save a list of its nodes + cuts_nodes = {tuple(sorted(n.name for n in nodes)) for nodes in cuts_cache.values()} + assert cuts_nodes == {('n1',), ('n4',), ('n1', 'n2'), ('n2', 'n3'), ('n3', 'n4')} + + # verify cuts computation only happens the first time + cuts_cache2 = ru_calc.cuts + maxcut_spy.assert_called_once() + assert cuts_cache2 == cuts_cache + + # map from node names to cuts to retrieve the cuts + nodes_to_cuts = {tuple(sorted(elem.node_name for elem in cut.mem_elements.elements)): cut + for cut in cuts_cache.keys()} + cut1 = nodes_to_cuts[('n1',)] + cut12 = nodes_to_cuts[('n1', 'n2')] + cut23 = nodes_to_cuts[('n2', 'n3')] + cut34 = nodes_to_cuts[('n3', 'n4')] + cut4 = nodes_to_cuts[('n4',)] + + # compute utilization to check everything works together with real maxcut + total, per_cut, per_cut_per_node = ru_calc.compute_activation_utilization_by_cut(target_criterion=TIC.AnyQuantized, + bitwidth_mode=BM.QDefaultSP) + + assert per_cut_per_node == {cut1: {n1: Utilization(10 * 20 * 3, 600)}, + cut12: {n1: Utilization(10 * 20 * 3, 600), + n2: Utilization(5 * 10, 50)}, + cut23: {n2: Utilization(5*10, 50), + n3: Utilization(5*10, 50)}, + cut34: {n3: Utilization(5*10, 50), + n4: Utilization(5*10*32, 1600)}, + cut4: {n4: Utilization(5 * 10 * 32, 1600)}} + assert per_cut == { + nodes_to_cuts[('n1',)]: Utilization(600, 600), + nodes_to_cuts[('n1', 'n2')]: Utilization(650, 650), + nodes_to_cuts[('n2', 'n3')]: Utilization(100, 100), + nodes_to_cuts[('n3', 'n4')]: Utilization(1650, 1650), + nodes_to_cuts[('n4',)]: Utilization(1600, 1600) + } + assert total == 1650 + + @pytest.fixture + def prepare_compute_cuts(self, graph_mock, fw_impl_mock, fw_info_mock, mocker): + # reused nodes should be always included + mp_reuse = build_node('mp_reuse', qcs=[build_qc(5), build_qc(2)], output_shape=(None, 24), reuse=True) + mp = build_node('mp', qcs=[build_qc(4), build_qc(2)], output_shape=(None, 5, 10)) + noq = build_node('noq', qcs=[build_qc(6, False)], output_shape=(None, 300)) + sp = build_node('sp', qcs=[build_qc(3)], output_shape=(None, 20, 10)) + mp2 = build_node('mp2', qcs=[build_qc(2), build_qc(4)], output_shape=(None, 150)) + + nodes = [mp_reuse, mp, noq, sp, mp2] + graph_mock.nodes = nodes + # use the Graph original method (need to bind it to graph_mock instance) + graph_mock.find_node_by_name = MethodType(Graph.find_node_by_name, graph_mock) + + # we should not use total size, setting it to bad number + cut_elems1 = MemoryElements(elements={ActivationMemoryTensor(mp_reuse.output_shape, 'mp_reuse', 0)}, total_size=-1) + cut_elems2 = MemoryElements(elements={ActivationMemoryTensor(mp_reuse.output_shape, 'mp_reuse', 0), + ActivationMemoryTensor(mp.output_shape, 'mp', 0), + ActivationMemoryTensor(noq.output_shape, 'noq', 0), + ActivationMemoryTensor(sp.output_shape, 'sp', 0)}, total_size=-1) + cut_elems3 = MemoryElements(elements={ActivationMemoryTensor(sp.output_shape, 'sp', 0), + ActivationMemoryTensor(noq.output_shape, 'noq', 0)}, total_size=-1) + cut_elems4 = MemoryElements(elements={ActivationMemoryTensor(mp2.output_shape, 'mp2', 0)}, total_size=-1) + + cuts = [Cut([], set(), mem_elements=cut_elems) + for cut_elems in [cut_elems1, cut_elems2, cut_elems3, cut_elems4]] + mocker.patch.object(ResourceUtilizationCalculator, '_compute_cuts', Mock(return_value=cuts)) + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + return ru_calc, cuts, nodes + + def test_get_cut_target_nodes(self, prepare_compute_cuts): + ru_calc, (cut1, cut2, cut3, cut4), (mp_reuse, mp, noq, sp, mp2) = prepare_compute_cuts + assert len(TIC) == 4 + sorted_res = lambda res: sorted(res, key=lambda n: n.name) + assert sorted_res(ru_calc._get_cut_target_nodes(cut2, TIC.Any)) == [mp, mp_reuse, noq, sp] + assert sorted_res(ru_calc._get_cut_target_nodes(cut2, TIC.AnyQuantized)) == [mp, mp_reuse, sp] + assert sorted_res(ru_calc._get_cut_target_nodes(cut2, TIC.QConfigurable)) == [mp, mp_reuse] + assert sorted_res(ru_calc._get_cut_target_nodes(cut2, TIC.QNonConfigurable)) == [sp] + + def test_compute_act_utilization_by_cut(self, prepare_compute_cuts): + ru_calc, (cut1, cut2, cut3, cut4), (mp_reuse, mp, noq, sp, mp2) = prepare_compute_cuts + + ru_calc.compute_node_activation_tensor_utilization = Mock(wraps=ru_calc.compute_node_activation_tensor_utilization) + ru_calc._get_cut_target_nodes = Mock(wraps=ru_calc._get_cut_target_nodes) + + qcs = {mp_reuse: build_qc(7), mp: build_qc(10), noq: build_qc(4, True), mp2: build_qc(4, False)} + qcs = {k: v.activation_quantization_cfg for k, v in qcs.items()} + total, per_cut, per_cut_node = ru_calc.compute_activation_utilization_by_cut(TIC.AnyQuantized, BM.QCustom, qcs) + + cut_nodes_calls = ru_calc._get_cut_target_nodes.call_args_list + assert len(cut_nodes_calls ) == 4 + assert {call.args[0] for call in cut_nodes_calls} == {cut1, cut2, cut3, cut4} + assert {call.args[1] for call in cut_nodes_calls } == {TIC.AnyQuantized} + + compute_tensor_calls = sorted(ru_calc.compute_node_activation_tensor_utilization.call_args_list, + key=lambda call: call.args[0].name) + assert len(compute_tensor_calls) == 6 + assert compute_tensor_calls[0].args == (mp, TIC.AnyQuantized, BM.QCustom, qcs[mp]) + assert compute_tensor_calls[-1].args == (sp, TIC.AnyQuantized, BM.QCustom, None) + + assert len(per_cut_node) == 4 + assert per_cut_node[cut1] == {mp_reuse: Utilization(24, 21.)} + assert per_cut_node[cut2] == {mp_reuse: Utilization(24, 21.), + mp: Utilization(50, 62.5), + sp: Utilization(200, 75.)} + assert per_cut_node[cut3] == {sp: Utilization(200, 75.)} + assert per_cut_node[cut4] == {mp2: Utilization(150, 600.)} + + assert per_cut == {cut1: Utilization(24, 21.), + cut2: Utilization(274, 158.5), + cut3: Utilization(200, 75.), + cut4: Utilization(150, 600.), + } + assert total == 600. + + def test_compute_act_utilization_by_cut_no_cut_nodes(self, prepare_compute_cuts): + ru_calc, (cut1, cut2, cut3, cut4), (mp_reuse, mp, noq, sp, mp2) = prepare_compute_cuts + + total, per_cut, per_cut_node = ru_calc.compute_activation_utilization_by_cut(TIC.QNonConfigurable, BM.QDefaultSP) + assert len(per_cut_node) == 2 + assert per_cut_node[cut2] == {sp: Utilization(200, 75.)} + assert per_cut_node[cut3] == {sp: Utilization(200, 75.)} + assert per_cut == {cut2: Utilization(200, 75.), + cut3: Utilization(200, 75.)} + assert total == 75. + + def test_compute_act_utilization_by_cut_no_target_nodes(self, graph_mock, fw_impl_mock, fw_info_mock): + node = build_node(qcs=[build_qc(a_enable=False)]) + graph_mock.nodes = [node] + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + from unittest.mock import MagicMock + + ru_calc._compute_cuts = MagicMock() + ru_calc._get_target_activation_nodes = Mock(wraps=ru_calc._get_target_activation_nodes) + + assert ru_calc.compute_activation_utilization_by_cut(TIC.AnyQuantized, BM.Float) == (0, {}, {}) + ru_calc._compute_cuts.assert_not_called() + ru_calc._get_target_activation_nodes.assert_called_with(TIC.AnyQuantized, include_reused=True) + + # make sure _compute_cuts is supposed to be called when cuts are accessed, otherwise the test is meaningless + ru_calc.cuts + ru_calc._compute_cuts.assert_called() + + @pytest.mark.parametrize('bitmode', set(BM)-{BM.QCustom}) + def test_compute_act_utilization_by_cut_errors(self, graph_mock, fw_impl_mock, fw_info_mock, bitmode): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + with pytest.raises(ValueError, match=ResourceUtilizationCalculator.unexpected_qc_error): + ru_calc.compute_activation_utilization_by_cut(TIC.Any, bitmode, act_qcs=Mock()) + + +class TestWeightUtilizationMethods: + """ Tests for weights utilization non-public api. """ + + def test_get_w_nbits(self, graph_mock, fw_impl_mock, fw_info_mock): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + node = build_node(canonical_weights={'mp': 1, 'sp': 2, 'noq': 3}) + node.candidates_quantization_cfg = [ + build_qc(1, w_attr={'mp': (2, True), 'sp': (5, True), 'noq': (12, False)}, pos_attr=(6, True, [2])), + build_qc(10, w_attr={'mp': (4, True), 'sp': (5, True), 'noq': (1, False)}, pos_attr=(6, True, [2])), + build_qc(8, False, w_attr={'mp': (7, True), 'sp': (5, True), 'noq': (2, False)}, pos_attr=(6, True, [2])) + ] + + # configurable attr + assert ru_calc._get_weight_nbits(node, 'mp', BM.Float, w_qc=None) == FLOAT_BITWIDTH + assert ru_calc._get_weight_nbits(node, 'mp', BM.QMinBit, w_qc=None) == 2 + assert ru_calc._get_weight_nbits(node, 'mp', BM.QMaxBit, w_qc=None) == 7 + assert ru_calc._get_weight_nbits(node, 'mp', BM.Q8Bit, w_qc=None) == 8 + + # non-configurable attr with multiple qcs with same w precision + for bm in set(BitwidthMode) - {BM.Float, BM.Q8Bit}: + assert ru_calc._get_weight_nbits(node, 'sp', bm, w_qc=None) == 5 + assert ru_calc._get_weight_nbits(node, 'sp', BM.Float, w_qc=None) == FLOAT_BITWIDTH + assert ru_calc._get_weight_nbits(node, 'sp', BM.Q8Bit, w_qc=None) == 8 + # positional + assert ru_calc._get_weight_nbits(node, 2, BM.QMaxBit, w_qc=None) == 6 + + # for un-quantized, all modes return float + for bm in set(BitwidthMode): + assert ru_calc._get_weight_nbits(node, 'noq', bm, w_qc=None) == FLOAT_BITWIDTH + + # qc is passed but doesn't contain the weight, retrieve from the node + qc = build_qc(w_attr={'foo': (10, True)}) + assert ru_calc._get_weight_nbits(node, 'sp', BM.QCustom, w_qc=qc.weights_quantization_cfg) == 5 + assert ru_calc._get_weight_nbits(node, 2, BM.QCustom, w_qc=qc.weights_quantization_cfg) == 6 + + # custom qc + custom_qc = build_qc(w_attr={'foo': (42, True), 'sp': (43, False), 'noq': (44, True)}, pos_attr=(11, True, [2])) + wqc = custom_qc.weights_quantization_cfg + assert ru_calc._get_weight_nbits(node, 'foo', BM.QCustom, w_qc=wqc) == 42 + assert ru_calc._get_weight_nbits(node, 2, BM.QCustom, w_qc=wqc) == 11 + # non-quantized qc for quantized weight + assert ru_calc._get_weight_nbits(node, 'sp', BM.QCustom, w_qc=wqc) == FLOAT_BITWIDTH + # quantized qc for non-quantized weight + assert ru_calc._get_weight_nbits(node, 'noq', BM.QCustom, w_qc=wqc) == 44 + + def test_get_w_nbits_errors(self, graph_mock, fw_impl_mock, fw_info_mock): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + node = build_node(canonical_weights={'foo': 1, 1: 2}, + qcs=[build_qc(w_attr={'foo': (4, True)}, pos_attr=(4, True, [1])), + build_qc(w_attr={'foo': (8, True)}, pos_attr=(8, True, [1]))]) + # qc not passed for configurable attr + with pytest.raises(ValueError, match='Could not retrieve the quantization candidate for attr foo'): + ru_calc._get_weight_nbits(node, 'foo', BM.QCustom, w_qc=None) + + # qc passed but doesnt contain all configurable attrs + qc = build_qc(w_attr={'foo': (8, True)}).weights_quantization_cfg + with pytest.raises(ValueError, match='Could not retrieve the quantization candidate for attr 1'): + ru_calc._get_weight_nbits(node, 1, BM.QCustom, w_qc=qc) + + # default bit mode cannot be requested for configurable attrs. + with pytest.raises(ValueError, match='Could not retrieve the quantization candidate for attr foo'): + ru_calc._get_weight_nbits(node, 'foo', BM.QDefaultSP, w_qc=None) + + def test_get_target_weight_attrs(self, graph_mock, fw_impl_mock, fw_info_mock): + weights = { + 'foo': np.array(1.), + 'bar': np.array(2.), + 'baz': np.array(3.), + 1: np.array(4.), + 2: np.array(5.) + } + # default weight cfg is used for positional weights + qcs = [ + build_qc(w_attr={'foo': (8, True), 'bar': (2, True), 'baz': (4, False)}, pos_attr=(10, True, [1, 2])), + build_qc(w_attr={'foo': (4, True), 'bar': (2, True), 'baz': (2, False)}, pos_attr=(10, True, [2, 1])) + ] + node = build_node(canonical_weights=weights, qcs=qcs) + assert node.has_positional_weights + + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + assert len(TIC) == 4, 'enum changed, update the test' + assert ru_calc._get_target_weight_attrs(node, TIC.QConfigurable) == full_attr_name(['foo']) + assert ru_calc._get_target_weight_attrs(node, TIC.QNonConfigurable) == full_attr_name(['bar', 1, 2]) + assert ru_calc._get_target_weight_attrs(node, TIC.AnyQuantized) == full_attr_name(['foo', 'bar', 1, 2]) + assert ru_calc._get_target_weight_attrs(node, TIC.Any) == full_attr_name(['foo', 'bar', 'baz', 1, 2]) + + def test_collect_target_nodes_w_attrs(self, graph_mock, fw_impl_mock, fw_info_mock): + node = build_node('mixed', canonical_weights={'foo': np.array(1.), 'bar': np.array(2.), 3: np.array(3.)}, + qcs=[build_qc(w_attr={'foo': (8, True), 'bar': (2, True)}, pos_attr=(4, False, [3])), + build_qc(w_attr={'foo': (4, True), 'bar': (2, True)}, pos_attr=(2, False, [3]))]) + + # should never be selected + node_no_weights = build_node('no_w', qcs=[build_qc()]) + + node_reuse = build_node('reuse', canonical_weights={'foo': np.array(1.), 1: np.array(2.)}, + qcs=[build_qc(w_attr={'foo': (8, True)}, pos_attr=(4, True, [1]))], reuse=True) + + graph_mock.nodes = [node, node_no_weights, node_reuse] + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + + # we only cover options relevant to this level, as test_get_target_weight_attrs fully covers node's attrs + assert (ru_calc._collect_target_nodes_w_attrs(TIC.Any, include_reused=True) == + {node: full_attr_name(['foo', 'bar', 3]), + node_reuse: [full_attr_name('foo'), 1]}) + + assert (ru_calc._collect_target_nodes_w_attrs(TIC.Any, include_reused=False) == + {node: full_attr_name(['foo', 'bar', 3])}) + + assert (ru_calc._collect_target_nodes_w_attrs(TIC.QConfigurable, include_reused=True) == + {node: [full_attr_name('foo')]}) + + +class TestComputeNodeWeightsUtilization: + """ Tests for compute_node_weight_utilization public method. """ + + @pytest.fixture + def setup_node_w_test(self, graph_mock, fw_impl_mock, fw_info_mock): + weights = { + 'mp': np.ones((3, 4, 5, 6)), + 'sp': np.full((10, 20), 2), + 'noq': np.full((15,), 3), + 1: np.full((2, 3, 5), 4), + 2: np.full((2, 3), 5) + } + qcs = [ + build_qc(w_attr={'mp': (16, True), 'sp': (4, True), 'noq': (5, False)}, pos_attr=(4, True, [1, 2])), + build_qc(w_attr={'mp': (4, True), 'sp': (4, True), 'noq': (6, False)}, pos_attr=(8, True, [1, 2])) + ] + node = build_node(canonical_weights=weights, qcs=qcs) + graph_mock.nodes = [node] + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + # wrap the original methods to verify integration + ru_calc._get_weight_nbits = Mock(wraps=ru_calc._get_weight_nbits) + ru_calc._get_target_weight_attrs = Mock(wraps=ru_calc._get_target_weight_attrs) + return node, ru_calc + + def test_compute_node_w_utilization_custom_qc(self, setup_node_w_test): + node, ru_calc = setup_node_w_test + # _get_weight_nbits and _get_target_weight_attrs are fully tested separately, we wrap the implementation + # to verify integration. No need to test all cases again. + custom_qc = build_qc(w_attr={'mp': (3, True), 'noq': (4, True)}, + pos_attr=(2, True, [1, 2])).weights_quantization_cfg + total, per_attr = ru_calc.compute_node_weights_utilization(node, TIC.QConfigurable, BM.QCustom, qc=custom_qc) + + ru_calc._get_target_weight_attrs.assert_called_once_with(node, TIC.QConfigurable) + call_args = [call.args for call in ru_calc._get_weight_nbits.call_args_list] + assert len(call_args) == 3 + assert set(call_args) == {(node, full_attr_name('mp'), BM.QCustom, custom_qc), + (node, 1, BM.QCustom, custom_qc), + (node, 2, BM.QCustom, custom_qc)} + assert per_attr == {full_attr_name('mp'): Utilization(360, 135.), + 1: Utilization(30, 7.5), + 2: Utilization(6, 1.5)} + assert total == Utilization(396, 144.) + + def test_compute_node_w_utilization_explicit_attrs_non_custom(self, setup_node_w_test): + node, ru_calc = setup_node_w_test + # explicit attrs list, no custom qc + total, per_attr = ru_calc.compute_node_weights_utilization(node, full_attr_name(['mp', 'noq', 2]), + BM.QMinBit) + ru_calc._get_target_weight_attrs.assert_not_called() + call_args = [call.args for call in ru_calc._get_weight_nbits.call_args_list] + assert len(call_args) == 3 + assert set(call_args) == {(node, full_attr_name('mp'), BM.QMinBit, None), + (node, full_attr_name('noq'), BM.QMinBit, None), + (node, 2, BM.QMinBit, None)} + assert per_attr == {full_attr_name('mp'): Utilization(360, 180.), + full_attr_name('noq'): Utilization(15, 60.), + 2: Utilization(6, 3.)} + + @pytest.mark.parametrize('node', [ + build_node(qcs=[build_qc()]), + build_node(qcs=[build_qc(w_attr={'foo': (4, False)})]) + ]) + def test_compute_node_w_utilization_no_weights(self, graph_mock, fw_impl_mock, fw_info_mock, node): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + + total, detailed = ru_calc.compute_node_weights_utilization(node, TIC.AnyQuantized, BM.Float) + assert total == Utilization(0, 0) and detailed == {} + + def test_compute_node_w_utilization_errors(self, graph_mock, fw_impl_mock, fw_info_mock, setup_node_w_test): + node = build_node(canonical_weights={'foo': 1, 1: 2}, qcs=[build_qc(w_attr={'foo': (4, True)}), + build_qc(w_attr={'foo': (8, True)})]) + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + + # qc for non-custom mode + qc = build_qc().weights_quantization_cfg + with pytest.raises(ValueError, match=ResourceUtilizationCalculator.unexpected_qc_error): + ru_calc.compute_node_weights_utilization(node, TIC.AnyQuantized, BM.QMaxBit, qc) + + qc = build_qc(w_attr={'whoisit': (4, True)}, pos_attr=(4, True, [77])).weights_quantization_cfg + with pytest.raises(ValueError, match='Custom configuration contains unexpected weight attr'): + ru_calc.compute_node_weights_utilization(node, TIC.Any, BM.QCustom, qc=qc) + + with pytest.raises(ValueError, match='Explicit list of attributes to compute cannot be empty'): + ru_calc.compute_node_weights_utilization(node, target_criterion=[], bitwidth_mode=BM.QMaxBit, qc=None) + + +class TestComputeWeightUtilization: + """ Tests for compute_weight_utilization public method. """ + @pytest.fixture + def prepare_compute_w_util(self): + n1 = build_node('n1', + canonical_weights={'mp': np.ones((5, 10)), 'sp': np.zeros((42,)), 'noq': np.ones((3, 1, 4))}, + qcs=[build_qc(w_attr={'mp': (6, True), 'sp': (4, True), 'noq': (8, False)}), + build_qc(w_attr={'mp': (2, True), 'sp': (4, True), 'noq': (4, False)})]) + n2 = build_node('n2', canonical_weights={1: np.ones((2, 3, 4, 5, 6)), 'mp': np.ones((31, 4))}, + qcs=[build_qc(a_enable=False, w_attr={'mp': (4, True)}, pos_attr=(4, True, [1])), + build_qc(a_enable=False, w_attr={'mp': (16, True)}, pos_attr=(8, True, [1]))]) + n3 = build_node('n3', canonical_weights={'sp': np.ones((123,))}, qcs=[build_qc(w_attr={'sp': (2, True)})]) + # reused - should never be collected + n_reuse = build_node('reused', canonical_weights={'sp': np.ones((31, 4))}, + qcs=[build_qc(w_attr={'sp': (4, True)})], reuse=True) + # no weights - should never be collected + n_no_w = build_node('no_w', qcs=[build_qc()]) + + g = Graph('g', nodes=[n_reuse, n_no_w], input_nodes=[n1], output_nodes=[n3], + edge_list=[Edge(*ns, 0, 0) for ns in [(n1, n_reuse), (n_reuse, n_no_w), (n_no_w, n2), (n2, n3)]]) + + ru_calc = ResourceUtilizationCalculator(g, fw_impl_mock, fw_info_mock) + # wrap original methods for api checks + ru_calc._topo_sort = Mock(wraps=ru_calc._topo_sort) + ru_calc._collect_target_nodes_w_attrs = Mock(wraps=ru_calc._collect_target_nodes_w_attrs) + ru_calc.compute_node_weights_utilization = Mock(wraps=ru_calc.compute_node_weights_utilization) + return ru_calc, {n.name: n for n in [n1, n2, n3, n_reuse, n_no_w]} + + def test_compute_weights_utilization_custom(self, prepare_compute_w_util): + ru_calc, nodes = prepare_compute_w_util + n1, n2, n3 = nodes['n1'], nodes['n2'], nodes['n3'] + # n3 - not in qc (but should be considered) + custom_qc = {n1: build_qc(w_attr={'mp': (5, False), 'noq': (16, True)}).weights_quantization_cfg, + n2: build_qc(w_attr={'mp': (6, True)}, pos_attr=(2, True, [1])).weights_quantization_cfg, + nodes['no_w']: build_qc().weights_quantization_cfg, + nodes['reused']: build_qc(w_attr={'sp': (8, True)})} + + total, per_node, per_weight = ru_calc.compute_weights_utilization(TIC.Any, BM.QCustom, custom_qc) + + ru_calc._collect_target_nodes_w_attrs.assert_called_once_with(TIC.Any, include_reused=False) + + ru_calc._topo_sort.assert_called_once() + assert sorted(ru_calc._topo_sort.call_args.args[0], key=lambda n:n.name) == [n1, n2, n3] + + calls = [call for call in ru_calc.compute_node_weights_utilization.call_args_list] + assert len(calls) == 3 + calls = sorted(calls, key=lambda call: call.args[0].name) + # first call + assert (calls[0].args[0], *calls[0].args[2:]) == (n1, BitwidthMode.QCustom, custom_qc[n1]) + assert sorted(calls[0].args[1]) == full_attr_name(['mp', 'noq', 'sp']) + # second call + assert (calls[1].args[0], *calls[1].args[2:]) == (n2, BitwidthMode.QCustom, custom_qc[n2]) + assert calls[1].args[1] in (full_attr_name(['mp', 1]), full_attr_name([1, 'mp'])) + # third call + assert (calls[2].args[0], *calls[2].args[2:]) == (n3, BitwidthMode.QCustom, None) + assert calls[2].args[1] == [full_attr_name('sp')] + + # check the actual results + assert len(per_weight) == len(per_node) == 3 + assert per_weight[n1] == {full_attr_name('mp'): Utilization(50, 200.), + full_attr_name('sp'): Utilization(42, 21), + full_attr_name('noq'): Utilization(12, 24.)} + assert per_weight[n2] == {full_attr_name('mp'): Utilization(124, 93.), + 1: Utilization(720, 180.)} + assert per_weight[n3] == {full_attr_name('sp'): Utilization(123, 30.75)} + + assert per_node == {n1: Utilization(104, 245.), + n2: Utilization(844, 273.), + n3: Utilization(123, 30.75)} + assert total == 245+273+30.75 + + def test_compute_w_utilization_non_custom(self, prepare_compute_w_util): + ru_calc, nodes = prepare_compute_w_util + n1, n2 = nodes['n1'], nodes['n2'] + total, per_node, per_weight = ru_calc.compute_weights_utilization(TIC.QConfigurable, BM.QMaxBit) + + ru_calc._collect_target_nodes_w_attrs.assert_called_once_with(TIC.QConfigurable, include_reused=False) + calls = [call for call in ru_calc.compute_node_weights_utilization.call_args_list] + assert len(calls) == 2 + calls = sorted(calls, key=lambda call: call.args[0].name) + assert calls[0].args == (n1, [full_attr_name('mp')], BM.QMaxBit, None) + assert calls[1].args in [(n2, full_attr_name(attrs), BM.QMaxBit, None) for attrs in (['mp', 1], [1, 'mp'])] + + def test_compute_w_utilization_no_targets(self, graph_mock, fw_impl_mock, fw_info_mock): + graph_mock.nodes = [ + build_node('n1', qcs=build_qc()), + build_node('n2', canonical_weights={'foo': np.ones((5,))}, qcs=[build_qc(w_attr={'foo': (8, True)})]) + ] + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + total, per_node, per_weight = ru_calc.compute_weights_utilization(TIC.QConfigurable, BM.Float) + assert total == 0 + assert per_node == {} + assert per_weight == {} + + @pytest.mark.parametrize('bm', set(BM)-{BM.QCustom}) + def test_compute_w_utilization_errors(self, graph_mock, fw_impl_mock, fw_info_mock, bm): + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + with pytest.raises(ValueError, match=ResourceUtilizationCalculator.unexpected_qc_error): + ru_calc.compute_weights_utilization(TIC.Any, BM.QMaxBit, w_qcs={Mock(): Mock()}) + + +class TestCalculatorMisc: + """ Calculator tests that don't belong to other test classes """ + def test_calculator_init(self, fw_impl_mock, fw_info_mock): + n1 = build_node('n1', qcs=[build_qc(a_enable=False)], output_shape=(None, 5, 10)) + n2 = build_node('n2', output_shape=(None, 2, 111, 3), + canonical_weights={'foo': np.zeros((3, 14)), + 'bar': np.zeros((15, 9, 2, 6)), + 2: np.zeros((2, 71))}, + qcs=[build_qc(w_attr={'foo': (8, False), 'bar': (8, True)}, pos_attr=(8, True, [2]))]) + n3 = build_node('n3', qcs=[build_qc(4)], output_shape=(None, 17)) + graph = Graph('g', input_nodes=[n1], nodes=[n2], output_nodes=[n3], + edge_list=[Edge(n1, n2, 0, 0), Edge(n2, n3, 0, 0)]) + + ru_calc = ResourceUtilizationCalculator(graph, fw_impl_mock, fw_info_mock) + assert ru_calc._act_tensors_size == {n1: 50, n2: 666, n3: 17} + assert ru_calc._params_cnt == {n2: {full_attr_name('foo'): 42, + full_attr_name('bar'): 1620, + 2: 142}} + + def test_topo_sort(self, graph_mock, fw_impl_mock, fw_info_mock): + n1, n2, n3, n4, n5 = [build_node(f'n{i}') for i in range(5)] + graph_mock.get_topo_sorted_nodes.return_value = [n3, n4, n2, n5, n1] + ru_calc = ResourceUtilizationCalculator(graph_mock, fw_impl_mock, fw_info_mock) + + assert ru_calc._topo_sort([]) == [] + assert ru_calc._topo_sort([n5, n4, n3, n2, n1]) == [n3, n4, n2, n5, n1] + assert ru_calc._topo_sort([n1, n3, n5]) == [n3, n5, n1] + n6 = build_node('n6') + with pytest.raises(ValueError, match=fr'Could not topo-sort, nodes \[{n6}\] do not match the graph nodes'): + ru_calc._topo_sort([n1, n2, n6]) diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_threshold_search.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_threshold_search.ipynb index ae28d66f8..25d3850e0 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_threshold_search.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_threshold_search.ipynb @@ -74,8 +74,8 @@ }, "outputs": [], "source": [ - "TF_VER = '2.14'\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "TF_VER = '2.14.0'\n", + "!pip install -q tensorflow~={TF_VER}" ] }, { diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_z_score_threshold.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_z_score_threshold.ipynb index fd3cc7d47..562f4fce1 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_z_score_threshold.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_activation_z_score_threshold.ipynb @@ -58,8 +58,8 @@ }, "outputs": [], "source": [ - "TF_VER = '2.14'\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "TF_VER = '2.14.0'\n", + "!pip install -q tensorflow~={TF_VER}" ] }, { diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_gptq.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_gptq.ipynb index eb1fc3aa6..5f8d19785 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_gptq.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_gptq.ipynb @@ -44,7 +44,7 @@ "source": [ "TF_VER = '2.14.0'\n", "\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "!pip install -q tensorflow~={TF_VER}" ] }, { diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_mixed_precision.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_mixed_precision.ipynb index dff46ce45..df9c90c86 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_mixed_precision.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_mobilenet_mixed_precision.ipynb @@ -39,8 +39,8 @@ }, "outputs": [], "source": [ - "TF_VER = '2.14'\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "TF_VER = '2.14.0'\n", + "!pip install -q tensorflow~={TF_VER}" ] }, { diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_network_editor.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_network_editor.ipynb index 28e880cab..14780ffb6 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_network_editor.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_network_editor.ipynb @@ -47,8 +47,8 @@ }, "outputs": [], "source": [ - "TF_VER = '2.14'\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "TF_VER = '2.14.0'\n", + "!pip install -q tensorflow~={TF_VER}" ] }, { diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_post-training_quantization.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_post-training_quantization.ipynb index 6ebf07f72..2d1464025 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_post-training_quantization.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_post-training_quantization.ipynb @@ -31,8 +31,8 @@ "execution_count": null, "outputs": [], "source": [ - "TF_VER = '2.14'\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "TF_VER = '2.14.0'\n", + "!pip install -q tensorflow~={TF_VER}" ], "metadata": { "collapsed": false diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_pruning_mnist.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_pruning_mnist.ipynb index 8ed23bc1a..2f2bf0793 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_pruning_mnist.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_pruning_mnist.ipynb @@ -44,7 +44,7 @@ "cell_type": "code", "source": [ "TF_VER = '2.14.0'\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "!pip install -q tensorflow~={TF_VER}" ], "metadata": { "id": "xTvVA__4NItc" diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_qat.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_qat.ipynb index 435976096..df20b9b51 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_qat.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_qat.ipynb @@ -33,7 +33,7 @@ "outputs": [], "source": [ "TF_VER = '2.14.0'\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "!pip install -q tensorflow~={TF_VER}" ] }, { diff --git a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_xquant.ipynb b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_xquant.ipynb index 229b3bce6..3fc9888ac 100644 --- a/tutorials/notebooks/mct_features_notebooks/keras/example_keras_xquant.ipynb +++ b/tutorials/notebooks/mct_features_notebooks/keras/example_keras_xquant.ipynb @@ -41,7 +41,7 @@ "source": [ "TF_VER = '2.14.0'\n", "\n", - "!pip install -q tensorflow[and-cuda]~={TF_VER}" + "!pip install -q tensorflow~={TF_VER}" ], "metadata": { "id": "kCLHJUhTlPDi"