Skip to content

Commit

Permalink
support Pytorch model in the weight compression algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
alexsu52 committed Jan 7, 2024
1 parent d285f47 commit 1b1e4fa
Show file tree
Hide file tree
Showing 33 changed files with 1,143 additions and 828 deletions.
6 changes: 4 additions & 2 deletions nncf/common/graph/layer_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,11 @@ class ConvertDtypeLayerAttributes(BaseLayerAttributes):


@dataclass
class ParameterLayerAttributes(BaseLayerAttributes):
class ConstantLayerAttributes(BaseLayerAttributes):
"""
:param name: Parameter name.
:param name: Constant name.
:param shape: Constant shape.
"""

name: str
shape: List[int]
13 changes: 0 additions & 13 deletions nncf/common/graph/transformations/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,16 +214,3 @@ def __init__(self, command_type: TransformationType, target_point: TargetPoint):
@property
def target_point(self) -> TargetPoint:
return self._target_point

def check_command_compatibility(self, command: "TransformationCommand") -> bool:
return (
isinstance(command, TransformationCommand)
and self.type == command.type
and self.target_point == command.target_point
)

def union(self, other: "TransformationCommand") -> "TransformationCommand":
raise NotImplementedError()

def __add__(self, other: "TransformationCommand") -> "TransformationCommand":
return self.union(other)
6 changes: 3 additions & 3 deletions nncf/experimental/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nncf.experimental.tensor.enums import TensorBackendType as TensorBackendType
from nncf.experimental.tensor.enums import TensorDataType as TensorDataType
from nncf.experimental.tensor.enums import TensorDeviceType as TensorDeviceType
from nncf.experimental.tensor.definitions import TensorBackendType as TensorBackendType
from nncf.experimental.tensor.definitions import TensorDataType as TensorDataType
from nncf.experimental.tensor.definitions import TensorDeviceType as TensorDeviceType
from nncf.experimental.tensor.tensor import Tensor as Tensor
from nncf.experimental.tensor.tensor import unwrap_tensor_data as unwrap_tensor_data
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass
from enum import Enum
from enum import auto

Expand All @@ -32,6 +33,7 @@ class TensorDataType(Enum):
float64 = auto()
int8 = auto()
uint8 = auto()
int32 = auto()


class TensorDeviceType(Enum):
Expand All @@ -41,3 +43,18 @@ class TensorDeviceType(Enum):

CPU = auto()
GPU = auto()


@dataclass
class TypeInfo:
"""
The class represents the numerical properties of a floating point types.
:param eps: The smallest representable number such that 1.0 + eps != 1.0.
:param max: The largest representable number.
:param min: The smallest representable number (typically -max).
"""

eps: float
max: float
min: float
53 changes: 48 additions & 5 deletions nncf/experimental/tensor/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@
# limitations under the License.

import functools
from typing import Callable, List, Optional, Tuple, TypeVar, Union
from typing import Any, Callable, List, Optional, Tuple, Union

from nncf.experimental.tensor.enums import TensorDataType
from nncf.experimental.tensor.enums import TensorDeviceType
from nncf.experimental.tensor.definitions import TensorDataType
from nncf.experimental.tensor.definitions import TensorDeviceType
from nncf.experimental.tensor.definitions import TypeInfo
from nncf.experimental.tensor.tensor import Tensor
from nncf.experimental.tensor.tensor import unwrap_tensor_data

TypeInfo = TypeVar("TypeInfo")


def _tensor_guard(func: callable):
"""
Expand Down Expand Up @@ -442,6 +441,50 @@ def finfo(a: Tensor) -> TypeInfo:
return finfo(a.data)


@functools.singledispatch
@_tensor_guard
def clip(a: Tensor, a_min: Union[Tensor, float], a_max: Union[Tensor, float]) -> Tensor:
"""
Clips all elements in input into the range [ a_min, a_max ]
:param a: Tensor.
:param a_min: A lower-bound of the range to be clamped to.
:param a_max: An upper-bound of the range to be clamped to.
:return: A clipped tensor with the elements of a, but where values < a_min are replaced with a_min,
and those > a_max with a_max.
"""
return Tensor(clip(a.data, unwrap_tensor_data(a_min), unwrap_tensor_data(a_max)))


@functools.singledispatch
@_tensor_guard
def as_tensor_like(a: Tensor, data: Any) -> Tensor:
"""
Converts the data into a tensor with the same data representation and hosted on the same device
as the given tensor.
:param a: A tensor for defining the data representation and the host device of the output tensor.
:param data: Initial data for the tensor. Can be a list, tuple, NumPy ndarray, scalar, and other types.
:return: A tensor with the same data representation and hosted on the same device as a,
and which has been initialized with data.
"""
return Tensor(as_tensor_like(a.data, data))


@functools.singledispatch
@_tensor_guard
def item(a: Tensor) -> Union[int, float, bool]:
"""
Returns the value of this tensor as a standard Python number. This only works for tensors with one element.
:param a: Tensor.
:return: The value of this tensor as a standard Python number
"""
if isinstance(a.data, (int, float, bool)):
return a.data
return item(a.data)


def _dispatch_list(fn: "functools._SingleDispatchCallable", tensor_list: List[Tensor], *args, **kwargs):
"""
Dispatches the function to the type of the wrapped data of the first element in tensor_list.
Expand Down
32 changes: 27 additions & 5 deletions nncf/experimental/tensor/numpy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, List, Optional, Tuple, Union
from typing import Any, Callable, List, Optional, Tuple, Union

import numpy as np

from nncf.experimental.tensor import functions as fns
from nncf.experimental.tensor.enums import TensorDataType
from nncf.experimental.tensor.enums import TensorDeviceType
from nncf.experimental.tensor.definitions import TensorDataType
from nncf.experimental.tensor.definitions import TensorDeviceType
from nncf.experimental.tensor.definitions import TypeInfo

DTYPE_MAP = {
TensorDataType.float16: np.dtype(np.float16),
TensorDataType.float32: np.dtype(np.float32),
TensorDataType.float64: np.dtype(np.float64),
TensorDataType.int8: np.dtype(np.int8),
TensorDataType.uint8: np.dtype(np.uint8),
TensorDataType.int32: np.dtype(np.int32),
}

DTYPE_MAP_REV = {v: k for k, v in DTYPE_MAP.items()}
Expand Down Expand Up @@ -209,5 +211,25 @@ def _(


@_register_numpy_types(fns.finfo)
def _(a: np.ndarray) -> np.finfo:
return np.finfo(a.dtype)
def _(a: np.ndarray) -> TypeInfo:
ti = np.finfo(a.dtype)
return TypeInfo(ti.eps, ti.max, ti.min)


@_register_numpy_types(fns.clip)
def _(
a: Union[np.ndarray, np.generic],
a_min: Union[np.ndarray, np.generic, float],
a_max: Union[np.ndarray, np.generic, float],
) -> Union[np.ndarray, np.generic]:
return np.clip(a, a_min, a_max)


@_register_numpy_types(fns.as_tensor_like)
def _(a: Union[np.ndarray, np.generic], data: Any) -> Union[np.ndarray, np.generic]:
return np.array(data)


@_register_numpy_types(fns.item)
def _(a: Union[np.ndarray, np.generic]) -> Union[int, float, bool]:
return a.item()
9 changes: 6 additions & 3 deletions nncf/experimental/tensor/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import operator
from typing import Any, Optional, Tuple, TypeVar, Union

from nncf.experimental.tensor.enums import TensorDataType
from nncf.experimental.tensor.enums import TensorDeviceType
from nncf.experimental.tensor.definitions import TensorDataType
from nncf.experimental.tensor.definitions import TensorDeviceType

TTensor = TypeVar("TTensor")

Expand Down Expand Up @@ -44,7 +44,7 @@ def device(self) -> TensorDeviceType:
return _call_function("device", self)

@property
def dtype(self) -> TensorDeviceType:
def dtype(self) -> TensorDataType:
return _call_function("dtype", self)

def __bool__(self) -> bool:
Expand Down Expand Up @@ -146,6 +146,9 @@ def astype(self, dtype: TensorDataType) -> Tensor:
def reshape(self, shape: Tuple[int, ...]) -> Tensor:
return _call_function("reshape", self, shape)

def item(self) -> float:
return _call_function("item", self)


def _call_function(func_name: str, *args):
"""
Expand Down
25 changes: 24 additions & 1 deletion nncf/experimental/tensor/torch_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, List, Optional, Tuple, Union
from typing import Any, Callable, List, Optional, Tuple, Union

import torch

from nncf.experimental.tensor import TensorDataType
from nncf.experimental.tensor import TensorDeviceType
from nncf.experimental.tensor import functions as fns
from nncf.experimental.tensor.definitions import TypeInfo

DTYPE_MAP = {
TensorDataType.float16: torch.float16,
TensorDataType.float32: torch.float32,
TensorDataType.float64: torch.float64,
TensorDataType.int8: torch.int8,
TensorDataType.uint8: torch.uint8,
TensorDataType.int32: torch.int32,
}

DTYPE_MAP_REV = {v: k for k, v in DTYPE_MAP.items()}
Expand Down Expand Up @@ -200,3 +202,24 @@ def _(a: torch.Tensor, b: Union[torch.Tensor, float], operator_fn: Callable) ->
@fns._binary_reverse_op_nowarn.register(torch.Tensor)
def _(a: torch.Tensor, b: Union[torch.Tensor, float], operator_fn: Callable) -> torch.Tensor:
return operator_fn(b, a)


@fns.clip.register(torch.Tensor)
def _(a: torch.Tensor, a_min: Union[torch.Tensor, float], a_max: Union[torch.Tensor, float]) -> torch.Tensor:
return torch.clip(a, a_min, a_max)


@fns.finfo.register(torch.Tensor)
def _(a: torch.Tensor) -> TypeInfo:
ti = torch.finfo(a.dtype)
return TypeInfo(ti.eps, ti.max, ti.min)


@fns.as_tensor_like.register(torch.Tensor)
def _(a: torch.Tensor, data: Any) -> torch.Tensor:
return torch.as_tensor(data, device=a.device)


@fns.item.register(torch.Tensor)
def _(a: torch.Tensor) -> Union[int, float, bool]:
return a.item()
29 changes: 2 additions & 27 deletions nncf/onnx/graph/transformations/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,6 @@ def __init__(self, target_point: ONNXTargetPoint, input_edges_mapping: Dict[str,
# need to keep the mapping NNCF input nodes to the following ONNX nodes.
self.input_edges_mapping = input_edges_mapping

def union(self, other: "TransformationCommand") -> "TransformationCommand":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()


class ONNXQuantizerInsertionCommand(ONNXInsertionCommand):
def __init__(
Expand All @@ -71,15 +67,10 @@ def __init__(
super().__init__(target_point, nncf_input_node_next_onnx_nodes)
self.quantizer_parameters = quantizer_parameters

def union(self, other: "TransformationCommand") -> "TransformationCommand":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()


class ONNXOutputInsertionCommand(ONNXInsertionCommand):
def union(self, other: "TransformationCommand") -> "TransformationCommand":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()
def __init__(self, target_point: ONNXTargetPoint, input_edges_mapping: Dict[str, Tuple[str, int]]):
super().__init__(TransformationType.INSERT, target_point, input_edges_mapping)


class ONNXBiasCorrectionCommand(TransformationCommand):
Expand All @@ -95,10 +86,6 @@ def __init__(self, target_point: ONNXTargetPoint, bias_value: np.ndarray):
super().__init__(TransformationType.CHANGE, target_point)
self.bias_value = bias_value

def union(self, other: "TransformationCommand") -> "TransformationCommand":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()


class ONNXModelExtractionCommand(Command):
"""
Expand All @@ -114,10 +101,6 @@ def __init__(self, inputs: List[str], outputs: List[str]):
self.inputs = inputs
self.outputs = outputs

def union(self, other: "Command") -> "Command":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()


class ONNXQDQNodeRemovingCommand(TransformationCommand):
"""
Expand All @@ -130,10 +113,6 @@ def __init__(self, target_point: ONNXTargetPoint):
"""
super().__init__(TransformationType.REMOVE, target_point)

def union(self, other: "TransformationCommand") -> "TransformationCommand":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()


class ONNXNullBiasInsertionCommand(TransformationCommand):
"""
Expand All @@ -145,7 +124,3 @@ def __init__(self, target_point: ONNXTargetPoint):
:param target_point: The TargetPoint instance for the insertion that contains layer's information.
"""
super().__init__(TransformationType.INSERT, target_point)

def union(self, other: "TransformationCommand") -> "TransformationCommand":
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()
9 changes: 5 additions & 4 deletions nncf/onnx/quantization/quantizer_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import numpy as np

from nncf.experimental.tensor import functions as fns
from nncf.quantization.fake_quantize import FakeQuantizeParameters
from nncf.quantization.fake_quantize import calculate_scale_zero_point

Expand Down Expand Up @@ -54,17 +55,17 @@ def convert_fq_params_to_onnx_params(
if levels not in [255, 256]:
raise ValueError("Can only export to INT8/UIN8 256-level ONNX Quantize/Dequantize pairs.")

input_low, input_high = parameters.input_low.data, parameters.input_high.data
output_low, output_high = parameters.output_low.data, parameters.output_high.data
if not np.allclose(input_high, output_high) or not np.allclose(input_low, output_low):
input_low, input_high = parameters.input_low, parameters.input_high
output_low, output_high = parameters.output_low, parameters.output_high
if not fns.allclose(input_high, output_high) or not fns.allclose(input_low, output_low):
raise ValueError(
"ONNX Quantize/Dequantize pairs only support input_high == output_high and input_low == output_low."
)

level_low, level_high = get_level_low_level_high(tensor_type)
narrow_range = levels == 2**num_bits - 1
scale, zero_point = calculate_scale_zero_point(input_low, input_high, level_low, level_high, narrow_range)
return ONNXQuantizerLayerParameters(scale, zero_point, tensor_type, axis)
return ONNXQuantizerLayerParameters(scale.data, zero_point.data, tensor_type, axis)


def get_level_low_level_high(tensor_type: np.dtype) -> Tuple[int, int]:
Expand Down
Loading

0 comments on commit 1b1e4fa

Please sign in to comment.