Skip to content

Commit

Permalink
feat: implement quantized unfold
Browse files Browse the repository at this point in the history
  • Loading branch information
tguerand authored Feb 2, 2024
1 parent df81aca commit fa3ef88
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/deep-learning/onnx_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ The following operators are supported for evaluation and conversion to an equiva
- Tanh
- ThresholdedRelu
- Transpose
- Unfold
- Unsqueeze
- Where
- onnx.brevitas.Quant
Expand Down
2 changes: 2 additions & 0 deletions src/concrete/ml/onnx/onnx_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@
numpy_tanh,
numpy_thresholdedrelu,
numpy_transpose,
numpy_unfold,
numpy_unsqueeze,
numpy_where,
rounded_numpy_equal_for_trees,
Expand Down Expand Up @@ -382,6 +383,7 @@
"Shape": numpy_shape,
"ConstantOfShape": numpy_constant_of_shape,
"Expand": numpy_expand,
"Unfold": numpy_unfold,
}


Expand Down
71 changes: 71 additions & 0 deletions src/concrete/ml/onnx/ops_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2072,3 +2072,74 @@ def numpy_expand(x: numpy.ndarray, shape: Optional[Tuple[int]] = None) -> Tuple[
assert_true(shape_difference >= 0, "Target shape cannot have fewer dimensions than input shape")

return (numpy.broadcast_to(x, target_shape),)


def numpy_unfold(
x: numpy.ndarray,
*,
kernel_shape: Tuple[int, ...],
pads: Tuple[int, ...] = None,
strides: Tuple[int, ...] = None,
) -> Tuple[numpy.ndarray]:
"""Compute Unfold using Torch.
Currently supports 2d Unfold with torch semantics. This function is ONNX compatible.
See: https://github.com/onnx/onnx/blob/main/docs/Operators.md
Args:
x (numpy.ndarray): input data (many dtypes are supported). Shape is N x C x H x W for 2d
kernel_shape (Tuple[int, ...]): shape of the kernel. Should have 2 elements for 2d conv
pads (Tuple[int, ...]): padding in ONNX format (begin, end) on each axis
strides (Tuple[int, ...]): stride of the convolution on each axis
Returns:
res (numpy.ndarray): a tensor of size (N x InChannels x OutHeight * OutWidth).
See https://pytorch.org/docs/stable/generated/torch.nn.Unfold.html
Raises:
AssertionError: if the unfold arguments are wrong
"""

assert_true(len(kernel_shape) == 2, "The unfold operator currently supports only 2-d")

# For mypy
assert pads is None or len(pads) == 4

# For mypy
assert len(kernel_shape) == 2

assert strides is None or len(strides) == 2

# Use default values if the ONNX did not set these parameters
pads = (0, 0, 0, 0) if pads is None else pads
strides = (1, 1) if strides is None else strides

# Compute the unfold using a grouped convolution (groups = input channels)
# This means that each slice of the kernel is applied on each input channel respectively
# We create kernels with only one one at each position, which will redirect the kernel
# outputs to the output channels
n_in_channels = x.shape[1]
kernels_list = []
for _ in range(n_in_channels):
for row in range(kernel_shape[0]):
for col in range(kernel_shape[1]):
kernel = numpy.zeros(
(1, 1, kernel_shape[0], kernel_shape[1]),
dtype=numpy.int64,
)
kernel[:, :, row, col] = 1
kernels_list.append(kernel)
kernels = numpy.concatenate(numpy.array(kernels_list), axis=0)

# Pad the input tensor
pool_pads = compute_onnx_pool_padding(x.shape, kernel_shape, pads, strides, ceil_mode=0)
q_input_pad = numpy_onnx_pad(x, pool_pads)

# Compute the kernels of input values for each kernel position
res = fhe_conv(q_input_pad, kernels, None, [0, 0, 0, 0], strides, None, None, n_in_channels)

# reshape to fit the torch.F.unfold function output shapes
res = res.reshape((res.shape[0], res.shape[1], -1))

return (res,)
125 changes: 125 additions & 0 deletions src/concrete/ml/quantization/quantized_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2479,3 +2479,128 @@ def __init__(
# We do not support testing a == b where a,b are encrypted
# only comparing to a constant is supported
assert_true(constant_inputs is not None and len(constant_inputs) >= 1)


class QuantizedUnfold(QuantizedMixingOp):
"""Quantized Unfold op."""

_impl_for_op_named: str = "Unfold"

# Since this op takes a single input, we can set int_input_names to a single default id
def __init__(
self,
n_bits_output: int,
op_instance_name: str,
int_input_names: Set[str] = None,
constant_inputs: Optional[Union[Dict[str, Any], Dict[int, Any]]] = None,
input_quant_opts: QuantizationOptions = None,
**attrs,
) -> None:

super().__init__(
n_bits_output,
op_instance_name,
int_input_names,
constant_inputs,
input_quant_opts,
**attrs,
)

# Get the ONNX parameters
self.kernel_shape = attrs.get("kernel_shape", None)
self.pads = attrs.get("pads", tuple([0] * 2 * (len(self.kernel_shape) - 2)))
self.dilations = attrs.get("dilations", tuple([1] * len(self.kernel_shape)))
self.strides = attrs.get("strides", tuple([1] * len(self.kernel_shape)))

# Validate the parameters
assert_true(
len(self.kernel_shape) == 2,
"The Unfold operator currently supports only 2d",
)
assert_true(
len(self.kernel_shape) == len(self.strides),
"The Unfold operator requires the number of strides to "
"be the same as the number of kernel dimensions",
)
assert_true(
len(self.pads) == 2 * len(self.kernel_shape),
"The Unfold operator in Concrete ML requires padding to be specified as "
" (pad_left_dim1, pad_right_dim1, pad_left_dim2, pad_right_dim2, ...), following ONNX"
" standard",
)

self.kernel: Union[numpy.ndarray, None] = None
self.norm_const: Union[float, None] = None

def q_impl(
self,
*q_inputs: ONNXOpInputOutputType,
**attrs,
) -> ONNXOpInputOutputType:

# Retrieve the quantized inputs
prepared_inputs = self._prepare_inputs_with_constants(
*q_inputs, calibrate=False, quantize_actual_values=True
)
q_input: QuantizedArray = prepared_inputs[0]

n_in_channels = q_input.qvalues.shape[1]
kernels_list = []
for _ in range(n_in_channels):
for row in range(self.kernel_shape[0]):
for col in range(self.kernel_shape[1]):
kernel = numpy.zeros(
(1, 1, self.kernel_shape[0], self.kernel_shape[1]),
dtype=numpy.int64,
)
kernel[:, :, row, col] = 1
kernels_list.append(kernel)
kernels = numpy.concatenate(numpy.array(kernels_list), axis=0)

# for mypy: The Quantized ops can only run on QuantizedArray that have quantization
# parameters (i.e., were fully constructed). This should always be the case, except
# during the UniformQuantizer initialization when the zero_point can exist as None
assert q_input.quantizer.zero_point is not None

# Compute padding with floor and apply it to the input, pad with the input zero-point
pool_pads = compute_onnx_pool_padding(
q_input.qvalues.shape, self.kernel_shape, self.pads, self.strides, ceil_mode=0
)

# Can only pad with scalar zero-points, but zero-points can be float in special cases
# for output layers
_check_op_input_zero_point(q_input.quantizer.zero_point, self.op_instance_name)
pad_value = int(q_input.quantizer.zero_point)
q_input_pad = numpy_onnx_pad(q_input.qvalues, pool_pads, pad_value, int_only=True)

# Remark that here, we are _not_ using Concrete pad, since it would pad with
# 0's while we want to pad with zero-point's. So, instead, he have done the padding
# on our side, with q_input_pad
fake_pads = [0] * len(self.pads)

with tag(self.op_instance_name + ".unfold"):
sum_result = fhe_conv(
q_input_pad, kernels, None, fake_pads, self.strides, None, None, n_in_channels
)

if self.debug_value_tracker is not None:
# pylint: disable-next=unsubscriptable-object
self.debug_value_tracker[self.op_instance_name][
"output"
] = sum_result # pragma: no cover

result = (
sum_result.astype(numpy.float64) - q_input.quantizer.zero_point
) * q_input.quantizer.scale

# Reshape to fit the same shape output as unfold
result = result.reshape((result.shape[0], result.shape[1], -1))

return QuantizedArray(
self.n_bits,
result,
value_is_float=True,
options=self._get_output_quant_opts(),
stats=self.output_quant_stats,
params=self.output_quant_params,
)
105 changes: 105 additions & 0 deletions tests/quantization/test_quantized_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
QuantizedSub,
QuantizedTanh,
QuantizedTranspose,
QuantizedUnfold,
QuantizedUnsqueeze,
QuantizedWhere,
)
Expand Down Expand Up @@ -1492,6 +1493,7 @@ def test_all_ops_were_tested():
QuantizedSqueeze: test_quantized_squeeze,
QuantizedExpand: test_quantized_expand,
QuantizedEqual: test_quantized_comparators_and_where,
QuantizedUnfold: test_quantized_unfold,
ONNXSlice: test_quantized_slice,
ONNXGather: test_quantized_gather,
ONNXShape: test_quantized_shape,
Expand Down Expand Up @@ -1980,3 +1982,106 @@ def test_quantized_shape(shape):
check_serialization(
q_op, ONNXShape, equal_method=partial(quantized_op_results_are_equal, q_input=q_input)
)


@pytest.mark.parametrize("n_bits", [16])
@pytest.mark.parametrize(
"params",
[
(
numpy.random.uniform(low=-2.0, high=2.0, size=(1, 1, 32, 32)),
(3, 3),
(2, 2),
(0, 0, 0, 0),
),
(
numpy.random.uniform(low=-1.2, high=0.2, size=(10, 1, 16, 16)),
(2, 2),
(1, 1),
(0, 0, 0, 0),
),
(
numpy.random.uniform(low=-2.0, high=2.0, size=(2, 32, 4, 4)),
(2, 2),
(1, 1),
(0, 0, 0, 0),
),
(
numpy.random.uniform(low=-2.0, high=2.0, size=(2, 32, 4, 4)),
(2, 4),
(1, 1),
(1, 2, 1, 2),
),
(
numpy.random.uniform(low=-2.0, high=2.0, size=(2, 32, 4, 4)),
(2, 4),
(1, 1),
(0, 2, 0, 2),
),
(
numpy.random.uniform(low=-2.0, high=2.0, size=(2, 32, 5, 5)),
(3, 3),
(1, 1),
(1, 1, 1, 1),
),
(
numpy.random.uniform(low=-2.0, high=2.0, size=(2, 1, 7, 5)),
(5, 1),
(1, 1),
(1, 2, 0, 4),
),
(
numpy.random.uniform(low=-2.0, high=2.0, size=(1, 1, 16, 16)),
(2, 2),
(4, 4),
(1, 2, 0, 4),
),
],
)
@pytest.mark.parametrize("is_signed", [True, False])
def test_quantized_unfold(params, n_bits, is_signed, check_r2_score, check_float_array_equal):
"""Test the quantized average pool operator."""

# Retrieve arguments
net_input, kernel_shape, strides, pads = params

# Create quantized data
q_input = QuantizedArray(n_bits, net_input, is_signed=is_signed)

q_op = QuantizedUnfold(
n_bits,
OP_DEBUG_NAME + "QuantizedUnfold",
strides=strides,
pads=pads,
kernel_shape=kernel_shape,
# ceil_mode=ceil_mode,
input_quant_opts=q_input.quantizer.quant_options,
)

# Compute the result in floating point
expected_result = q_op.calibrate(net_input)

# Pad the input if needed
tinputs = torch.Tensor(net_input.copy())

# Torch uses padding (padding_left,padding_right, padding_top,padding_bottom)
# While ONNX and Concrete ML use (padding_top, padding_left, padding_bottom, padding_right)
tx_pad = torch.nn.functional.pad(tinputs, (pads[1], pads[3], pads[0], pads[2]))

# Compute the torch unfold
torch_res = torch.nn.functional.unfold(tx_pad, kernel_shape, 1, 0, strides).numpy()

check_float_array_equal(torch_res, expected_result)

# Compute the quantized result
result = q_op(q_input).dequant()

# The fp32 and quantized results should be very similar when quantization precision is high
check_r2_score(expected_result, result)

# Test the serialization of QuantizedUnfold
check_serialization(
q_op,
QuantizedUnfold,
equal_method=partial(quantized_op_results_are_equal, q_input=q_input),
)

0 comments on commit fa3ef88

Please sign in to comment.