From a9996e2dcedfa50cc72c97b9c61561b0f2deeeb2 Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Mon, 11 Nov 2024 19:49:27 -0800 Subject: [PATCH] [Quant Tool] Flaky test due to Pad reflect bug (#22798) ### Description Fixes a unit test that would fail intermittently due to an existing bug with Pad (reflect mode). When the number of padded values is >= the inner dimension size, the ORT Pad implementation accesses invalid memory. This PR makes the number of padding values less than the inner dimension size to avoid triggering the bug. ### Motivation and Context See related issues: https://github.com/microsoft/onnxruntime/issues/8265 https://github.com/microsoft/onnxruntime/issues/11828 https://github.com/microsoft/onnxruntime/issues/20801 Here's a valgrind trace obtained on a Linux machine (with `sess_options.enable_cpu_mem_arena = False`) ``` ==864228== Invalid read of size 4 ==864228== at 0x2716272A: void onnxruntime::PadInnermostAxis(unsigned int*, unsigned int*, long, unsigned long) (pad.cc:370) ==864228== by 0x2715D213: onnxruntime::common::Status onnxruntime::PadImpl(onnxruntime::OpKernelContext*, absl::lts_20240722::InlinedVector > const&, absl::lts_20240722::InlinedVector > const&, onnxruntime::Mode const&, unsigned int) (pad.cc:551) ==864228== by 0x2715B2BB: onnxruntime::Pad::Compute(onnxruntime::OpKernelContext*) const (pad.cc:725) ==864228== by 0x276FF6A7: onnxruntime::ExecuteKernel(onnxruntime::StreamExecutionContext&, unsigned long, unsigned long, bool const&, onnxruntime::SessionScope&) (sequential_executor.cc:484) ==864228== by 0x276F4A04: onnxruntime::LaunchKernelStep::Execute(onnxruntime::StreamExecutionContext&, unsigned long, onnxruntime::SessionScope&, bool const&, bool&) (execution_steps.cc:73) ... ``` The above is obtained with the basic Pad(reflect) example on the [ONNX Pad operator spec page](https://onnx.ai/onnx/operators/onnx__Pad.html#summary): ```python data = [ [1.0, 1.2], [2.3, 3.4], [4.5, 5.7], ] pads = [0, 2, 0, 0] mode = 'reflect' # Expected output by ONNX spec expected_output = [ [1.0, 1.2, 1.0, 1.2], [2.3, 3.4, 2.3, 3.4], [4.5, 5.7, 4.5, 5.7], ] # Bugged output from onnxruntime has invalid/uninitialized data for the first element in the inner dimension # invalid data may be 0.0, inf, nan, etc. ort_output = [ [inf, 1.2, 1.0, 1.2], [inf, 3.4, 2.3, 3.4], [inf, 5.7, 4.5, 5.7], ] ``` --- onnxruntime/test/python/quantization/test_op_pad.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/python/quantization/test_op_pad.py b/onnxruntime/test/python/quantization/test_op_pad.py index 05736019cd7c8..755c7fae5e3e8 100644 --- a/onnxruntime/test/python/quantization/test_op_pad.py +++ b/onnxruntime/test/python/quantization/test_op_pad.py @@ -548,14 +548,15 @@ def build_pad_model( opset: int = 21, float_type: onnx.TensorProto.DataType = onnx.TensorProto.FLOAT, ) -> onnx.ModelProto: + num_pads_start = 1 input_0 = onnx.helper.make_tensor_value_info("input_0", float_type, (3, 2)) - output_0 = onnx.helper.make_tensor_value_info("output_0", float_type, (3, 4)) + output_0 = onnx.helper.make_tensor_value_info("output_0", float_type, (3, 2 + num_pads_start)) initializers = [] pad_input_names = ["input_0"] attrs = {"mode": mode} - pads_data = np.array([0, 2, 0, 0], dtype=np.int64) # Pad two vals at beginning of axis 1. + pads_data = np.array([0, num_pads_start, 0, 0], dtype=np.int64) # Pad one val at beginning of axis 1. if opset >= 11: initializers.append(onnx.numpy_helper.from_array(pads_data, "pads")) pad_input_names.append("pads")