From b1e889e23454c6ae44a66229df4d5ce577d7f741 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Mon, 4 Nov 2024 13:31:34 +0100 Subject: [PATCH] : decimal stat rounding overflow Signed-off-by: Marko Grujic Signed-off-by: Thomas <12407096+thomas-chauvet@users.noreply.github.com> --- crates/core/src/writer/stats.rs | 36 ++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/crates/core/src/writer/stats.rs b/crates/core/src/writer/stats.rs index c1f0363083..ae763f7b72 100644 --- a/crates/core/src/writer/stats.rs +++ b/crates/core/src/writer/stats.rs @@ -343,7 +343,15 @@ impl StatsScalar { }); }; - let val = val / 10.0_f64.powi(*scale); + let mut val = val / 10.0_f64.powi(*scale); + + if val.is_normal() { + if (val.trunc() as i128).to_string().len() > (precision - scale) as usize { + // For normal values with integer parts that get rounded to a number beyond + // the precision - scale range take the next smaller (by magnitude) value + val = f64::from_bits(val.to_bits() - 1); + } + } Ok(Self::Decimal(val)) } (Statistics::FixedLenByteArray(v), Some(LogicalType::Uuid)) => { @@ -740,6 +748,32 @@ mod tests { }), Value::from(10.0), ), + ( + simple_parquet_stat!( + Statistics::FixedLenByteArray, + FixedLenByteArray::from(vec![ + 75, 59, 76, 168, 90, 134, 196, 122, 9, 138, 34, 63, 255, 255, 255, 255 + ]) + ), + Some(LogicalType::Decimal { + scale: 6, + precision: 38, + }), + Value::from(9.999999999999999e31), + ), + ( + simple_parquet_stat!( + Statistics::FixedLenByteArray, + FixedLenByteArray::from(vec![ + 180, 196, 179, 87, 165, 121, 59, 133, 246, 117, 221, 192, 0, 0, 0, 1 + ]) + ), + Some(LogicalType::Decimal { + scale: 6, + precision: 38, + }), + Value::from(-9.999999999999999e31), + ), ( simple_parquet_stat!( Statistics::FixedLenByteArray,