From 65209a327c2650a5606837825f1bd174d9173412 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Feb 2022 10:47:01 -0500 Subject: [PATCH 01/11] Clean up decimal array creation --- datafusion-common/src/scalar.rs | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/datafusion-common/src/scalar.rs b/datafusion-common/src/scalar.rs index d7c6c6bc710f..eb4d7a7045e8 100644 --- a/datafusion-common/src/scalar.rs +++ b/datafusion-common/src/scalar.rs @@ -985,26 +985,15 @@ impl ScalarValue { precision: &usize, scale: &usize, ) -> Result { - // collect the value as Option let array = scalars .into_iter() .map(|element: ScalarValue| match element { ScalarValue::Decimal128(v1, _, _) => v1, _ => unreachable!(), }) - .collect::>>(); - - // build the decimal array using the Decimal Builder - let mut builder = DecimalBuilder::new(array.len(), *precision, *scale); - array.iter().for_each(|element| match element { - None => { - builder.append_null().unwrap(); - } - Some(v) => { - builder.append_value(*v).unwrap(); - } - }); - Ok(builder.finish()) + .collect::() + .with_precision_and_scale(*precision, *scale)?; + Ok(array) } fn iter_to_array_list( From 6324846757f1fb30c8779e429c61cdba296ba456 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Feb 2022 13:43:44 -0500 Subject: [PATCH 02/11] Refactor a bit more --- datafusion-common/src/scalar.rs | 20 ++--- .../src/expressions/cast.rs | 81 +++++++++++++------ datafusion/src/physical_plan/hash_utils.rs | 16 ++-- 3 files changed, 68 insertions(+), 49 deletions(-) diff --git a/datafusion-common/src/scalar.rs b/datafusion-common/src/scalar.rs index eb4d7a7045e8..57bc565982c2 100644 --- a/datafusion-common/src/scalar.rs +++ b/datafusion-common/src/scalar.rs @@ -1069,21 +1069,11 @@ impl ScalarValue { scale: &usize, size: usize, ) -> DecimalArray { - let mut builder = DecimalBuilder::new(size, *precision, *scale); - match value { - None => { - for _i in 0..size { - builder.append_null().unwrap(); - } - } - Some(v) => { - let v = *v; - for _i in 0..size { - builder.append_value(v).unwrap(); - } - } - }; - builder.finish() + std::iter::repeat(value) + .take(size) + .collect::() + .with_precision_and_scale(*precision, *scale) + .unwrap() } /// Converts a scalar value into an array of `size` rows. diff --git a/datafusion-physical-expr/src/expressions/cast.rs b/datafusion-physical-expr/src/expressions/cast.rs index 1b157e36d189..bd18d16cbfa2 100644 --- a/datafusion-physical-expr/src/expressions/cast.rs +++ b/datafusion-physical-expr/src/expressions/cast.rs @@ -161,8 +161,8 @@ mod tests { use crate::expressions::col; use arrow::{ array::{ - Array, DecimalArray, DecimalBuilder, Float32Array, Float64Array, Int16Array, - Int32Array, Int64Array, Int8Array, StringArray, Time64NanosecondArray, + Array, DecimalArray, Float32Array, Float64Array, Int16Array, Int32Array, + Int64Array, Int8Array, StringArray, Time64NanosecondArray, TimestampNanosecondArray, UInt32Array, }, datatypes::*, @@ -268,23 +268,16 @@ mod tests { }}; } - fn create_decimal_array( - array: &[i128], - precision: usize, - scale: usize, - ) -> Result { - let mut decimal_builder = DecimalBuilder::new(array.len(), precision, scale); - for value in array { - decimal_builder.append_value(*value)? - } - decimal_builder.append_null()?; - Ok(decimal_builder.finish()) - } - #[test] fn test_cast_decimal_to_decimal() -> Result<()> { - let array: Vec = vec![1234, 2222, 3, 4000, 5000]; - let decimal_array = create_decimal_array(&array, 10, 3)?; + let array = vec![1234, 2222, 3, 4000, 5000]; + + let decimal_array = array + .iter() + .map(|v| Some(*v)) + .collect::() + .with_precision_and_scale(10, 3)?; + generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -301,7 +294,12 @@ mod tests { DEFAULT_DATAFUSION_CAST_OPTIONS ); - let decimal_array = create_decimal_array(&array, 10, 3)?; + let decimal_array = array + .iter() + .map(|v| Some(*v)) + .collect::() + .with_precision_and_scale(10, 3)?; + generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -323,9 +321,13 @@ mod tests { #[test] fn test_cast_decimal_to_numeric() -> Result<()> { - let array: Vec = vec![1, 2, 3, 4, 5]; + let array = vec![Some(1), Some(2), Some(3), Some(4), Some(5), None]; // decimal to i8 - let decimal_array = create_decimal_array(&array, 10, 0)?; + let decimal_array = array + .clone() + .iter() + .collect::() + .with_precision_and_scale(10, 0)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -341,8 +343,12 @@ mod tests { ], DEFAULT_DATAFUSION_CAST_OPTIONS ); + // decimal to i16 - let decimal_array = create_decimal_array(&array, 10, 0)?; + let decimal_array = array + .iter() + .collect::() + .with_precision_and_scale(10, 0)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -358,8 +364,12 @@ mod tests { ], DEFAULT_DATAFUSION_CAST_OPTIONS ); + // decimal to i32 - let decimal_array = create_decimal_array(&array, 10, 0)?; + let decimal_array = array + .iter() + .collect::() + .with_precision_and_scale(10, 0)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -375,8 +385,12 @@ mod tests { ], DEFAULT_DATAFUSION_CAST_OPTIONS ); + // decimal to i64 - let decimal_array = create_decimal_array(&array, 10, 0)?; + let decimal_array = array + .iter() + .collect::() + .with_precision_and_scale(10, 0)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 0), @@ -392,9 +406,20 @@ mod tests { ], DEFAULT_DATAFUSION_CAST_OPTIONS ); + // decimal to float32 - let array: Vec = vec![1234, 2222, 3, 4000, 5000]; - let decimal_array = create_decimal_array(&array, 10, 3)?; + let array = vec![ + Some(1234), + Some(2222), + Some(3), + Some(4000), + Some(5000), + None, + ]; + let decimal_array = array + .iter() + .collect::() + .with_precision_and_scale(10, 3)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(10, 3), @@ -410,8 +435,12 @@ mod tests { ], DEFAULT_DATAFUSION_CAST_OPTIONS ); + // decimal to float64 - let decimal_array = create_decimal_array(&array, 20, 6)?; + let decimal_array = array + .into_iter() + .collect::() + .with_precision_and_scale(20, 6)?; generic_decimal_to_other_test_cast!( decimal_array, DataType::Decimal(20, 6), diff --git a/datafusion/src/physical_plan/hash_utils.rs b/datafusion/src/physical_plan/hash_utils.rs index 00073a6592ce..4e503b19e7bf 100644 --- a/datafusion/src/physical_plan/hash_utils.rs +++ b/datafusion/src/physical_plan/hash_utils.rs @@ -564,7 +564,6 @@ pub fn create_hashes<'a>( #[cfg(test)] mod tests { use crate::from_slice::FromSlice; - use arrow::array::DecimalBuilder; use arrow::{array::DictionaryArray, datatypes::Int8Type}; use std::sync::Arc; @@ -572,14 +571,15 @@ mod tests { #[test] fn create_hashes_for_decimal_array() -> Result<()> { - let mut builder = DecimalBuilder::new(4, 20, 3); - let array: Vec = vec![1, 2, 3, 4]; - for value in &array { - builder.append_value(*value)?; - } - let array_ref = Arc::new(builder.finish()); + let array = vec![1, 2, 3, 4] + .into_iter() + .map(Some) + .collect::() + .with_precision_and_scale(20, 3) + .unwrap(); + let array_ref = Arc::new(array); let random_state = RandomState::with_seeds(0, 0, 0, 0); - let hashes_buff = &mut vec![0; array.len()]; + let hashes_buff = &mut vec![0; array_ref.len()]; let hashes = create_hashes(&[array_ref], &random_state, hashes_buff)?; assert_eq!(hashes.len(), 4); Ok(()) From 7cf30eb5010a15d50bb50aa9d5113bc2405d76b4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Feb 2022 15:30:02 -0500 Subject: [PATCH 03/11] Update the next ! --- .../src/expressions/average.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/datafusion-physical-expr/src/expressions/average.rs b/datafusion-physical-expr/src/expressions/average.rs index acdbf8b72cb1..50efa5571116 100644 --- a/datafusion-physical-expr/src/expressions/average.rs +++ b/datafusion-physical-expr/src/expressions/average.rs @@ -237,11 +237,12 @@ mod tests { #[test] fn avg_decimal() -> Result<()> { // test agg - let mut decimal_builder = DecimalBuilder::new(6, 10, 0); - for i in 1..7 { - decimal_builder.append_value(i as i128)?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..7) + .map(Some) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, @@ -262,7 +263,12 @@ mod tests { decimal_builder.append_value(i)?; } } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(|i| if i == 2 { None } else { Some(i) }) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, DataType::Decimal(10, 0), From 9c8f7e2ada4c007c4354494ec3fe17aa166dae8a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Feb 2022 15:31:16 -0500 Subject: [PATCH 04/11] cleanup --- datafusion-physical-expr/src/expressions/average.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/datafusion-physical-expr/src/expressions/average.rs b/datafusion-physical-expr/src/expressions/average.rs index 50efa5571116..44f997d8e72f 100644 --- a/datafusion-physical-expr/src/expressions/average.rs +++ b/datafusion-physical-expr/src/expressions/average.rs @@ -281,11 +281,12 @@ mod tests { #[test] fn avg_decimal_all_nulls() -> Result<()> { // test agg - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for _i in 1..6 { - decimal_builder.append_null()?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + std::iter::repeat(None) + .take(6) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, DataType::Decimal(10, 0), From 613364ca9a6eac666929125c4b38fda5fdfb4fae Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Feb 2022 15:32:50 -0500 Subject: [PATCH 05/11] update --- datafusion-physical-expr/src/expressions/average.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/datafusion-physical-expr/src/expressions/average.rs b/datafusion-physical-expr/src/expressions/average.rs index 44f997d8e72f..0dfb4aaadc2a 100644 --- a/datafusion-physical-expr/src/expressions/average.rs +++ b/datafusion-physical-expr/src/expressions/average.rs @@ -255,14 +255,6 @@ mod tests { #[test] fn avg_decimal_with_nulls() -> Result<()> { - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for i in 1..6 { - if i == 2 { - decimal_builder.append_null()?; - } else { - decimal_builder.append_value(i)?; - } - } let array: ArrayRef = Arc::new( (1..6) .map(|i| if i == 2 { None } else { Some(i) }) From 3eadf00d3257889e6b45b00a68bf02f7b89514c5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 18 Feb 2022 02:26:55 -0500 Subject: [PATCH 06/11] port over min/max --- .../src/expressions/min_max.rs | 135 +++++++++--------- 1 file changed, 68 insertions(+), 67 deletions(-) diff --git a/datafusion-physical-expr/src/expressions/min_max.rs b/datafusion-physical-expr/src/expressions/min_max.rs index be3ea95004c6..c34c43cce70b 100644 --- a/datafusion-physical-expr/src/expressions/min_max.rs +++ b/datafusion-physical-expr/src/expressions/min_max.rs @@ -553,10 +553,11 @@ impl Accumulator for MinAccumulator { #[cfg(test)] mod tests { use super::*; - use crate::expressions::col; - use crate::expressions::tests::aggregate; - use crate::generic_test_op; - use arrow::array::DecimalBuilder; + use crate::from_slice::FromSlice; + use crate::physical_plan::expressions::col; + use crate::physical_plan::expressions::tests::aggregate; + use crate::scalar::ScalarValue::Decimal128; + use crate::{error::Result, generic_test_op}; use arrow::datatypes::*; use arrow::record_batch::RecordBatch; use datafusion_common::Result; @@ -572,32 +573,33 @@ mod tests { assert_eq!(result, left); // min batch - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for i in 1..6 { - decimal_builder.append_value(i as i128)?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(Some) + .collect::() + .with_precision_and_scale(10, 0)?, + ); let result = min_batch(&array)?; assert_eq!(result, ScalarValue::Decimal128(Some(1), 10, 0)); - // min batch without values - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - let array: ArrayRef = Arc::new(decimal_builder.finish()); - let result = min_batch(&array)?; - assert_eq!(ScalarValue::Decimal128(None, 10, 0), result); - let mut decimal_builder = DecimalBuilder::new(0, 10, 0); - let array: ArrayRef = Arc::new(decimal_builder.finish()); + // min batch without values + let array: ArrayRef = Arc::new( + std::iter::repeat(None) + .take(0) + .collect::() + .with_precision_and_scale(10, 0)?, + ); let result = min_batch(&array)?; assert_eq!(ScalarValue::Decimal128(None, 10, 0), result); // min batch with agg - let mut decimal_builder = DecimalBuilder::new(6, 10, 0); - decimal_builder.append_null().unwrap(); - for i in 1..6 { - decimal_builder.append_value(i as i128)?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(Some) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, DataType::Decimal(10, 0), @@ -610,11 +612,12 @@ mod tests { #[test] fn min_decimal_all_nulls() -> Result<()> { // min batch all nulls - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for _i in 1..6 { - decimal_builder.append_null()?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + std::iter::repeat(None) + .take(6) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, DataType::Decimal(10, 0), @@ -627,15 +630,13 @@ mod tests { #[test] fn min_decimal_with_nulls() -> Result<()> { // min batch with nulls - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for i in 1..6 { - if i == 2 { - decimal_builder.append_null()?; - } else { - decimal_builder.append_value(i as i128)?; - } - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(|i| if i == 2 { None } else { Some(i) }) + .collect::() + .with_precision_and_scale(10, 0)?, + ); + generic_test_op!( array, DataType::Decimal(10, 0), @@ -662,30 +663,32 @@ mod tests { assert_eq!(expect.to_string(), result.unwrap_err().to_string()); // max batch - let mut decimal_builder = DecimalBuilder::new(5, 10, 5); - for i in 1..6 { - decimal_builder.append_value(i as i128)?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(Some) + .collect::() + .with_precision_and_scale(10, 5)?, + ); let result = max_batch(&array)?; assert_eq!(result, ScalarValue::Decimal128(Some(5), 10, 5)); + // max batch without values - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + std::iter::repeat(None) + .take(0) + .collect::() + .with_precision_and_scale(10, 0)?, + ); let result = max_batch(&array)?; assert_eq!(ScalarValue::Decimal128(None, 10, 0), result); - let mut decimal_builder = DecimalBuilder::new(0, 10, 0); - let array: ArrayRef = Arc::new(decimal_builder.finish()); - let result = max_batch(&array)?; - assert_eq!(ScalarValue::Decimal128(None, 10, 0), result); // max batch with agg - let mut decimal_builder = DecimalBuilder::new(6, 10, 0); - decimal_builder.append_null().unwrap(); - for i in 1..6 { - decimal_builder.append_value(i as i128)?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(Some) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, DataType::Decimal(10, 0), @@ -697,15 +700,12 @@ mod tests { #[test] fn max_decimal_with_nulls() -> Result<()> { - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for i in 1..6 { - if i == 2 { - decimal_builder.append_null()?; - } else { - decimal_builder.append_value(i as i128)?; - } - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(|i| if i == 2 { None } else { Some(i) }) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, DataType::Decimal(10, 0), @@ -717,11 +717,12 @@ mod tests { #[test] fn max_decimal_all_nulls() -> Result<()> { - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for _i in 1..6 { - decimal_builder.append_null()?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + std::iter::repeat(None) + .take(6) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, DataType::Decimal(10, 0), From 9757ffc1c634345ba9f1868e65c220c7c666f725 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 18 Feb 2022 02:36:23 -0500 Subject: [PATCH 07/11] Update sum --- .../src/expressions/sum.rs | 74 +++++++++---------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/datafusion-physical-expr/src/expressions/sum.rs b/datafusion-physical-expr/src/expressions/sum.rs index f2c19f56e335..636bea98cfc8 100644 --- a/datafusion-physical-expr/src/expressions/sum.rs +++ b/datafusion-physical-expr/src/expressions/sum.rs @@ -375,9 +375,9 @@ impl Accumulator for SumAccumulator { #[cfg(test)] mod tests { use super::*; - use crate::expressions::col; - use crate::generic_test_op; - use arrow::array::DecimalBuilder; + use crate::from_slice::FromSlice; + use crate::physical_plan::expressions::col; + use crate::{error::Result, generic_test_op}; use arrow::datatypes::*; use arrow::record_batch::RecordBatch; use datafusion_common::Result; @@ -419,20 +419,22 @@ mod tests { ); // test sum batch - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for i in 1..6 { - decimal_builder.append_value(i as i128)?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(Some) + .collect::() + .with_precision_and_scale(10, 0)?, + ); let result = sum_batch(&array)?; assert_eq!(ScalarValue::Decimal128(Some(15), 10, 0), result); // test agg - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for i in 1..6 { - decimal_builder.append_value(i as i128)?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(Some) + .collect::() + .with_precision_and_scale(10, 0)?, + ); generic_test_op!( array, @@ -452,28 +454,22 @@ mod tests { assert_eq!(ScalarValue::Decimal128(Some(123), 10, 2), result); // test with batch - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for i in 1..6 { - if i == 2 { - decimal_builder.append_null()?; - } else { - decimal_builder.append_value(i)?; - } - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(|i| if i == 2 { None } else { Some(i) }) + .collect::() + .with_precision_and_scale(10, 0)?, + ); let result = sum_batch(&array)?; assert_eq!(ScalarValue::Decimal128(Some(13), 10, 0), result); // test agg - let mut decimal_builder = DecimalBuilder::new(5, 35, 0); - for i in 1..6 { - if i == 2 { - decimal_builder.append_null()?; - } else { - decimal_builder.append_value(i)?; - } - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + (1..6) + .map(|i| if i == 2 { None } else { Some(i) }) + .collect::() + .with_precision_and_scale(35, 0)?, + ); generic_test_op!( array, DataType::Decimal(35, 0), @@ -492,20 +488,16 @@ mod tests { assert_eq!(ScalarValue::Decimal128(None, 10, 2), result); // test with batch - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for _i in 1..6 { - decimal_builder.append_null()?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); + let array: ArrayRef = Arc::new( + std::iter::repeat(None) + .take(6) + .collect::() + .with_precision_and_scale(10, 0)?, + ); let result = sum_batch(&array)?; assert_eq!(ScalarValue::Decimal128(None, 10, 0), result); // test agg - let mut decimal_builder = DecimalBuilder::new(5, 10, 0); - for _i in 1..6 { - decimal_builder.append_null()?; - } - let array: ArrayRef = Arc::new(decimal_builder.finish()); generic_test_op!( array, DataType::Decimal(10, 0), From 0dd9cc297792515a946b59cd9ec23b2e17f0c71c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 18 Feb 2022 02:41:49 -0500 Subject: [PATCH 08/11] Use max scale / precision from arrow --- datafusion-common/src/lib.rs | 4 +--- datafusion-common/src/scalar.rs | 8 ++------ .../src/coercion_rule/binary_rule.rs | 7 +++---- datafusion-physical-expr/src/expressions/average.rs | 10 ++++------ datafusion-physical-expr/src/expressions/sum.rs | 7 +++---- datafusion/src/scalar.rs | 4 +--- datafusion/src/sql/utils.rs | 6 +++--- 7 files changed, 17 insertions(+), 29 deletions(-) diff --git a/datafusion-common/src/lib.rs b/datafusion-common/src/lib.rs index fdcb7d4b5f74..d39020f72132 100644 --- a/datafusion-common/src/lib.rs +++ b/datafusion-common/src/lib.rs @@ -25,6 +25,4 @@ mod scalar; pub use column::Column; pub use dfschema::{DFField, DFSchema, DFSchemaRef, ExprSchema, ToDFSchema}; pub use error::{DataFusionError, Result}; -pub use scalar::{ - ScalarType, ScalarValue, MAX_PRECISION_FOR_DECIMAL128, MAX_SCALE_FOR_DECIMAL128, -}; +pub use scalar::{ScalarType, ScalarValue}; diff --git a/datafusion-common/src/scalar.rs b/datafusion-common/src/scalar.rs index 57bc565982c2..4a1dde18b203 100644 --- a/datafusion-common/src/scalar.rs +++ b/datafusion-common/src/scalar.rs @@ -26,6 +26,7 @@ use arrow::{ Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalUnit, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + DECIMAL_MAX_PRECISION, }, }; use ordered_float::OrderedFloat; @@ -34,11 +35,6 @@ use std::convert::{Infallible, TryInto}; use std::str::FromStr; use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc}; -// TODO may need to be moved to arrow-rs -/// The max precision and scale for decimal128 -pub const MAX_PRECISION_FOR_DECIMAL128: usize = 38; -pub const MAX_SCALE_FOR_DECIMAL128: usize = 38; - /// Represents a dynamically typed, nullable single value. /// This is the single-valued counter-part of arrow’s `Array`. #[derive(Clone)] @@ -542,7 +538,7 @@ impl ScalarValue { scale: usize, ) -> Result { // make sure the precision and scale is valid - if precision <= MAX_PRECISION_FOR_DECIMAL128 && scale <= precision { + if precision <= DECIMAL_MAX_PRECISION && scale <= precision { return Ok(ScalarValue::Decimal128(Some(value), precision, scale)); } return Err(DataFusionError::Internal(format!( diff --git a/datafusion-physical-expr/src/coercion_rule/binary_rule.rs b/datafusion-physical-expr/src/coercion_rule/binary_rule.rs index 22070794bda0..ac23f2b1b78a 100644 --- a/datafusion-physical-expr/src/coercion_rule/binary_rule.rs +++ b/datafusion-physical-expr/src/coercion_rule/binary_rule.rs @@ -17,10 +17,9 @@ //! Coercion rules for matching argument types for binary operators -use arrow::datatypes::DataType; +use arrow::datatypes::{DataType, DECIMAL_MAX_PRECISION, DECIMAL_MAX_SCALE}; use datafusion_common::DataFusionError; use datafusion_common::Result; -use datafusion_common::{MAX_PRECISION_FOR_DECIMAL128, MAX_SCALE_FOR_DECIMAL128}; use datafusion_expr::Operator; /// Coercion rules for all binary operators. Returns the output type @@ -261,8 +260,8 @@ fn mathematics_numerical_coercion( fn create_decimal_type(precision: usize, scale: usize) -> DataType { DataType::Decimal( - MAX_PRECISION_FOR_DECIMAL128.min(precision), - MAX_SCALE_FOR_DECIMAL128.min(scale), + DECIMAL_MAX_PRECISION.min(precision), + DECIMAL_MAX_SCALE.min(scale), ) } diff --git a/datafusion-physical-expr/src/expressions/average.rs b/datafusion-physical-expr/src/expressions/average.rs index 0dfb4aaadc2a..c6318704d0aa 100644 --- a/datafusion-physical-expr/src/expressions/average.rs +++ b/datafusion-physical-expr/src/expressions/average.rs @@ -23,15 +23,13 @@ use std::sync::Arc; use crate::{AggregateExpr, PhysicalExpr}; use arrow::compute; -use arrow::datatypes::DataType; +use arrow::datatypes::{DataType, DECIMAL_MAX_PRECISION, DECIMAL_MAX_SCALE}; use arrow::{ array::{ArrayRef, UInt64Array}, datatypes::Field, }; use datafusion_common::{DataFusionError, Result}; -use datafusion_common::{ - ScalarValue, MAX_PRECISION_FOR_DECIMAL128, MAX_SCALE_FOR_DECIMAL128, -}; +use datafusion_common::ScalarValue; use datafusion_expr::Accumulator; use super::{format_state_name, sum}; @@ -50,8 +48,8 @@ pub fn avg_return_type(arg_type: &DataType) -> Result { DataType::Decimal(precision, scale) => { // in the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)). // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66 - let new_precision = MAX_PRECISION_FOR_DECIMAL128.min(*precision + 4); - let new_scale = MAX_SCALE_FOR_DECIMAL128.min(*scale + 4); + let new_precision = DECIMAL_MAX_PRECISION.min(*precision + 4); + let new_scale = DECIMAL_MAX_SCALE.min(*scale + 4); Ok(DataType::Decimal(new_precision, new_scale)) } DataType::Int8 diff --git a/datafusion-physical-expr/src/expressions/sum.rs b/datafusion-physical-expr/src/expressions/sum.rs index 636bea98cfc8..cc4172638a0b 100644 --- a/datafusion-physical-expr/src/expressions/sum.rs +++ b/datafusion-physical-expr/src/expressions/sum.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use crate::{AggregateExpr, PhysicalExpr}; use arrow::compute; -use arrow::datatypes::DataType; +use arrow::datatypes::{DataType, DECIMAL_MAX_PRECISION}; use arrow::{ array::{ ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, @@ -31,8 +31,7 @@ use arrow::{ }, datatypes::Field, }; -use datafusion_common::{DataFusionError, Result}; -use datafusion_common::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128}; +use datafusion_common::{ScalarValue, DataFusionError, Result}; use datafusion_expr::Accumulator; use super::format_state_name; @@ -63,7 +62,7 @@ pub fn sum_return_type(arg_type: &DataType) -> Result { DataType::Decimal(precision, scale) => { // in the spark, the result type is DECIMAL(min(38,precision+10), s) // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66 - let new_precision = MAX_PRECISION_FOR_DECIMAL128.min(*precision + 10); + let new_precision = DECIMAL_MAX_PRECISION.min(*precision + 10); Ok(DataType::Decimal(new_precision, *scale)) } other => Err(DataFusionError::Plan(format!( diff --git a/datafusion/src/scalar.rs b/datafusion/src/scalar.rs index 7dc947565728..774b8ebf86dc 100644 --- a/datafusion/src/scalar.rs +++ b/datafusion/src/scalar.rs @@ -17,9 +17,7 @@ //! ScalarValue reimported from datafusion-common -pub use datafusion_common::{ - ScalarType, ScalarValue, MAX_PRECISION_FOR_DECIMAL128, MAX_SCALE_FOR_DECIMAL128, -}; +pub use datafusion_common::{ScalarType, ScalarValue}; #[cfg(test)] mod tests { diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs index cbe40d6dc51d..8ec0a49a2fd6 100644 --- a/datafusion/src/sql/utils.rs +++ b/datafusion/src/sql/utils.rs @@ -17,12 +17,12 @@ //! SQL Utility Functions -use arrow::datatypes::DataType; +use arrow::datatypes::{DataType, DECIMAL_MAX_PRECISION}; use sqlparser::ast::Ident; use crate::logical_plan::ExprVisitable; use crate::logical_plan::{Expr, LogicalPlan}; -use crate::scalar::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128}; +use crate::scalar::ScalarValue; use crate::{ error::{DataFusionError, Result}, logical_plan::{Column, ExpressionVisitor, Recursion}, @@ -522,7 +522,7 @@ pub(crate) fn make_decimal_type( } (Some(p), Some(s)) => { // Arrow decimal is i128 meaning 38 maximum decimal digits - if (p as usize) > MAX_PRECISION_FOR_DECIMAL128 || s > p { + if (p as usize) > DECIMAL_MAX_PRECISION || s > p { return Err(DataFusionError::Internal(format!( "For decimal(precision, scale) precision must be less than or equal to 38 and scale can't be greater than precision. Got ({}, {})", p, s From c0a478448574985e9bc080712b679c72662d86e3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 5 Mar 2022 09:14:29 -0500 Subject: [PATCH 09/11] fmt --- datafusion-physical-expr/src/expressions/average.rs | 2 +- datafusion-physical-expr/src/expressions/sum.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion-physical-expr/src/expressions/average.rs b/datafusion-physical-expr/src/expressions/average.rs index c6318704d0aa..8888ee99366d 100644 --- a/datafusion-physical-expr/src/expressions/average.rs +++ b/datafusion-physical-expr/src/expressions/average.rs @@ -28,8 +28,8 @@ use arrow::{ array::{ArrayRef, UInt64Array}, datatypes::Field, }; -use datafusion_common::{DataFusionError, Result}; use datafusion_common::ScalarValue; +use datafusion_common::{DataFusionError, Result}; use datafusion_expr::Accumulator; use super::{format_state_name, sum}; diff --git a/datafusion-physical-expr/src/expressions/sum.rs b/datafusion-physical-expr/src/expressions/sum.rs index cc4172638a0b..08648e403a03 100644 --- a/datafusion-physical-expr/src/expressions/sum.rs +++ b/datafusion-physical-expr/src/expressions/sum.rs @@ -31,7 +31,7 @@ use arrow::{ }, datatypes::Field, }; -use datafusion_common::{ScalarValue, DataFusionError, Result}; +use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::Accumulator; use super::format_state_name; From b42933a5e348e376895f38cd56bfb41e8b2ab71d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 5 Mar 2022 09:45:59 -0500 Subject: [PATCH 10/11] Fixup --- datafusion-physical-expr/src/expressions/min_max.rs | 8 +++----- datafusion-physical-expr/src/expressions/sum.rs | 5 ++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/datafusion-physical-expr/src/expressions/min_max.rs b/datafusion-physical-expr/src/expressions/min_max.rs index c34c43cce70b..a599d65c40a6 100644 --- a/datafusion-physical-expr/src/expressions/min_max.rs +++ b/datafusion-physical-expr/src/expressions/min_max.rs @@ -553,11 +553,9 @@ impl Accumulator for MinAccumulator { #[cfg(test)] mod tests { use super::*; - use crate::from_slice::FromSlice; - use crate::physical_plan::expressions::col; - use crate::physical_plan::expressions::tests::aggregate; - use crate::scalar::ScalarValue::Decimal128; - use crate::{error::Result, generic_test_op}; + use crate::expressions::col; + use crate::expressions::tests::aggregate; + use crate::generic_test_op; use arrow::datatypes::*; use arrow::record_batch::RecordBatch; use datafusion_common::Result; diff --git a/datafusion-physical-expr/src/expressions/sum.rs b/datafusion-physical-expr/src/expressions/sum.rs index 08648e403a03..9945620443ac 100644 --- a/datafusion-physical-expr/src/expressions/sum.rs +++ b/datafusion-physical-expr/src/expressions/sum.rs @@ -374,9 +374,8 @@ impl Accumulator for SumAccumulator { #[cfg(test)] mod tests { use super::*; - use crate::from_slice::FromSlice; - use crate::physical_plan::expressions::col; - use crate::{error::Result, generic_test_op}; + use crate::expressions::col; + use crate::generic_test_op; use arrow::datatypes::*; use arrow::record_batch::RecordBatch; use datafusion_common::Result; From 009dd7c3dbb5ff67a0fd4d2ccfb2f7e22a2cae8e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 5 Mar 2022 16:35:43 -0500 Subject: [PATCH 11/11] clippy --- datafusion-physical-expr/src/expressions/cast.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion-physical-expr/src/expressions/cast.rs b/datafusion-physical-expr/src/expressions/cast.rs index bd18d16cbfa2..2870c9b2654d 100644 --- a/datafusion-physical-expr/src/expressions/cast.rs +++ b/datafusion-physical-expr/src/expressions/cast.rs @@ -324,7 +324,6 @@ mod tests { let array = vec![Some(1), Some(2), Some(3), Some(4), Some(5), None]; // decimal to i8 let decimal_array = array - .clone() .iter() .collect::() .with_precision_and_scale(10, 0)?;