diff --git a/native/Cargo.lock b/native/Cargo.lock index 5807b61b65..b6d89f7a1d 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -811,7 +811,7 @@ dependencies = [ [[package]] name = "datafusion" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -842,7 +842,7 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "num_cpus", "object_store", @@ -860,7 +860,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "arrow-schema", "async-trait", @@ -956,7 +956,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -972,20 +972,22 @@ dependencies = [ "object_store", "paste", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ + "log", "tokio", ] [[package]] name = "datafusion-execution" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "arrow", "chrono", @@ -1005,7 +1007,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -1026,7 +1028,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "arrow", "datafusion-common", @@ -1036,7 +1038,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "arrow", "arrow-buffer", @@ -1049,7 +1051,7 @@ dependencies = [ "datafusion-expr", "hashbrown", "hex", - "itertools 0.12.1", + "itertools 0.13.0", "log", "md-5", "rand", @@ -1062,7 +1064,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -1082,7 +1084,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -1095,7 +1097,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "arrow", "arrow-array", @@ -1107,7 +1109,8 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "itertools 0.12.1", + "datafusion-physical-expr-common", + "itertools 0.13.0", "log", "paste", "rand", @@ -1116,7 +1119,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "datafusion-common", "datafusion-expr", @@ -1127,7 +1130,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "arrow", "async-trait", @@ -1137,7 +1140,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "regex-syntax", @@ -1146,7 +1149,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -1167,7 +1170,7 @@ dependencies = [ "hashbrown", "hex", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "petgraph", @@ -1177,7 +1180,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -1190,7 +1193,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-functions-aggregate" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -1205,19 +1208,19 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", - "itertools 0.12.1", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-plan" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "ahash", "arrow", @@ -1240,7 +1243,7 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "once_cell", "parking_lot", @@ -1252,7 +1255,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "41.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=f98693e#f98693e16fa7494f2521096ee3c35aae7dac98fc" +source = "git+https://github.com/apache/datafusion.git?rev=dff590b#dff590bfd2bb9993b2c8ce6f76a3bdd973e520a8" dependencies = [ "arrow", "arrow-array", diff --git a/native/Cargo.toml b/native/Cargo.toml index 4f6a6bcfc5..7faf1de239 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -39,14 +39,14 @@ arrow-buffer = { version = "52.2.0" } arrow-data = { version = "52.2.0" } arrow-schema = { version = "52.2.0" } parquet = { version = "52.2.0", default-features = false, features = ["experimental"] } -datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "f98693e" } -datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "f98693e", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "f98693e", features = ["crypto_expressions"] } -datafusion-functions-nested = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "f98693e", default-features = false } -datafusion-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "f98693e", default-features = false } -datafusion-execution = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "f98693e", default-features = false } -datafusion-physical-plan = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "f98693e", default-features = false } -datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "f98693e", default-features = false } +datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "dff590b" } +datafusion = { default-features = false, git = "https://github.com/apache/datafusion.git", rev = "dff590b", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "dff590b", features = ["crypto_expressions"] } +datafusion-functions-nested = { git = "https://github.com/apache/datafusion.git", rev = "dff590b", default-features = false } +datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "dff590b", default-features = false } +datafusion-execution = { git = "https://github.com/apache/datafusion.git", rev = "dff590b", default-features = false } +datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "dff590b", default-features = false } +datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "dff590b", default-features = false } datafusion-comet-spark-expr = { path = "spark-expr", version = "0.2.0" } datafusion-comet-proto = { path = "proto", version = "0.2.0" } chrono = { version = "0.4", default-features = false, features = ["clock"] } diff --git a/native/core/src/execution/datafusion/expressions/avg.rs b/native/core/src/execution/datafusion/expressions/avg.rs index c3277bf7af..5e7b555c8a 100644 --- a/native/core/src/execution/datafusion/expressions/avg.rs +++ b/native/core/src/execution/datafusion/expressions/avg.rs @@ -122,6 +122,10 @@ impl AggregateExpr for Avg { ), } } + + fn default_value(&self, _data_type: &DataType) -> Result { + Ok(ScalarValue::Float64(None)) + } } impl PartialEq for Avg { diff --git a/native/core/src/execution/datafusion/expressions/avg_decimal.rs b/native/core/src/execution/datafusion/expressions/avg_decimal.rs index d29dc6dab0..7eab6a0084 100644 --- a/native/core/src/execution/datafusion/expressions/avg_decimal.rs +++ b/native/core/src/execution/datafusion/expressions/avg_decimal.rs @@ -142,6 +142,22 @@ impl AggregateExpr for AvgDecimal { ), } } + + fn default_value(&self, _data_type: &DataType) -> Result { + match &self.result_data_type { + Decimal128(target_precision, target_scale) => { + Ok(make_decimal128( + None, + *target_precision, + *target_scale, + )) + } + _ => not_impl_err!( + "The result_data_type of AvgDecimal should be Decimal128 but got{}", + self.result_data_type + ), + } + } } impl PartialEq for AvgDecimal { @@ -211,6 +227,10 @@ impl AvgDecimalAccumulator { } } +fn make_decimal128(value: Option, precision: u8, scale: i8) -> ScalarValue { + ScalarValue::Decimal128(value, precision, scale) +} + impl Accumulator for AvgDecimalAccumulator { fn state(&mut self) -> Result> { Ok(vec![ @@ -265,10 +285,6 @@ impl Accumulator for AvgDecimalAccumulator { } fn evaluate(&mut self) -> Result { - fn make_decimal128(value: Option, precision: u8, scale: i8) -> ScalarValue { - ScalarValue::Decimal128(value, precision, scale) - } - let scaler = 10_i128.pow(self.target_scale.saturating_sub(self.sum_scale) as u32); let target_min = MIN_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1]; let target_max = MAX_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1]; diff --git a/native/core/src/execution/datafusion/expressions/correlation.rs b/native/core/src/execution/datafusion/expressions/correlation.rs index 642c7d8664..3dcf6cca8b 100644 --- a/native/core/src/execution/datafusion/expressions/correlation.rs +++ b/native/core/src/execution/datafusion/expressions/correlation.rs @@ -121,6 +121,10 @@ impl AggregateExpr for Correlation { fn name(&self) -> &str { &self.name } + + fn default_value(&self, _data_type: &DataType) -> Result { + Ok(ScalarValue::Float64(None)) + } } impl PartialEq for Correlation { diff --git a/native/core/src/execution/datafusion/expressions/covariance.rs b/native/core/src/execution/datafusion/expressions/covariance.rs index 20a6fab94c..11b345a22b 100644 --- a/native/core/src/execution/datafusion/expressions/covariance.rs +++ b/native/core/src/execution/datafusion/expressions/covariance.rs @@ -118,6 +118,10 @@ impl AggregateExpr for Covariance { fn name(&self) -> &str { &self.name } + + fn default_value(&self, _data_type: &DataType) -> Result { + Ok(ScalarValue::Float64(None)) + } } impl PartialEq for Covariance { diff --git a/native/core/src/execution/datafusion/expressions/stddev.rs b/native/core/src/execution/datafusion/expressions/stddev.rs index 50d66463a8..bc96a56808 100644 --- a/native/core/src/execution/datafusion/expressions/stddev.rs +++ b/native/core/src/execution/datafusion/expressions/stddev.rs @@ -108,6 +108,10 @@ impl AggregateExpr for Stddev { fn name(&self) -> &str { &self.name } + + fn default_value(&self, _data_type: &DataType) -> Result { + Ok(ScalarValue::Float64(None)) + } } impl PartialEq for Stddev { diff --git a/native/core/src/execution/datafusion/expressions/sum_decimal.rs b/native/core/src/execution/datafusion/expressions/sum_decimal.rs index 4eb36cbd08..37030b67a8 100644 --- a/native/core/src/execution/datafusion/expressions/sum_decimal.rs +++ b/native/core/src/execution/datafusion/expressions/sum_decimal.rs @@ -112,6 +112,13 @@ impl AggregateExpr for SumDecimal { self.scale, ))) } + + fn default_value(&self, _data_type: &DataType) -> DFResult { + ScalarValue::new_primitive::( + None, + &DataType::Decimal128(self.precision, self.scale), + ) + } } impl PartialEq for SumDecimal { diff --git a/native/core/src/execution/datafusion/expressions/variance.rs b/native/core/src/execution/datafusion/expressions/variance.rs index 19322aa630..5cfbf29471 100644 --- a/native/core/src/execution/datafusion/expressions/variance.rs +++ b/native/core/src/execution/datafusion/expressions/variance.rs @@ -106,6 +106,10 @@ impl AggregateExpr for Variance { fn name(&self) -> &str { &self.name } + + fn default_value(&self, _data_type: &DataType) -> Result { + Ok(ScalarValue::Float64(None)) + } } impl PartialEq for Variance {