diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs index 5f6fa6b4b8fd..30e14a2afa59 100644 --- a/datafusion/core/src/dataframe.rs +++ b/datafusion/core/src/dataframe.rs @@ -323,7 +323,8 @@ impl DataFrame { /// ``` pub async fn describe(self) -> Result { //the functions now supported - let supported_describe_functions = vec!["count", "null_count", "max", "min"]; + let supported_describe_functions = + vec!["count", "null_count", "mean", "min", "max"]; let fields_iter = self.schema().fields().iter(); @@ -369,7 +370,19 @@ impl DataFrame { )? .collect() .await?, - // max aggregation + // mean aggregation + self.clone() + .aggregate( + vec![], + fields_iter + .clone() + .filter(|f| f.data_type().is_numeric()) + .map(|f| datafusion_expr::avg(col(f.name())).alias(f.name())) + .collect::>(), + )? + .collect() + .await?, + // min aggregation self.clone() .aggregate( vec![], @@ -378,12 +391,12 @@ impl DataFrame { .filter(|f| { !matches!(f.data_type(), DataType::Binary | DataType::Boolean) }) - .map(|f| datafusion_expr::max(col(f.name())).alias(f.name())) + .map(|f| datafusion_expr::min(col(f.name())).alias(f.name())) .collect::>(), )? .collect() .await?, - // min aggregation + // max aggregation self.clone() .aggregate( vec![], @@ -392,7 +405,7 @@ impl DataFrame { .filter(|f| { !matches!(f.data_type(), DataType::Binary | DataType::Boolean) }) - .map(|f| datafusion_expr::min(col(f.name())).alias(f.name())) + .map(|f| datafusion_expr::max(col(f.name())).alias(f.name())) .collect::>(), )? .collect() diff --git a/datafusion/core/tests/dataframe.rs b/datafusion/core/tests/dataframe.rs index 55076fe52469..85d968398efd 100644 --- a/datafusion/core/tests/dataframe.rs +++ b/datafusion/core/tests/dataframe.rs @@ -54,8 +54,9 @@ async fn describe() -> Result<()> { "+------------+-----+----------+-------------+--------------+---------+------------+-------------------+------------+-----------------+------------+---------------------+", "| count | 8.0 | 8 | 8.0 | 8.0 | 8.0 | 8.0 | 8.0 | 8.0 | 8 | 8 | 8 |", "| null_count | 8.0 | 8 | 8.0 | 8.0 | 8.0 | 8.0 | 8.0 | 8.0 | 8 | 8 | 8 |", - "| max | 7.0 | null | 1.0 | 1.0 | 1.0 | 10.0 | 1.100000023841858 | 10.1 | null | null | 2009-04-01T00:01:00 |", + "| mean | 3.5 | null | 0.5 | 0.5 | 0.5 | 5.0 | 0.550000011920929 | 5.05 | null | null | null |", "| min | 0.0 | null | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | null | null | 2009-01-01T00:00:00 |", + "| max | 7.0 | null | 1.0 | 1.0 | 1.0 | 10.0 | 1.100000023841858 | 10.1 | null | null | 2009-04-01T00:01:00 |", "+------------+-----+----------+-------------+--------------+---------+------------+-------------------+------------+-----------------+------------+---------------------+", ]; assert_batches_eq!(expected, &describe_record_batch);