Skip to content

Commit

Permalink
add mean result for describe method (#5435)
Browse files Browse the repository at this point in the history
  • Loading branch information
jiangzhx authored Mar 1, 2023
1 parent eef0464 commit 793feda
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 6 deletions.
23 changes: 18 additions & 5 deletions datafusion/core/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,8 @@ impl DataFrame {
/// ```
pub async fn describe(self) -> Result<Self> {
//the functions now supported
let supported_describe_functions = vec!["count", "null_count", "max", "min"];
let supported_describe_functions =
vec!["count", "null_count", "mean", "min", "max"];

let fields_iter = self.schema().fields().iter();

Expand Down Expand Up @@ -369,7 +370,19 @@ impl DataFrame {
)?
.collect()
.await?,
// max aggregation
// mean aggregation
self.clone()
.aggregate(
vec![],
fields_iter
.clone()
.filter(|f| f.data_type().is_numeric())
.map(|f| datafusion_expr::avg(col(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
)?
.collect()
.await?,
// min aggregation
self.clone()
.aggregate(
vec![],
Expand All @@ -378,12 +391,12 @@ impl DataFrame {
.filter(|f| {
!matches!(f.data_type(), DataType::Binary | DataType::Boolean)
})
.map(|f| datafusion_expr::max(col(f.name())).alias(f.name()))
.map(|f| datafusion_expr::min(col(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
)?
.collect()
.await?,
// min aggregation
// max aggregation
self.clone()
.aggregate(
vec![],
Expand All @@ -392,7 +405,7 @@ impl DataFrame {
.filter(|f| {
!matches!(f.data_type(), DataType::Binary | DataType::Boolean)
})
.map(|f| datafusion_expr::min(col(f.name())).alias(f.name()))
.map(|f| datafusion_expr::max(col(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
)?
.collect()
Expand Down
3 changes: 2 additions & 1 deletion datafusion/core/tests/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ async fn describe() -> Result<()> {
"+------------+-----+----------+-------------+--------------+---------+------------+-------------------+------------+-----------------+------------+---------------------+",
"| count | 8.0 | 8 | 8.0 | 8.0 | 8.0 | 8.0 | 8.0 | 8.0 | 8 | 8 | 8 |",
"| null_count | 8.0 | 8 | 8.0 | 8.0 | 8.0 | 8.0 | 8.0 | 8.0 | 8 | 8 | 8 |",
"| max | 7.0 | null | 1.0 | 1.0 | 1.0 | 10.0 | 1.100000023841858 | 10.1 | null | null | 2009-04-01T00:01:00 |",
"| mean | 3.5 | null | 0.5 | 0.5 | 0.5 | 5.0 | 0.550000011920929 | 5.05 | null | null | null |",
"| min | 0.0 | null | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | null | null | 2009-01-01T00:00:00 |",
"| max | 7.0 | null | 1.0 | 1.0 | 1.0 | 10.0 | 1.100000023841858 | 10.1 | null | null | 2009-04-01T00:01:00 |",
"+------------+-----+----------+-------------+--------------+---------+------------+-------------------+------------+-----------------+------------+---------------------+",
];
assert_batches_eq!(expected, &describe_record_batch);
Expand Down

0 comments on commit 793feda

Please sign in to comment.