Skip to content

Commit

Permalink
Fix count(null) and count(distinct null)
Browse files Browse the repository at this point in the history
Use `logical_nulls` when the array data type is `Null`.
  • Loading branch information
joroKr21 committed Dec 12, 2023
1 parent 95ba48b commit 9d3a3e9
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 18 deletions.
44 changes: 27 additions & 17 deletions datafusion/physical-expr/src/aggregate/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,20 @@ impl GroupsAccumulator for CountGroupsAccumulator {
// Add one to each group's counter for each non null, non
// filtered value
self.counts.resize(total_num_groups, 0);
accumulate_indices(
group_indices,
values.nulls(), // ignore values
opt_filter,
|group_index| {
self.counts[group_index] += 1;
},
);
let index_fn = |group_index| {
self.counts[group_index] += 1;
};

if values.data_type() == &DataType::Null {
accumulate_indices(
group_indices,
values.logical_nulls().as_ref(),
opt_filter,
index_fn,
);
} else {
accumulate_indices(group_indices, values.nulls(), opt_filter, index_fn);
}

Ok(())
}
Expand Down Expand Up @@ -196,18 +202,22 @@ impl GroupsAccumulator for CountGroupsAccumulator {
/// for each row if one column value is null, then null_count + 1
fn null_count_for_multiple_cols(values: &[ArrayRef]) -> usize {
if values.len() > 1 {
let result_bool_buf: Option<BooleanBuffer> = values
.iter()
.map(|a| a.nulls())
.fold(None, |acc, b| match (acc, b) {
(Some(acc), Some(b)) => Some(acc.bitand(b.inner())),
(Some(acc), None) => Some(acc),
(None, Some(b)) => Some(b.inner().clone()),
_ => None,
let result_bool_buf: Option<BooleanBuffer> =
values.iter().fold(None, |acc, a| match (acc, a) {
(Some(acc), a) if a.data_type() == &DataType::Null => {
Some(acc.bitand(a.logical_nulls()?.inner()))
}
(Some(acc), a) => Some(acc.bitand(a.nulls()?.inner())),
(None, a) => Some(a.logical_nulls()?.into_inner()),
});
result_bool_buf.map_or(0, |b| values[0].len() - b.count_set_bits())
} else {
values[0].null_count()
let values = &values[0];
if values.data_type() == &DataType::Null {
values.len()
} else {
values.null_count()
}
}
}

Expand Down
5 changes: 5 additions & 0 deletions datafusion/physical-expr/src/aggregate/count_distinct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,12 @@ impl Accumulator for DistinctCountAccumulator {
if values.is_empty() {
return Ok(());
}

let arr = &values[0];
if arr.data_type() == &DataType::Null {
return Ok(());
}

(0..arr.len()).try_for_each(|index| {
if !arr.is_null(index) {
let scalar = ScalarValue::try_from_array(arr, index)?;
Expand Down
17 changes: 16 additions & 1 deletion datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1492,6 +1492,12 @@ SELECT count(c1, c2) FROM test
----
3

# count_null
query III
SELECT count(null), count(null, null), count(distinct null) FROM test
----
0 0 0

# count_multi_expr_group_by
query I
SELECT count(c1, c2) FROM test group by c1 order by c1
Expand All @@ -1501,6 +1507,15 @@ SELECT count(c1, c2) FROM test group by c1 order by c1
2
0

# count_null_group_by
query III
SELECT count(null), count(null, null), count(distinct null) FROM test group by c1 order by c1
----
0 0 0
0 0 0
0 0 0
0 0 0

# aggreggte_with_alias
query II
select c1, sum(c2) as `Total Salary` from test group by c1 order by c1
Expand Down Expand Up @@ -3241,4 +3256,4 @@ select count(*) from (select count(*) from (select 1));
query I
select count(*) from (select count(*) a, count(*) b from (select 1));
----
1
1

0 comments on commit 9d3a3e9

Please sign in to comment.