Skip to content

Commit

Permalink
Count distinct boolean (#230)
Browse files Browse the repository at this point in the history
* Added boolean support for count distinct.

* Added boolean support for COUNT DISTINCT

* Corrected macro call

* Added test for boolean COUNT DISTINCT

* ran cargo fmt

* Corrected test assertion for boolean COUNT DISTINCT

* Fixed clippy warnings

* fix cargo fmt
  • Loading branch information
pjmore authored May 3, 2021
1 parent 47bd3fa commit b9b3d86
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 3 deletions.
60 changes: 57 additions & 3 deletions datafusion/src/physical_plan/distinct_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,9 @@ impl Accumulator for DistinctCountAccumulator {
mod tests {
use super::*;

use arrow::array::ArrayRef;
use arrow::array::{
Int16Array, Int32Array, Int64Array, Int8Array, ListArray, UInt16Array,
UInt32Array, UInt64Array, UInt8Array,
ArrayRef, BooleanArray, Int16Array, Int32Array, Int64Array, Int8Array, ListArray,
UInt16Array, UInt32Array, UInt64Array, UInt8Array,
};
use arrow::array::{Int32Builder, ListBuilder, UInt64Builder};
use arrow::datatypes::DataType;
Expand Down Expand Up @@ -396,6 +395,61 @@ mod tests {
test_count_distinct_update_batch_numeric!(UInt64Array, UInt64, u64)
}

#[test]
fn count_distinct_update_batch_boolean() -> Result<()> {
let get_count = |data: BooleanArray| -> Result<(Vec<Option<bool>>, u64)> {
let arrays = vec![Arc::new(data) as ArrayRef];
let (states, result) = run_update_batch(&arrays)?;
let mut state_vec = state_to_vec!(&states[0], Boolean, bool).unwrap();
state_vec.sort();
let count = match result {
ScalarValue::UInt64(c) => c.ok_or_else(|| {
DataFusionError::Internal("Found None count".to_string())
}),
scalar => Err(DataFusionError::Internal(format!(
"Found non Uint64 scalar value from count: {}",
scalar
))),
}?;
Ok((state_vec, count))
};

let zero_count_values = BooleanArray::from(Vec::<bool>::new());

let one_count_values = BooleanArray::from(vec![false, false]);
let one_count_values_with_null =
BooleanArray::from(vec![Some(true), Some(true), None, None]);

let two_count_values = BooleanArray::from(vec![true, false, true, false, true]);
let two_count_values_with_null = BooleanArray::from(vec![
Some(true),
Some(false),
None,
None,
Some(true),
Some(false),
]);

assert_eq!(
get_count(zero_count_values)?,
(Vec::<Option<bool>>::new(), 0)
);
assert_eq!(get_count(one_count_values)?, (vec![Some(false)], 1));
assert_eq!(
get_count(one_count_values_with_null)?,
(vec![Some(true)], 1)
);
assert_eq!(
get_count(two_count_values)?,
(vec![Some(false), Some(true)], 2)
);
assert_eq!(
get_count(two_count_values_with_null)?,
(vec![Some(false), Some(true)], 2)
);
Ok(())
}

#[test]
fn count_distinct_update_batch_all_nulls() -> Result<()> {
let arrays = vec![Arc::new(Int32Array::from(
Expand Down
1 change: 1 addition & 0 deletions datafusion/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ impl ScalarValue {
),
},
ScalarValue::List(values, data_type) => Arc::new(match data_type {
DataType::Boolean => build_list!(BooleanBuilder, Boolean, values, size),
DataType::Int8 => build_list!(Int8Builder, Int8, values, size),
DataType::Int16 => build_list!(Int16Builder, Int16, values, size),
DataType::Int32 => build_list!(Int32Builder, Int32, values, size),
Expand Down

0 comments on commit b9b3d86

Please sign in to comment.