Skip to content

Commit

Permalink
fix length error with array_has
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Sep 13, 2024
1 parent 389f7f7 commit 05cf5fc
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
2 changes: 2 additions & 0 deletions datafusion/functions-nested/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ rand = "0.8.5"

[dev-dependencies]
criterion = { version = "0.5", features = ["async_tokio"] }
datafusion = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt", "sync"] }

[[bench]]
harness = false
Expand Down
46 changes: 45 additions & 1 deletion datafusion/functions-nested/src/array_has.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use arrow::array::{Array, ArrayRef, BooleanArray, OffsetSizeTrait};
use arrow::datatypes::DataType;
use arrow::row::{RowConverter, Rows, SortField};
use arrow_array::{Datum, GenericListArray, Scalar};
use arrow_buffer::BooleanBuffer;
use datafusion_common::cast::as_generic_list_array;
use datafusion_common::utils::string_utils::string_array_to_vec;
use datafusion_common::{exec_err, Result, ScalarValue};
Expand Down Expand Up @@ -200,7 +201,10 @@ fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
// If first argument is empty list (second argument is non-null), return false
// i.e. array_has([], non-null element) -> false
if values.len() == 0 {
return Ok(Arc::new(BooleanArray::from(vec![Some(false)])));
return Ok(Arc::new(BooleanArray::new(
BooleanBuffer::new_unset(haystack.len()),
None,
)));
}
let eq_array = compare_with_eq(values, needle, is_nested)?;
let mut final_contained = vec![None; haystack.len()];
Expand Down Expand Up @@ -457,3 +461,43 @@ fn general_array_has_all_and_any_kernel(
}),
}
}

#[cfg(test)]
mod test {
use arrow::datatypes::UInt32Type;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
use arrow_array::ListArray;
use datafusion::logical_expr::ScalarUDF;
use datafusion::prelude::*;

use super::*;

#[tokio::test]
async fn test_empty_haystack() {
let ctx = SessionContext::new();
let udf = ScalarUDF::from(ArrayHas::default());
ctx.register_udf(udf);

let batch = RecordBatch::try_new(
Arc::new(Schema::new(vec![Field::new(
"items",
DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, true))),
true,
)])),
vec![Arc::new(
ListArray::from_iter_primitive::<UInt32Type, _, _>(vec![
Some(vec![]),
Some(vec![]),
Some(vec![]),
]),
)],
)
.unwrap();
ctx.register_batch("test", batch).unwrap();

let sql = "SELECT 1 from test where array_has(items, 1)";
let count = ctx.sql(sql).await.unwrap().count().await.unwrap();
assert_eq!(count, 0);
}
}

0 comments on commit 05cf5fc

Please sign in to comment.