-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce Boolean Coercion #8331
Changes from all commits
9fd8f76
570e475
720e925
aefb39c
2af4259
7171bd6
c14820d
268b69b
4b0bfbc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -799,69 +799,6 @@ mod tests { | |
Ok(batch) | ||
} | ||
|
||
#[test] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move to slt |
||
fn case_test_incompatible() -> Result<()> { | ||
// 1 then is int64 | ||
// 2 then is boolean | ||
let batch = case_test_batch()?; | ||
let schema = batch.schema(); | ||
|
||
// CASE WHEN a = 'foo' THEN 123 WHEN a = 'bar' THEN true END | ||
let when1 = binary( | ||
col("a", &schema)?, | ||
Operator::Eq, | ||
lit("foo"), | ||
&batch.schema(), | ||
)?; | ||
let then1 = lit(123i32); | ||
let when2 = binary( | ||
col("a", &schema)?, | ||
Operator::Eq, | ||
lit("bar"), | ||
&batch.schema(), | ||
)?; | ||
let then2 = lit(true); | ||
|
||
let expr = generate_case_when_with_type_coercion( | ||
None, | ||
vec![(when1, then1), (when2, then2)], | ||
None, | ||
schema.as_ref(), | ||
); | ||
assert!(expr.is_err()); | ||
|
||
// then 1 is int32 | ||
// then 2 is int64 | ||
// else is float | ||
// CASE WHEN a = 'foo' THEN 123 WHEN a = 'bar' THEN 456 ELSE 1.23 END | ||
let when1 = binary( | ||
col("a", &schema)?, | ||
Operator::Eq, | ||
lit("foo"), | ||
&batch.schema(), | ||
)?; | ||
let then1 = lit(123i32); | ||
let when2 = binary( | ||
col("a", &schema)?, | ||
Operator::Eq, | ||
lit("bar"), | ||
&batch.schema(), | ||
)?; | ||
let then2 = lit(456i64); | ||
let else_expr = lit(1.23f64); | ||
|
||
let expr = generate_case_when_with_type_coercion( | ||
None, | ||
vec![(when1, then1), (when2, then2)], | ||
Some(else_expr), | ||
schema.as_ref(), | ||
); | ||
assert!(expr.is_ok()); | ||
let result_type = expr.unwrap().data_type(schema.as_ref())?; | ||
assert_eq!(DataType::Float64, result_type); | ||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn case_eq() -> Result<()> { | ||
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2190,18 +2190,6 @@ fn union_with_aliases() { | |
quick_test(sql, expected); | ||
} | ||
|
||
#[test] | ||
fn union_with_incompatible_data_types() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move to slt |
||
let sql = "SELECT 'a' a UNION ALL SELECT true a"; | ||
let err = logical_plan(sql) | ||
.expect_err("query should have failed") | ||
.strip_backtrace(); | ||
assert_eq!( | ||
"Error during planning: UNION Column a (type: Boolean) is not compatible with column a (type: Utf8)", | ||
err | ||
); | ||
} | ||
|
||
#[test] | ||
fn empty_over() { | ||
let sql = "SELECT order_id, MAX(order_id) OVER () from orders"; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -557,10 +557,16 @@ select column1[0:5], column2[0:3], column3[0:9] from arrays; | |
## make_array (aliases: `make_list`) | ||
|
||
# make_array scalar function #1 | ||
query ??? | ||
select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o'); | ||
---- | ||
[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] | ||
query ?????? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also add some tests for other uses of this logic (not just in make_array) such as comparisons I notice that postgres doesn't handle boolean coercion
However, after this PR datafusion does: DataFusion CLI v33.0.0
❯ select true = 1;
+--------------------------+
| Boolean(true) = Int64(1) |
+--------------------------+
| true |
+--------------------------+
1 row in set. Query took 0.006 seconds. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Duckdb support boolean coercion, we can consider follow it. If it does not break the overall design, I think we can support it too. |
||
select | ||
make_array(1, 2, 3), | ||
make_array(1.0, 2.0, 3.0), | ||
make_array('h', 'e', 'l', 'l', 'o'), | ||
make_array(true, 1, 2, false), | ||
make_array(true, 1, 2.3, false), | ||
make_array(true, 1, 2.3, false, '4'); | ||
---- | ||
[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] [1, 1, 2, 0] [1.0, 1.0, 2.3, 0.0] [true, 1, 2.3, false, 4] | ||
|
||
# make_array scalar function #2 | ||
query ??? | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1296,6 +1296,27 @@ NULL | |
123 | ||
NULL | ||
|
||
# integer with float | ||
query I | ||
select case when a = 'rust' then arrow_cast(1, 'Int32') when a == 'c++' then arrow_cast(2, 'Int64') else 1.5 end from (values('python')) as t(a); | ||
---- | ||
1.5 | ||
|
||
# integer with boolean | ||
query I | ||
select case when a = 'rust' then 1 when a == 'c++' then false end from (values('c++')) as t(a); | ||
---- | ||
0 | ||
|
||
# type coercion not supported in case expr (boolean <-> timestamp) | ||
# | ||
# DataFusion error: type_coercion | ||
# caused by | ||
# Error during planning: Failed to coerce then ([Timestamp(Nanosecond, None), Boolean]) and else (None) to common types in CASE WHEN expression | ||
query error | ||
select case when a = 'foo' then arrow_cast(500, 'Timestamp(Nanosecond, None)') when a = 'bar' then true end from (values('foo')) as t(a); | ||
|
||
|
||
# csv_query_sum_cast() { | ||
|
||
statement ok | ||
|
@@ -1926,3 +1947,14 @@ A true | |
B false | ||
C false | ||
D false | ||
|
||
# bool_coercion | ||
query BBBB | ||
select 1 == true, false == 0, 1.0 == true, false == 'false'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
---- | ||
true true true true | ||
|
||
query BBBB | ||
select 2 > true, false is not distinct from 0, 1.0 >= true, true is distinct from 'false'; | ||
---- | ||
true true true true |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this also needs to check when
rhs_type
is aDataType::Boolean
as well.I would expect both of the following queries to work and return the same thing