Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Huaxin Gao committed Mar 10, 2024
1 parent e75dd35 commit 9d9364f
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 102 deletions.
32 changes: 18 additions & 14 deletions datafusion/physical-expr/src/window/nth_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,29 +226,33 @@ impl PartitionEvaluator for NthValueEvaluator {
return ScalarValue::try_from(arr.data_type());
}

let mut valid_indices = Vec::new();
if self.ignore_nulls {
valid_indices = arr.nulls().unwrap().valid_indices().collect::<Vec<_>>();
// Extract valid indices if ignoring nulls.
let (slice, valid_indices) = if self.ignore_nulls {
let slice = arr.slice(range.start, n_range);
let valid_indices = slice.nulls().unwrap().valid_indices().collect::<Vec<_>>();
if valid_indices.is_empty() {
return ScalarValue::try_from(arr.data_type());
}
}
(Some(slice), Some(valid_indices))
} else {
(None, None)
};
match self.state.kind {
NthValueKind::First => {
let index: usize = if self.ignore_nulls {
valid_indices[0]
if let Some(slice) = &slice {
let valid_indices = valid_indices.unwrap();
ScalarValue::try_from_array(slice, valid_indices[0])
} else {
range.start
};
ScalarValue::try_from_array(arr, index)
ScalarValue::try_from_array(arr, range.start)
}
}
NthValueKind::Last => {
let index = if self.ignore_nulls {
valid_indices[valid_indices.len() - 1]
if let Some(slice) = &slice {
let valid_indices = valid_indices.unwrap();
ScalarValue::try_from_array(slice, valid_indices[valid_indices.len() - 1])
} else {
range.end - 1
};
ScalarValue::try_from_array(arr, index)
ScalarValue::try_from_array(arr, range.end - 1)
}
}
NthValueKind::Nth(n) => {
match n.cmp(&0) {
Expand Down
252 changes: 164 additions & 88 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4300,70 +4300,98 @@ DROP TABLE t;

# Test for ignore nulls with ORDER BY in FIRST_VALUE
statement ok
CREATE TABLE t AS VALUES (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 1);
CREATE TABLE t AS VALUES (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 2), (5, 5), (6, 6);

query I
SELECT column1 FROM t ORDER BY column2;
query II
SELECT column1, column2 FROM t ORDER BY column2;
----
NULL
NULL
4
3
NULL 1
NULL 2
4 3
3 4
5 5
6 6

query I
SELECT FIRST_VALUE(column1) OVER(ORDER BY column2) FROM t;
query II
SELECT FIRST_VALUE(column1) OVER(ORDER BY column2), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 1
NULL 2
NULL 3
NULL 4
NULL 5
NULL 6

query I
SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2) FROM t;
query II
SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 1
NULL 2
NULL 3
NULL 4
NULL 5
NULL 6

query I
SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2) FROM t;
query II
SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2), column2 FROM t;
----
NULL 1
NULL 2
4 3
4 4
4 5
4 6

query II
SELECT FIRST_VALUE(column1)OVER(ORDER BY column2 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t;
----
4
4
4
4
NULL 1
NULL 2
NULL 3
4 4
3 5
5 6

query II
SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t;
----
NULL 1
4 2
4 3
4 4
3 5
5 6

statement ok
DROP TABLE t;

# Test for ignore nulls with ORDER BY in FIRST_VALUE with all NULL values
statement ok
CREATE TABLE t AS VALUES (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 1);
CREATE TABLE t AS VALUES (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 2);

query I
SELECT FIRST_VALUE(column1) OVER(ORDER BY column2) FROM t;
query II
SELECT FIRST_VALUE(column1) OVER(ORDER BY column2), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 1
NULL 2
NULL 3
NULL 4

query I
SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2) FROM t;
query II
SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 1
NULL 2
NULL 3
NULL 4

query I
SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2) FROM t;
query II
SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 1
NULL 2
NULL 3
NULL 4

statement ok
DROP TABLE t;
Expand Down Expand Up @@ -4398,70 +4426,118 @@ DROP TABLE t;

# Test for ignore nulls with ORDER BY in LAST_VALUE
statement ok
CREATE TABLE t AS VALUES (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 1);
CREATE TABLE t AS VALUES (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 2), (5, 5), (6, 6);

query I
SELECT column1 FROM t ORDER BY column2 DESC NULLS LAST;
query II
SELECT column1, column2 FROM t ORDER BY column2 DESC NULLS LAST;
----
3
4
NULL
NULL
6 6
5 5
3 4
4 3
NULL 2
NULL 1

query I
SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t;
query II
SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST), column2 FROM t;
----
6 6
5 5
3 4
4 3
NULL 2
NULL 1

# query II
# SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST), column2 FROM t;
# ----
# 6 6
# 5 5
# 3 4
# 4 3
# 4 2
# 4 1

query II
SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 6
NULL 5
NULL 4
NULL 3
NULL 2
NULL 1

query I
SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t;
query II
SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 6
NULL 5
NULL 4
NULL 3
NULL 2
NULL 1

query I
SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t;
query II
SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
----
4 6
4 5
4 4
4 3
4 2
4 1

query II
SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t;
----
5 6
3 5
4 4
NULL 3
NULL 2
NULL 1

query II
SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t;
----
4
4
4
4
5 6
3 5
4 4
4 3
4 2
NULL 1

statement ok
DROP TABLE t;

# Test for ignore nulls with ORDER BY in LAST_VALUE with all NULLs
statement ok
CREATE TABLE t AS VALUES (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 1);
CREATE TABLE t AS VALUES (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 2);

query I
SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t;
query II
SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 4
NULL 3
NULL 2
NULL 1

query I
SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t;
query II
SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 4
NULL 3
NULL 2
NULL 1

query I
SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t;
query II
SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t;
----
NULL
NULL
NULL
NULL
NULL 4
NULL 3
NULL 2
NULL 1

statement ok
DROP TABLE t;

0 comments on commit 9d9364f

Please sign in to comment.