From 9d9364fd7f3e3f4f21e836375add47364fbe5bf8 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Sat, 9 Mar 2024 17:23:20 -0800 Subject: [PATCH] address comments --- .../physical-expr/src/window/nth_value.rs | 32 ++- datafusion/sqllogictest/test_files/window.slt | 252 ++++++++++++------ 2 files changed, 182 insertions(+), 102 deletions(-) diff --git a/datafusion/physical-expr/src/window/nth_value.rs b/datafusion/physical-expr/src/window/nth_value.rs index 647409b089821..5e64c96e5d1fd 100644 --- a/datafusion/physical-expr/src/window/nth_value.rs +++ b/datafusion/physical-expr/src/window/nth_value.rs @@ -226,29 +226,33 @@ impl PartitionEvaluator for NthValueEvaluator { return ScalarValue::try_from(arr.data_type()); } - let mut valid_indices = Vec::new(); - if self.ignore_nulls { - valid_indices = arr.nulls().unwrap().valid_indices().collect::>(); + // Extract valid indices if ignoring nulls. + let (slice, valid_indices) = if self.ignore_nulls { + let slice = arr.slice(range.start, n_range); + let valid_indices = slice.nulls().unwrap().valid_indices().collect::>(); if valid_indices.is_empty() { return ScalarValue::try_from(arr.data_type()); } - } + (Some(slice), Some(valid_indices)) + } else { + (None, None) + }; match self.state.kind { NthValueKind::First => { - let index: usize = if self.ignore_nulls { - valid_indices[0] + if let Some(slice) = &slice { + let valid_indices = valid_indices.unwrap(); + ScalarValue::try_from_array(slice, valid_indices[0]) } else { - range.start - }; - ScalarValue::try_from_array(arr, index) + ScalarValue::try_from_array(arr, range.start) + } } NthValueKind::Last => { - let index = if self.ignore_nulls { - valid_indices[valid_indices.len() - 1] + if let Some(slice) = &slice { + let valid_indices = valid_indices.unwrap(); + ScalarValue::try_from_array(slice, valid_indices[valid_indices.len() - 1]) } else { - range.end - 1 - }; - ScalarValue::try_from_array(arr, index) + ScalarValue::try_from_array(arr, range.end - 1) + } } NthValueKind::Nth(n) => { match n.cmp(&0) { diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index b20e112b6d8d8..b5e358c047235 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -4300,70 +4300,98 @@ DROP TABLE t; # Test for ignore nulls with ORDER BY in FIRST_VALUE statement ok -CREATE TABLE t AS VALUES (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 1); +CREATE TABLE t AS VALUES (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 2), (5, 5), (6, 6); -query I -SELECT column1 FROM t ORDER BY column2; +query II +SELECT column1, column2 FROM t ORDER BY column2; ---- -NULL -NULL -4 -3 +NULL 1 +NULL 2 +4 3 +3 4 +5 5 +6 6 -query I -SELECT FIRST_VALUE(column1) OVER(ORDER BY column2) FROM t; +query II +SELECT FIRST_VALUE(column1) OVER(ORDER BY column2), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 1 +NULL 2 +NULL 3 +NULL 4 +NULL 5 +NULL 6 -query I -SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2) FROM t; +query II +SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 1 +NULL 2 +NULL 3 +NULL 4 +NULL 5 +NULL 6 -query I -SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2) FROM t; +query II +SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2), column2 FROM t; +---- +NULL 1 +NULL 2 +4 3 +4 4 +4 5 +4 6 + +query II +SELECT FIRST_VALUE(column1)OVER(ORDER BY column2 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t; ---- -4 -4 -4 -4 +NULL 1 +NULL 2 +NULL 3 +4 4 +3 5 +5 6 + +query II +SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t; +---- +NULL 1 +4 2 +4 3 +4 4 +3 5 +5 6 statement ok DROP TABLE t; # Test for ignore nulls with ORDER BY in FIRST_VALUE with all NULL values statement ok -CREATE TABLE t AS VALUES (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 1); +CREATE TABLE t AS VALUES (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 2); -query I -SELECT FIRST_VALUE(column1) OVER(ORDER BY column2) FROM t; +query II +SELECT FIRST_VALUE(column1) OVER(ORDER BY column2), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 1 +NULL 2 +NULL 3 +NULL 4 -query I -SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2) FROM t; +query II +SELECT FIRST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 1 +NULL 2 +NULL 3 +NULL 4 -query I -SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2) FROM t; +query II +SELECT FIRST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 1 +NULL 2 +NULL 3 +NULL 4 statement ok DROP TABLE t; @@ -4398,70 +4426,118 @@ DROP TABLE t; # Test for ignore nulls with ORDER BY in LAST_VALUE statement ok -CREATE TABLE t AS VALUES (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 1); +CREATE TABLE t AS VALUES (3, 4), (4, 3), (null::bigint, 1), (null::bigint, 2), (5, 5), (6, 6); -query I -SELECT column1 FROM t ORDER BY column2 DESC NULLS LAST; +query II +SELECT column1, column2 FROM t ORDER BY column2 DESC NULLS LAST; ---- -3 -4 -NULL -NULL +6 6 +5 5 +3 4 +4 3 +NULL 2 +NULL 1 -query I -SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t; +query II +SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST), column2 FROM t; +---- +6 6 +5 5 +3 4 +4 3 +NULL 2 +NULL 1 + +# query II +# SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST), column2 FROM t; +# ---- +# 6 6 +# 5 5 +# 3 4 +# 4 3 +# 4 2 +# 4 1 + +query II +SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 6 +NULL 5 +NULL 4 +NULL 3 +NULL 2 +NULL 1 -query I -SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t; +query II +SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 6 +NULL 5 +NULL 4 +NULL 3 +NULL 2 +NULL 1 -query I -SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t; +query II +SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t; +---- +4 6 +4 5 +4 4 +4 3 +4 2 +4 1 + +query II +SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t; +---- +5 6 +3 5 +4 4 +NULL 3 +NULL 2 +NULL 1 + +query II +SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), column2 FROM t; ---- -4 -4 -4 -4 +5 6 +3 5 +4 4 +4 3 +4 2 +NULL 1 statement ok DROP TABLE t; # Test for ignore nulls with ORDER BY in LAST_VALUE with all NULLs statement ok -CREATE TABLE t AS VALUES (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 1); +CREATE TABLE t AS VALUES (null::bigint, 4), (null::bigint, 3), (null::bigint, 1), (null::bigint, 2); -query I -SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t; +query II +SELECT LAST_VALUE(column1) OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 4 +NULL 3 +NULL 2 +NULL 1 -query I -SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t; +query II +SELECT LAST_VALUE(column1) RESPECT NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 4 +NULL 3 +NULL 2 +NULL 1 -query I -SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t; +query II +SELECT LAST_VALUE(column1) IGNORE NULLS OVER(ORDER BY column2 DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), column2 FROM t; ---- -NULL -NULL -NULL -NULL +NULL 4 +NULL 3 +NULL 2 +NULL 1 statement ok DROP TABLE t;