From 4cc1dcb1753b1d283877c8c7edf315646977353d Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Mon, 25 Nov 2024 15:46:19 +0800 Subject: [PATCH 01/15] chore(planner): improve cardinality estimation --- .../planner/optimizer/property/histogram.rs | 4 +- .../planner/optimizer/property/selectivity.rs | 54 ++++++-- src/query/sql/src/planner/plans/join.rs | 121 +++++------------- 3 files changed, 77 insertions(+), 102 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/property/histogram.rs b/src/query/sql/src/planner/optimizer/property/histogram.rs index 055be4c4f80c..8513f6ec1249 100644 --- a/src/query/sql/src/planner/optimizer/property/histogram.rs +++ b/src/query/sql/src/planner/optimizer/property/histogram.rs @@ -77,13 +77,13 @@ pub fn histogram_from_ndv( let mut buckets: Vec = Vec::with_capacity(num_buckets); let sample_set = UniformSampleSet { min, max }; - for idx in 0..num_buckets + 1 { - let upper_bound = sample_set.get_upper_bound(num_buckets, idx)?; + for idx in 0..num_buckets { let lower_bound = if idx == 0 { sample_set.min.clone() } else { buckets[idx - 1].upper_bound().clone() }; + let upper_bound = sample_set.get_upper_bound(num_buckets, idx + 1)?; let bucket = HistogramBucket::new( lower_bound, upper_bound, diff --git a/src/query/sql/src/planner/optimizer/property/selectivity.rs b/src/query/sql/src/planner/optimizer/property/selectivity.rs index 5986a0082c6c..4a1020aaaa40 100644 --- a/src/query/sql/src/planner/optimizer/property/selectivity.rs +++ b/src/query/sql/src/planner/optimizer/property/selectivity.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::max; use std::cmp::Ordering; use std::collections::HashSet; @@ -27,8 +28,10 @@ use databend_common_expression::FunctionContext; use databend_common_expression::Scalar; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_storage::Datum; +use databend_common_storage::DEFAULT_HISTOGRAM_BUCKETS; use databend_common_storage::F64; +use crate::optimizer::histogram_from_ndv; use crate::optimizer::ColumnStat; use crate::optimizer::Statistics; use crate::plans::BoundColumnRef; @@ -323,8 +326,10 @@ impl<'a> SelectivityEstimator<'a> { column_stat.ndv = new_ndv; if let Some(histogram) = &mut column_stat.histogram { if histogram.accuracy { + // If selectivity < 0.2, most buckets are invalid and + // the accuracy histogram can be discarded. // Todo: find a better way to update histogram. - if selectivity < 0.8 { + if selectivity < 0.2 { column_stat.histogram = None; } continue; @@ -349,13 +354,14 @@ impl<'a> SelectivityEstimator<'a> { column_stat: &mut ColumnStat, updated_column_indexes: &mut HashSet, ) -> Result { - let col_hist = column_stat.histogram.as_ref(); + let histogram = column_stat.histogram.as_ref(); - if col_hist.is_none() && const_datum.is_numeric() { + if histogram.is_none() && const_datum.is_numeric() { // If there is no histogram and the column isn't numeric, return default selectivity. - if !column_stat.min.is_numeric() { + if !column_stat.min.is_numeric() || !column_stat.max.is_numeric() { return Ok(DEFAULT_SELECTIVITY); } + let min = column_stat.min.to_double()?; let max = column_stat.max.to_double()?; let ndv = column_stat.ndv; @@ -427,10 +433,10 @@ impl<'a> SelectivityEstimator<'a> { return Ok(percent); } - if col_hist.is_none() { + let Some(histogram) = histogram else { return Ok(DEFAULT_SELECTIVITY); - } - let col_hist = col_hist.unwrap(); + }; + let (mut num_greater, new_min, new_max) = match comparison_op { ComparisonOp::GT | ComparisonOp::GTE => { let new_min = const_datum.clone(); @@ -445,7 +451,7 @@ impl<'a> SelectivityEstimator<'a> { _ => unreachable!(), }; - for bucket in col_hist.buckets_iter() { + for bucket in histogram.buckets_iter() { if let Ok(ord) = bucket.upper_bound().compare(const_datum) { match comparison_op { ComparisonOp::GT => { @@ -484,8 +490,8 @@ impl<'a> SelectivityEstimator<'a> { } let selectivity = match comparison_op { - ComparisonOp::GT | ComparisonOp::GTE => 1.0 - num_greater / col_hist.num_values(), - ComparisonOp::LT | ComparisonOp::LTE => num_greater / col_hist.num_values(), + ComparisonOp::GT | ComparisonOp::GTE => 1.0 - num_greater / histogram.num_values(), + ComparisonOp::LT | ComparisonOp::LTE => num_greater / histogram.num_values(), _ => unreachable!(), }; @@ -575,11 +581,31 @@ fn update_statistic( new_min = Datum::Float(F64::from(new_min.to_double()?)); new_max = Datum::Float(F64::from(new_max.to_double()?)); } - if selectivity < 0.8 { - // Todo: support unfixed buckets number for histogram and prune the histogram. - column_stat.histogram = None; - } column_stat.min = new_min.clone(); column_stat.max = new_max.clone(); + + if let Some(histogram) = &column_stat.histogram { + // If selectivity < 0.2, most buckets are invalid and + // the accuracy histogram can be discarded. + // Todo: support unfixed buckets number for histogram and prune the histogram. + column_stat.histogram = if histogram.accuracy && selectivity >= 0.2 { + Some(histogram.clone()) + } else { + let num_values = histogram.num_values(); + let new_num_values = (num_values * selectivity).ceil() as u64; + let new_ndv = new_ndv as u64; + if new_ndv <= 2 { + column_stat.histogram = None; + return Ok(()); + } + Some(histogram_from_ndv( + new_ndv, + max(new_num_values, new_ndv), + Some((new_min, new_max)), + DEFAULT_HISTOGRAM_BUCKETS, + )?) + } + } + Ok(()) } diff --git a/src/query/sql/src/planner/plans/join.rs b/src/query/sql/src/planner/plans/join.rs index 643d44058972..d9a73a8be65b 100644 --- a/src/query/sql/src/planner/plans/join.rs +++ b/src/query/sql/src/planner/plans/join.rs @@ -355,12 +355,8 @@ impl Join { if join_card_updated { for (idx, left) in left_statistics.column_stats.iter_mut() { if *idx == left_column_index { - if let Some(his) = &left.histogram { - if his.accuracy { - // Todo: find a better way to update accuracy histogram - left.histogram = None; - continue; - } + if left.histogram.is_some() { + // Todo: find a better way to update accuracy histogram left.histogram = if left.ndv as u64 <= 2 { None } else { @@ -384,12 +380,8 @@ impl Join { } for (idx, right) in right_statistics.column_stats.iter_mut() { if *idx == right_column_index { - if let Some(his) = &right.histogram { - if his.accuracy { - // Todo: find a better way to update accuracy histogram - right.histogram = None; - continue; - } + if right.histogram.is_some() { + // Todo: find a better way to update accuracy histogram right.histogram = if right.ndv as u64 <= 2 { None } else { @@ -729,26 +721,17 @@ fn evaluate_by_histogram( for right_bucket in right_hist.buckets.iter() { let right_bucket_min = right_bucket.lower_bound().to_double()?; let right_bucket_max = right_bucket.upper_bound().to_double()?; - if left_bucket_min <= right_bucket_max && left_bucket_max >= right_bucket_min { + if left_bucket_min < right_bucket_max && left_bucket_max > right_bucket_min { has_intersection = true; let right_num_rows = right_bucket.num_values(); let right_ndv = right_bucket.num_distinct(); // There are four cases for interleaving - // 1. left bucket contains right bucket - // ---left_min---right_min---right_max---left_max--- if right_bucket_min >= left_bucket_min && right_bucket_max <= left_bucket_max { - let numerator = if right_bucket_max == right_bucket_min { - 1.0 - } else { - right_bucket_max - right_bucket_min + 1.0 - }; - let denominator = if left_bucket_max == left_bucket_min { - 1.0 - } else { - left_bucket_max - left_bucket_min + 1.0 - }; - let percentage = numerator / denominator; + // 1. left bucket contains right bucket + // ---left_min---right_min---right_max---left_max--- + let percentage = + (right_bucket_max - right_bucket_min) / (left_bucket_max - left_bucket_min); let left_ndv = left_ndv * percentage; let left_num_rows = left_num_rows * percentage; @@ -758,22 +741,12 @@ fn evaluate_by_histogram( all_ndv += left_ndv.min(right_ndv); card += left_num_rows * right_num_rows / max_ndv; } - } - // 2. right bucket contains left bucket - // ---right_min---left_min---left_max---right_max--- - else if left_bucket_min >= right_bucket_min && left_bucket_max <= right_bucket_max + } else if left_bucket_min >= right_bucket_min && left_bucket_max <= right_bucket_max { - let numerator = if left_bucket_max == left_bucket_min { - 1.0 - } else { - left_bucket_max - left_bucket_min + 1.0 - }; - let denominator = if right_bucket_max == right_bucket_min { - 1.0 - } else { - right_bucket_max - right_bucket_min + 1.0 - }; - let percentage = numerator / denominator; + // 2. right bucket contains left bucket + // ---right_min---left_min---left_max---right_max--- + let percentage = + (left_bucket_max - left_bucket_min) / (right_bucket_max - right_bucket_min); let right_ndv = right_ndv * percentage; let right_num_rows = right_num_rows * percentage; @@ -783,29 +756,17 @@ fn evaluate_by_histogram( all_ndv += left_ndv.min(right_ndv); card += left_num_rows * right_num_rows / max_ndv; } - } - // 3. left bucket intersects with right bucket on the left - // ---left_min---right_min---left_max---right_max--- - else if left_bucket_min <= right_bucket_min && left_bucket_max <= right_bucket_max + } else if left_bucket_min <= right_bucket_min && left_bucket_max <= right_bucket_max { - let numerator = if left_bucket_max == right_bucket_min { - 1.0 - } else { - left_bucket_max - right_bucket_min + 1.0 - }; - let left_denominator = if left_bucket_max == left_bucket_min { - 1.0 - } else { - left_bucket_max - left_bucket_min + 1.0 - }; - let right_denominator = if right_bucket_max == right_bucket_min { - 1.0 - } else { - right_bucket_max - right_bucket_min + 1.0 - }; - - let left_percentage = numerator / left_denominator; - let right_percentage = numerator / right_denominator; + // 3. left bucket intersects with right bucket on the left + // ---left_min---right_min---left_max---right_max--- + if left_bucket_max == right_bucket_min { + continue; + } + let left_percentage = + (left_bucket_max - right_bucket_min) / (left_bucket_max - left_bucket_min); + let right_percentage = (left_bucket_max - right_bucket_min) + / (right_bucket_max - right_bucket_min); let left_ndv = left_ndv * left_percentage; let left_num_rows = left_num_rows * left_percentage; @@ -817,29 +778,17 @@ fn evaluate_by_histogram( all_ndv += left_ndv.min(right_ndv); card += left_num_rows * right_num_rows / max_ndv; } - } - // 4. left bucket intersects with right bucket on the right - // ---right_min---left_min---right_max---left_max--- - else if left_bucket_min >= right_bucket_min && left_bucket_max >= right_bucket_max + } else if left_bucket_min >= right_bucket_min && left_bucket_max >= right_bucket_max { - let numerator = if right_bucket_max == left_bucket_min { - 1.0 - } else { - right_bucket_max - left_bucket_min + 1.0 - }; - let left_denominator = if left_bucket_max == left_bucket_min { - 1.0 - } else { - left_bucket_max - left_bucket_min + 1.0 - }; - let right_denominator = if right_bucket_max == right_bucket_min { - 1.0 - } else { - right_bucket_max - right_bucket_min + 1.0 - }; - - let left_percentage = numerator / left_denominator; - let right_percentage = numerator / right_denominator; + // 4. left bucket intersects with right bucket on the right + // ---right_min---left_min---right_max---left_max--- + if right_bucket_max == left_bucket_min { + continue; + } + let left_percentage = + (right_bucket_max - left_bucket_min) / (left_bucket_max - left_bucket_min); + let right_percentage = (right_bucket_max - left_bucket_min) + / (right_bucket_max - right_bucket_min); let left_ndv = left_ndv * left_percentage; let left_num_rows = left_num_rows * left_percentage; @@ -913,7 +862,7 @@ fn trim_histogram_buckets( hist.buckets .iter() .filter(|bucket| { - (min.is_none() || bucket.upper_bound() >= min.as_ref().unwrap()) + (min.is_none() || bucket.upper_bound() > min.as_ref().unwrap()) && (max.is_none() || bucket.lower_bound() <= max.as_ref().unwrap()) }) .cloned() From 7ba5d9ceeddccea86f3998565c344ebcd2096500 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Mon, 25 Nov 2024 20:32:26 +0800 Subject: [PATCH 02/15] chore(planner): improve histogram cardinality estimation --- .../planner/optimizer/property/selectivity.rs | 116 +++++++++++------- src/query/sql/src/planner/plans/scan.rs | 6 +- 2 files changed, 74 insertions(+), 48 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/property/selectivity.rs b/src/query/sql/src/planner/optimizer/property/selectivity.rs index 4a1020aaaa40..5c5fa94d2896 100644 --- a/src/query/sql/src/planner/optimizer/property/selectivity.rs +++ b/src/query/sql/src/planner/optimizer/property/selectivity.rs @@ -437,65 +437,89 @@ impl<'a> SelectivityEstimator<'a> { return Ok(DEFAULT_SELECTIVITY); }; - let (mut num_greater, new_min, new_max) = match comparison_op { - ComparisonOp::GT | ComparisonOp::GTE => { - let new_min = const_datum.clone(); - let new_max = column_stat.max.clone(); - (0.0, new_min, new_max) - } - ComparisonOp::LT | ComparisonOp::LTE => { - let new_max = const_datum.clone(); - let new_min = column_stat.min.clone(); - (0.0, new_min, new_max) - } - _ => unreachable!(), - }; - + let mut num_selected = 0.0; for bucket in histogram.buckets_iter() { - if let Ok(ord) = bucket.upper_bound().compare(const_datum) { - match comparison_op { - ComparisonOp::GT => { - if ord == Ordering::Less || ord == Ordering::Equal { - num_greater += bucket.num_values(); - } else { - break; - } - } - ComparisonOp::GTE => { - if ord == Ordering::Less { - num_greater += bucket.num_values(); - } else { - break; - } - } - ComparisonOp::LT => { - if ord == Ordering::Less { - num_greater += bucket.num_values(); + let lower_bound = bucket.lower_bound(); + let upper_bound = bucket.upper_bound(); + + let const_gte_upper_bound = matches!( + const_datum.compare(upper_bound)?, + Ordering::Greater | Ordering::Equal + ); + let (no_overlap, complete_overlap) = match comparison_op { + ComparisonOp::LT => ( + matches!( + const_datum.compare(lower_bound)?, + Ordering::Less | Ordering::Equal + ), + const_gte_upper_bound, + ), + ComparisonOp::LTE => ( + matches!(const_datum.compare(lower_bound)?, Ordering::Less), + const_gte_upper_bound, + ), + ComparisonOp::GT => ( + const_gte_upper_bound, + matches!(const_datum.compare(lower_bound)?, Ordering::Less), + ), + ComparisonOp::GTE => ( + const_gte_upper_bound, + matches!( + const_datum.compare(lower_bound)?, + Ordering::Less | Ordering::Equal + ), + ), + _ => unreachable!(), + }; + + if complete_overlap { + num_selected += bucket.num_values(); + } else if !no_overlap && const_datum.is_numeric() { + let ndv = bucket.num_distinct(); + let lower_bound = lower_bound.to_double()?; + let upper_bound = upper_bound.to_double()?; + let const_value = const_datum.to_double()?; + + let bucket_range = upper_bound - lower_bound; + let bucket_selectivity = match comparison_op { + ComparisonOp::LT => (const_value - lower_bound) / bucket_range, + ComparisonOp::LTE => { + if const_value == lower_bound { + 1.0 / ndv } else { - break; + (const_value - lower_bound + 1.0) / bucket_range } } - ComparisonOp::LTE => { - if ord == Ordering::Less || ord == Ordering::Equal { - num_greater += bucket.num_values(); + ComparisonOp::GT => { + if const_value == lower_bound { + 1.0 - 1.0 / ndv } else { - break; + (upper_bound - const_value - 1.0).max(0.0) / bucket_range } } + ComparisonOp::GTE => (upper_bound - const_value) / bucket_range, _ => unreachable!(), - } - } else { - return Ok(DEFAULT_SELECTIVITY); + }; + num_selected += bucket.num_values() * bucket_selectivity; } } - let selectivity = match comparison_op { - ComparisonOp::GT | ComparisonOp::GTE => 1.0 - num_greater / histogram.num_values(), - ComparisonOp::LT | ComparisonOp::LTE => num_greater / histogram.num_values(), - _ => unreachable!(), - }; + let selectivity = num_selected / histogram.num_values(); if update { + let (new_min, new_max) = match comparison_op { + ComparisonOp::GT | ComparisonOp::GTE => { + let new_min = const_datum.clone(); + let new_max = column_stat.max.clone(); + (new_min, new_max) + } + ComparisonOp::LT | ComparisonOp::LTE => { + let new_max = const_datum.clone(); + let new_min = column_stat.min.clone(); + (new_min, new_max) + } + _ => unreachable!(), + }; update_statistic(column_stat, new_min, new_max, selectivity)?; updated_column_indexes.insert(column_ref.column.index); } diff --git a/src/query/sql/src/planner/plans/scan.rs b/src/query/sql/src/planner/plans/scan.rs index 7a7c8f2d18e3..56cd6835488c 100644 --- a/src/query/sql/src/planner/plans/scan.rs +++ b/src/query/sql/src/planner/plans/scan.rs @@ -238,12 +238,14 @@ impl Operator for Scan { let min = col_stat.min.unwrap(); let max = col_stat.max.unwrap(); let ndv = col_stat.ndv.unwrap(); - let histogram = if let Some(histogram) = self.statistics.histograms.get(k) { + let histogram = if let Some(histogram) = self.statistics.histograms.get(k) + && histogram.is_some() + { histogram.clone() } else { histogram_from_ndv( ndv, - num_rows, + num_rows - col_stat.null_count, Some((min.clone(), max.clone())), DEFAULT_HISTOGRAM_BUCKETS, ) From e3b9ab1c59d5312f3abeb7e151075e81a5ab3242 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Mon, 25 Nov 2024 20:55:45 +0800 Subject: [PATCH 03/15] chore(planner): improve join cardinality --- src/query/sql/src/planner/plans/join.rs | 32 ------------------------- 1 file changed, 32 deletions(-) diff --git a/src/query/sql/src/planner/plans/join.rs b/src/query/sql/src/planner/plans/join.rs index d9a73a8be65b..d91dbb1194a6 100644 --- a/src/query/sql/src/planner/plans/join.rs +++ b/src/query/sql/src/planner/plans/join.rs @@ -24,7 +24,6 @@ use databend_common_exception::Result; use databend_common_expression::types::F64; use databend_common_storage::Datum; use databend_common_storage::Histogram; -use databend_common_storage::HistogramBucket; use databend_common_storage::DEFAULT_HISTOGRAM_BUCKETS; use crate::optimizer::histogram_from_ndv; @@ -319,8 +318,6 @@ impl Join { let card = match (&left_col_stat.histogram, &right_col_stat.histogram) { (Some(left_hist), Some(right_hist)) => { // Evaluate join cardinality by histogram. - let (left_hist, right_hist) = - trim_buckets(left_hist, right_hist, &new_min, &new_max)?; evaluate_by_histogram(&left_hist, &right_hist, &mut new_ndv)? } _ => evaluate_by_ndv( @@ -853,32 +850,3 @@ fn update_statistic( } (left_index, right_index) } - -fn trim_histogram_buckets( - hist: &Histogram, - min: &Option, - max: &Option, -) -> Vec { - hist.buckets - .iter() - .filter(|bucket| { - (min.is_none() || bucket.upper_bound() > min.as_ref().unwrap()) - && (max.is_none() || bucket.lower_bound() <= max.as_ref().unwrap()) - }) - .cloned() - .collect() -} - -fn trim_buckets( - left_hist: &Histogram, - right_hist: &Histogram, - min: &Option, - max: &Option, -) -> Result<(Histogram, Histogram)> { - let left_buckets = trim_histogram_buckets(left_hist, min, max); - let right_buckets = trim_histogram_buckets(right_hist, min, max); - Ok(( - Histogram::new(left_buckets, left_hist.accuracy), - Histogram::new(right_buckets, right_hist.accuracy), - )) -} From 757daafef0ae75eb98bb32963592567e3c723544 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Mon, 25 Nov 2024 20:56:32 +0800 Subject: [PATCH 04/15] chore(test): update sqllogictest --- .../explain/eliminate_outer_join.test | 24 ++++++++-------- .../mode/standalone/explain/explain.test | 24 ++++++++-------- .../standalone/explain/explain_substr.test | 1 - .../suites/mode/standalone/explain/join.test | 28 +++++++++---------- .../mode/standalone/explain/merge_into.test | 4 +-- .../standalone/explain/multi_table_insert.sql | 2 +- .../push_down_filter_join_inner.test | 6 ++-- 7 files changed, 44 insertions(+), 45 deletions(-) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test b/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test index 0e7b54503a43..998addf21229 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test @@ -463,11 +463,11 @@ HashJoin ├── build keys: [t1.a (#1)] ├── probe keys: [t.a (#0)] ├── filters: [] -├── estimated rows: 8.00 +├── estimated rows: 9.00 ├── Filter(Build) │ ├── output columns: [t1.a (#1)] │ ├── filters: [is_true(t1.a (#1) >= 1)] -│ ├── estimated rows: 8.00 +│ ├── estimated rows: 9.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t │ ├── output columns: [a (#1)] @@ -481,7 +481,7 @@ HashJoin └── Filter(Probe) ├── output columns: [t.a (#0)] ├── filters: [is_true(t.a (#0) >= 1)] - ├── estimated rows: 8.00 + ├── estimated rows: 9.00 └── TableScan ├── table: default.eliminate_outer_join.t ├── output columns: [a (#0)] @@ -502,11 +502,11 @@ HashJoin ├── build keys: [t1.a (#1)] ├── probe keys: [t.a (#0)] ├── filters: [] -├── estimated rows: 1.00 +├── estimated rows: 2.00 ├── Filter(Build) │ ├── output columns: [t1.a (#1)] │ ├── filters: [is_true(t1.a (#1) <= 1)] -│ ├── estimated rows: 1.00 +│ ├── estimated rows: 2.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t │ ├── output columns: [a (#1)] @@ -520,7 +520,7 @@ HashJoin └── Filter(Probe) ├── output columns: [t.a (#0)] ├── filters: [is_true(t.a (#0) <= 1)] - ├── estimated rows: 1.00 + ├── estimated rows: 2.00 └── TableScan ├── table: default.eliminate_outer_join.t ├── output columns: [a (#0)] @@ -539,7 +539,7 @@ explain select * from t left join t t1 on t.a = t1.a where t1.a <= 1 or t.a < 1 Filter ├── output columns: [t.a (#0), t1.a (#1)] ├── filters: [is_true((t1.a (#1) <= 1 OR t.a (#0) < 1))] -├── estimated rows: 1.90 +├── estimated rows: 3.09 └── HashJoin ├── output columns: [t.a (#0), t1.a (#1)] ├── join type: LEFT OUTER @@ -574,18 +574,18 @@ explain select * from t left join t t1 on t.a = t1.a where t1.a <= 1 or (t.a > 1 Filter ├── output columns: [t.a (#0), t1.a (#1)] ├── filters: [is_true((t1.a (#1) <= 1 OR (t.a (#0) > 1 AND t1.a (#1) > 1)))] -├── estimated rows: 6.13 +├── estimated rows: 6.95 └── HashJoin ├── output columns: [t.a (#0), t1.a (#1)] ├── join type: INNER ├── build keys: [t1.a (#1)] ├── probe keys: [t.a (#0)] ├── filters: [] - ├── estimated rows: 7.47 + ├── estimated rows: 8.40 ├── Filter(Build) │ ├── output columns: [t1.a (#1)] │ ├── filters: [is_true((t1.a (#1) <= 1 OR t1.a (#1) > 1))] - │ ├── estimated rows: 8.20 + │ ├── estimated rows: 8.40 │ └── TableScan │ ├── table: default.eliminate_outer_join.t │ ├── output columns: [a (#1)] @@ -599,7 +599,7 @@ Filter └── Filter(Probe) ├── output columns: [t.a (#0)] ├── filters: [is_true((t.a (#0) <= 1 OR t.a (#0) > 1))] - ├── estimated rows: 8.20 + ├── estimated rows: 8.40 └── TableScan ├── table: default.eliminate_outer_join.t ├── output columns: [a (#0)] @@ -617,7 +617,7 @@ explain select * from t left join t t1 on t.a = t1.a where t1.a <= 1 or (t.a > 1 Filter ├── output columns: [t.a (#0), t1.a (#1)] ├── filters: [is_true((t1.a (#1) <= 1 OR (t.a (#0) > 1 AND t.a (#0) < 2)))] -├── estimated rows: 2.80 +├── estimated rows: 3.95 └── HashJoin ├── output columns: [t.a (#0), t1.a (#1)] ├── join type: LEFT OUTER diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain.test b/tests/sqllogictests/suites/mode/standalone/explain/explain.test index 0f7686c52c50..940f72d817bf 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain.test @@ -709,11 +709,11 @@ Limit └── Sort ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] ├── sort keys: [a DESC NULLS LAST] - ├── estimated rows: 3.87 + ├── estimated rows: 3.85 └── Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] ├── filters: [(((t1.a (#0) > 1 OR t1.b (#1) < 2) AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4))] - ├── estimated rows: 3.87 + ├── estimated rows: 3.85 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] ├── join type: CROSS @@ -750,7 +750,7 @@ Limit ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] └── estimated rows: 5.00 -query +query explain select * from t1,t2 where (t1.a > 1 or t1.b < 2) and (t1.a > 1 or t1.b < 2) ---- HashJoin @@ -785,7 +785,7 @@ HashJoin ├── push downs: [filters: [], limit: NONE] └── estimated rows: 5.00 -query +query explain select count(distinct a) from t1; ---- AggregateFinal @@ -817,7 +817,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query explain select count_distinct(a) from t1; ---- AggregateFinal @@ -849,7 +849,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query explain select * from (values(1, 'a'),(2, 'b')) t(c1,c2) ---- ConstantTableScan @@ -863,28 +863,28 @@ drop table t1 statement ok drop table t2 -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3'); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3') -query +query explain syntax select * from read_parquet(prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet(prune_page=>TRUE, refresh_meta_cache=>TRUE) -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3', prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3', prune_page=>TRUE, refresh_meta_cache=>TRUE) -query +query explain syntax select * from read_parquet('p1', 'p2', 'p3', prune_page=>true, refresh_meta_cache=>true); ---- SELECT * @@ -897,7 +897,7 @@ drop table if exists t4 statement ok create OR REPLACE table t4(a int, b string); -query +query explain select * from t4 where a = 1 and try_cast(get(try_parse_json(b),'bb') as varchar) = 'xx'; ---- Filter @@ -920,7 +920,7 @@ drop view if exists v4 statement ok create view v4 as select a as a, try_cast(get(try_parse_json(b), 'bb') as varchar) as b from t4; -query +query explain select * from v4 where b = 'xx'; ---- EvalScalar diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain_substr.test b/tests/sqllogictests/suites/mode/standalone/explain/explain_substr.test index bde37c2700d3..103d21b767e5 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain_substr.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain_substr.test @@ -65,4 +65,3 @@ Filter statement ok drop table t1 all - diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join.test b/tests/sqllogictests/suites/mode/standalone/explain/join.test index 6899fc5ce748..4843f454f3ad 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join.test @@ -311,39 +311,39 @@ query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 42 and b.x < 45 ---- HashJoin -├── output columns: [a.x (#0), b.y (#2), b.x (#1)] +├── output columns: [b.x (#1), b.y (#2), a.x (#0)] ├── join type: INNER -├── build keys: [b.x (#1)] -├── probe keys: [a.x (#0)] +├── build keys: [a.x (#0)] +├── probe keys: [b.x (#1)] ├── filters: [] ├── estimated rows: 3.56 ├── Filter(Build) -│ ├── output columns: [b.x (#1), b.y (#2)] -│ ├── filters: [is_true(b.x (#1) > 42), is_true(b.x (#1) < 45)] +│ ├── output columns: [a.x (#0)] +│ ├── filters: [is_true(a.x (#0) > 42), is_true(a.x (#0) < 45)] │ ├── estimated rows: 2.67 │ └── TableScan -│ ├── table: default.default.twocolumn -│ ├── output columns: [x (#1), y (#2)] +│ ├── table: default.default.onecolumn +│ ├── output columns: [x (#0)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] +│ ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] │ └── estimated rows: 4.00 └── Filter(Probe) - ├── output columns: [a.x (#0)] - ├── filters: [is_true(a.x (#0) > 42), is_true(a.x (#0) < 45)] + ├── output columns: [b.x (#1), b.y (#2)] + ├── filters: [is_true(b.x (#1) > 42), is_true(b.x (#1) < 45)] ├── estimated rows: 2.67 └── TableScan - ├── table: default.default.onecolumn - ├── output columns: [x (#0)] + ├── table: default.default.twocolumn + ├── output columns: [x (#1), y (#2)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] + ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] └── estimated rows: 4.00 # the following cases won't be converted to inner join @@ -354,7 +354,7 @@ explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where Filter ├── output columns: [a.x (#0), b.x (#1), b.y (#2)] ├── filters: [is_true((b.x (#1) > 44 OR a.x (#0) < 43))] -├── estimated rows: 1.33 +├── estimated rows: 2.00 └── HashJoin ├── output columns: [a.x (#0), b.x (#1), b.y (#2)] ├── join type: LEFT OUTER diff --git a/tests/sqllogictests/suites/mode/standalone/explain/merge_into.test b/tests/sqllogictests/suites/mode/standalone/explain/merge_into.test index 871fe0053aa6..449c43f11da9 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/merge_into.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/merge_into.test @@ -122,7 +122,7 @@ CommitSink ├── build keys: [employees2.employee_id (#0)] ├── probe keys: [salaries2.employee_id (#3)] ├── filters: [] - ├── estimated rows: 6.00 + ├── estimated rows: 4.00 ├── TableScan(Build) │ ├── table: default.default.employees2 │ ├── output columns: [employee_id (#0), employee_name (#1), department (#2)] @@ -161,7 +161,7 @@ CommitSink ├── build keys: [employees2.employee_id (#0)] ├── probe keys: [salaries2.employee_id (#3)] ├── filters: [] - ├── estimated rows: 6.00 + ├── estimated rows: 4.00 ├── TableScan(Build) │ ├── table: default.default.employees2 │ ├── output columns: [employee_id (#0), employee_name (#1), department (#2)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/multi_table_insert.sql b/tests/sqllogictests/suites/mode/standalone/explain/multi_table_insert.sql index 1b0d34c62948..26607dd35024 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/multi_table_insert.sql +++ b/tests/sqllogictests/suites/mode/standalone/explain/multi_table_insert.sql @@ -45,4 +45,4 @@ Commit ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 5.00 \ No newline at end of file + └── estimated rows: 5.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test index 24631526617b..0c2e8eda9473 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test @@ -64,14 +64,14 @@ explain select * from t1 inner join t2 on t1.a = t2.a where t2.a <= 2 or (t1.a > Filter ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] ├── filters: [is_true((t2.a (#2) <= 2 OR (t1.a (#0) > 1 AND t2.a (#2) > 1)))] -├── estimated rows: 3.11 +├── estimated rows: 3.56 └── HashJoin ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] ├── join type: INNER ├── build keys: [t2.a (#2)] ├── probe keys: [t1.a (#0)] ├── filters: [] - ├── estimated rows: 3.11 + ├── estimated rows: 3.56 ├── Filter(Build) │ ├── output columns: [t2.a (#2), t2.b (#3)] │ ├── filters: [is_true((t2.a (#2) <= 2 OR t2.a (#2) > 1))] @@ -89,7 +89,7 @@ Filter └── Filter(Probe) ├── output columns: [t1.a (#0), t1.b (#1)] ├── filters: [is_true((t1.a (#0) <= 2 OR t1.a (#0) > 1))] - ├── estimated rows: 3.11 + ├── estimated rows: 3.56 └── TableScan ├── table: default.default.t1 ├── output columns: [a (#0), b (#1)] From 3982b70eadb915e75cc6da0d8f18fe86f4545f3f Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Mon, 25 Nov 2024 20:59:44 +0800 Subject: [PATCH 05/15] chore(test): update sqllogictest --- .../standalone/explain_native/explain.test | 4 ++-- .../mode/standalone/explain_native/join.test | 20 +++++++++---------- .../push_down_filter_join_inner.test | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test index 81aaf022c559..87762e3ab2da 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test @@ -661,11 +661,11 @@ Limit └── Sort ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] ├── sort keys: [a DESC NULLS LAST] - ├── estimated rows: 3.87 + ├── estimated rows: 3.85 └── Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] ├── filters: [(((t1.a (#0) > 1 OR t1.b (#1) < 2) AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4))] - ├── estimated rows: 3.87 + ├── estimated rows: 3.85 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] ├── join type: CROSS diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test index 2aef8da12dd3..ce48d657fb7f 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test @@ -279,31 +279,31 @@ query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 42 and b.x < 45 ---- HashJoin -├── output columns: [a.x (#0), b.y (#2), b.x (#1)] +├── output columns: [b.x (#1), b.y (#2), a.x (#0)] ├── join type: INNER -├── build keys: [b.x (#1)] -├── probe keys: [a.x (#0)] +├── build keys: [a.x (#0)] +├── probe keys: [b.x (#1)] ├── filters: [] ├── estimated rows: 3.56 ├── TableScan(Build) -│ ├── table: default.default.twocolumn -│ ├── output columns: [x (#1), y (#2)] +│ ├── table: default.default.onecolumn +│ ├── output columns: [x (#0)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] +│ ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] │ └── estimated rows: 2.67 └── TableScan(Probe) - ├── table: default.default.onecolumn - ├── output columns: [x (#0)] + ├── table: default.default.twocolumn + ├── output columns: [x (#1), y (#2)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] + ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] └── estimated rows: 2.67 # the following cases won't be converted to inner join @@ -314,7 +314,7 @@ explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where Filter ├── output columns: [a.x (#0), b.x (#1), b.y (#2)] ├── filters: [is_true((b.x (#1) > 44 OR a.x (#0) < 43))] -├── estimated rows: 1.33 +├── estimated rows: 2.00 └── HashJoin ├── output columns: [a.x (#0), b.x (#1), b.y (#2)] ├── join type: LEFT OUTER diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test index aaa4b6e0df54..697e2e87519c 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test @@ -56,14 +56,14 @@ explain select * from t1 inner join t2 on t1.a = t2.a where t2.a <= 2 or (t1.a > Filter ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] ├── filters: [is_true((t2.a (#2) <= 2 OR (t1.a (#0) > 1 AND t2.a (#2) > 1)))] -├── estimated rows: 3.11 +├── estimated rows: 3.56 └── HashJoin ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] ├── join type: INNER ├── build keys: [t2.a (#2)] ├── probe keys: [t1.a (#0)] ├── filters: [] - ├── estimated rows: 3.11 + ├── estimated rows: 3.56 ├── TableScan(Build) │ ├── table: default.default.t2 │ ├── output columns: [a (#2), b (#3)] @@ -83,7 +83,7 @@ Filter ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [is_true((t1.a (#0) <= 2 OR t1.a (#0) > 1))], limit: NONE] - └── estimated rows: 3.11 + └── estimated rows: 3.56 statement ok drop table if exists t1; From 01c0b2d2253e18212832bfd9168d2e659e8d88f8 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Mon, 25 Nov 2024 21:12:30 +0800 Subject: [PATCH 06/15] chore(code): refine code --- src/query/sql/src/planner/plans/join.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/sql/src/planner/plans/join.rs b/src/query/sql/src/planner/plans/join.rs index d91dbb1194a6..bf57e4b33859 100644 --- a/src/query/sql/src/planner/plans/join.rs +++ b/src/query/sql/src/planner/plans/join.rs @@ -318,7 +318,7 @@ impl Join { let card = match (&left_col_stat.histogram, &right_col_stat.histogram) { (Some(left_hist), Some(right_hist)) => { // Evaluate join cardinality by histogram. - evaluate_by_histogram(&left_hist, &right_hist, &mut new_ndv)? + evaluate_by_histogram(left_hist, right_hist, &mut new_ndv)? } _ => evaluate_by_ndv( left_col_stat, From 3862db2888dc2432f0a25f4ba892ed419093bd23 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 00:23:36 +0800 Subject: [PATCH 07/15] chore(test): update sqllogictest --- .../tpch/join_order_with_accurate_his.test | 156 +++++++++--------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/tests/sqllogictests/suites/tpch/join_order_with_accurate_his.test b/tests/sqllogictests/suites/tpch/join_order_with_accurate_his.test index 2746ddfe66d4..ef2347e35019 100644 --- a/tests/sqllogictests/suites/tpch/join_order_with_accurate_his.test +++ b/tests/sqllogictests/suites/tpch/join_order_with_accurate_his.test @@ -85,17 +85,13 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpch_test.region (#4) (read rows: 5) +│ │ │ │ └── Scan: default.tpch_test.region (#8) (read rows: 5) │ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) +│ │ │ └── Scan: default.tpch_test.nation (#7) (read rows: 25) │ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) +│ │ └── Scan: default.tpch_test.supplier (#6) (read rows: 1000) │ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.part (#0) (read rows: 20000) -│ └── Probe -│ └── Scan: default.tpch_test.partsupp (#2) (read rows: 80000) +│ └── Scan: default.tpch_test.partsupp (#5) (read rows: 80000) └── Probe └── HashJoin: INNER ├── Build @@ -103,13 +99,17 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (#8) (read rows: 5) + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) + │ │ │ └── Probe + │ │ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (#7) (read rows: 25) + │ │ └── Scan: default.tpch_test.partsupp (#2) (read rows: 80000) │ └── Probe - │ └── Scan: default.tpch_test.supplier (#6) (read rows: 1000) + │ └── Scan: default.tpch_test.region (#4) (read rows: 5) └── Probe - └── Scan: default.tpch_test.partsupp (#5) (read rows: 80000) + └── Scan: default.tpch_test.part (#0) (read rows: 20000) # Q3 query I @@ -206,25 +206,25 @@ order by ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.supplier (#3) (read rows: 1000) +│ └── HashJoin: INNER +│ ├── Build +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── HashJoin: INNER +│ │ │ │ ├── Build +│ │ │ │ │ └── Scan: default.tpch_test.region (#5) (read rows: 5) +│ │ │ │ └── Probe +│ │ │ │ └── Scan: default.tpch_test.nation (#4) (read rows: 25) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpch_test.supplier (#3) (read rows: 1000) +│ │ └── Probe +│ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) +│ └── Probe +│ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── HashJoin: INNER - │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpch_test.region (#5) (read rows: 5) - │ │ │ └── Probe - │ │ │ └── Scan: default.tpch_test.nation (#4) (read rows: 25) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) - │ └── Probe - │ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) - └── Probe - └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) + └── Scan: default.tpch_test.customer (#0) (read rows: 15000) # Q6 query I @@ -346,33 +346,33 @@ order by ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.nation (#6) (read rows: 25) +│ └── HashJoin: INNER +│ ├── Build +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── HashJoin: INNER +│ │ │ │ ├── Build +│ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ ├── Build +│ │ │ │ │ │ └── Scan: default.tpch_test.region (#7) (read rows: 5) +│ │ │ │ │ └── Probe +│ │ │ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) +│ │ │ │ └── Probe +│ │ │ │ └── Scan: default.tpch_test.customer (#4) (read rows: 15000) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpch_test.orders (#3) (read rows: 150000) +│ │ └── Probe +│ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) +│ └── Probe +│ └── HashJoin: INNER +│ ├── Build +│ │ └── Scan: default.tpch_test.nation (#6) (read rows: 25) +│ └── Probe +│ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (#7) (read rows: 5) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) - │ └── Probe - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── HashJoin: INNER - │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpch_test.part (#0) (read rows: 20000) - │ │ │ └── Probe - │ │ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.orders (#3) (read rows: 150000) - │ └── Probe - │ └── Scan: default.tpch_test.customer (#4) (read rows: 15000) - └── Probe - └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) + └── Scan: default.tpch_test.part (#0) (read rows: 20000) # Q9 query I @@ -415,21 +415,21 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── HashJoin: INNER +│ │ │ │ ├── Build +│ │ │ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) +│ │ │ │ └── Probe +│ │ │ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) │ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) +│ │ └── Scan: default.tpch_test.partsupp (#3) (read rows: 80000) │ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.part (#0) (read rows: 20000) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) -│ └── Probe -│ └── Scan: default.tpch_test.orders (#4) (read rows: 150000) +│ └── Scan: default.tpch_test.orders (#4) (read rows: 150000) └── Probe - └── Scan: default.tpch_test.partsupp (#3) (read rows: 80000) + └── Scan: default.tpch_test.part (#0) (read rows: 20000) # Q10 query I @@ -469,13 +469,13 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) +│ │ └── Probe +│ │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) │ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) -│ └── Probe -│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) +│ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) └── Probe └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) @@ -618,9 +618,9 @@ where ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.lineitem (#0) (read rows: 600572) +│ └── Scan: default.tpch_test.part (#1) (read rows: 20000) └── Probe - └── Scan: default.tpch_test.part (#1) (read rows: 20000) + └── Scan: default.tpch_test.lineitem (#0) (read rows: 600572) # Q15 query T @@ -706,13 +706,13 @@ MaterializedCte: 0 ├── Build │ └── CTEScan │ ├── CTE index: 0, sub index: 2 - │ └── estimated rows: 85.00 + │ └── estimated rows: 415.00 └── Probe └── HashJoin: INNER ├── Build │ └── CTEScan │ ├── CTE index: 0, sub index: 1 - │ └── estimated rows: 85.00 + │ └── estimated rows: 415.00 └── Probe └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) From b904e144929d8c13ea2d0bf128f68e871496f5d3 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 09:37:19 +0800 Subject: [PATCH 08/15] chore(test): test ci tpch --- .../sqllogictests/suites/tpch/join_order.test | 174 +----------------- 1 file changed, 1 insertion(+), 173 deletions(-) diff --git a/tests/sqllogictests/suites/tpch/join_order.test b/tests/sqllogictests/suites/tpch/join_order.test index 44669fc4e3bb..a12bd7e01839 100644 --- a/tests/sqllogictests/suites/tpch/join_order.test +++ b/tests/sqllogictests/suites/tpch/join_order.test @@ -28,181 +28,9 @@ analyze table region statement ok analyze table supplier -# Q1 -query I -explain join select - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order -from - lineitem -where - l_shipdate <= add_days(to_date('1998-12-01'), -90) -group by - l_returnflag, - l_linestatus -order by - l_returnflag, - l_linestatus; ----- -Scan: default.tpch_test.lineitem (#0) (read rows: 600572) - -# Q2 -query I -explain join select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 15 - and p_type like '%BRASS' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' -) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── Scan: default.tpch_test.region (#4) (read rows: 5) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) -│ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.part (#0) (read rows: 20000) -│ └── Probe -│ └── Scan: default.tpch_test.partsupp (#2) (read rows: 80000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (#8) (read rows: 5) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (#7) (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (#6) (read rows: 1000) - └── Probe - └── Scan: default.tpch_test.partsupp (#5) (read rows: 80000) - -# Q3 -query I -explain join select - l_orderkey, - sum(l_extendedprice * (1 - l_discount)) as revenue, - o_orderdate, - o_shippriority -from - customer, - orders, - lineitem -where - c_mktsegment = 'BUILDING' - and c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate < to_date('1995-03-15') - and l_shipdate > to_date('1995-03-15') -group by - l_orderkey, - o_orderdate, - o_shippriority -order by - revenue desc, - o_orderdate -limit 5; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) -│ └── Probe -│ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) - -# Q4 -query I -explain join select - o_orderpriority, - count(*) as order_count -from - orders -where - o_orderdate >= to_date('1993-07-01') - and o_orderdate < add_months(to_date('1993-07-01'), 3) - and exists ( - select - * - from - lineitem - where - l_orderkey = o_orderkey - and l_commitdate < l_receiptdate - ) -group by - o_orderpriority -order by - o_orderpriority; ----- -HashJoin: RIGHT SEMI -├── Build -│ └── Scan: default.tpch_test.orders (#0) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) - # Q5 query I -explain join select +explain select n_name, ceil(sum(l_extendedprice * (1 - l_discount)) / 10) as revenue from From 54a160e8bac965468782a642ab51d59645d24421 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 09:56:01 +0800 Subject: [PATCH 09/15] chore(code): fix typos --- src/query/expression/src/kernels/group_by_hash/utils.rs | 2 +- src/query/service/src/servers/admin/admin_service.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/query/expression/src/kernels/group_by_hash/utils.rs b/src/query/expression/src/kernels/group_by_hash/utils.rs index 280edbe6e850..d682067e373d 100644 --- a/src/query/expression/src/kernels/group_by_hash/utils.rs +++ b/src/query/expression/src/kernels/group_by_hash/utils.rs @@ -40,7 +40,7 @@ pub fn serialize_group_columns( } builder.commit_row(); } - // For nulllable column it will only serialize valid row data + // For nullable column it will only serialize valid row data debug_assert!(builder.data.len() <= serialize_size); builder.build() } diff --git a/src/query/service/src/servers/admin/admin_service.rs b/src/query/service/src/servers/admin/admin_service.rs index c6454ea0af21..b63a38724ae6 100644 --- a/src/query/service/src/servers/admin/admin_service.rs +++ b/src/query/service/src/servers/admin/admin_service.rs @@ -184,7 +184,7 @@ impl Server for AdminService { async fn shutdown(&mut self, _graceful: bool) { // intendfully do nothing: sometimes we hope to diagnose the backtraces or metrics after // the process got the sigterm signal, we can still leave the admin service port open until - // the process exited. it's not an user facing service, it's allowed to shutdown forcely. + // the process exited. it's not an user facing service, it's allowed to shutdown forcibly. } #[async_backtrace::framed] From 9366dd799048ac2c47891829b843c8943555c9a5 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 10:04:52 +0800 Subject: [PATCH 10/15] chore(test): remove accurate_his test --- .../sqllogictests/suites/tpch/join_order.test | 174 ++- .../tpch/join_order_with_accurate_his.test | 1093 ----------------- 2 files changed, 173 insertions(+), 1094 deletions(-) delete mode 100644 tests/sqllogictests/suites/tpch/join_order_with_accurate_his.test diff --git a/tests/sqllogictests/suites/tpch/join_order.test b/tests/sqllogictests/suites/tpch/join_order.test index a12bd7e01839..44669fc4e3bb 100644 --- a/tests/sqllogictests/suites/tpch/join_order.test +++ b/tests/sqllogictests/suites/tpch/join_order.test @@ -28,9 +28,181 @@ analyze table region statement ok analyze table supplier +# Q1 +query I +explain join select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= add_days(to_date('1998-12-01'), -90) +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; +---- +Scan: default.tpch_test.lineitem (#0) (read rows: 600572) + +# Q2 +query I +explain join select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 15 + and p_type like '%BRASS' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' +) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey; +---- +HashJoin: INNER +├── Build +│ └── HashJoin: INNER +│ ├── Build +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── Scan: default.tpch_test.region (#4) (read rows: 5) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) +│ │ └── Probe +│ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) +│ └── Probe +│ └── HashJoin: INNER +│ ├── Build +│ │ └── Scan: default.tpch_test.part (#0) (read rows: 20000) +│ └── Probe +│ └── Scan: default.tpch_test.partsupp (#2) (read rows: 80000) +└── Probe + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── Scan: default.tpch_test.region (#8) (read rows: 5) + │ │ └── Probe + │ │ └── Scan: default.tpch_test.nation (#7) (read rows: 25) + │ └── Probe + │ └── Scan: default.tpch_test.supplier (#6) (read rows: 1000) + └── Probe + └── Scan: default.tpch_test.partsupp (#5) (read rows: 80000) + +# Q3 +query I +explain join select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < to_date('1995-03-15') + and l_shipdate > to_date('1995-03-15') +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate +limit 5; +---- +HashJoin: INNER +├── Build +│ └── HashJoin: INNER +│ ├── Build +│ │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) +│ └── Probe +│ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) +└── Probe + └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) + +# Q4 +query I +explain join select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= to_date('1993-07-01') + and o_orderdate < add_months(to_date('1993-07-01'), 3) + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; +---- +HashJoin: RIGHT SEMI +├── Build +│ └── Scan: default.tpch_test.orders (#0) (read rows: 150000) +└── Probe + └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) + # Q5 query I -explain select +explain join select n_name, ceil(sum(l_extendedprice * (1 - l_discount)) / 10) as revenue from diff --git a/tests/sqllogictests/suites/tpch/join_order_with_accurate_his.test b/tests/sqllogictests/suites/tpch/join_order_with_accurate_his.test deleted file mode 100644 index ef2347e35019..000000000000 --- a/tests/sqllogictests/suites/tpch/join_order_with_accurate_his.test +++ /dev/null @@ -1,1093 +0,0 @@ -statement ok -set sandbox_tenant = 'test_tenant'; - -statement ok -use tpch_test; - -statement ok -set enable_analyze_histogram = 1; - -statement ok -analyze table customer - -statement ok -analyze table lineitem - -statement ok -analyze table nation - -statement ok -analyze table orders - -statement ok -analyze table partsupp - -statement ok -analyze table part - -statement ok -analyze table region - -statement ok -analyze table supplier - -# Q2 -query I -explain join select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 15 - and p_type like '%BRASS' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'EUROPE' -) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── Scan: default.tpch_test.region (#8) (read rows: 5) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.nation (#7) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#6) (read rows: 1000) -│ └── Probe -│ └── Scan: default.tpch_test.partsupp (#5) (read rows: 80000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── HashJoin: INNER - │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) - │ │ │ └── Probe - │ │ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.partsupp (#2) (read rows: 80000) - │ └── Probe - │ └── Scan: default.tpch_test.region (#4) (read rows: 5) - └── Probe - └── Scan: default.tpch_test.part (#0) (read rows: 20000) - -# Q3 -query I -explain join select - l_orderkey, - sum(l_extendedprice * (1 - l_discount)) as revenue, - o_orderdate, - o_shippriority -from - customer, - orders, - lineitem -where - c_mktsegment = 'BUILDING' - and c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate < to_date('1995-03-15') - and l_shipdate > to_date('1995-03-15') -group by - l_orderkey, - o_orderdate, - o_shippriority -order by - revenue desc, - o_orderdate -limit 5; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) -│ └── Probe -│ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) - -# Q4 -query I -explain join select - o_orderpriority, - count(*) as order_count -from - orders -where - o_orderdate >= to_date('1993-07-01') - and o_orderdate < add_months(to_date('1993-07-01'), 3) - and exists ( - select - * - from - lineitem - where - l_orderkey = o_orderkey - and l_commitdate < l_receiptdate - ) -group by - o_orderpriority -order by - o_orderpriority; ----- -HashJoin: RIGHT SEMI -├── Build -│ └── Scan: default.tpch_test.orders (#0) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) - -# Q5 -query I -explain join select - n_name, - ceil(sum(l_extendedprice * (1 - l_discount)) / 10) as revenue -from - customer, - orders, - lineitem, - supplier, - nation, - region -where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and l_suppkey = s_suppkey - and c_nationkey = s_nationkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'ASIA' - and o_orderdate >= to_date('1994-01-01') - and o_orderdate < add_years(to_date('1994-01-01'), 1) -group by - n_name -order by - revenue desc; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── HashJoin: INNER -│ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpch_test.region (#5) (read rows: 5) -│ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpch_test.nation (#4) (read rows: 25) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.supplier (#3) (read rows: 1000) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) -│ └── Probe -│ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.customer (#0) (read rows: 15000) - -# Q6 -query I -explain join select - truncate(sum(l_extendedprice * l_discount),3) as revenue -from - lineitem -where - l_shipdate >= '1994-01-01' - and l_shipdate < date_add(year, 1, to_date('1994-01-01')) - and l_discount between 0.05 and 0.07 - and l_quantity < 24; ----- -Scan: default.tpch_test.lineitem (#0) (read rows: 600572) - -# Q7 -query I -explain join select - supp_nation, - cust_nation, - l_year, - truncate(sum(volume),3) as revenue -from - ( - select - n1.n_name as supp_nation, - n2.n_name as cust_nation, - extract(year from l_shipdate) as l_year, - l_extendedprice * (1 - l_discount) as volume - from - supplier, - lineitem, - orders, - customer, - nation n1, - nation n2 - where - s_suppkey = l_suppkey - and o_orderkey = l_orderkey - and c_custkey = o_custkey - and s_nationkey = n1.n_nationkey - and c_nationkey = n2.n_nationkey - and ( - (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') - or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') - ) - and l_shipdate between to_date('1995-01-01') and to_date('1996-12-31') - ) as shipping -group by - supp_nation, - cust_nation, - l_year -order by - supp_nation, - cust_nation, - l_year; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.nation (#4) (read rows: 25) -│ └── Probe -│ └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.customer (#3) (read rows: 15000) - │ └── Probe - │ └── Scan: default.tpch_test.orders (#2) (read rows: 150000) - └── Probe - └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) - -# Q8 -query I -explain join select - o_year, - truncate(sum(case - when nation = 'BRAZIL' then volume - else 0 - end) / sum(volume),8) as mkt_share -from - ( - select - extract(year from o_orderdate) as o_year, - l_extendedprice * (1 - l_discount) as volume, - n2.n_name as nation - from - part, - supplier, - lineitem, - orders, - customer, - nation n1, - nation n2, - region - where - p_partkey = l_partkey - and s_suppkey = l_suppkey - and l_orderkey = o_orderkey - and o_custkey = c_custkey - and c_nationkey = n1.n_nationkey - and n1.n_regionkey = r_regionkey - and r_name = 'AMERICA' - and s_nationkey = n2.n_nationkey - and o_orderdate between to_date('1995-01-01') and to_date('1996-12-31') - and p_type = 'ECONOMY ANODIZED STEEL' - ) as all_nations -group by - o_year -order by - o_year; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── HashJoin: INNER -│ │ │ │ ├── Build -│ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpch_test.region (#7) (read rows: 5) -│ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) -│ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpch_test.customer (#4) (read rows: 15000) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.orders (#3) (read rows: 150000) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) -│ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.nation (#6) (read rows: 25) -│ └── Probe -│ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) -└── Probe - └── Scan: default.tpch_test.part (#0) (read rows: 20000) - -# Q9 -query I -explain join select - nation, - o_year, - truncate(truncate(sum(amount),0)/10, 0) as sum_profit -from - ( - select - n_name as nation, - extract(year from o_orderdate) as o_year, - truncate(l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity, 100) as amount - from - part, - supplier, - lineitem, - partsupp, - orders, - nation - where - s_suppkey = l_suppkey - and ps_suppkey = l_suppkey - and ps_partkey = l_partkey - and p_partkey = l_partkey - and o_orderkey = l_orderkey - and s_nationkey = n_nationkey - and p_name like '%green%' - ) as profit -group by - nation, - o_year -order by - sum_profit -limit 5; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── HashJoin: INNER -│ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) -│ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.partsupp (#3) (read rows: 80000) -│ └── Probe -│ └── Scan: default.tpch_test.orders (#4) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.part (#0) (read rows: 20000) - -# Q10 -query I -explain join select - c_custkey, - c_name, - TRUNCATE(sum(l_extendedprice * (1 - l_discount)), 3) as revenue, - c_acctbal, - n_name, - c_address, - c_phone, - c_comment -from - customer, - orders, - lineitem, - nation -where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate >= to_date('1993-10-01') - and o_orderdate < add_months(to_date('1993-10-01'), 3) - and l_returnflag = 'R' - and c_nationkey = n_nationkey -group by - c_custkey, - c_name, - c_acctbal, - c_phone, - n_name, - c_address, - c_comment -order by - revenue desc limit 5; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) -│ └── Probe -│ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) -└── Probe - └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) - -# Q11 -query I -explain join select - ps_partkey, - sum(ps_supplycost * ps_availqty) as value -from - partsupp, - supplier, - nation -where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'GERMANY' -group by - ps_partkey having - sum(ps_supplycost * ps_availqty) > ( - select - sum(ps_supplycost * ps_availqty) * 0.000002 - from - partsupp, - supplier, - nation - where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'GERMANY' - ) -order by - value desc limit 100; ----- -RangeJoin: INNER -├── Left -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (#4) (read rows: 1000) -│ └── Probe -│ └── Scan: default.tpch_test.partsupp (#3) (read rows: 80000) -└── Right - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpch_test.nation (#2) (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) - └── Probe - └── Scan: default.tpch_test.partsupp (#0) (read rows: 80000) - -# Q12 -query I -explain join select - l_shipmode, - sum(case - when o_orderpriority = '1-URGENT' - or o_orderpriority = '2-HIGH' - then 1 - else 0 - end) as high_line_count, - sum(case - when o_orderpriority <> '1-URGENT' - and o_orderpriority <> '2-HIGH' - then 1 - else 0 - end) as low_line_count -from - orders, - lineitem -where - o_orderkey = l_orderkey - and l_shipmode in ('MAIL', 'SHIP') - and l_commitdate < l_receiptdate - and l_shipdate < l_commitdate - and l_receiptdate >= to_date('1994-01-01') - and l_receiptdate < date_add(year, 1, to_date('1994-01-01')) -group by - l_shipmode -order by - l_shipmode; ----- -HashJoin: INNER -├── Build -│ └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) -└── Probe - └── Scan: default.tpch_test.orders (#0) (read rows: 150000) - -# Q13 -query I -explain join select - c_count, - count(*) as custdist -from - ( - select - c_custkey, - count(o_orderkey) as c_count - from - customer - left outer join - orders - on c_custkey = o_custkey - and o_comment not like '%pending%deposits%' - group by - c_custkey - ) - c_orders -group by - c_count -order by - custdist desc, - c_count desc; ----- -HashJoin: RIGHT OUTER -├── Build -│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) -└── Probe - └── Scan: default.tpch_test.orders (#1) (read rows: 150000) - -# Q14 -query I -explain join select - TRUNCATE(100.00 * sum(case - when p_type like 'PROMO%' - then l_extendedprice * (1 - l_discount) - else 0 - end) / sum(l_extendedprice * (1 - l_discount)), 5) as promo_revenue -from - lineitem, - part -where - l_partkey = p_partkey - and l_shipdate >= to_date('1995-09-01') - and l_shipdate < add_months(to_date('1995-09-01'), 1); ----- -HashJoin: INNER -├── Build -│ └── Scan: default.tpch_test.part (#1) (read rows: 20000) -└── Probe - └── Scan: default.tpch_test.lineitem (#0) (read rows: 600572) - -# Q15 -query T -explain join with revenue as ( - select - l_suppkey as supplier_no, - truncate(sum(l_extendedprice * (1 - l_discount)), 2) as total_revenue - from - lineitem - where - l_shipdate >= to_date ('1996-01-01') - and l_shipdate < to_date ('1996-04-01') - group by - l_suppkey) -select - s_suppkey, - s_name, - s_address, - s_phone, - total_revenue -from - supplier, - revenue -where - s_suppkey = supplier_no - and total_revenue = ( - select - max(total_revenue) - from - revenue -) -order by - s_suppkey; ----- -HashJoin: INNER -├── Build -│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) - └── Probe - └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) - -# Q15 -query T -explain join with revenue as materialized ( - select - l_suppkey as supplier_no, - truncate(sum(l_extendedprice * (1 - l_discount)), 2) as total_revenue - from - lineitem - where - l_shipdate >= to_date ('1996-01-01') - and l_shipdate < to_date ('1996-04-01') - group by - l_suppkey) -select - s_suppkey, - s_name, - s_address, - s_phone, - total_revenue -from - supplier, - revenue -where - s_suppkey = supplier_no - and total_revenue = ( - select - max(total_revenue) - from - revenue -) -order by - s_suppkey; ----- -MaterializedCte: 0 -├── Right -│ └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) -└── Left - └── HashJoin: INNER - ├── Build - │ └── CTEScan - │ ├── CTE index: 0, sub index: 2 - │ └── estimated rows: 415.00 - └── Probe - └── HashJoin: INNER - ├── Build - │ └── CTEScan - │ ├── CTE index: 0, sub index: 1 - │ └── estimated rows: 415.00 - └── Probe - └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) - -# Q16 -query I -explain join select - p_brand, - p_type, - p_size, - count(distinct ps_suppkey) as supplier_cnt -from - partsupp, - part -where - p_partkey = ps_partkey - and p_brand <> 'Brand#45' - and p_type not like 'MEDIUM POLISHED%' - and p_size in (49, 14, 23, 45, 19, 3, 36, 9) - and ps_suppkey not in ( - select - s_suppkey - from - supplier - where - s_comment like '%Customer%Complaints%' -) -group by - p_brand, - p_type, - p_size -order by - supplier_cnt desc, - p_brand, - p_type, - p_size -limit 20; ----- -HashJoin: RIGHT MARK -├── Build -│ └── Scan: default.tpch_test.supplier (#2) (read rows: 1000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── Scan: default.tpch_test.part (#1) (read rows: 20000) - └── Probe - └── Scan: default.tpch_test.partsupp (#0) (read rows: 80000) - -#Q17 -query I -explain join select - truncate(sum(l_extendedprice) / 7.0,8) as avg_yearly -from - lineitem, - part -where - p_partkey = l_partkey - and p_brand = 'Brand#23' - and p_container = 'MED BOX' - and l_quantity < ( - select - 0.2 * avg(l_quantity) - from - lineitem - where - l_partkey = p_partkey -); ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.part (#1) (read rows: 20000) -│ └── Probe -│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) -└── Probe - └── Scan: default.tpch_test.lineitem (#0) (read rows: 600572) - -#Q18 -query I -explain join select - c_name, - c_custkey, - o_orderkey, - o_orderdate, - o_totalprice, - sum(l_quantity) -from - customer, - orders, - lineitem -where - o_orderkey in ( - select - l_orderkey - from - lineitem - group by - l_orderkey having - sum(l_quantity) > 300 - ) - and c_custkey = o_custkey - and o_orderkey = l_orderkey -group by - c_name, - c_custkey, - o_orderkey, - o_orderdate, - o_totalprice -order by - o_totalprice desc, - o_orderdate; ----- -HashJoin: INNER -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── Scan: default.tpch_test.lineitem (#3) (read rows: 600572) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) -│ └── Probe -│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) -└── Probe - └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) - -# Q19 -query I -explain join select - truncate(sum(l_extendedprice* (1 - l_discount)),3) as revenue -from - lineitem, - part -where - ( - p_partkey = l_partkey - and p_brand = 'Brand#52' - and p_container in - ( - 'SM CASE', - 'SM BOX', - 'SM PACK', - 'SM PKG' - ) - and l_quantity >= 4 - and l_quantity <= 4 + 10 - and p_size between 1 and 5 - and l_shipmode in - ( - 'AIR', - 'AIR REG' - ) - and l_shipinstruct = 'DELIVER IN PERSON' - ) - or - ( - p_partkey = l_partkey - and p_brand = 'Brand#11' - and p_container in - ( - 'MED BAG', - 'MED BOX', - 'MED PKG', - 'MED PACK' - ) - and l_quantity >= 18 - and l_quantity <= 18 + 10 - and p_size between 1 and 10 - and l_shipmode in - ( - 'AIR', - 'AIR REG' - ) - and l_shipinstruct = 'DELIVER IN PERSON' - ) - or - ( - p_partkey = l_partkey - and p_brand = 'Brand#51' - and p_container in - ( - 'LG CASE', - 'LG BOX', - 'LG PACK', - 'LG PKG' - ) - and l_quantity >= 29 - and l_quantity <= 29 + 10 - and p_size between 1 and 15 - and l_shipmode in - ( - 'AIR', - 'AIR REG' - ) - and l_shipinstruct = 'DELIVER IN PERSON' - ) -; ----- -HashJoin: INNER -├── Build -│ └── Scan: default.tpch_test.part (#1) (read rows: 20000) -└── Probe - └── Scan: default.tpch_test.lineitem (#0) (read rows: 600572) - -# Q20 -query I -explain join select - s_name, - s_address -from - supplier, - nation -where - s_suppkey in ( - select - ps_suppkey - from - partsupp - where - ps_partkey in ( - select - p_partkey - from - part - where - p_name like 'forest%' - ) - and ps_availqty > ( - select - 0.5 * sum(l_quantity) - from - lineitem - where - l_partkey = ps_partkey - and l_suppkey = ps_suppkey - and l_shipdate >= to_date('1994-01-01') - and l_shipdate < add_years(to_date('1994-01-01'), 1) - ) - ) - and s_nationkey = n_nationkey - and n_name = 'CANADA' -order by - s_name; ----- -HashJoin: RIGHT SEMI -├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpch_test.nation (#1) (read rows: 25) -│ └── Probe -│ └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) -└── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: LEFT SEMI - │ ├── Build - │ │ └── Scan: default.tpch_test.part (#3) (read rows: 20000) - │ └── Probe - │ └── Scan: default.tpch_test.partsupp (#2) (read rows: 80000) - └── Probe - └── Scan: default.tpch_test.lineitem (#4) (read rows: 600572) - -# Q21 -query I -explain join select - s_name, - truncate(count(*),4) as numwait -from - supplier, - lineitem l1, - orders, - nation -where - s_suppkey = l1.l_suppkey - and o_orderkey = l1.l_orderkey - and o_orderstatus = 'F' - and l1.l_receiptdate > l1.l_commitdate - and exists ( - select - * - from - lineitem l2 - where - l2.l_orderkey = l1.l_orderkey - and l2.l_suppkey <> l1.l_suppkey - ) - and not exists ( - select - * - from - lineitem l3 - where - l3.l_orderkey = l1.l_orderkey - and l3.l_suppkey <> l1.l_suppkey - and l3.l_receiptdate > l3.l_commitdate - ) - and s_nationkey = n_nationkey - and n_name = 'SAUDI ARABIA' -group by - s_name -order by - numwait desc, - s_name; ----- -HashJoin: RIGHT ANTI -├── Build -│ └── HashJoin: RIGHT SEMI -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── HashJoin: INNER -│ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) -│ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) -│ │ └── Probe -│ │ └── Scan: default.tpch_test.orders (#2) (read rows: 150000) -│ └── Probe -│ └── Scan: default.tpch_test.lineitem (#4) (read rows: 600572) -└── Probe - └── Scan: default.tpch_test.lineitem (#5) (read rows: 600572) - -# Q22 -query I -explain join select - cntrycode, - count(*) as numcust, - sum(c_acctbal) as totacctbal -from - ( - select - substring(c_phone from 1 for 2) as cntrycode, - c_acctbal - from - customer - where - substring(c_phone from 1 for 2) in - ('13', '31', '23', '29', '30', '18', '17') - and c_acctbal > ( - select - avg(c_acctbal) - from - customer - where - c_acctbal > 0.00 - and substring(c_phone from 1 for 2) in - ('13', '31', '23', '29', '30', '18', '17') - ) - and not exists ( - select - * - from - orders - where - o_custkey = c_custkey - ) - ) as custsale -group by - cntrycode -order by - cntrycode; ----- -HashJoin: RIGHT ANTI -├── Build -│ └── RangeJoin: INNER -│ ├── Left -│ │ └── Scan: default.tpch_test.customer (#1) (read rows: 15000) -│ └── Right -│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) -└── Probe - └── Scan: default.tpch_test.orders (#2) (read rows: 150000) - -statement ok -set enable_analyze_histogram = 0; From 118296bee4d0762896f39804a1fda5497538aabf Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 12:07:13 +0800 Subject: [PATCH 11/15] chore(test): fix sqllogictest --- .../suites/mode/cluster/explain_v2.test | 4 +- .../suites/mode/cluster/filter_nulls.test | 10 ++--- .../mode/cluster/memo/aggregate_property.test | 44 +++++++++---------- .../mode/cluster/memo/join_property.test | 8 ++-- .../mode/cluster/memo/mix_property.test | 38 ++++++++-------- .../suites/mode/cluster/window.test | 4 +- 6 files changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/sqllogictests/suites/mode/cluster/explain_v2.test b/tests/sqllogictests/suites/mode/cluster/explain_v2.test index 0aef8d2184b8..85268eefef87 100644 --- a/tests/sqllogictests/suites/mode/cluster/explain_v2.test +++ b/tests/sqllogictests/suites/mode/cluster/explain_v2.test @@ -43,14 +43,14 @@ Exchange └── Filter ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] ├── filters: [(t1.a (#0) > 3 OR (t2.a (#2) > 5 AND t1.a (#0) > 1))] - ├── estimated rows: 99.60 + ├── estimated rows: 99.68 └── HashJoin ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] ├── join type: INNER ├── build keys: [t2.a (#2)] ├── probe keys: [t1.a (#0)] ├── filters: [] - ├── estimated rows: 99.84 + ├── estimated rows: 99.92 ├── Exchange(Build) │ ├── output columns: [t2.a (#2), t2.b (#3)] │ ├── exchange type: Broadcast diff --git a/tests/sqllogictests/suites/mode/cluster/filter_nulls.test b/tests/sqllogictests/suites/mode/cluster/filter_nulls.test index 96e20e8bf451..14e2d5d66bef 100644 --- a/tests/sqllogictests/suites/mode/cluster/filter_nulls.test +++ b/tests/sqllogictests/suites/mode/cluster/filter_nulls.test @@ -44,7 +44,7 @@ Exchange ├── build keys: [table2.value (#1)] ├── probe keys: [table1.value (#0)] ├── filters: [] - ├── estimated rows: 2000.00 + ├── estimated rows: 250.00 ├── Exchange(Build) │ ├── output columns: [table2.value (#1)] │ ├── exchange type: Broadcast @@ -89,8 +89,8 @@ Exchange ├── build keys: [table3.value (#2)] ├── probe keys: [table1.value (#0)] ├── filters: [] - ├── estimated rows: 4000.00 - ├── HashJoin(Build) + ├── estimated rows: 200.00 + ├── Exchange(Build) │ ├── output columns: [table3.value (#2), table2.value (#1)] │ ├── join type: INNER │ ├── build keys: [table2.value (#1)] @@ -162,7 +162,7 @@ Exchange ├── build keys: [table2.value (#1)] ├── probe keys: [table1.value (#0)] ├── filters: [] - ├── estimated rows: 2000.00 + ├── estimated rows: 250.00 ├── Exchange(Build) │ ├── output columns: [table2.value (#1)] │ ├── exchange type: Broadcast @@ -207,7 +207,7 @@ Exchange ├── build keys: [table2.value (#1)] ├── probe keys: [table1.value (#0)] ├── filters: [] - ├── estimated rows: 1000.00 + ├── estimated rows: 250.00 ├── Exchange(Build) │ ├── output columns: [table2.value (#1)] │ ├── exchange type: Hash(table2.value (#1)) diff --git a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test index 205b7ebb4bf0..0814c7608ba1 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test @@ -63,22 +63,22 @@ Memo │ └── #0 Join [#0, #3] ├── Group #5 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 4420.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #1, cost: 7920.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4419.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #1, cost: 7569.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#4] │ └── #1 Exchange: (Merge) [#5] ├── Group #6 │ ├── Best properties -│ │ └── { dist: Serial }: expr: #0, cost: 7970.000, children: [{ dist: Serial }] +│ │ └── { dist: Serial }: expr: #0, cost: 7614.000, children: [{ dist: Serial }] │ ├── #0 Aggregate [#5] │ └── #1 Exchange: (Merge) [#6] ├── Group #7 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 7975.000, children: [{ dist: Serial }] +│ │ └── { dist: Any }: expr: #0, cost: 7619.000, children: [{ dist: Serial }] │ └── #0 Aggregate [#6] └── Group #8 ├── Best properties - │ └── { dist: Serial }: expr: #0, cost: 7976.000, children: [{ dist: Any }] + │ └── { dist: Serial }: expr: #0, cost: 7620.000, children: [{ dist: Any }] └── #0 EvalScalar [#7] query T @@ -126,22 +126,22 @@ Memo │ └── #0 Join [#0, #3] ├── Group #5 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 4420.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 4930.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4419.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 4878.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#4] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#5] ├── Group #6 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 4980.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 4923.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#5] ├── Group #7 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5030.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 4968.000, children: [{ dist: Any }] │ └── #0 Aggregate [#6] ├── Group #8 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 5040.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #4, cost: 8540.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4977.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #4, cost: 8127.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#7] │ ├── #1 EvalScalar [#14] │ ├── #2 EvalScalar [#20] @@ -166,16 +166,16 @@ Memo ├── Group #12 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 66410.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 66920.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 66869.000, children: [{ dist: Any }] │ ├── #0 Join [#11, #3] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#12] ├── Group #13 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 66970.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 66914.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#12] ├── Group #14 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 67020.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 66959.000, children: [{ dist: Any }] │ └── #0 Aggregate [#13] ├── Group #15 │ ├── Best properties @@ -197,17 +197,17 @@ Memo │ └── #0 Join [#0, #16] ├── Group #18 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 5030.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 5540.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 5029.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 5488.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#17] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#18] ├── Group #19 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5590.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 5533.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#18] ├── Group #20 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5640.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 5578.000, children: [{ dist: Any }] │ └── #0 Aggregate [#19] ├── Group #21 │ ├── Best properties @@ -215,17 +215,17 @@ Memo │ └── #0 Join [#11, #16] ├── Group #22 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 67030.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 67540.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 67029.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 67488.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#21] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#22] ├── Group #23 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 67590.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 67533.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#22] └── Group #24 ├── Best properties - │ └── { dist: Any }: expr: #0, cost: 67640.000, children: [{ dist: Any }] + │ └── { dist: Any }: expr: #0, cost: 67578.000, children: [{ dist: Any }] └── #0 Aggregate [#23] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test index 2bab888b4a88..590cb049c067 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test @@ -62,8 +62,8 @@ Memo │ └── #0 Join [#0, #3] └── Group #5 ├── Best properties - │ ├── { dist: Any }: expr: #0, cost: 4420.000, children: [{ dist: Any }] - │ └── { dist: Serial }: expr: #1, cost: 7920.000, children: [{ dist: Any }] + │ ├── { dist: Any }: expr: #0, cost: 4419.000, children: [{ dist: Any }] + │ └── { dist: Serial }: expr: #1, cost: 7569.000, children: [{ dist: Any }] ├── #0 EvalScalar [#4] └── #1 Exchange: (Merge) [#5] @@ -192,8 +192,8 @@ Memo │ └── #0 Join [#2, #3] └── Group #5 ├── Best properties - │ ├── { dist: Any }: expr: #0, cost: 112820.000, children: [{ dist: Any }] - │ └── { dist: Serial }: expr: #1, cost: 462820.000, children: [{ dist: Any }] + │ ├── { dist: Any }: expr: #0, cost: 112911.000, children: [{ dist: Any }] + │ └── { dist: Serial }: expr: #1, cost: 494761.000, children: [{ dist: Any }] ├── #0 EvalScalar [#4] └── #1 Exchange: (Merge) [#5] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test index 5643c1172676..06f6f4bb93f0 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test @@ -66,22 +66,22 @@ Memo │ └── #0 Join [#0, #3] ├── Group #5 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 4420.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 4930.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4419.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 4878.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#4] │ └── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#5] ├── Group #6 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 4980.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 4923.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] │ └── #0 Aggregate [#5] ├── Group #7 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5030.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 4968.000, children: [{ dist: Any }] │ └── #0 Aggregate [#6] ├── Group #8 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 5040.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #4, cost: 8540.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4977.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #4, cost: 8127.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#7] │ ├── #1 EvalScalar [#16] │ ├── #2 EvalScalar [#22] @@ -89,11 +89,11 @@ Memo │ └── #4 Exchange: (Merge) [#8] ├── Group #9 │ ├── Best properties -│ │ └── { dist: Serial }: expr: #0, cost: 8550.000, children: [{ dist: Serial }] +│ │ └── { dist: Serial }: expr: #0, cost: 8136.000, children: [{ dist: Serial }] │ └── #0 Sort [#8] ├── Group #10 │ ├── Best properties -│ │ └── { dist: Serial }: expr: #0, cost: 8560.000, children: [{ dist: Serial }] +│ │ └── { dist: Serial }: expr: #0, cost: 8145.000, children: [{ dist: Serial }] │ └── #0 Limit [#9] ├── Group #11 │ ├── Best properties @@ -114,16 +114,16 @@ Memo ├── Group #14 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 66410.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 66920.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 66869.000, children: [{ dist: Any }] │ ├── #0 Join [#13, #3] │ └── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#14] ├── Group #15 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 66970.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 66914.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] │ └── #0 Aggregate [#14] ├── Group #16 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 67020.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 66959.000, children: [{ dist: Any }] │ └── #0 Aggregate [#15] ├── Group #17 │ ├── Best properties @@ -145,17 +145,17 @@ Memo │ └── #0 Join [#0, #18] ├── Group #20 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 9130.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 9640.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 9129.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 9588.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#19] │ └── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#20] ├── Group #21 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 9690.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 9633.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] │ └── #0 Aggregate [#20] ├── Group #22 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 9740.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 9678.000, children: [{ dist: Any }] │ └── #0 Aggregate [#21] ├── Group #23 │ ├── Best properties @@ -163,17 +163,17 @@ Memo │ └── #0 Join [#13, #18] ├── Group #24 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 71130.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 71640.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 71129.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 71588.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#23] │ └── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#24] ├── Group #25 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 71690.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 71633.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] │ └── #0 Aggregate [#24] └── Group #26 ├── Best properties - │ └── { dist: Any }: expr: #0, cost: 71740.000, children: [{ dist: Any }] + │ └── { dist: Any }: expr: #0, cost: 71678.000, children: [{ dist: Any }] └── #0 Aggregate [#25] diff --git a/tests/sqllogictests/suites/mode/cluster/window.test b/tests/sqllogictests/suites/mode/cluster/window.test index fd8f6e106600..1781c2874851 100644 --- a/tests/sqllogictests/suites/mode/cluster/window.test +++ b/tests/sqllogictests/suites/mode/cluster/window.test @@ -87,7 +87,7 @@ Exchange └── WindowPartition ├── output columns: [e.name (#1), e.salary (#3), d.department_name (#5), d.department_id (#4)] ├── hash keys: [department_id] - ├── estimated rows: 10.00 + ├── estimated rows: 8.00 └── Exchange ├── output columns: [e.name (#1), e.salary (#3), d.department_name (#5), d.department_id (#4)] ├── exchange type: Hash(d.department_id (#4)) @@ -97,7 +97,7 @@ Exchange ├── build keys: [d.department_id (#4)] ├── probe keys: [e.department_id (#2)] ├── filters: [] - ├── estimated rows: 10.00 + ├── estimated rows: 8.00 ├── Exchange(Build) │ ├── output columns: [d.department_id (#4), d.department_name (#5)] │ ├── exchange type: Broadcast From e6c543cc3bd1f2a3bcedd54d7d30b64e32837830 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 12:07:31 +0800 Subject: [PATCH 12/15] chore(query): fix sub overflow --- src/query/sql/src/planner/plans/scan.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/sql/src/planner/plans/scan.rs b/src/query/sql/src/planner/plans/scan.rs index 56cd6835488c..c35a55e93124 100644 --- a/src/query/sql/src/planner/plans/scan.rs +++ b/src/query/sql/src/planner/plans/scan.rs @@ -245,7 +245,7 @@ impl Operator for Scan { } else { histogram_from_ndv( ndv, - num_rows - col_stat.null_count, + num_rows.saturating_sub(col_stat.null_count), Some((min.clone(), max.clone())), DEFAULT_HISTOGRAM_BUCKETS, ) From d1fba76bfa89200ce2f3f12500b215ea854f0004 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 14:30:19 +0800 Subject: [PATCH 13/15] chore(planner): refine scan histogram --- src/query/sql/src/planner/plans/scan.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/query/sql/src/planner/plans/scan.rs b/src/query/sql/src/planner/plans/scan.rs index c35a55e93124..67d250067d7a 100644 --- a/src/query/sql/src/planner/plans/scan.rs +++ b/src/query/sql/src/planner/plans/scan.rs @@ -243,13 +243,19 @@ impl Operator for Scan { { histogram.clone() } else { - histogram_from_ndv( - ndv, - num_rows.saturating_sub(col_stat.null_count), - Some((min.clone(), max.clone())), - DEFAULT_HISTOGRAM_BUCKETS, - ) - .ok() + let num_rows = num_rows.saturating_sub(col_stat.null_count); + let ndv = std::cmp::min(num_rows, ndv); + if num_rows != 0 { + histogram_from_ndv( + ndv, + num_rows, + Some((min.clone(), max.clone())), + DEFAULT_HISTOGRAM_BUCKETS, + ) + .ok() + } else { + None + } }; let column_stat = ColumnStat { min, From cf2ca2f4555f6489cfa3d72e84c79cb71c7af936 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 14:30:30 +0800 Subject: [PATCH 14/15] chore(test): update sqllogictest --- .../suites/mode/cluster/filter_nulls.test | 81 +++++++++---------- 1 file changed, 39 insertions(+), 42 deletions(-) diff --git a/tests/sqllogictests/suites/mode/cluster/filter_nulls.test b/tests/sqllogictests/suites/mode/cluster/filter_nulls.test index 14e2d5d66bef..3c90d1942418 100644 --- a/tests/sqllogictests/suites/mode/cluster/filter_nulls.test +++ b/tests/sqllogictests/suites/mode/cluster/filter_nulls.test @@ -92,32 +92,32 @@ Exchange ├── estimated rows: 200.00 ├── Exchange(Build) │ ├── output columns: [table3.value (#2), table2.value (#1)] - │ ├── join type: INNER - │ ├── build keys: [table2.value (#1)] - │ ├── probe keys: [table3.value (#2)] - │ ├── filters: [] - │ ├── estimated rows: 2000.00 - │ ├── Exchange(Build) - │ │ ├── output columns: [table2.value (#1)] - │ │ ├── exchange type: Hash(table2.value (#1)) - │ │ └── Filter - │ │ ├── output columns: [table2.value (#1)] - │ │ ├── filters: [is_not_null(table2.value (#1))] - │ │ ├── estimated rows: 1000.00 - │ │ └── TableScan - │ │ ├── table: default.default.table2 - │ │ ├── output columns: [value (#1)] - │ │ ├── read rows: 1000 - │ │ ├── read size: 2.30 KiB - │ │ ├── partitions total: 6 - │ │ ├── partitions scanned: 3 - │ │ ├── pruning stats: [segments: , blocks: ] - │ │ ├── push downs: [filters: [is_not_null(table2.value (#1))], limit: NONE] - │ │ └── estimated rows: 4000.00 - │ └── Exchange(Probe) - │ ├── output columns: [table3.value (#2)] - │ ├── exchange type: Hash(table3.value (#2)) - │ └── Filter + │ ├── exchange type: Broadcast + │ └── HashJoin + │ ├── output columns: [table3.value (#2), table2.value (#1)] + │ ├── join type: INNER + │ ├── build keys: [table2.value (#1)] + │ ├── probe keys: [table3.value (#2)] + │ ├── filters: [] + │ ├── estimated rows: 250.00 + │ ├── Exchange(Build) + │ │ ├── output columns: [table2.value (#1)] + │ │ ├── exchange type: Broadcast + │ │ └── Filter + │ │ ├── output columns: [table2.value (#1)] + │ │ ├── filters: [is_not_null(table2.value (#1))] + │ │ ├── estimated rows: 1000.00 + │ │ └── TableScan + │ │ ├── table: default.default.table2 + │ │ ├── output columns: [value (#1)] + │ │ ├── read rows: 1000 + │ │ ├── read size: 2.30 KiB + │ │ ├── partitions total: 6 + │ │ ├── partitions scanned: 3 + │ │ ├── pruning stats: [segments: , blocks: ] + │ │ ├── push downs: [filters: [is_not_null(table2.value (#1))], limit: NONE] + │ │ └── estimated rows: 4000.00 + │ └── Filter(Probe) │ ├── output columns: [table3.value (#2)] │ ├── filters: [is_not_null(table3.value (#2))] │ ├── estimated rows: 2000.00 @@ -131,23 +131,20 @@ Exchange │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [is_not_null(table3.value (#2))], limit: NONE] │ └── estimated rows: 4000.00 - └── Exchange(Probe) + └── Filter(Probe) ├── output columns: [table1.value (#0)] - ├── exchange type: Hash(table1.value (#0)) - └── Filter - ├── output columns: [table1.value (#0)] - ├── filters: [is_not_null(table1.value (#0))] - ├── estimated rows: 2000.00 - └── TableScan - ├── table: default.default.table1 - ├── output columns: [value (#0)] - ├── read rows: 2000 - ├── read size: 3.94 KiB - ├── partitions total: 6 - ├── partitions scanned: 3 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_not_null(table1.value (#0))], limit: NONE] - └── estimated rows: 4000.00 + ├── filters: [is_not_null(table1.value (#0))] + ├── estimated rows: 2000.00 + └── TableScan + ├── table: default.default.table1 + ├── output columns: [value (#0)] + ├── read rows: 2000 + ├── read size: 3.94 KiB + ├── partitions total: 6 + ├── partitions scanned: 3 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_not_null(table1.value (#0))], limit: NONE] + └── estimated rows: 4000.00 query T From 42586df2cdbe62278dce40a1ace31e89f1f317f4 Mon Sep 17 00:00:00 2001 From: Dousir9 <736191200@qq.com> Date: Tue, 26 Nov 2024 15:31:28 +0800 Subject: [PATCH 15/15] chore(test): update sqllogictest --- .../suites/mode/standalone/explain/join.test | 90 +++++++++---------- .../mode/standalone/explain_native/join.test | 66 +++++++------- 2 files changed, 78 insertions(+), 78 deletions(-) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join.test b/tests/sqllogictests/suites/mode/standalone/explain/join.test index 4843f454f3ad..924a5e633db7 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join.test @@ -233,117 +233,117 @@ query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 42 ---- HashJoin -├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── output columns: [a.x (#0), b.y (#2), b.x (#1)] ├── join type: INNER -├── build keys: [a.x (#0)] -├── probe keys: [b.x (#1)] +├── build keys: [b.x (#1)] +├── probe keys: [a.x (#0)] ├── filters: [] -├── estimated rows: 2.67 +├── estimated rows: 2.37 ├── Filter(Build) -│ ├── output columns: [a.x (#0)] -│ ├── filters: [is_true(a.x (#0) > 42)] +│ ├── output columns: [b.x (#1), b.y (#2)] +│ ├── filters: [is_true(b.x (#1) > 42)] │ ├── estimated rows: 2.67 │ └── TableScan -│ ├── table: default.default.onecolumn -│ ├── output columns: [x (#0)] +│ ├── table: default.default.twocolumn +│ ├── output columns: [x (#1), y (#2)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [is_true(onecolumn.x (#0) > 42)], limit: NONE] +│ ├── push downs: [filters: [is_true(twocolumn.x (#1) > 42)], limit: NONE] │ └── estimated rows: 4.00 └── Filter(Probe) - ├── output columns: [b.x (#1), b.y (#2)] - ├── filters: [is_true(b.x (#1) > 42)] - ├── estimated rows: 3.00 + ├── output columns: [a.x (#0)] + ├── filters: [is_true(a.x (#0) > 42)] + ├── estimated rows: 2.67 └── TableScan - ├── table: default.default.twocolumn - ├── output columns: [x (#1), y (#2)] + ├── table: default.default.onecolumn + ├── output columns: [x (#0)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_true(twocolumn.x (#1) > 42)], limit: NONE] + ├── push downs: [filters: [is_true(onecolumn.x (#0) > 42)], limit: NONE] └── estimated rows: 4.00 query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 44 or b.x < 43 ---- HashJoin -├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── output columns: [a.x (#0), b.y (#2), b.x (#1)] ├── join type: INNER -├── build keys: [a.x (#0)] -├── probe keys: [b.x (#1)] +├── build keys: [b.x (#1)] +├── probe keys: [a.x (#0)] ├── filters: [] -├── estimated rows: 1.17 +├── estimated rows: 0.89 ├── Filter(Build) -│ ├── output columns: [a.x (#0)] -│ ├── filters: [is_true((a.x (#0) > 44 OR a.x (#0) < 43))] +│ ├── output columns: [b.x (#1), b.y (#2)] +│ ├── filters: [is_true((b.x (#1) > 44 OR b.x (#1) < 43))] │ ├── estimated rows: 1.33 │ └── TableScan -│ ├── table: default.default.onecolumn -│ ├── output columns: [x (#0)] +│ ├── table: default.default.twocolumn +│ ├── output columns: [x (#1), y (#2)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [is_true((onecolumn.x (#0) > 44 OR onecolumn.x (#0) < 43))], limit: NONE] +│ ├── push downs: [filters: [is_true((twocolumn.x (#1) > 44 OR twocolumn.x (#1) < 43))], limit: NONE] │ └── estimated rows: 4.00 └── Filter(Probe) - ├── output columns: [b.x (#1), b.y (#2)] - ├── filters: [is_true((b.x (#1) > 44 OR b.x (#1) < 43))] - ├── estimated rows: 1.75 + ├── output columns: [a.x (#0)] + ├── filters: [is_true((a.x (#0) > 44 OR a.x (#0) < 43))] + ├── estimated rows: 1.33 └── TableScan - ├── table: default.default.twocolumn - ├── output columns: [x (#1), y (#2)] + ├── table: default.default.onecolumn + ├── output columns: [x (#0)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_true((twocolumn.x (#1) > 44 OR twocolumn.x (#1) < 43))], limit: NONE] + ├── push downs: [filters: [is_true((onecolumn.x (#0) > 44 OR onecolumn.x (#0) < 43))], limit: NONE] └── estimated rows: 4.00 query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 42 and b.x < 45 ---- HashJoin -├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── output columns: [a.x (#0), b.y (#2), b.x (#1)] ├── join type: INNER -├── build keys: [a.x (#0)] -├── probe keys: [b.x (#1)] +├── build keys: [b.x (#1)] +├── probe keys: [a.x (#0)] ├── filters: [] -├── estimated rows: 3.56 +├── estimated rows: 2.37 ├── Filter(Build) -│ ├── output columns: [a.x (#0)] -│ ├── filters: [is_true(a.x (#0) > 42), is_true(a.x (#0) < 45)] +│ ├── output columns: [b.x (#1), b.y (#2)] +│ ├── filters: [is_true(b.x (#1) > 42), is_true(b.x (#1) < 45)] │ ├── estimated rows: 2.67 │ └── TableScan -│ ├── table: default.default.onecolumn -│ ├── output columns: [x (#0)] +│ ├── table: default.default.twocolumn +│ ├── output columns: [x (#1), y (#2)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] +│ ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] │ └── estimated rows: 4.00 └── Filter(Probe) - ├── output columns: [b.x (#1), b.y (#2)] - ├── filters: [is_true(b.x (#1) > 42), is_true(b.x (#1) < 45)] + ├── output columns: [a.x (#0)] + ├── filters: [is_true(a.x (#0) > 42), is_true(a.x (#0) < 45)] ├── estimated rows: 2.67 └── TableScan - ├── table: default.default.twocolumn - ├── output columns: [x (#1), y (#2)] + ├── table: default.default.onecolumn + ├── output columns: [x (#0)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] + ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] └── estimated rows: 4.00 # the following cases won't be converted to inner join @@ -392,7 +392,7 @@ HashJoin ├── build keys: [a.x (#0)] ├── probe keys: [b.x (#1)] ├── filters: [] -├── estimated rows: 3.56 +├── estimated rows: 2.67 ├── Filter(Build) │ ├── output columns: [a.x (#0)] │ ├── filters: [is_true(a.x (#0) > 42), is_true(a.x (#0) < 45)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test index ce48d657fb7f..223721f153ff 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test @@ -217,93 +217,93 @@ query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 42 ---- HashJoin -├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── output columns: [a.x (#0), b.y (#2), b.x (#1)] ├── join type: INNER -├── build keys: [a.x (#0)] -├── probe keys: [b.x (#1)] +├── build keys: [b.x (#1)] +├── probe keys: [a.x (#0)] ├── filters: [] -├── estimated rows: 2.67 +├── estimated rows: 2.37 ├── TableScan(Build) -│ ├── table: default.default.onecolumn -│ ├── output columns: [x (#0)] +│ ├── table: default.default.twocolumn +│ ├── output columns: [x (#1), y (#2)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [is_true(onecolumn.x (#0) > 42)], limit: NONE] +│ ├── push downs: [filters: [is_true(twocolumn.x (#1) > 42)], limit: NONE] │ └── estimated rows: 2.67 └── TableScan(Probe) - ├── table: default.default.twocolumn - ├── output columns: [x (#1), y (#2)] + ├── table: default.default.onecolumn + ├── output columns: [x (#0)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_true(twocolumn.x (#1) > 42)], limit: NONE] - └── estimated rows: 3.00 + ├── push downs: [filters: [is_true(onecolumn.x (#0) > 42)], limit: NONE] + └── estimated rows: 2.67 query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 44 or b.x < 43 ---- HashJoin -├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── output columns: [a.x (#0), b.y (#2), b.x (#1)] ├── join type: INNER -├── build keys: [a.x (#0)] -├── probe keys: [b.x (#1)] +├── build keys: [b.x (#1)] +├── probe keys: [a.x (#0)] ├── filters: [] -├── estimated rows: 1.17 +├── estimated rows: 0.89 ├── TableScan(Build) -│ ├── table: default.default.onecolumn -│ ├── output columns: [x (#0)] +│ ├── table: default.default.twocolumn +│ ├── output columns: [x (#1), y (#2)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [is_true((onecolumn.x (#0) > 44 OR onecolumn.x (#0) < 43))], limit: NONE] +│ ├── push downs: [filters: [is_true((twocolumn.x (#1) > 44 OR twocolumn.x (#1) < 43))], limit: NONE] │ └── estimated rows: 1.33 └── TableScan(Probe) - ├── table: default.default.twocolumn - ├── output columns: [x (#1), y (#2)] + ├── table: default.default.onecolumn + ├── output columns: [x (#0)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_true((twocolumn.x (#1) > 44 OR twocolumn.x (#1) < 43))], limit: NONE] - └── estimated rows: 1.75 + ├── push downs: [filters: [is_true((onecolumn.x (#0) > 44 OR onecolumn.x (#0) < 43))], limit: NONE] + └── estimated rows: 1.33 query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 42 and b.x < 45 ---- HashJoin -├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── output columns: [a.x (#0), b.y (#2), b.x (#1)] ├── join type: INNER -├── build keys: [a.x (#0)] -├── probe keys: [b.x (#1)] +├── build keys: [b.x (#1)] +├── probe keys: [a.x (#0)] ├── filters: [] -├── estimated rows: 3.56 +├── estimated rows: 2.37 ├── TableScan(Build) -│ ├── table: default.default.onecolumn -│ ├── output columns: [x (#0)] +│ ├── table: default.default.twocolumn +│ ├── output columns: [x (#1), y (#2)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] +│ ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] │ └── estimated rows: 2.67 └── TableScan(Probe) - ├── table: default.default.twocolumn - ├── output columns: [x (#1), y (#2)] + ├── table: default.default.onecolumn + ├── output columns: [x (#0)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] + ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] └── estimated rows: 2.67 # the following cases won't be converted to inner join @@ -352,7 +352,7 @@ HashJoin ├── build keys: [a.x (#0)] ├── probe keys: [b.x (#1)] ├── filters: [] -├── estimated rows: 3.56 +├── estimated rows: 2.67 ├── TableScan(Build) │ ├── table: default.default.onecolumn │ ├── output columns: [x (#0)]