Skip to content

Commit

Permalink
feat: eliminate the duplicated sort keys in Order By clause
Browse files Browse the repository at this point in the history
  • Loading branch information
jackwener committed Mar 3, 2023
1 parent ddd64e7 commit 9d537a2
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 3 deletions.
100 changes: 100 additions & 0 deletions datafusion/optimizer/src/eliminate_duplicated_expr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Optimizer rule to replace `LIMIT 0` or
//! `LIMIT whose ancestor LIMIT's skip is greater than or equal to current's fetch`
//! on a plan with an empty relation.
//! This rule also removes OFFSET 0 from the [LogicalPlan]
//! This saves time in planning and executing the query.
use crate::optimizer::ApplyOrder;
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::Result;
use datafusion_expr::logical_plan::LogicalPlan;
use datafusion_expr::Sort;
use hashbrown::HashSet;

/// Optimization rule that eliminate duplicated expr.
#[derive(Default)]
pub struct EliminateDuplicatedExpr;

impl EliminateDuplicatedExpr {
#[allow(missing_docs)]
pub fn new() -> Self {
Self {}
}
}

impl OptimizerRule for EliminateDuplicatedExpr {
fn try_optimize(
&self,
plan: &LogicalPlan,
_config: &dyn OptimizerConfig,
) -> Result<Option<LogicalPlan>> {
match plan {
LogicalPlan::Sort(sort) => {
let new_expr = sort.expr.iter().collect::<HashSet<_>>();
if new_expr.len() == sort.expr.len() {
Ok(None)
} else {
Ok(Some(LogicalPlan::Sort(Sort {
expr: new_expr.into_iter().cloned().collect::<Vec<_>>(),
input: sort.input.clone(),
fetch: sort.fetch,
})))
}
}
_ => Ok(None),
}
}

fn name(&self) -> &str {
"eliminate_duplicated_expr"
}

fn apply_order(&self) -> Option<ApplyOrder> {
Some(ApplyOrder::TopDown)
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::test::*;
use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder};
use std::sync::Arc;

fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) -> Result<()> {
crate::test::assert_optimized_plan_eq(
Arc::new(EliminateDuplicatedExpr::new()),
plan,
expected,
)
}

#[test]
fn eliminate_sort_expr() -> Result<()> {
let table_scan = test_table_scan().unwrap();
let plan = LogicalPlanBuilder::from(table_scan)
.sort(vec![col("a"), col("a")])?
.limit(5, Some(10))?
.build()?;
let expected = "Limit: skip=5, fetch=10\
\n Sort: test.a\
\n TableScan: test";
assert_optimized_plan_eq(&plan, expected)
}
}
7 changes: 4 additions & 3 deletions datafusion/optimizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub mod common_subexpr_eliminate;
pub mod decorrelate_where_exists;
pub mod decorrelate_where_in;
pub mod eliminate_cross_join;
pub mod eliminate_duplicated_expr;
pub mod eliminate_filter;
pub mod eliminate_limit;
pub mod eliminate_outer_join;
Expand All @@ -33,17 +34,17 @@ pub mod propagate_empty_relation;
pub mod push_down_filter;
pub mod push_down_limit;
pub mod push_down_projection;
pub mod replace_distinct_aggregate;
pub mod rewrite_disjunctive_predicate;
pub mod scalar_subquery_to_join;
pub mod simplify_expressions;
pub mod single_distinct_to_groupby;
pub mod type_coercion;
pub mod unwrap_cast_in_comparison;
pub mod utils;

pub mod replace_distinct_aggregate;
pub mod rewrite_disjunctive_predicate;
#[cfg(test)]
pub mod test;
pub mod unwrap_cast_in_comparison;

pub use optimizer::{OptimizerConfig, OptimizerContext, OptimizerRule};
pub use utils::optimize_children;

0 comments on commit 9d537a2

Please sign in to comment.