From 8b848607be1c7f3324099a806e3249fcd6aa8937 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 12 Oct 2022 14:14:17 -0400 Subject: [PATCH 1/4] Add `/opt/cargo/bin` to gpuCI `PATH` (#856) * Check if rust compiler is picked up properly * Permission denied to /root * Check what user gpuCI runner is logged in as * Install rustup as jenkins * Export rustup path * Attempt to install rustup/cargo in a shared location * Attempt to access bins in opt * Try to access /opt/ and /opt/cargo/ * Are rustup and cargo available? * Try to access /opt/cargo/bin/cargo directly * Check if rustup and cargo are on PATH * Add cargo bin to PATH manually --- continuous_integration/gpuci/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/continuous_integration/gpuci/build.sh b/continuous_integration/gpuci/build.sh index cdcb4a6e9..f8492f0c0 100644 --- a/continuous_integration/gpuci/build.sh +++ b/continuous_integration/gpuci/build.sh @@ -11,7 +11,7 @@ function hasArg { } # Set path and build parallel level -export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PATH=/opt/cargo/bin:/opt/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} # Set home to the job's workspace From 82960ca558616f36c830a7eedf9b4e371ce23b12 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 12 Oct 2022 15:04:28 -0400 Subject: [PATCH 2/4] Enable crate sorting with `rustfmt` (#819) * Shouldn't need to initialize stable Rust toolchain * Remove rust from conda environments * Modify documentation to add rustup installation step * Add potential TODOs * Remove `rust_compiler_version` from conda build config * Enable crate sorting with fmt * Install nightly toolchain for style CI * Specifically install nightly rustfmt * Revert "Enable crate sorting with fmt" This reverts commit ed203e0eba97f4ede2e74e472fa060e86d7c5995. * Re-enable crate sorting with fmt * Add instructions for installing nightly toolchain * Amend instructions to only install nightly rustfmt * Amend nightly toolchain installation command --- .github/workflows/style.yml | 4 + .pre-commit-config.yaml | 11 ++- CONTRIBUTING.md | 7 +- dask_planner/src/dialect.rs | 4 +- dask_planner/src/error.rs | 6 +- dask_planner/src/expression.rs | 31 +++++--- dask_planner/src/parser.rs | 13 ++-- dask_planner/src/sql.rs | 74 +++++++++++-------- dask_planner/src/sql/column.rs | 1 - dask_planner/src/sql/exceptions.rs | 3 +- dask_planner/src/sql/function.rs | 7 +- dask_planner/src/sql/logical.rs | 45 +++++------ dask_planner/src/sql/logical/aggregate.rs | 15 ++-- dask_planner/src/sql/logical/analyze_table.rs | 17 +++-- .../src/sql/logical/create_catalog_schema.rs | 11 +-- .../src/sql/logical/create_experiment.rs | 17 ++--- .../src/sql/logical/create_memory_table.rs | 9 ++- dask_planner/src/sql/logical/create_model.rs | 17 ++--- dask_planner/src/sql/logical/create_table.rs | 18 ++--- dask_planner/src/sql/logical/create_view.rs | 13 ++-- .../src/sql/logical/describe_model.rs | 13 ++-- dask_planner/src/sql/logical/drop_model.rs | 13 ++-- dask_planner/src/sql/logical/drop_schema.rs | 13 ++-- dask_planner/src/sql/logical/drop_table.rs | 2 +- .../src/sql/logical/empty_relation.rs | 2 +- dask_planner/src/sql/logical/explain.rs | 3 +- dask_planner/src/sql/logical/export_model.rs | 17 ++--- dask_planner/src/sql/logical/filter.rs | 6 +- dask_planner/src/sql/logical/join.rs | 10 +-- dask_planner/src/sql/logical/limit.rs | 6 +- dask_planner/src/sql/logical/predict_model.rs | 12 +-- dask_planner/src/sql/logical/projection.rs | 6 +- .../src/sql/logical/repartition_by.rs | 13 +++- dask_planner/src/sql/logical/show_columns.rs | 17 +++-- dask_planner/src/sql/logical/show_models.rs | 6 +- dask_planner/src/sql/logical/show_schema.rs | 17 +++-- dask_planner/src/sql/logical/show_tables.rs | 17 +++-- dask_planner/src/sql/logical/sort.rs | 8 +- dask_planner/src/sql/logical/table_scan.rs | 4 +- dask_planner/src/sql/logical/use_schema.rs | 13 ++-- dask_planner/src/sql/logical/window.rs | 7 +- dask_planner/src/sql/optimizer.rs | 41 +++++----- .../sql/optimizer/eliminate_agg_distinct.rs | 26 ++++--- dask_planner/src/sql/parser_utils.rs | 3 +- dask_planner/src/sql/schema.rs | 8 +- dask_planner/src/sql/statement.rs | 4 +- dask_planner/src/sql/table.rs | 31 ++++---- dask_planner/src/sql/types.rs | 11 +-- dask_planner/src/sql/types/rel_data_type.rs | 5 +- .../src/sql/types/rel_data_type_field.rs | 12 +-- rustfmt.toml | 3 + 51 files changed, 354 insertions(+), 318 deletions(-) create mode 100644 rustfmt.toml diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index fb2037154..a74a6f1eb 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -15,4 +15,8 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + components: rustfmt - uses: pre-commit/action@v2.0.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f1feccf47..0bda454e2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,8 +19,6 @@ repos: - repo: https://github.com/doublify/pre-commit-rust rev: v1.0 hooks: - - id: fmt - args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] - id: cargo-check args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] - id: clippy @@ -33,3 +31,12 @@ repos: - id: check-yaml exclude: ^continuous_integration/recipe/ - id: check-added-large-files + - repo: local + hooks: + - id: cargo-fmt + name: cargo fmt + description: Format files with cargo fmt. + entry: cargo +nightly fmt + language: system + types: [rust] + args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--'] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b23b29c3d..9ab31230f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,12 +4,15 @@ The environment used for development and CI consists of: -- a system installation of [`rustup`](https://rustup.rs/) +- a system installation of [`rustup`](https://rustup.rs/) with: + - the latest stable toolchain + - the latest nightly `rustfmt` - a [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) environment containing all required Python packages -Once `rustup` is installed, ensure that the latest stable toolchain is available by running +Once `rustup` is installed, ensure that the latest stable toolchain and nightly `rustfmt` are available by running ``` +rustup toolchain install nightly -c rustfmt --profile minimal rustup update ``` diff --git a/dask_planner/src/dialect.rs b/dask_planner/src/dialect.rs index 492f4aca3..973f76f4f 100644 --- a/dask_planner/src/dialect.rs +++ b/dask_planner/src/dialect.rs @@ -1,5 +1,5 @@ -use core::iter::Peekable; -use core::str::Chars; +use core::{iter::Peekable, str::Chars}; + use datafusion_sql::sqlparser::dialect::Dialect; #[derive(Debug)] diff --git a/dask_planner/src/error.rs b/dask_planner/src/error.rs index 73a474353..00a14cde4 100644 --- a/dask_planner/src/error.rs +++ b/dask_planner/src/error.rs @@ -1,8 +1,8 @@ +use std::fmt::{Display, Formatter}; + use datafusion_common::DataFusionError; -use datafusion_sql::sqlparser::parser::ParserError; -use datafusion_sql::sqlparser::tokenizer::TokenizerError; +use datafusion_sql::sqlparser::{parser::ParserError, tokenizer::TokenizerError}; use pyo3::PyErr; -use std::fmt::{Display, Formatter}; pub type Result = std::result::Result; diff --git a/dask_planner/src/expression.rs b/dask_planner/src/expression.rs index 80eec7dd0..dc13059f6 100644 --- a/dask_planner/src/expression.rs +++ b/dask_planner/src/expression.rs @@ -1,14 +1,25 @@ -use crate::error::{DaskPlannerError, Result}; -use crate::sql::exceptions::{py_runtime_err, py_type_err}; -use crate::sql::logical; -use crate::sql::types::RexType; +use std::{convert::From, sync::Arc}; + use arrow::datatypes::DataType; use datafusion_common::{Column, DFField, DFSchema, ScalarValue}; -use datafusion_expr::Operator; -use datafusion_expr::{lit, utils::exprlist_to_fields, BuiltinScalarFunction, Expr, LogicalPlan}; +use datafusion_expr::{ + lit, + utils::exprlist_to_fields, + BuiltinScalarFunction, + Expr, + LogicalPlan, + Operator, +}; use pyo3::prelude::*; -use std::convert::From; -use std::sync::Arc; + +use crate::{ + error::{DaskPlannerError, Result}, + sql::{ + exceptions::{py_runtime_err, py_type_err}, + logical, + types::RexType, + }, +}; /// An PyExpr that can be used on a DataFrame #[pyclass(name = "Expression", module = "datafusion", subclass)] @@ -805,11 +816,11 @@ pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> Result { #[cfg(test)] mod test { - use crate::error::Result; - use crate::expression::PyExpr; use datafusion_common::{Column, ScalarValue}; use datafusion_expr::Expr; + use crate::{error::Result, expression::PyExpr}; + #[test] fn get_value_u32() -> Result<()> { test_get_value(ScalarValue::UInt32(None))?; diff --git a/dask_planner/src/parser.rs b/dask_planner/src/parser.rs index 9b85a8512..d743af901 100644 --- a/dask_planner/src/parser.rs +++ b/dask_planner/src/parser.rs @@ -2,19 +2,20 @@ //! //! Declares a SQL parser based on sqlparser that handles custom formats that we need. -use crate::sql::exceptions::py_type_err; -use crate::sql::types::SqlTypeName; -use pyo3::prelude::*; +use std::collections::VecDeque; -use crate::dialect::DaskDialect; -use crate::sql::parser_utils::DaskParserUtils; use datafusion_sql::sqlparser::{ ast::{Expr, Ident, SelectItem, Statement as SQLStatement, UnaryOperator, Value}, dialect::{keywords::Keyword, Dialect}, parser::{Parser, ParserError}, tokenizer::{Token, Tokenizer}, }; -use std::collections::VecDeque; +use pyo3::prelude::*; + +use crate::{ + dialect::DaskDialect, + sql::{exceptions::py_type_err, parser_utils::DaskParserUtils, types::SqlTypeName}, +}; macro_rules! parser_err { ($MSG:expr) => { diff --git a/dask_planner/src/sql.rs b/dask_planner/src/sql.rs index 089d818ae..ef34c66f9 100644 --- a/dask_planner/src/sql.rs +++ b/dask_planner/src/sql.rs @@ -9,47 +9,61 @@ pub mod statement; pub mod table; pub mod types; -use crate::sql::exceptions::{py_optimization_exp, py_parsing_exp, py_runtime_err}; +use std::{collections::HashMap, sync::Arc}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::{DFSchema, DataFusionError}; -use datafusion_expr::logical_plan::Extension; use datafusion_expr::{ - AccumulatorFunctionImplementation, AggregateUDF, LogicalPlan, PlanVisitor, ReturnTypeFunction, - ScalarFunctionImplementation, ScalarUDF, Signature, StateTypeFunction, TableSource, - TypeSignature, Volatility, + logical_plan::Extension, + AccumulatorFunctionImplementation, + AggregateUDF, + LogicalPlan, + PlanVisitor, + ReturnTypeFunction, + ScalarFunctionImplementation, + ScalarUDF, + Signature, + StateTypeFunction, + TableSource, + TypeSignature, + Volatility, }; use datafusion_sql::{ parser::Statement as DFStatement, planner::{ContextProvider, SqlToRel}, - ResolvedTableReference, TableReference, + ResolvedTableReference, + TableReference, }; - -use std::collections::HashMap; -use std::sync::Arc; - -use crate::dialect::DaskDialect; -use crate::parser::{DaskParser, DaskStatement}; -use crate::sql::logical::analyze_table::AnalyzeTablePlanNode; -use crate::sql::logical::create_experiment::CreateExperimentPlanNode; -use crate::sql::logical::create_model::CreateModelPlanNode; -use crate::sql::logical::create_table::CreateTablePlanNode; -use crate::sql::logical::create_view::CreateViewPlanNode; -use crate::sql::logical::describe_model::DescribeModelPlanNode; -use crate::sql::logical::drop_model::DropModelPlanNode; -use crate::sql::logical::export_model::ExportModelPlanNode; -use crate::sql::logical::predict_model::PredictModelPlanNode; -use crate::sql::logical::show_columns::ShowColumnsPlanNode; -use crate::sql::logical::show_models::ShowModelsPlanNode; -use crate::sql::logical::show_schema::ShowSchemasPlanNode; -use crate::sql::logical::show_tables::ShowTablesPlanNode; - -use crate::sql::logical::PyLogicalPlan; use pyo3::prelude::*; -use self::logical::create_catalog_schema::CreateCatalogSchemaPlanNode; -use self::logical::drop_schema::DropSchemaPlanNode; -use self::logical::use_schema::UseSchemaPlanNode; +use self::logical::{ + create_catalog_schema::CreateCatalogSchemaPlanNode, + drop_schema::DropSchemaPlanNode, + use_schema::UseSchemaPlanNode, +}; +use crate::{ + dialect::DaskDialect, + parser::{DaskParser, DaskStatement}, + sql::{ + exceptions::{py_optimization_exp, py_parsing_exp, py_runtime_err}, + logical::{ + analyze_table::AnalyzeTablePlanNode, + create_experiment::CreateExperimentPlanNode, + create_model::CreateModelPlanNode, + create_table::CreateTablePlanNode, + create_view::CreateViewPlanNode, + describe_model::DescribeModelPlanNode, + drop_model::DropModelPlanNode, + export_model::ExportModelPlanNode, + predict_model::PredictModelPlanNode, + show_columns::ShowColumnsPlanNode, + show_models::ShowModelsPlanNode, + show_schema::ShowSchemasPlanNode, + show_tables::ShowTablesPlanNode, + PyLogicalPlan, + }, + }, +}; /// DaskSQLContext is main interface used for interacting with DataFusion to /// parse SQL queries, build logical plans, and optimize logical plans. diff --git a/dask_planner/src/sql/column.rs b/dask_planner/src/sql/column.rs index a3a9b8954..2266bb27b 100644 --- a/dask_planner/src/sql/column.rs +++ b/dask_planner/src/sql/column.rs @@ -1,5 +1,4 @@ use datafusion_common::Column; - use pyo3::prelude::*; #[pyclass(name = "Column", module = "dask_planner", subclass)] diff --git a/dask_planner/src/sql/exceptions.rs b/dask_planner/src/sql/exceptions.rs index 774a0d270..871402279 100644 --- a/dask_planner/src/sql/exceptions.rs +++ b/dask_planner/src/sql/exceptions.rs @@ -1,6 +1,7 @@ -use pyo3::{create_exception, PyErr}; use std::fmt::Debug; +use pyo3::{create_exception, PyErr}; + // Identifies exceptions that occur while attempting to generate a `LogicalPlan` from a SQL string create_exception!(rust, ParsingException, pyo3::exceptions::PyException); diff --git a/dask_planner/src/sql/function.rs b/dask_planner/src/sql/function.rs index ca41220aa..0da2ff322 100644 --- a/dask_planner/src/sql/function.rs +++ b/dask_planner/src/sql/function.rs @@ -1,8 +1,9 @@ -use super::types::PyDataType; -use pyo3::prelude::*; +use std::collections::HashMap; use arrow::datatypes::DataType; -use std::collections::HashMap; +use pyo3::prelude::*; + +use super::types::PyDataType; #[pyclass(name = "DaskFunction", module = "dask_planner", subclass)] #[derive(Debug, Clone)] diff --git a/dask_planner/src/sql/logical.rs b/dask_planner/src/sql/logical.rs index a3653b322..17bea5343 100644 --- a/dask_planner/src/sql/logical.rs +++ b/dask_planner/src/sql/logical.rs @@ -1,6 +1,7 @@ -use crate::sql::table; -use crate::sql::types::rel_data_type::RelDataType; -use crate::sql::types::rel_data_type_field::RelDataTypeField; +use crate::sql::{ + table, + types::{rel_data_type::RelDataType, rel_data_type_field::RelDataTypeField}, +}; pub mod aggregate; pub mod analyze_table; @@ -34,27 +35,27 @@ pub mod window; use datafusion_common::{DFSchemaRef, DataFusionError}; use datafusion_expr::LogicalPlan; - -use crate::error::Result; -use crate::sql::exceptions::py_type_err; use pyo3::prelude::*; -use self::analyze_table::AnalyzeTablePlanNode; -use self::create_catalog_schema::CreateCatalogSchemaPlanNode; -use self::create_experiment::CreateExperimentPlanNode; -use self::create_model::CreateModelPlanNode; -use self::create_table::CreateTablePlanNode; -use self::create_view::CreateViewPlanNode; -use self::describe_model::DescribeModelPlanNode; -use self::drop_model::DropModelPlanNode; -use self::drop_schema::DropSchemaPlanNode; -use self::export_model::ExportModelPlanNode; -use self::predict_model::PredictModelPlanNode; -use self::show_columns::ShowColumnsPlanNode; -use self::show_models::ShowModelsPlanNode; -use self::show_schema::ShowSchemasPlanNode; -use self::show_tables::ShowTablesPlanNode; -use self::use_schema::UseSchemaPlanNode; +use self::{ + analyze_table::AnalyzeTablePlanNode, + create_catalog_schema::CreateCatalogSchemaPlanNode, + create_experiment::CreateExperimentPlanNode, + create_model::CreateModelPlanNode, + create_table::CreateTablePlanNode, + create_view::CreateViewPlanNode, + describe_model::DescribeModelPlanNode, + drop_model::DropModelPlanNode, + drop_schema::DropSchemaPlanNode, + export_model::ExportModelPlanNode, + predict_model::PredictModelPlanNode, + show_columns::ShowColumnsPlanNode, + show_models::ShowModelsPlanNode, + show_schema::ShowSchemasPlanNode, + show_tables::ShowTablesPlanNode, + use_schema::UseSchemaPlanNode, +}; +use crate::{error::Result, sql::exceptions::py_type_err}; #[pyclass(name = "LogicalPlan", module = "dask_planner", subclass)] #[derive(Debug, Clone)] diff --git a/dask_planner/src/sql/logical/aggregate.rs b/dask_planner/src/sql/logical/aggregate.rs index b181c158e..95dd2dda4 100644 --- a/dask_planner/src/sql/logical/aggregate.rs +++ b/dask_planner/src/sql/logical/aggregate.rs @@ -1,10 +1,15 @@ -use crate::expression::{py_expr_list, PyExpr}; - -use datafusion_expr::{logical_plan::Aggregate, logical_plan::Distinct, Expr, LogicalPlan}; - -use crate::sql::exceptions::py_type_err; +use datafusion_expr::{ + logical_plan::{Aggregate, Distinct}, + Expr, + LogicalPlan, +}; use pyo3::prelude::*; +use crate::{ + expression::{py_expr_list, PyExpr}, + sql::exceptions::py_type_err, +}; + #[pyclass(name = "Aggregate", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PyAggregate { diff --git a/dask_planner/src/sql/logical/analyze_table.rs b/dask_planner/src/sql/logical/analyze_table.rs index b8a7a9806..02240e634 100644 --- a/dask_planner/src/sql/logical/analyze_table.rs +++ b/dask_planner/src/sql/logical/analyze_table.rs @@ -1,14 +1,15 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNode}; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{ + logical_plan::{Extension, UserDefinedLogicalNode}, + Expr, + LogicalPlan, +}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct AnalyzeTablePlanNode { diff --git a/dask_planner/src/sql/logical/create_catalog_schema.rs b/dask_planner/src/sql/logical/create_catalog_schema.rs index 12e079e5d..3a7e7f61f 100644 --- a/dask_planner/src/sql/logical/create_catalog_schema.rs +++ b/dask_planner/src/sql/logical/create_catalog_schema.rs @@ -1,15 +1,12 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; +use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; use fmt::Debug; -use std::{any::Any, fmt, sync::Arc}; - use pyo3::prelude::*; +use crate::sql::{exceptions::py_type_err, logical}; + #[derive(Clone)] pub struct CreateCatalogSchemaPlanNode { pub schema: DFSchemaRef, diff --git a/dask_planner/src/sql/logical/create_experiment.rs b/dask_planner/src/sql/logical/create_experiment.rs index 14f2843da..a60feab03 100644 --- a/dask_planner/src/sql/logical/create_experiment.rs +++ b/dask_planner/src/sql/logical/create_experiment.rs @@ -1,15 +1,14 @@ -use crate::parser::PySqlArg; -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::DFSchemaRef; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::{ + parser::PySqlArg, + sql::{exceptions::py_type_err, logical}, +}; #[derive(Clone)] pub struct CreateExperimentPlanNode { diff --git a/dask_planner/src/sql/logical/create_memory_table.rs b/dask_planner/src/sql/logical/create_memory_table.rs index 3cf0b8547..fcb8838e2 100644 --- a/dask_planner/src/sql/logical/create_memory_table.rs +++ b/dask_planner/src/sql/logical/create_memory_table.rs @@ -1,8 +1,11 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical::PyLogicalPlan; -use datafusion_expr::{logical_plan::CreateMemoryTable, logical_plan::CreateView, LogicalPlan}; +use datafusion_expr::{ + logical_plan::{CreateMemoryTable, CreateView}, + LogicalPlan, +}; use pyo3::prelude::*; +use crate::sql::{exceptions::py_type_err, logical::PyLogicalPlan}; + #[pyclass(name = "CreateMemoryTable", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PyCreateMemoryTable { diff --git a/dask_planner/src/sql/logical/create_model.rs b/dask_planner/src/sql/logical/create_model.rs index 7a3f12bf2..5b2825d6b 100644 --- a/dask_planner/src/sql/logical/create_model.rs +++ b/dask_planner/src/sql/logical/create_model.rs @@ -1,15 +1,14 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; -use crate::parser::PySqlArg; use datafusion_common::DFSchemaRef; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::{ + parser::PySqlArg, + sql::{exceptions::py_type_err, logical}, +}; #[derive(Clone)] pub struct CreateModelPlanNode { diff --git a/dask_planner/src/sql/logical/create_table.rs b/dask_planner/src/sql/logical/create_table.rs index 914792310..72922f8c9 100644 --- a/dask_planner/src/sql/logical/create_table.rs +++ b/dask_planner/src/sql/logical/create_table.rs @@ -1,16 +1,14 @@ -use crate::parser::PySqlArg; -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; - -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::{ + parser::PySqlArg, + sql::{exceptions::py_type_err, logical}, +}; #[derive(Clone)] pub struct CreateTablePlanNode { diff --git a/dask_planner/src/sql/logical/create_view.rs b/dask_planner/src/sql/logical/create_view.rs index 59429b845..ce9e14a97 100644 --- a/dask_planner/src/sql/logical/create_view.rs +++ b/dask_planner/src/sql/logical/create_view.rs @@ -1,14 +1,11 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct CreateViewPlanNode { diff --git a/dask_planner/src/sql/logical/describe_model.rs b/dask_planner/src/sql/logical/describe_model.rs index 39d959a8d..ccc57dc7a 100644 --- a/dask_planner/src/sql/logical/describe_model.rs +++ b/dask_planner/src/sql/logical/describe_model.rs @@ -1,14 +1,11 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct DescribeModelPlanNode { diff --git a/dask_planner/src/sql/logical/drop_model.rs b/dask_planner/src/sql/logical/drop_model.rs index 39760ec87..fdfadf25c 100644 --- a/dask_planner/src/sql/logical/drop_model.rs +++ b/dask_planner/src/sql/logical/drop_model.rs @@ -1,14 +1,11 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct DropModelPlanNode { diff --git a/dask_planner/src/sql/logical/drop_schema.rs b/dask_planner/src/sql/logical/drop_schema.rs index 97ce06bb8..b67b47a69 100644 --- a/dask_planner/src/sql/logical/drop_schema.rs +++ b/dask_planner/src/sql/logical/drop_schema.rs @@ -1,14 +1,11 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct DropSchemaPlanNode { diff --git a/dask_planner/src/sql/logical/drop_table.rs b/dask_planner/src/sql/logical/drop_table.rs index e5897da07..d5bd164b9 100644 --- a/dask_planner/src/sql/logical/drop_table.rs +++ b/dask_planner/src/sql/logical/drop_table.rs @@ -1,7 +1,7 @@ use datafusion_expr::logical_plan::{DropTable, LogicalPlan}; +use pyo3::prelude::*; use crate::sql::exceptions::py_type_err; -use pyo3::prelude::*; #[pyclass(name = "DropTable", module = "dask_planner", subclass)] #[derive(Clone)] diff --git a/dask_planner/src/sql/logical/empty_relation.rs b/dask_planner/src/sql/logical/empty_relation.rs index 61d6117d4..864cb18ea 100644 --- a/dask_planner/src/sql/logical/empty_relation.rs +++ b/dask_planner/src/sql/logical/empty_relation.rs @@ -1,7 +1,7 @@ use datafusion_expr::logical_plan::{EmptyRelation, LogicalPlan}; +use pyo3::prelude::*; use crate::sql::exceptions::py_type_err; -use pyo3::prelude::*; #[pyclass(name = "EmptyRelation", module = "dask_planner", subclass)] #[derive(Clone)] diff --git a/dask_planner/src/sql/logical/explain.rs b/dask_planner/src/sql/logical/explain.rs index de296785d..a23c4ea4f 100644 --- a/dask_planner/src/sql/logical/explain.rs +++ b/dask_planner/src/sql/logical/explain.rs @@ -1,7 +1,8 @@ -use crate::sql::exceptions::py_type_err; use datafusion_expr::{logical_plan::Explain, LogicalPlan}; use pyo3::prelude::*; +use crate::sql::exceptions::py_type_err; + #[pyclass(name = "Explain", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PyExplain { diff --git a/dask_planner/src/sql/logical/export_model.rs b/dask_planner/src/sql/logical/export_model.rs index e5e702db9..da35fe56d 100644 --- a/dask_planner/src/sql/logical/export_model.rs +++ b/dask_planner/src/sql/logical/export_model.rs @@ -1,15 +1,14 @@ -use crate::parser::PySqlArg; -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::{ + parser::PySqlArg, + sql::{exceptions::py_type_err, logical}, +}; #[derive(Clone)] pub struct ExportModelPlanNode { diff --git a/dask_planner/src/sql/logical/filter.rs b/dask_planner/src/sql/logical/filter.rs index bb797b763..532d98374 100644 --- a/dask_planner/src/sql/logical/filter.rs +++ b/dask_planner/src/sql/logical/filter.rs @@ -1,10 +1,8 @@ -use crate::expression::PyExpr; - use datafusion_expr::{logical_plan::Filter, LogicalPlan}; - -use crate::sql::exceptions::py_type_err; use pyo3::prelude::*; +use crate::{expression::PyExpr, sql::exceptions::py_type_err}; + #[pyclass(name = "Filter", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PyFilter { diff --git a/dask_planner/src/sql/logical/join.rs b/dask_planner/src/sql/logical/join.rs index c10f11a50..3ddcb757e 100644 --- a/dask_planner/src/sql/logical/join.rs +++ b/dask_planner/src/sql/logical/join.rs @@ -1,15 +1,15 @@ -use crate::expression::PyExpr; -use crate::sql::column; - use datafusion_expr::{ and, logical_plan::{Join, JoinType, LogicalPlan}, Expr, }; - -use crate::sql::exceptions::py_type_err; use pyo3::prelude::*; +use crate::{ + expression::PyExpr, + sql::{column, exceptions::py_type_err}, +}; + #[pyclass(name = "Join", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PyJoin { diff --git a/dask_planner/src/sql/logical/limit.rs b/dask_planner/src/sql/logical/limit.rs index 83f76d65c..750bbe834 100644 --- a/dask_planner/src/sql/logical/limit.rs +++ b/dask_planner/src/sql/logical/limit.rs @@ -1,10 +1,8 @@ -use crate::expression::PyExpr; -use crate::sql::exceptions::py_type_err; - use datafusion_common::ScalarValue; +use datafusion_expr::{logical_plan::Limit, Expr, LogicalPlan}; use pyo3::prelude::*; -use datafusion_expr::{logical_plan::Limit, Expr, LogicalPlan}; +use crate::{expression::PyExpr, sql::exceptions::py_type_err}; #[pyclass(name = "Limit", module = "dask_planner", subclass)] #[derive(Clone)] diff --git a/dask_planner/src/sql/logical/predict_model.rs b/dask_planner/src/sql/logical/predict_model.rs index 4a77b6892..7a69ddd55 100644 --- a/dask_planner/src/sql/logical/predict_model.rs +++ b/dask_planner/src/sql/logical/predict_model.rs @@ -1,16 +1,12 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::DFSchemaRef; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; use super::PyLogicalPlan; +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct PredictModelPlanNode { diff --git a/dask_planner/src/sql/logical/projection.rs b/dask_planner/src/sql/logical/projection.rs index eedb323d9..a017731b5 100644 --- a/dask_planner/src/sql/logical/projection.rs +++ b/dask_planner/src/sql/logical/projection.rs @@ -1,10 +1,8 @@ -use crate::expression::PyExpr; - use datafusion_expr::{logical_plan::Projection, Expr, LogicalPlan}; - -use crate::sql::exceptions::py_type_err; use pyo3::prelude::*; +use crate::{expression::PyExpr, sql::exceptions::py_type_err}; + #[pyclass(name = "Projection", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PyProjection { diff --git a/dask_planner/src/sql/logical/repartition_by.rs b/dask_planner/src/sql/logical/repartition_by.rs index dd63ed750..c5f77d53d 100644 --- a/dask_planner/src/sql/logical/repartition_by.rs +++ b/dask_planner/src/sql/logical/repartition_by.rs @@ -1,9 +1,14 @@ -use crate::sql::logical; -use crate::{expression::PyExpr, sql::exceptions::py_type_err}; -use datafusion_expr::logical_plan::{Partitioning, Repartition}; +use datafusion_expr::{ + logical_plan::{Partitioning, Repartition}, + Expr, + LogicalPlan, +}; use pyo3::prelude::*; -use datafusion_expr::{Expr, LogicalPlan}; +use crate::{ + expression::PyExpr, + sql::{exceptions::py_type_err, logical}, +}; #[pyclass(name = "RepartitionBy", module = "dask_planner", subclass)] pub struct PyRepartitionBy { diff --git a/dask_planner/src/sql/logical/show_columns.rs b/dask_planner/src/sql/logical/show_columns.rs index f9224affc..722c20f3d 100644 --- a/dask_planner/src/sql/logical/show_columns.rs +++ b/dask_planner/src/sql/logical/show_columns.rs @@ -1,14 +1,15 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNode}; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{ + logical_plan::{Extension, UserDefinedLogicalNode}, + Expr, + LogicalPlan, +}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct ShowColumnsPlanNode { diff --git a/dask_planner/src/sql/logical/show_models.rs b/dask_planner/src/sql/logical/show_models.rs index df00b32cd..1fa83d045 100644 --- a/dask_planner/src/sql/logical/show_models.rs +++ b/dask_planner/src/sql/logical/show_models.rs @@ -1,10 +1,8 @@ -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; #[derive(Clone)] pub struct ShowModelsPlanNode { diff --git a/dask_planner/src/sql/logical/show_schema.rs b/dask_planner/src/sql/logical/show_schema.rs index 5505fa2e3..1a1ca3215 100644 --- a/dask_planner/src/sql/logical/show_schema.rs +++ b/dask_planner/src/sql/logical/show_schema.rs @@ -1,14 +1,15 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNode}; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{ + logical_plan::{Extension, UserDefinedLogicalNode}, + Expr, + LogicalPlan, +}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct ShowSchemasPlanNode { diff --git a/dask_planner/src/sql/logical/show_tables.rs b/dask_planner/src/sql/logical/show_tables.rs index b7519d8b5..50a6d676e 100644 --- a/dask_planner/src/sql/logical/show_tables.rs +++ b/dask_planner/src/sql/logical/show_tables.rs @@ -1,14 +1,15 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNode}; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{ + logical_plan::{Extension, UserDefinedLogicalNode}, + Expr, + LogicalPlan, +}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct ShowTablesPlanNode { diff --git a/dask_planner/src/sql/logical/sort.rs b/dask_planner/src/sql/logical/sort.rs index 4c6ec1dd9..0bdd67e23 100644 --- a/dask_planner/src/sql/logical/sort.rs +++ b/dask_planner/src/sql/logical/sort.rs @@ -1,9 +1,11 @@ -use crate::expression::{py_expr_list, PyExpr}; - -use crate::sql::exceptions::py_type_err; use datafusion_expr::{logical_plan::Sort, LogicalPlan}; use pyo3::prelude::*; +use crate::{ + expression::{py_expr_list, PyExpr}, + sql::exceptions::py_type_err, +}; + #[pyclass(name = "Sort", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PySort { diff --git a/dask_planner/src/sql/logical/table_scan.rs b/dask_planner/src/sql/logical/table_scan.rs index 51561f081..db0fbf599 100644 --- a/dask_planner/src/sql/logical/table_scan.rs +++ b/dask_planner/src/sql/logical/table_scan.rs @@ -1,8 +1,8 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; use datafusion_expr::logical_plan::TableScan; use pyo3::prelude::*; +use crate::sql::{exceptions::py_type_err, logical}; + #[pyclass(name = "TableScan", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PyTableScan { diff --git a/dask_planner/src/sql/logical/use_schema.rs b/dask_planner/src/sql/logical/use_schema.rs index 29fb935f5..111999b33 100644 --- a/dask_planner/src/sql/logical/use_schema.rs +++ b/dask_planner/src/sql/logical/use_schema.rs @@ -1,14 +1,11 @@ -use crate::sql::exceptions::py_type_err; -use crate::sql::logical; -use pyo3::prelude::*; - -use datafusion_expr::logical_plan::UserDefinedLogicalNode; -use datafusion_expr::{Expr, LogicalPlan}; - -use fmt::Debug; use std::{any::Any, fmt, sync::Arc}; use datafusion_common::{DFSchema, DFSchemaRef}; +use datafusion_expr::{logical_plan::UserDefinedLogicalNode, Expr, LogicalPlan}; +use fmt::Debug; +use pyo3::prelude::*; + +use crate::sql::{exceptions::py_type_err, logical}; #[derive(Clone)] pub struct UseSchemaPlanNode { diff --git a/dask_planner/src/sql/logical/window.rs b/dask_planner/src/sql/logical/window.rs index a1b8695ce..91d7485ee 100644 --- a/dask_planner/src/sql/logical/window.rs +++ b/dask_planner/src/sql/logical/window.rs @@ -1,8 +1,11 @@ -use crate::expression::{py_expr_list, PyExpr}; -use crate::sql::exceptions::py_type_err; use datafusion_expr::{logical_plan::Window, Expr, LogicalPlan, WindowFrame, WindowFrameBound}; use pyo3::prelude::*; +use crate::{ + expression::{py_expr_list, PyExpr}, + sql::exceptions::py_type_err, +}; + #[pyclass(name = "Window", module = "dask_planner", subclass)] #[derive(Clone)] pub struct PyWindow { diff --git a/dask_planner/src/sql/optimizer.rs b/dask_planner/src/sql/optimizer.rs index 24ddc9e6b..facf1fd09 100644 --- a/dask_planner/src/sql/optimizer.rs +++ b/dask_planner/src/sql/optimizer.rs @@ -1,20 +1,24 @@ use datafusion_common::DataFusionError; use datafusion_expr::LogicalPlan; -use datafusion_optimizer::decorrelate_where_exists::DecorrelateWhereExists; -use datafusion_optimizer::decorrelate_where_in::DecorrelateWhereIn; -use datafusion_optimizer::eliminate_filter::EliminateFilter; -use datafusion_optimizer::reduce_cross_join::ReduceCrossJoin; -use datafusion_optimizer::reduce_outer_join::ReduceOuterJoin; -use datafusion_optimizer::rewrite_disjunctive_predicate::RewriteDisjunctivePredicate; -use datafusion_optimizer::scalar_subquery_to_join::ScalarSubqueryToJoin; -use datafusion_optimizer::simplify_expressions::SimplifyExpressions; -use datafusion_optimizer::type_coercion::TypeCoercion; -use datafusion_optimizer::unwrap_cast_in_comparison::UnwrapCastInComparison; use datafusion_optimizer::{ - common_subexpr_eliminate::CommonSubexprEliminate, eliminate_limit::EliminateLimit, - filter_null_join_keys::FilterNullJoinKeys, filter_push_down::FilterPushDown, - limit_push_down::LimitPushDown, optimizer::OptimizerRule, - projection_push_down::ProjectionPushDown, subquery_filter_to_join::SubqueryFilterToJoin, + common_subexpr_eliminate::CommonSubexprEliminate, + decorrelate_where_exists::DecorrelateWhereExists, + decorrelate_where_in::DecorrelateWhereIn, + eliminate_filter::EliminateFilter, + eliminate_limit::EliminateLimit, + filter_null_join_keys::FilterNullJoinKeys, + filter_push_down::FilterPushDown, + limit_push_down::LimitPushDown, + optimizer::OptimizerRule, + projection_push_down::ProjectionPushDown, + reduce_cross_join::ReduceCrossJoin, + reduce_outer_join::ReduceOuterJoin, + rewrite_disjunctive_predicate::RewriteDisjunctivePredicate, + scalar_subquery_to_join::ScalarSubqueryToJoin, + simplify_expressions::SimplifyExpressions, + subquery_filter_to_join::SubqueryFilterToJoin, + type_coercion::TypeCoercion, + unwrap_cast_in_comparison::UnwrapCastInComparison, OptimizerConfig, }; use log::trace; @@ -101,8 +105,8 @@ impl DaskSqlOptimizer { #[cfg(test)] mod tests { - use crate::dialect::DaskDialect; - use crate::sql::optimizer::DaskSqlOptimizer; + use std::{any::Any, collections::HashMap, sync::Arc}; + use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource}; @@ -111,9 +115,8 @@ mod tests { sqlparser::{ast::Statement, parser::Parser}, TableReference, }; - use std::any::Any; - use std::collections::HashMap; - use std::sync::Arc; + + use crate::{dialect::DaskDialect, sql::optimizer::DaskSqlOptimizer}; #[test] fn subquery_filter_with_cast() -> Result<()> { diff --git a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs index 5ad2e59f6..3964b8c8b 100644 --- a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs +++ b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs @@ -63,17 +63,19 @@ //! Aggregate: groupBy=[[a.d]], aggr=[[COUNT(UInt64(1)) AS __dask_sql_count__4]]\ //! TableScan: a +use std::{collections::HashSet, sync::Arc}; + use datafusion_common::{Column, Result}; -use datafusion_expr::logical_plan::Projection; use datafusion_expr::{ - col, count, - logical_plan::{Aggregate, LogicalPlan}, - AggregateFunction, Expr, LogicalPlanBuilder, + col, + count, + logical_plan::{Aggregate, LogicalPlan, Projection}, + AggregateFunction, + Expr, + LogicalPlanBuilder, }; use datafusion_optimizer::{utils, OptimizerConfig, OptimizerRule}; use log::trace; -use std::collections::HashSet; -use std::sync::Arc; /// Optimizer rule eliminating/moving Aggregate Expr(s) with a `DISTINCT` inner Expr. #[derive(Default)] @@ -428,14 +430,18 @@ fn unique_set_without_aliases(unique_expressions: &HashSet) -> HashSet Date: Thu, 13 Oct 2022 13:31:03 -0400 Subject: [PATCH 3/4] Update datafusion dependency during upstream testing (#814) * Update datafusion dependency during upstream testing * add upstream testing manifest file * Move to planner dir before updating * Replace upstream-toml with script to replace datafusion dependencies in the existing Cargo.toml Co-authored-by: Andy Grove * update cargo dependencies for commits containing test-upstream command * test-upstream * remove pr testing for [test-upstream] tests * Make cargo update optional for only updating specific deps * [test-upstream] * Remove upstream cargo testing with upstream dask and move to rust only component * Add another cron job with cargo update to upstream testing * update upstream testing conditionals * Update .github/workflows/test-upstream.yml Co-authored-by: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Co-authored-by: Andy Grove Co-authored-by: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> --- .github/workflows/rust.yml | 27 +++++++++++++++++++ .github/workflows/test-upstream.yml | 41 ++++++++++++++++++++++++++--- dask_planner/update-dependencies.sh | 9 +++++++ 3 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 dask_planner/update-dependencies.sh diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 40b11dd9f..d61be9d5f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -16,9 +16,25 @@ env: RUSTFLAGS: "-C debuginfo=1" jobs: + detect-ci-trigger: + name: Check for upstream trigger phrase + runs-on: ubuntu-latest + if: github.repository == 'dask-contrib/dask-sql' + outputs: + triggered: ${{ steps.detect-trigger.outputs.trigger-found }} + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 2 + - uses: xarray-contrib/ci-trigger@v1.1 + id: detect-trigger + with: + keyword: "[test-df-upstream]" + # Check crate compiles linux-build-lib: name: cargo check + needs: [detect-ci-trigger] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -27,6 +43,11 @@ jobs: with: path: /home/runner/.cargo key: cargo-cache + - name: Optionally update upstream dependencies + if: needs.detect-ci-trigger.outputs.triggered == 'true' + run: | + cd dask_planner + bash update-dependencies.sh - name: Check workspace in debug mode run: | cd dask_planner @@ -39,6 +60,7 @@ jobs: # test the crate linux-test: name: cargo test + needs: [detect-ci-trigger] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -49,6 +71,11 @@ jobs: with: path: /home/runner/.cargo key: cargo-cache + - name: Optionally update upstream dependencies + if: needs.detect-ci-trigger.outputs.triggered == 'true' + run: | + cd dask_planner + bash update-dependencies.sh - name: Run tests run: | cd dask_planner diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index 0e0a8e849..da4666545 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -1,14 +1,29 @@ name: Nightly upstream testing on: schedule: - - cron: "0 0 * * *" # Daily “At 00:00” UTC + - cron: "0 0 * * *" # Daily “At 00:00” UTC for upstream dask testing + - cron: "0 3 * * *" # Daily "At 03:00" UTC for upstream datafusion testing workflow_dispatch: # allows you to trigger the workflow run manually - + inputs: + upstreamLib: + type: choice + description: 'Library to update for upstream testing' + required: false + default: 'Dask' + options: + - Dask + - DataFusion # Required shell entrypoint to have properly activated conda environments defaults: run: shell: bash -l {0} +env: + which_upstream: | + (github.event.schedule == '0 3 * * *' && 'DataFusion') + || (github.event.schedule == '0 0 * * *' && 'Dask') + || (github.event.inputs.upstreamLib) + jobs: test-dev: name: "Test upstream dev (${{ matrix.os }}, python: ${{ matrix.python }})" @@ -38,6 +53,11 @@ jobs: channels: dask/label/dev,conda-forge,nodefaults activate-environment: dask-sql environment-file: ${{ env.CONDA_FILE }} + - name: Optionally update upstream cargo dependencies + if: env.which_upstream == 'DataFusion' + run: | + cd dask_planner + bash update-dependencies.sh - name: Build the Rust DataFusion bindings run: | python setup.py build install @@ -48,6 +68,7 @@ jobs: docker pull bde2020/hive:2.3.2-postgresql-metastore docker pull bde2020/hive-metastore-postgresql:2.3.0 - name: Install upstream dev Dask / dask-ml + if: env.which_upstream == 'Dask' run: | mamba update dask python -m pip install --no-deps git+https://github.com/dask/dask-ml @@ -70,6 +91,11 @@ jobs: channels: dask/label/dev,conda-forge,nodefaults activate-environment: dask-sql environment-file: continuous_integration/environment-3.9-dev.yaml + - name: Optionally update upstream cargo dependencies + if: env.which_upstream == 'DataFusion' + run: | + cd dask_planner + bash update-dependencies.sh - name: Build the Rust DataFusion bindings run: | python setup.py build install @@ -81,6 +107,7 @@ jobs: pip list mamba list - name: Install upstream dev dask-ml + if: env.which_upstream == 'Dask' run: | mamba update dask python -m pip install --no-deps git+https://github.com/dask/dask-ml @@ -109,6 +136,13 @@ jobs: mamba-version: "*" channels: conda-forge,nodefaults channel-priority: strict + - name: Optionally update upstream cargo dependencies + if: env.which_upstream == 'DataFusion' + env: + UPDATE_ALL_CARGO_DEPS: false + run: | + cd dask_planner + bash update-dependencies.sh - name: Install dependencies and nothing else run: | mamba install setuptools-rust @@ -118,6 +152,7 @@ jobs: pip list mamba list - name: Install upstream dev Dask / dask-ml + if: env.which_upstream == 'Dask' run: | python -m pip install --no-deps git+https://github.com/dask/dask python -m pip install --no-deps git+https://github.com/dask/distributed @@ -142,7 +177,7 @@ jobs: with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | - const title = "⚠️ Upstream CI failed ⚠️" + const title = "⚠️ Upstream CI ${{ env.which_upstream }} failed ⚠️" const workflow_url = `https://github.com/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}` const issue_body = `[Workflow Run URL](${workflow_url})` // Run GraphQL query against GitHub API to find the most recent open issue used for reporting failures diff --git a/dask_planner/update-dependencies.sh b/dask_planner/update-dependencies.sh new file mode 100644 index 000000000..a94c53dad --- /dev/null +++ b/dask_planner/update-dependencies.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +UPDATE_ALL_CARGO_DEPS="${UPDATE_ALL_CARGO_DEPS:-true}" +# Update datafusion dependencies in the dask-planner to the latest revision from the default branch +sed -i -r 's/^datafusion-([a-z]+).*/datafusion-\1 = { git = "https:\/\/github.com\/apache\/arrow-datafusion\/" }/g' Cargo.toml + +if [ "$UPDATE_ALL_CARGO_DEPS" = true ] ; then + cargo update +fi From 89450cc368c0ffa31fd09e0e6be16eafc5f7f75a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Oct 2022 09:26:31 -0400 Subject: [PATCH 4/4] Bump mimalloc from 0.1.29 to 0.1.30 in /dask_planner (#862) Bumps [mimalloc](https://github.com/purpleprotocol/mimalloc_rust) from 0.1.29 to 0.1.30. - [Release notes](https://github.com/purpleprotocol/mimalloc_rust/releases) - [Commits](https://github.com/purpleprotocol/mimalloc_rust/compare/v0.1.29...v0.1.30) --- updated-dependencies: - dependency-name: mimalloc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- dask_planner/Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dask_planner/Cargo.lock b/dask_planner/Cargo.lock index b011f6434..4111666dd 100644 --- a/dask_planner/Cargo.lock +++ b/dask_planner/Cargo.lock @@ -665,9 +665,9 @@ checksum = "292a948cd991e376cf75541fe5b97a1081d713c618b4f1b9500f8844e49eb565" [[package]] name = "libmimalloc-sys" -version = "0.1.25" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c" +checksum = "8fc093ab289b0bfda3aa1bdfab9c9542be29c7ef385cfcbe77f8c9813588eb48" dependencies = [ "cc", ] @@ -717,9 +717,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.29" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698" +checksum = "76ce6a4b40d3bff9eb3ce9881ca0737a85072f9f975886082640cd46a75cdb35" dependencies = [ "libmimalloc-sys", ]