From d896000ad3111e46d1b3e53b03c9a10092b51e65 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 30 Mar 2024 12:54:26 -0400 Subject: [PATCH] Add CI compile checks for feature flags in datafusion-functions (#9772) * Add CI checks for feature flags * Fix builds * Move function benchmark to datafusion-functions crate * Less aggressive ci checks * Improve doc * Fix compilation of datafusion-array * toml format * Update datafusion/functions-array/benches/array_expression.rs --- .github/workflows/rust.yml | 59 ++++++++++++++----- datafusion/core/Cargo.toml | 7 +-- datafusion/functions-array/Cargo.toml | 7 +++ .../benches/array_expression.rs | 4 +- datafusion/functions/Cargo.toml | 4 ++ 5 files changed, 60 insertions(+), 21 deletions(-) rename datafusion/{core => functions-array}/benches/array_expression.rs (93%) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 07c46351e9ac..ffd45b9777ef 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -65,42 +65,73 @@ jobs: # this key equals the ones on `linux-build-lib` for re-use key: cargo-cache-benchmark-${{ hashFiles('datafusion/**/Cargo.toml', 'benchmarks/Cargo.toml', 'datafusion-cli/Cargo.toml') }} - - name: Check workspace without default features + - name: Check datafusion without default features + # Some of the test binaries require the parquet feature still + #run: cargo check --all-targets --no-default-features -p datafusion run: cargo check --no-default-features -p datafusion - name: Check datafusion-common without default features - run: cargo check --tests --no-default-features -p datafusion-common + run: cargo check --all-targets --no-default-features -p datafusion-common + + - name: Check datafusion-functions + run: cargo check --all-targets --no-default-features -p datafusion-functions - name: Check workspace in debug mode run: cargo check - - name: Check workspace with all features + - name: Check workspace with avro,json features run: cargo check --workspace --benches --features avro,json + - name: Check Cargo.lock for datafusion-cli + run: | + # If this test fails, try running `cargo update` in the `datafusion-cli` directory + # and check in the updated Cargo.lock file. + cargo check --manifest-path datafusion-cli/Cargo.toml --locked + # Ensure that the datafusion crate can be built with only a subset of the function # packages enabled. - - name: Check function packages (array_expressions) + - name: Check datafusion (array_expressions) run: cargo check --no-default-features --features=array_expressions -p datafusion - - name: Check function packages (datetime_expressions) + - name: Check datafusion (crypto) + run: cargo check --no-default-features --features=crypto_expressions -p datafusion + + - name: Check datafusion (datetime_expressions) run: cargo check --no-default-features --features=datetime_expressions -p datafusion - - name: Check function packages (encoding_expressions) + - name: Check datafusion (encoding_expressions) run: cargo check --no-default-features --features=encoding_expressions -p datafusion - - name: Check function packages (math_expressions) + - name: Check datafusion (math_expressions) run: cargo check --no-default-features --features=math_expressions -p datafusion - - name: Check function packages (regex_expressions) + - name: Check datafusion (regex_expressions) run: cargo check --no-default-features --features=regex_expressions -p datafusion - - name: Check Cargo.lock for datafusion-cli - run: | - # If this test fails, try running `cargo update` in the `datafusion-cli` directory - # and check in the updated Cargo.lock file. - cargo check --manifest-path datafusion-cli/Cargo.toml --locked + - name: Check datafusion (string_expressions) + run: cargo check --no-default-features --features=string_expressions -p datafusion + + # Ensure that the datafusion-functions crate can be built with only a subset of the function + # packages enabled. + - name: Check datafusion-functions (crypto) + run: cargo check --all-targets --no-default-features --features=crypto_expressions -p datafusion-functions + + - name: Check datafusion-functions (datetime_expressions) + run: cargo check --all-targets --no-default-features --features=datetime_expressions -p datafusion-functions + + - name: Check datafusion-functions (encoding_expressions) + run: cargo check --all-targets --no-default-features --features=encoding_expressions -p datafusion-functions + + - name: Check datafusion-functions (math_expressions) + run: cargo check --all-targets --no-default-features --features=math_expressions -p datafusion-functions + + - name: Check datafusion-functions (regex_expressions) + run: cargo check --all-targets --no-default-features --features=regex_expressions -p datafusion-functions + + - name: Check datafusion-functions (string_expressions) + run: cargo check --all-targets --no-default-features --features=string_expressions -p datafusion-functions - # test the crate + # Run tests linux-test: name: cargo test (amd64) needs: [ linux-build-lib ] diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index fbbe047880b1..18946334dbf5 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -50,6 +50,7 @@ default = [ "datetime_expressions", "encoding_expressions", "regex_expressions", + "string_expressions", "unicode_expressions", "compression", "parquet", @@ -66,6 +67,7 @@ regex_expressions = [ "datafusion-functions/regex_expressions", ] serde = ["arrow-schema/serde"] +string_expressions = ["datafusion-functions/string_expressions"] unicode_expressions = [ "datafusion-physical-expr/unicode_expressions", "datafusion-optimizer/unicode_expressions", @@ -188,6 +190,7 @@ name = "physical_plan" [[bench]] harness = false name = "parquet_query_sql" +required-features = ["parquet"] [[bench]] harness = false @@ -204,7 +207,3 @@ name = "sort" [[bench]] harness = false name = "topk_aggregate" - -[[bench]] -harness = false -name = "array_expression" diff --git a/datafusion/functions-array/Cargo.toml b/datafusion/functions-array/Cargo.toml index 80c0e5e18768..6ef9c6b055af 100644 --- a/datafusion/functions-array/Cargo.toml +++ b/datafusion/functions-array/Cargo.toml @@ -49,3 +49,10 @@ datafusion-functions = { workspace = true } itertools = { version = "0.12", features = ["use_std"] } log = { workspace = true } paste = "1.0.14" + +[dev-dependencies] +criterion = { version = "0.5", features = ["async_tokio"] } + +[[bench]] +harness = false +name = "array_expression" diff --git a/datafusion/core/benches/array_expression.rs b/datafusion/functions-array/benches/array_expression.rs similarity index 93% rename from datafusion/core/benches/array_expression.rs rename to datafusion/functions-array/benches/array_expression.rs index c980329620aa..48b829793cef 100644 --- a/datafusion/core/benches/array_expression.rs +++ b/datafusion/functions-array/benches/array_expression.rs @@ -18,12 +18,10 @@ #[macro_use] extern crate criterion; extern crate arrow; -extern crate datafusion; -mod data_utils; use crate::criterion::Criterion; -use datafusion::functions_array::expr_fn::{array_replace_all, make_array}; use datafusion_expr::lit; +use datafusion_functions_array::expr_fn::{array_replace_all, make_array}; fn criterion_benchmark(c: &mut Criterion) { // Construct large arrays for benchmarking diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 3ae3061012e0..51452b9d4ca1 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -90,15 +90,19 @@ tokio = { workspace = true, features = ["macros", "rt", "sync"] } [[bench]] harness = false name = "to_timestamp" +required-features = ["datetime_expressions"] [[bench]] harness = false name = "regx" +required-features = ["regex_expressions"] [[bench]] harness = false name = "make_date" +required-features = ["datetime_expressions"] [[bench]] harness = false name = "to_char" +required-features = ["datetime_expressions"]