Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename functions-array to functions-nested #11602

Merged
merged 10 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ jobs:

# Ensure that the datafusion crate can be built with only a subset of the function
# packages enabled.
- name: Check datafusion (array_expressions)
run: cargo check --no-default-features --features=array_expressions -p datafusion
- name: Check datafusion (nested_expressions)
run: cargo check --no-default-features --features=nested_expressions -p datafusion

- name: Check datafusion (crypto)
run: cargo check --no-default-features --features=crypto_expressions -p datafusion
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ members = [
"datafusion/execution",
"datafusion/functions-aggregate",
"datafusion/functions",
"datafusion/functions-array",
"datafusion/functions-nested",
"datafusion/optimizer",
"datafusion/physical-expr-common",
"datafusion/physical-expr",
Expand Down Expand Up @@ -94,7 +94,7 @@ datafusion-execution = { path = "datafusion/execution", version = "40.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "40.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "40.0.0" }
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "40.0.0" }
datafusion-functions-array = { path = "datafusion/functions-array", version = "40.0.0" }
datafusion-functions-nested = { path = "datafusion/functions-nested", version = "40.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "40.0.0", default-features = false }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "40.0.0", default-features = false }
datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "40.0.0", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ This crate has several [features] which can be specified in your `Cargo.toml`.

Default features:

- `array_expressions`: functions for working with arrays such as `array_to_string`
- `nested_expressions`: functions for working with nested type function such as `array_to_string`
- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
- `datetime_expressions`: date and time functions such as `to_timestamp`
Expand Down
4 changes: 2 additions & 2 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ path = "src/lib.rs"

[features]
# Used to enable the avro format
array_expressions = ["datafusion-functions-array"]
nested_expressions = ["datafusion-functions-nested"]
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
crypto_expressions = ["datafusion-functions/crypto_expressions"]
datetime_expressions = ["datafusion-functions/datetime_expressions"]
default = [
"array_expressions",
"nested_expressions",
"crypto_expressions",
"datetime_expressions",
"encoding_expressions",
Expand Down Expand Up @@ -102,7 +102,7 @@ datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-functions = { workspace = true }
datafusion-functions-aggregate = { workspace = true }
datafusion-functions-array = { workspace = true, optional = true }
datafusion-functions-nested = { workspace = true, optional = true }
datafusion-optimizer = { workspace = true }
datafusion-physical-expr = { workspace = true }
datafusion-physical-expr-common = { workspace = true }
Expand Down Expand Up @@ -221,4 +221,4 @@ name = "parquet_statistic"
[[bench]]
harness = false
name = "map_query_sql"
required-features = ["array_expressions"]
required-features = ["nested_expressions"]
2 changes: 1 addition & 1 deletion datafusion/core/benches/map_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use tokio::runtime::Runtime;
use datafusion::prelude::SessionContext;
use datafusion_common::ScalarValue;
use datafusion_expr::Expr;
use datafusion_functions_array::map::map;
use datafusion_functions_nested::map::map;

mod data_utils;

Expand Down
23 changes: 12 additions & 11 deletions datafusion/core/src/execution/session_state_defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ use crate::datasource::file_format::parquet::ParquetFormatFactory;
use crate::datasource::file_format::FileFormatFactory;
use crate::datasource::provider::{DefaultTableFactory, TableProviderFactory};
use crate::execution::context::SessionState;
#[cfg(feature = "array_expressions")]
use crate::functions_array;
#[cfg(feature = "nested_expressions")]
use crate::functions_nested;
use crate::{functions, functions_aggregate};
use datafusion_execution::config::SessionConfig;
use datafusion_execution::object_store::ObjectStoreUrl;
Expand Down Expand Up @@ -82,11 +82,11 @@ impl SessionStateDefaults {
pub fn default_expr_planners() -> Vec<Arc<dyn ExprPlanner>> {
let expr_planners: Vec<Arc<dyn ExprPlanner>> = vec![
Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
// register crate of array expressions (if enabled)
#[cfg(feature = "array_expressions")]
Arc::new(functions_array::planner::ArrayFunctionPlanner),
#[cfg(feature = "array_expressions")]
Arc::new(functions_array::planner::FieldAccessPlanner),
// register crate of nested expressions (if enabled)
#[cfg(feature = "nested_expressions")]
Arc::new(functions_nested::planner::NestedFunctionPlanner),
#[cfg(feature = "nested_expressions")]
Arc::new(functions_nested::planner::FieldAccessPlanner),
#[cfg(any(
feature = "datetime_expressions",
feature = "unicode_expressions"
Expand All @@ -100,8 +100,8 @@ impl SessionStateDefaults {
/// returns the list of default [`ScalarUDF']'s
pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
#[cfg(feature = "array_expressions")]
functions.append(&mut functions_array::all_default_array_functions());
#[cfg(feature = "nested_expressions")]
functions.append(&mut functions_nested::all_default_nested_functions());

functions
}
Expand Down Expand Up @@ -140,8 +140,9 @@ impl SessionStateDefaults {
/// registers all the builtin array functions
pub fn register_array_functions(state: &mut SessionState) {
// register crate of array expressions (if enabled)
#[cfg(feature = "array_expressions")]
functions_array::register_all(state).expect("can not register array expressions");
#[cfg(feature = "nested_expressions")]
functions_nested::register_all(state)
.expect("can not register nested expressions");
}

/// registers all the builtin aggregate functions
Expand Down
10 changes: 5 additions & 5 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@
//! * [datafusion_execution]: State and structures needed for execution
//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure
//! * [datafusion_functions]: Scalar function packages
//! * [datafusion_functions_array]: Scalar function packages for `ARRAY`s
//! * [datafusion_functions_nested]: Scalar function packages for `ARRAY`s, `MAP`s and `STRUCT`s
//! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
//! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
//! * [datafusion_physical_plan]: [`ExecutionPlan`] and related expressions
Expand Down Expand Up @@ -569,10 +569,10 @@ pub mod functions {
pub use datafusion_functions::*;
}

/// re-export of [`datafusion_functions_array`] crate, if "array_expressions" feature is enabled
pub mod functions_array {
#[cfg(feature = "array_expressions")]
pub use datafusion_functions_array::*;
/// re-export of [`datafusion_functions_nested`] crate, if "nested_expressions" feature is enabled
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can leave a pub use functions_array here to help migration

Something like

    #[deprecated(since = "41.0.0", note = "use datafusion-functions-nested instead")]
pub mod functions_array {
    #[cfg(feature = "nested_expressions")]
    pub use datafusion_functions_nested::*;
}

We could do something similar with the feature flags, but maybe that is too complicated to be worth it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea. Thanks. I'll try to do it for the crate and feature.

pub mod functions_nested {
#[cfg(feature = "nested_expressions")]
pub use datafusion_functions_nested::*;
}

/// re-export of [`datafusion_functions_aggregate`] crate
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ pub use datafusion_expr::{
Expr,
};
pub use datafusion_functions::expr_fn::*;
#[cfg(feature = "array_expressions")]
pub use datafusion_functions_array::expr_fn::*;
#[cfg(feature = "nested_expressions")]
pub use datafusion_functions_nested::expr_fn::*;

pub use std::ops::Not;
pub use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/dataframe/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use datafusion_common::{DFSchema, ScalarValue};
use datafusion_expr::expr::Alias;
use datafusion_expr::ExprSchemable;
use datafusion_functions_aggregate::expr_fn::{approx_median, approx_percentile_cont};
use datafusion_functions_array::map::map;
use datafusion_functions_nested::map::map;

fn test_schema() -> SchemaRef {
Arc::new(Schema::new(vec![
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/expr_api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use datafusion_expr::AggregateExt;
use datafusion_functions::core::expr_ext::FieldAccessor;
use datafusion_functions_aggregate::first_last::first_value_udaf;
use datafusion_functions_aggregate::sum::sum_udaf;
use datafusion_functions_array::expr_ext::{IndexAccessor, SliceAccessor};
use datafusion_functions_nested::expr_ext::{IndexAccessor, SliceAccessor};
use sqlparser::ast::NullTreatment;
/// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
use std::sync::{Arc, OnceLock};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use datafusion_expr::{
LogicalPlanBuilder, OperateFunctionArg, ScalarUDF, ScalarUDFImpl, Signature,
Volatility,
};
use datafusion_functions_array::range::range_udf;
use datafusion_functions_nested::range::range_udf;

/// test that casting happens on udfs.
/// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/expr_rewriter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub use order_by::rewrite_sort_cols_by_aggs;
///
/// For example, concatenating arrays `a || b` is represented as
/// `Operator::ArrowAt`, but can be implemented by calling a function
/// `array_concat` from the `functions-array` crate.
/// `array_concat` from the `functions-nested` crate.
// This is not used in datafusion internally, but it is still helpful for downstream project so don't remove it.
pub trait FunctionRewrite {
/// Return a human readable name for this rewrite
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
# under the License.

[package]
name = "datafusion-functions-array"
description = "Array Function packages for the DataFusion query engine"
name = "datafusion-functions-nested"
description = "Nested Type Function packages for the DataFusion query engine"
keywords = ["datafusion", "logical", "plan", "expressions"]
readme = "README.md"
version = { workspace = true }
Expand All @@ -34,7 +34,7 @@ workspace = true
[features]

[lib]
name = "datafusion_functions_array"
name = "datafusion_functions_nested"
path = "src/lib.rs"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
under the License.
-->

# DataFusion Array Function Library
# DataFusion Nested Type Function Library

[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.

This crate contains functions for working with arrays, such as `array_append` that work with
This crate contains functions for working with arrays, maps and structs, such as `array_append` that work with
`ListArray`, `LargeListArray` and `FixedListArray` types from the `arrow` crate.

[df]: https://crates.io/crates/datafusion
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ extern crate arrow;

use crate::criterion::Criterion;
use datafusion_expr::lit;
use datafusion_functions_array::expr_fn::{array_replace_all, make_array};
use datafusion_functions_nested::expr_fn::{array_replace_all, make_array};

fn criterion_benchmark(c: &mut Criterion) {
// Construct large arrays for benchmarking
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ use std::sync::Arc;
use datafusion_common::ScalarValue;
use datafusion_expr::planner::ExprPlanner;
use datafusion_expr::{ColumnarValue, Expr};
use datafusion_functions_array::map::map_udf;
use datafusion_functions_array::planner::ArrayFunctionPlanner;
use datafusion_functions_nested::map::map_udf;
use datafusion_functions_nested::planner::NestedFunctionPlanner;

fn keys(rng: &mut ThreadRng) -> Vec<String> {
let mut keys = vec![];
Expand Down Expand Up @@ -58,7 +58,7 @@ fn criterion_benchmark(c: &mut Criterion) {
buffer.push(Expr::Literal(ScalarValue::Int32(Some(values[i]))));
}

let planner = ArrayFunctionPlanner {};
let planner = NestedFunctionPlanner {};

b.iter(|| {
black_box(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use crate::extract::{array_element, array_slice};
///
/// ```
/// # use datafusion_expr::{lit, col, Expr};
/// # use datafusion_functions_array::expr_ext::IndexAccessor;
/// # use datafusion_functions_nested::expr_ext::IndexAccessor;
/// let expr = col("c1")
/// .index(lit(3));
/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]");
Expand Down Expand Up @@ -65,7 +65,7 @@ impl IndexAccessor for Expr {
///
/// ```
/// # use datafusion_expr::{lit, col};
/// # use datafusion_functions_array::expr_ext::SliceAccessor;
/// # use datafusion_functions_nested::expr_ext::SliceAccessor;
/// let expr = col("c1")
/// .range(lit(2), lit(4));
/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]

//! Array Functions for [DataFusion].
//! Nested type Functions for [DataFusion].
//!
//! This crate contains a collection of array functions implemented using the
//! This crate contains a collection of nested type functions implemented using the
//! extension API.
//!
//! [DataFusion]: https://crates.io/crates/datafusion
Expand Down Expand Up @@ -102,8 +102,8 @@ pub mod expr_fn {
pub use super::string::string_to_array;
}

/// Return all default array functions
pub fn all_default_array_functions() -> Vec<Arc<ScalarUDF>> {
/// Return all default nested type functions
pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
vec![
string::array_to_string_udf(),
string::string_to_array_udf(),
Expand Down Expand Up @@ -148,7 +148,7 @@ pub fn all_default_array_functions() -> Vec<Arc<ScalarUDF>> {

/// Registers all enabled packages with a [`FunctionRegistry`]
pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
let functions: Vec<Arc<ScalarUDF>> = all_default_array_functions();
let functions: Vec<Arc<ScalarUDF>> = all_default_nested_functions();
functions.into_iter().try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
if let Some(existing_udf) = existing_udf {
Expand All @@ -162,14 +162,14 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {

#[cfg(test)]
mod tests {
use crate::all_default_array_functions;
use crate::all_default_nested_functions;
use datafusion_common::Result;
use std::collections::HashSet;

#[test]
fn test_no_duplicate_name() -> Result<()> {
let mut names = HashSet::new();
for func in all_default_array_functions() {
for func in all_default_nested_functions() {
assert!(
names.insert(func.name().to_string().to_lowercase()),
"duplicate function name: {}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! SQL planning extensions like [`ArrayFunctionPlanner`] and [`FieldAccessPlanner`]
//! SQL planning extensions like [`NestedFunctionPlanner`] and [`FieldAccessPlanner`]

use datafusion_common::{exec_err, utils::list_ndims, DFSchema, Result};
use datafusion_expr::expr::ScalarFunction;
Expand All @@ -35,9 +35,9 @@ use crate::{
make_array::make_array,
};

pub struct ArrayFunctionPlanner;
pub struct NestedFunctionPlanner;

impl ExprPlanner for ArrayFunctionPlanner {
impl ExprPlanner for NestedFunctionPlanner {
fn plan_binary_op(
&self,
expr: RawBinaryExpr,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/proto/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use datafusion::functions_aggregate::expr_fn::{
count_distinct, covar_pop, covar_samp, first_value, grouping, median, stddev,
stddev_pop, sum, var_pop, var_sample,
};
use datafusion::functions_array::map::map;
use datafusion::functions_nested::map::map;
use datafusion::prelude::*;
use datafusion::test_util::{TestTableFactory, TestTableProvider};
use datafusion_common::config::TableOptions;
Expand Down
2 changes: 1 addition & 1 deletion dev/release/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg
(cd datafusion/functions-aggregate && cargo publish)
(cd datafusion/physical-expr && cargo publish)
(cd datafusion/functions && cargo publish)
(cd datafusion/functions-array && cargo publish)
(cd datafusion/functions-nested && cargo publish)
(cd datafusion/sql && cargo publish)
(cd datafusion/optimizer && cargo publish)
(cd datafusion/common-runtime && cargo publish)
Expand Down
Loading