From 09054263df1d8de06b6e77ab0cbd99027bb7ceb6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 29 May 2024 03:26:16 -0400 Subject: [PATCH] Minor: improve Expr documentation (#10685) * Minor: improve Expr documentation * Update datafusion/expr/src/expr.rs Co-authored-by: Oleks V * Refine words --------- Co-authored-by: Oleks V --- datafusion/expr/src/expr.rs | 96 +++++++++++++++++++++++------ datafusion/sql/src/unparser/expr.rs | 8 ++- 2 files changed, 82 insertions(+), 22 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 0c05355cde1b..3542e2d985ca 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -39,17 +39,39 @@ use datafusion_common::{ }; use sqlparser::ast::NullTreatment; -/// `Expr` is a central struct of DataFusion's query API, and -/// represent logical expressions such as `A + 1`, or `CAST(c1 AS -/// int)`. +/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`. /// -/// An `Expr` can compute its [DataType] -/// and nullability, and has functions for building up complex -/// expressions. +/// For example the expression `A + 1` will be represented as +/// +///```text +/// BinaryExpr { +/// left: Expr::Column("A"), +/// op: Operator::Plus, +/// right: Expr::Literal(ScalarValue::Int32(Some(1))) +/// } +/// ``` +/// +/// # Creating Expressions +/// +/// `Expr`s can be created directly, but it is often easier and less verbose to +/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or +/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]). +/// +/// # Schema Access +/// +/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability +/// of an `Expr`. /// /// # Examples /// -/// ## Create an expression `c1` referring to column named "c1" +/// ## Column references and literals +/// +/// [`Expr::Column`] refer to the values of columns and are often created with +/// the [`col`] function. For example to create an expression `c1` referring to +/// column named "c1": +/// +/// [`col`]: crate::expr_fn::col +/// /// ``` /// # use datafusion_common::Column; /// # use datafusion_expr::{lit, col, Expr}; @@ -57,11 +79,33 @@ use sqlparser::ast::NullTreatment; /// assert_eq!(expr, Expr::Column(Column::from_name("c1"))); /// ``` /// -/// ## Create the expression `c1 + c2` to add columns "c1" and "c2" together +/// [`Expr::Literal`] refer to literal, or constant, values. These are created +/// with the [`lit`] function. For example to create an expression `42`: +/// +/// [`lit`]: crate::lit +/// +/// ``` +/// # use datafusion_common::{Column, ScalarValue}; +/// # use datafusion_expr::{lit, col, Expr}; +/// // All literals are strongly typed in DataFusion. To make an `i64` 42: +/// let expr = lit(42i64); +/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)))); +/// // To make a (typed) NULL: +/// let expr = Expr::Literal(ScalarValue::Int64(None)); +/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type): +/// let expr = lit(ScalarValue::Null); +/// ``` +/// +/// ## Binary Expressions +/// +/// Exprs implement traits that allow easy to understand construction of more +/// complex expresions. For example, to create `c1 + c2` to add columns "c1" and +/// "c2" together +/// /// ``` /// # use datafusion_expr::{lit, col, Operator, Expr}; +/// // Use the `+` operator to add two columns together /// let expr = col("c1") + col("c2"); -/// /// assert!(matches!(expr, Expr::BinaryExpr { ..} )); /// if let Expr::BinaryExpr(binary_expr) = expr { /// assert_eq!(*binary_expr.left, col("c1")); @@ -70,12 +114,13 @@ use sqlparser::ast::NullTreatment; /// } /// ``` /// -/// ## Create expression `c1 = 42` to compare the value in column "c1" to the literal value `42` +/// The expression `c1 = 42` to compares the value in column "c1" to the +/// literal value `42`: +/// /// ``` /// # use datafusion_common::ScalarValue; /// # use datafusion_expr::{lit, col, Operator, Expr}; /// let expr = col("c1").eq(lit(42_i32)); -/// /// assert!(matches!(expr, Expr::BinaryExpr { .. } )); /// if let Expr::BinaryExpr(binary_expr) = expr { /// assert_eq!(*binary_expr.left, col("c1")); @@ -85,19 +130,23 @@ use sqlparser::ast::NullTreatment; /// } /// ``` /// -/// ## Return a list of [`Expr::Column`] from a schema's columns +/// Here is how to implement the equivalent of `SELECT *` to select all +/// [`Expr::Column`] from a [`DFSchema`]'s columns: +/// /// ``` /// # use arrow::datatypes::{DataType, Field, Schema}; /// # use datafusion_common::{DFSchema, Column}; /// # use datafusion_expr::Expr; -/// +/// // Create a schema c1(int, c2 float) /// let arrow_schema = Schema::new(vec![ /// Field::new("c1", DataType::Int32, false), /// Field::new("c2", DataType::Float64, false), /// ]); -/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap(); +/// // DFSchema is a an Arrow schema with optional relation name +/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema) +/// .unwrap(); /// -/// // Form a list of expressions for each item in the schema +/// // Form Vec with an expression for each column in the schema /// let exprs: Vec<_> = df_schema.iter() /// .map(Expr::from) /// .collect(); @@ -227,6 +276,7 @@ impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr { } } +/// UNNEST expression. #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct Unnest { pub expr: Box, @@ -434,9 +484,13 @@ pub enum GetFieldAccess { }, } -/// Returns the field of a [`arrow::array::ListArray`] or -/// [`arrow::array::StructArray`] by `key`. See [`GetFieldAccess`] for -/// details. +/// Returns the field of a [`ListArray`] or +/// [`StructArray`] by `key`. +/// +/// See [`GetFieldAccess`] for details. +/// +/// [`ListArray`]: arrow::array::ListArray +/// [`StructArray`]: arrow::array::StructArray #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct GetIndexedField { /// The expression to take the field from @@ -712,7 +766,7 @@ pub fn find_df_window_func(name: &str) -> Option { } } -// Exists expression. +/// EXISTS expression #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct Exists { /// subquery that will produce a single column of data @@ -728,6 +782,9 @@ impl Exists { } } +/// User Defined Aggregate Function +/// +/// See [`udaf::AggregateUDF`] for more information. #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct AggregateUDF { /// The function @@ -821,6 +878,7 @@ impl Placeholder { } /// Grouping sets +/// /// See /// for Postgres definition. /// See diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index ea991102df36..df390ce6eaf8 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -20,6 +20,10 @@ use std::{fmt::Display, vec}; use arrow_array::{Date32Array, Date64Array}; use arrow_schema::DataType; +use sqlparser::ast::{ + self, Expr as AstExpr, Function, FunctionArg, Ident, UnaryOperator, +}; + use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, Result, ScalarValue, @@ -28,9 +32,6 @@ use datafusion_expr::{ expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction}, Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast, }; -use sqlparser::ast::{ - self, Expr as AstExpr, Function, FunctionArg, Ident, UnaryOperator, -}; use super::Unparser; @@ -931,6 +932,7 @@ mod tests { use arrow::datatypes::{Field, Schema}; use arrow_schema::DataType::Int8; + use datafusion_common::TableReference; use datafusion_expr::{ case, col, cube, exists,