Skip to content

Commit

Permalink
Minor: improve Expr documentation (#10685)
Browse files Browse the repository at this point in the history
* Minor: improve Expr documentation

* Update datafusion/expr/src/expr.rs

Co-authored-by: Oleks V <[email protected]>

* Refine words

---------

Co-authored-by: Oleks V <[email protected]>
  • Loading branch information
alamb and comphead authored May 29, 2024
1 parent 08e19f4 commit 0905426
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 22 deletions.
96 changes: 77 additions & 19 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,29 +39,73 @@ use datafusion_common::{
};
use sqlparser::ast::NullTreatment;

/// `Expr` is a central struct of DataFusion's query API, and
/// represent logical expressions such as `A + 1`, or `CAST(c1 AS
/// int)`.
/// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
///
/// An `Expr` can compute its [DataType]
/// and nullability, and has functions for building up complex
/// expressions.
/// For example the expression `A + 1` will be represented as
///
///```text
/// BinaryExpr {
/// left: Expr::Column("A"),
/// op: Operator::Plus,
/// right: Expr::Literal(ScalarValue::Int32(Some(1)))
/// }
/// ```
///
/// # Creating Expressions
///
/// `Expr`s can be created directly, but it is often easier and less verbose to
/// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or
/// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]).
///
/// # Schema Access
///
/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
/// of an `Expr`.
///
/// # Examples
///
/// ## Create an expression `c1` referring to column named "c1"
/// ## Column references and literals
///
/// [`Expr::Column`] refer to the values of columns and are often created with
/// the [`col`] function. For example to create an expression `c1` referring to
/// column named "c1":
///
/// [`col`]: crate::expr_fn::col
///
/// ```
/// # use datafusion_common::Column;
/// # use datafusion_expr::{lit, col, Expr};
/// let expr = col("c1");
/// assert_eq!(expr, Expr::Column(Column::from_name("c1")));
/// ```
///
/// ## Create the expression `c1 + c2` to add columns "c1" and "c2" together
/// [`Expr::Literal`] refer to literal, or constant, values. These are created
/// with the [`lit`] function. For example to create an expression `42`:
///
/// [`lit`]: crate::lit
///
/// ```
/// # use datafusion_common::{Column, ScalarValue};
/// # use datafusion_expr::{lit, col, Expr};
/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
/// let expr = lit(42i64);
/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42))));
/// // To make a (typed) NULL:
/// let expr = Expr::Literal(ScalarValue::Int64(None));
/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
/// let expr = lit(ScalarValue::Null);
/// ```
///
/// ## Binary Expressions
///
/// Exprs implement traits that allow easy to understand construction of more
/// complex expresions. For example, to create `c1 + c2` to add columns "c1" and
/// "c2" together
///
/// ```
/// # use datafusion_expr::{lit, col, Operator, Expr};
/// // Use the `+` operator to add two columns together
/// let expr = col("c1") + col("c2");
///
/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
/// if let Expr::BinaryExpr(binary_expr) = expr {
/// assert_eq!(*binary_expr.left, col("c1"));
Expand All @@ -70,12 +114,13 @@ use sqlparser::ast::NullTreatment;
/// }
/// ```
///
/// ## Create expression `c1 = 42` to compare the value in column "c1" to the literal value `42`
/// The expression `c1 = 42` to compares the value in column "c1" to the
/// literal value `42`:
///
/// ```
/// # use datafusion_common::ScalarValue;
/// # use datafusion_expr::{lit, col, Operator, Expr};
/// let expr = col("c1").eq(lit(42_i32));
///
/// assert!(matches!(expr, Expr::BinaryExpr { .. } ));
/// if let Expr::BinaryExpr(binary_expr) = expr {
/// assert_eq!(*binary_expr.left, col("c1"));
Expand All @@ -85,19 +130,23 @@ use sqlparser::ast::NullTreatment;
/// }
/// ```
///
/// ## Return a list of [`Expr::Column`] from a schema's columns
/// Here is how to implement the equivalent of `SELECT *` to select all
/// [`Expr::Column`] from a [`DFSchema`]'s columns:
///
/// ```
/// # use arrow::datatypes::{DataType, Field, Schema};
/// # use datafusion_common::{DFSchema, Column};
/// # use datafusion_expr::Expr;
///
/// // Create a schema c1(int, c2 float)
/// let arrow_schema = Schema::new(vec![
/// Field::new("c1", DataType::Int32, false),
/// Field::new("c2", DataType::Float64, false),
/// ]);
/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap();
/// // DFSchema is a an Arrow schema with optional relation name
/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema)
/// .unwrap();
///
/// // Form a list of expressions for each item in the schema
/// // Form Vec<Expr> with an expression for each column in the schema
/// let exprs: Vec<_> = df_schema.iter()
/// .map(Expr::from)
/// .collect();
Expand Down Expand Up @@ -227,6 +276,7 @@ impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr {
}
}

/// UNNEST expression.
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct Unnest {
pub expr: Box<Expr>,
Expand Down Expand Up @@ -434,9 +484,13 @@ pub enum GetFieldAccess {
},
}

/// Returns the field of a [`arrow::array::ListArray`] or
/// [`arrow::array::StructArray`] by `key`. See [`GetFieldAccess`] for
/// details.
/// Returns the field of a [`ListArray`] or
/// [`StructArray`] by `key`.
///
/// See [`GetFieldAccess`] for details.
///
/// [`ListArray`]: arrow::array::ListArray
/// [`StructArray`]: arrow::array::StructArray
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct GetIndexedField {
/// The expression to take the field from
Expand Down Expand Up @@ -712,7 +766,7 @@ pub fn find_df_window_func(name: &str) -> Option<WindowFunctionDefinition> {
}
}

// Exists expression.
/// EXISTS expression
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct Exists {
/// subquery that will produce a single column of data
Expand All @@ -728,6 +782,9 @@ impl Exists {
}
}

/// User Defined Aggregate Function
///
/// See [`udaf::AggregateUDF`] for more information.
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct AggregateUDF {
/// The function
Expand Down Expand Up @@ -821,6 +878,7 @@ impl Placeholder {
}

/// Grouping sets
///
/// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS>
/// for Postgres definition.
/// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html>
Expand Down
8 changes: 5 additions & 3 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ use std::{fmt::Display, vec};

use arrow_array::{Date32Array, Date64Array};
use arrow_schema::DataType;
use sqlparser::ast::{
self, Expr as AstExpr, Function, FunctionArg, Ident, UnaryOperator,
};

use datafusion_common::{
internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, Result,
ScalarValue,
Expand All @@ -28,9 +32,6 @@ use datafusion_expr::{
expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction},
Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast,
};
use sqlparser::ast::{
self, Expr as AstExpr, Function, FunctionArg, Ident, UnaryOperator,
};

use super::Unparser;

Expand Down Expand Up @@ -931,6 +932,7 @@ mod tests {

use arrow::datatypes::{Field, Schema};
use arrow_schema::DataType::Int8;

use datafusion_common::TableReference;
use datafusion_expr::{
case, col, cube, exists,
Expand Down

0 comments on commit 0905426

Please sign in to comment.