Skip to content

Commit

Permalink
Improve docs + macros
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Jan 15, 2024
1 parent 304ce2d commit 259f122
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 35 deletions.
4 changes: 2 additions & 2 deletions datafusion/core/tests/dataframe/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ async fn test_fn_upper() -> Result<()> {

#[tokio::test]
async fn test_fn_encode() -> Result<()> {
let expr = encode(vec![col("a"), lit("hex")]);
let expr = encode(col("a"), lit("hex"));

let expected = [
"+----------------------------+",
Expand All @@ -782,7 +782,7 @@ async fn test_fn_decode() -> Result<()> {
// binary is "hexadecimal" and therefore the output looks like decode did
// nothing. So compare to a constant.
let df_schema = DFSchema::try_from(test_schema().as_ref().clone())?;
let expr = decode(vec![encode(vec![col("a"), lit("hex")]), lit("hex")])
let expr = decode(encode(col("a"), lit("hex")), lit("hex"))
// need to cast to utf8 otherwise the default display of binary array is hex
// so it looks like nothing is done
.cast_to(&DataType::Utf8, &df_schema)?;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ pub fn is_not_unknown(expr: Expr) -> Expr {

macro_rules! scalar_expr {
($ENUM:ident, $FUNC:ident, $($arg:ident)*, $DOC:expr) => {
#[doc = $DOC ]
#[doc = $DOC]
pub fn $FUNC($($arg: Expr),*) -> Expr {
Expr::ScalarFunction(ScalarFunction::new(
built_in_function::BuiltinScalarFunction::$ENUM,
Expand Down
7 changes: 4 additions & 3 deletions datafusion/functions/src/encoding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@

mod inner;

// create `encode` and `decode` functions

// create `encode` and `decode` UDFs
make_udf_function!(inner::EncodeFunc, ENCODE, encode);
make_udf_function!(inner::DecodeFunc, DECODE, decode);

// Export the functions out of this package, both as expr_fn as well as a list of functions
export_functions!(
(encode, "encode the `input`, using the `encoding`. encoding can be base64 or hex"),
(decode, "decode the`input`, using the `encoding`. encoding can be base64 or hex")
(encode, input encoding, "encode the `input`, using the `encoding`. encoding can be base64 or hex"),
(decode, input encoding, "decode the `input`, using the `encoding`. encoding can be base64 or hex")
);

41 changes: 25 additions & 16 deletions datafusion/functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,25 @@
// specific language governing permissions and limitations
// under the License.

//! Function packages for DataFusion
//! Function packages for DataFusion.
//!
//! Each package is a implemented as a separate
//! module, which can be activated by a feature flag.
//! This crate contains a collection of various function packages for DataFusion,
//! implemented using the extension API. Users may wish to control which functions
//! are available to control the binary size of their application as well as
//! use dialect specific implementations of functions (e.g. Spark vs Postgres)
//!
//! Each package is a implemented as a separate
//! module, activated by a feature flag.
//!
//! # Available Packages
//! See the list of modules in this crate for available packages.
//! See the list of [modules](#modules) in this crate for available packages.
//!
//! # Using Package
//! # Using A Package
//! You can register all functions in all packages using the [`register_all`] function.
//!
//! To register only the functions in a certain package, you can do:
//! To access and use only the functions in a certain package, use the
//! `functions()` method in each module.
//!
//! ```
//! # fn main() -> datafusion_common::Result<()> {
//! # let mut registry = datafusion_execution::registry::MemoryFunctionRegistry::new();
Expand All @@ -41,37 +47,40 @@
//! # }
//! ```
//!
//! You can also use the "expr_fn" module to create [`Expr`]s that invoke
//! functions in a fluent style:
//! Each package also exports an `expr_fn` submodule to help create [`Expr`]s that invoke
//! functions using a fluent style. For example:
//!
//! ```
//! // create an Expr that will invoke the encode function
//! use datafusion_expr::{col, lit};
//! use datafusion_functions::expr_fn;
//! // encode(my_data, 'hex')
//! // Equivalent to "encode(my_data, 'hex')" in SQL:
//! let expr = expr_fn::encode(vec![col("my_data"), lit("hex")]);
//! ```
//!
//![`Expr`]: datafusion_expr::Expr
//!
//! # Implementing A New Package
//!
//! To add a new package to this crate::
//! To add a new package to this crate, you should follow the model of existing
//! packages. The high level steps are:
//!
//! 1. Create a new module with the appropriate [`ScalarUDF`] implementations.
//!
//! 1. Create a new module with the appropriate `ScalarUDF` implementations.
//! 2. Use the macros in [`macros`] to create standard entry points.
//!
//! 2. Use the `make_udf_function!` and `export_functions!` macros to create
//! standard entry points
//! 3. Add a new feature to `Cargo.toml`, with any optional dependencies
//!
//! 3. Add a new feature flag to `Cargo.toml`, with any optional dependencies
//! 4. Use the `make_package!` macro to expose the module when the
//! feature is enabled.
//!
//! 4. Use the `make_package!` macro to export the module if the specified feature is enabled
//! [`ScalarUDF`]: datafusion_expr::ScalarUDF
use datafusion_common::Result;
use datafusion_execution::FunctionRegistry;
use log::debug;

#[macro_use]
mod macros;
pub mod macros;

make_package!(
encoding,
Expand Down
10 changes: 5 additions & 5 deletions datafusion/functions/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@
/// }
/// ```
macro_rules! export_functions {
($($name:ident),*) => {
($(($FUNC:ident, $($arg:ident)*, $DOC:expr)),*) => {
pub mod expr_fn {
$(
#[doc = concat!("Call the `", $name, "`function")]
#[doc = $DOC]
/// Return $name(arg)
pub fn $name(args: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
super::$name().call(args)
pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
super::$FUNC().call(vec![$($arg),*],)
}
)*
}
Expand All @@ -52,7 +52,7 @@ macro_rules! export_functions {
pub fn functions() -> Vec<std::sync::Arc<datafusion_expr::ScalarUDF>> {
vec![
$(
$name(),
$FUNC(),
)*
]
}
Expand Down
13 changes: 5 additions & 8 deletions datafusion/proto/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
use datafusion_expr::{
col, create_udaf, lit, Accumulator, AggregateFunction,
BuiltinScalarFunction::{Sqrt, Substr},
Expr, LogicalPlan, Operator, PartitionEvaluator, Signature, TryCast, Volatility,
WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, WindowUDF,
WindowUDFImpl,
Expr, ExprSchemable, LogicalPlan, Operator, PartitionEvaluator, Signature, TryCast,
Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits,
WindowFunctionDefinition, WindowUDF, WindowUDFImpl,
};
use datafusion_proto::bytes::{
logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec,
Expand Down Expand Up @@ -539,11 +539,8 @@ async fn roundtrip_expr_api() -> Result<()> {
// ensure expressions created with the expr api can be round tripped
let plan = table
.select(vec![
encode(vec![
col("a").cast_to(&DataType::Utf8, &schema)?,
lit("hex"),
]),
decode(vec![lit("1234"), lit("hex")]),
encode(col("a").cast_to(&DataType::Utf8, &schema)?, lit("hex")),
decode(lit("1234"), lit("hex")),
])?
.into_optimized_plan()?;
let bytes = logical_plan_to_bytes(&plan)?;
Expand Down

0 comments on commit 259f122

Please sign in to comment.