Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add examples of how to convert logical plan to/from sql strings #10558

Merged
merged 4 commits into from
May 20, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 134 additions & 1 deletion datafusion-examples/examples/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use datafusion::error::Result;
use datafusion::prelude::*;
use datafusion::sql::unparser::expr_to_sql;
use datafusion_sql::unparser::dialect::CustomDialect;
use datafusion_sql::unparser::Unparser;
use datafusion_sql::unparser::{plan_to_sql, Unparser};

/// This example demonstrates the programmatic construction of
/// SQL using the DataFusion Expr [`Expr`] and LogicalPlan [`LogicalPlan`] API.
Expand All @@ -41,6 +41,10 @@ async fn main() -> Result<()> {
simple_expr_to_sql_demo()?;
simple_expr_to_sql_demo_no_escape()?;
simple_expr_to_sql_demo_escape_mysql_style()?;
simple_plan_to_sql_parquest_dataframe_demo().await?;
simple_plan_to_sql_csv_dataframe_demo().await?;
round_trip_plan_to_sql_parquest_dataframe_demo().await?;
round_trip_plan_to_sql_csv_dataframe_demo().await?;
Ok(())
}

Expand Down Expand Up @@ -77,3 +81,132 @@ fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> {
assert_eq!(sql, r#"((`a` < 5) OR (`a` = 8))"#);
Ok(())
}

/// DataFusion can convert a logic plan created using the DataFrames API to read from a parquet file
/// to SQL, using column name escaping PostgreSQL style.
async fn simple_plan_to_sql_parquest_dataframe_demo() -> Result<()> {
// create local execution context
let ctx = SessionContext::new(); // define the query using the DataFrame trait

let testdata = datafusion::test_util::parquet_test_data();
let df = ctx
.read_parquet(
&format!("{testdata}/alltypes_plain.parquet"),
ParquetReadOptions::default(),
)
.await?
.select_columns(&["id", "int_col", "double_col", "date_string_col"])?;

let ast = plan_to_sql(df.logical_plan())?;

let sql = format!("{}", ast);

assert_eq!(
sql,
r#"SELECT "?table?"."id", "?table?"."int_col", "?table?"."double_col", "?table?"."date_string_col" FROM "?table?""#
);

Ok(())
}

/// DataFusion can convert a logic plan created using the DataFrames API to read from a csv file
xinlifoobar marked this conversation as resolved.
Show resolved Hide resolved
/// to SQL, using column name escaping PostgreSQL style.
async fn simple_plan_to_sql_csv_dataframe_demo() -> Result<()> {
// create local execution context
let ctx = SessionContext::new(); // define the query using the DataFrame trait

let testdata = datafusion::test_util::arrow_test_data();
let df = ctx
.read_csv(
&format!("{testdata}/csv/aggregate_test_100.csv"),
CsvReadOptions::default(),
)
.await?
.select(vec![col("c1"), min(col("c12")), max(col("c12"))])?;

let ast = plan_to_sql(df.logical_plan())?;

let sql = format!("{}", ast);

assert_eq!(
sql,
r#"SELECT "?table?"."c1", MIN("?table?"."c12"), MAX("?table?"."c12") FROM "?table?""#
);

Ok(())
}

async fn round_trip_plan_to_sql_parquest_dataframe_demo() -> Result<()> {
// create local execution context
let ctx = SessionContext::new(); // define the query using the DataFrame trait

let testdata = datafusion::test_util::parquet_test_data();

// register parquet file with the execution context
ctx.register_parquet(
"alltypes_plain",
&format!("{testdata}/alltypes_plain.parquet"),
ParquetReadOptions::default(),
)
.await?;

// execute the query
xinlifoobar marked this conversation as resolved.
Show resolved Hide resolved
let df = ctx
.sql(
"SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \
FROM alltypes_plain",
)
.await?
.filter(
col("id")
.gt(lit(1))
.and(col("tinyint_col").lt(col("double_col"))),
)?;

let ast = plan_to_sql(df.logical_plan())?;

let sql = format!("{}", ast);

assert_eq!(
sql,
r#"SELECT "alltypes_plain"."int_col", "alltypes_plain"."double_col", CAST("alltypes_plain"."date_string_col" AS VARCHAR) FROM "alltypes_plain" WHERE (("alltypes_plain"."id" > 1) AND ("alltypes_plain"."tinyint_col" < "alltypes_plain"."double_col"))"#
);

Ok(())
}

async fn round_trip_plan_to_sql_csv_dataframe_demo() -> Result<()> {
// create local execution context
let ctx = SessionContext::new(); // define the query using the DataFrame trait

let testdata = datafusion::test_util::arrow_test_data();

// register parquet file with the execution context
ctx.register_csv(
"aggregate_test_100",
&format!("{testdata}/csv/aggregate_test_100.csv"),
CsvReadOptions::default(),
)
.await?;

// execute the query
let df = ctx
.sql(
"SELECT c1, MIN(c12), MAX(c12) \
FROM aggregate_test_100
GROUP BY c1",
)
.await?
.filter(col("c1").gt(lit(0.1)).and(col("c1").lt(lit(0.9))))?;

let ast = plan_to_sql(df.logical_plan())?;

let sql = format!("{}", ast);

assert_eq!(
sql,
r#"SELECT "aggregate_test_100"."c1", MIN("aggregate_test_100"."c12"), MAX("aggregate_test_100"."c12") FROM "aggregate_test_100" GROUP BY "aggregate_test_100"."c1" HAVING (("aggregate_test_100"."c1" > 0.1) AND ("aggregate_test_100"."c1" < 0.9))"#
);

Ok(())
}