Remove unnecessary tests

apache · mustafasrepo · Dec 11, 2023 · Nov 6, 2023 · Nov 6, 2023 · Nov 6, 2023
commit ce982f0ae3cfa4e18f63d74fabcab9ff42bce757
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -2278,316 +2278,3 @@ mod tests {
         Ok(())
     }
 }
-
-// #[cfg(test)]
-// mod tmp_tests {
-//     use crate::assert_batches_eq;
-//     use crate::physical_optimizer::utils::get_plan_string;
-//     use crate::physical_plan::{collect, displayable, ExecutionPlan};
-//     use crate::prelude::SessionContext;
-//     use arrow::util::pretty::print_batches;
-//     use datafusion_common::Result;
-//     use datafusion_execution::config::SessionConfig;
-//     use std::sync::Arc;
-//
-//     fn print_plan(plan: &Arc<dyn ExecutionPlan>) -> Result<()> {
-//         let formatted = displayable(plan.as_ref()).indent(true).to_string();
-//         let actual: Vec<&str> = formatted.trim().lines().collect();
-//         println!("{:#?}", actual);
-//         Ok(())
-//     }
-//
-//     #[tokio::test]
-//     async fn test_query() -> Result<()> {
-//         let config = SessionConfig::new().with_target_partitions(2);
-//         let ctx = SessionContext::new_with_config(config);
-//
-//         ctx.sql(
-//             "CREATE unbounded EXTERNAL TABLE csv_with_timestamps (
-//             name VARCHAR,
-//             ts TIMESTAMP
-//             )
-//             STORED AS CSV
-//             WITH ORDER (ts DESC)
-//             LOCATION '../core/tests/data/timestamps.csv'",
-//         )
-//         .await?;
-//
-//         let sql = "SELECT date_bin('15 minutes', ts)
-//           FROM csv_with_timestamps
-//           GROUP BY (date_bin('15 minutes', ts))
-//           LIMIT 5";
-//
-//         let msg = format!("Creating logical plan for '{sql}'");
-//         let dataframe = ctx.sql(sql).await.expect(&msg);
-//         let physical_plan = dataframe.create_physical_plan().await?;
-//         print_plan(&physical_plan)?;
-//         let batches = collect(physical_plan.clone(), ctx.task_ctx()).await?;
-//         print_batches(&batches)?;
-//
-//         let expected = vec![
-//             "GlobalLimitExec: skip=0, fetch=5",
-//             "  CoalescePartitionsExec",
-//             "    AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8(\"15 minutes\"),csv_with_timestamps.ts)@0 as date_bin(Utf8(\"15 minutes\"),csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted",
-//             "      CoalesceBatchesExec: target_batch_size=8192",
-//             "        SortPreservingRepartitionExec: partitioning=Hash([date_bin(Utf8(\"15 minutes\"),csv_with_timestamps.ts)@0], 2), input_partitions=2, sort_exprs=date_bin(Utf8(\"15 minutes\"),csv_with_timestamps.ts)@0 DESC",
-//             "          AggregateExec: mode=Partial, gby=[date_bin(900000000000, ts@0) as date_bin(Utf8(\"15 minutes\"),csv_with_timestamps.ts)], aggr=[], ordering_mode=Sorted",
-//             "            RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1",
-//             "              CsvExec: file_groups={1 group: [[Users/akurmustafa/projects/synnada/arrow-datafusion-synnada/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], infinite_source=true, output_ordering=[ts@0 DESC], has_header=false",
-//         ];
-//         // Get string representation of the plan
-//         let actual = get_plan_string(&physical_plan);
-//         assert_eq!(
-//             expected, actual,
-//             "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
-//         );
-//
-//         let expected = [
-//             "+------------+",
-//             "| nth_value1 |",
-//             "+------------+",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "+------------+",
-//         ];
-//         assert_batches_eq!(expected, &batches);
-//         Ok(())
-//     }
-//
-//     #[tokio::test]
-//     async fn test_query3() -> Result<()> {
-//         let config = SessionConfig::new().with_target_partitions(1);
-//         let ctx = SessionContext::new_with_config(config);
-//
-//         ctx.sql(
-//             "CREATE EXTERNAL TABLE csv_with_timestamps (
-//             name VARCHAR,
-//             ts TIMESTAMP
-//             )
-//             STORED AS CSV
-//             WITH ORDER (ts DESC)
-//             LOCATION '../core/tests/data/timestamps.csv'",
-//         )
-//         .await?;
-//
-//         let sql = " SELECT datebin FROM(
-//           SELECT date_bin('15 minutes', ts) as datebin
-//           FROM csv_with_timestamps)
-//         GROUP BY datebin;
-//         ";
-//
-//         let msg = format!("Creating logical plan for '{sql}'");
-//         let dataframe = ctx.sql(sql).await.expect(&msg);
-//         let physical_plan = dataframe.create_physical_plan().await?;
-//         print_plan(&physical_plan)?;
-//         let batches = collect(physical_plan.clone(), ctx.task_ctx()).await?;
-//         print_batches(&batches)?;
-//
-//         let expected = vec![
-//             "ProjectionExec: expr=[NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as nth_value1]",
-//             "  GlobalLimitExec: skip=0, fetch=5",
-//             "    BoundedWindowAggExec: wdw=[NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: \"NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted]",
-//             "      CsvExec: file_groups={1 group: [[Users/akurmustafa/projects/synnada/arrow-datafusion-synnada/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-//         ];
-//         // Get string representation of the plan
-//         let actual = get_plan_string(&physical_plan);
-//         assert_eq!(
-//             expected, actual,
-//             "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
-//         );
-//
-//         let expected = [
-//             "+------------+",
-//             "| nth_value1 |",
-//             "+------------+",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "+------------+",
-//         ];
-//         assert_batches_eq!(expected, &batches);
-//         Ok(())
-//     }
-//
-//     #[tokio::test]
-//     async fn test_query2() -> Result<()> {
-//         let config = SessionConfig::new().with_target_partitions(1);
-//         let ctx = SessionContext::new_with_config(config);
-//
-//         ctx.sql(
-//             "CREATE EXTERNAL TABLE csv_with_timestamps (
-//             name VARCHAR,
-//             ts TIMESTAMP
-//             )
-//             STORED AS CSV
-//             WITH ORDER (ts DESC)
-//             LOCATION '../core/tests/data/timestamps.csv'",
-//         )
-//         .await?;
-//
-//         let sql = "SELECT ts + INTERVAL '15 minutes'
-//           FROM csv_with_timestamps
-//           GROUP BY (ts + INTERVAL '15 minutes')
-//           LIMIT 5";
-//
-//         let msg = format!("Creating logical plan for '{sql}'");
-//         let dataframe = ctx.sql(sql).await.expect(&msg);
-//         let physical_plan = dataframe.create_physical_plan().await?;
-//         print_plan(&physical_plan)?;
-//         let batches = collect(physical_plan.clone(), ctx.task_ctx()).await?;
-//         print_batches(&batches)?;
-//
-//         let expected = vec![
-//             "ProjectionExec: expr=[NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as nth_value1]",
-//             "  GlobalLimitExec: skip=0, fetch=5",
-//             "    BoundedWindowAggExec: wdw=[NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: \"NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted]",
-//             "      CsvExec: file_groups={1 group: [[Users/akurmustafa/projects/synnada/arrow-datafusion-synnada/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-//         ];
-//         // Get string representation of the plan
-//         let actual = get_plan_string(&physical_plan);
-//         assert_eq!(
-//             expected, actual,
-//             "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
-//         );
-//
-//         let expected = [
-//             "+------------+",
-//             "| nth_value1 |",
-//             "+------------+",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "+------------+",
-//         ];
-//         assert_batches_eq!(expected, &batches);
-//         Ok(())
-//     }
-//
-//     #[tokio::test]
-//     async fn test_query4() -> Result<()> {
-//         let config = SessionConfig::new().with_target_partitions(1);
-//         let ctx = SessionContext::new_with_config(config);
-//
-//         ctx.sql("CREATE TABLE tab0(col0 INTEGER, col1 INTEGER, col2 INTEGER)")
-//             .await?;
-//
-//         let sql = "SELECT l.col0, LAST_VALUE(r.col1 ORDER BY r.col0) as last_col1
-//             FROM tab0 as l
-//             JOIN tab0 as r
-//             ON l.col0 = r.col0
-//             GROUP BY l.col0, l.col1, l.col2
-//             ORDER BY l.col0";
-//
-//         let msg = format!("Creating logical plan for '{sql}'");
-//         let dataframe = ctx.sql(sql).await.expect(&msg);
-//         let physical_plan = dataframe.create_physical_plan().await?;
-//         print_plan(&physical_plan)?;
-//         let batches = collect(physical_plan.clone(), ctx.task_ctx()).await?;
-//         print_batches(&batches)?;
-//
-//         let expected = vec![
-//             "ProjectionExec: expr=[NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as nth_value1]",
-//             "  GlobalLimitExec: skip=0, fetch=5",
-//             "    BoundedWindowAggExec: wdw=[NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: \"NTH_VALUE(annotated_data_finite2.d,Int64(2)) ORDER BY [annotated_data_finite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted]",
-//             "      CsvExec: file_groups={1 group: [[Users/akurmustafa/projects/synnada/arrow-datafusion-synnada/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-//         ];
-//         // Get string representation of the plan
-//         let actual = get_plan_string(&physical_plan);
-//         assert_eq!(
-//             expected, actual,
-//             "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
-//         );
-//
-//         let expected = [
-//             "+------------+",
-//             "| nth_value1 |",
-//             "+------------+",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "+------------+",
-//         ];
-//         assert_batches_eq!(expected, &batches);
-//         Ok(())
-//     }
-//
-//     #[tokio::test]
-//     async fn test_query5() -> Result<()> {
-//         let config = SessionConfig::new().with_target_partitions(1);
-//         let ctx = SessionContext::new_with_config(config);
-//
-//         ctx.sql(
-//             "CREATE EXTERNAL TABLE aggregate_test_100 (
-//   c1  VARCHAR NOT NULL,
-//   c2  TINYINT NOT NULL,
-//   c3  SMALLINT NOT NULL,
-//   c4  SMALLINT,
-//   c5  INT,
-//   c6  BIGINT NOT NULL,
-//   c7  SMALLINT NOT NULL,
-//   c8  INT NOT NULL,
-//   c9  BIGINT UNSIGNED NOT NULL,
-//   c10 VARCHAR NOT NULL,
-//   c11 FLOAT NOT NULL,
-//   c12 DOUBLE NOT NULL,
-//   c13 VARCHAR NOT NULL
-// )
-// STORED AS CSV
-// WITH HEADER ROW
-// LOCATION '../../testing/data/csv/aggregate_test_100.csv'",
-//         )
-//         .await?;
-//
-//         let sql = "SELECT c3,
-//     SUM(c9) OVER(ORDER BY c3+c4 DESC, c9 DESC, c2 ASC) as sum1,
-//     SUM(c9) OVER(ORDER BY c3+c4 ASC, c9 ASC ) as sum2
-//     FROM aggregate_test_100
-//     LIMIT 5";
-//
-//         let msg = format!("Creating logical plan for '{sql}'");
-//         let dataframe = ctx.sql(sql).await.expect(&msg);
-//         let physical_plan = dataframe.create_physical_plan().await?;
-//         print_plan(&physical_plan)?;
-//         let batches = collect(physical_plan.clone(), ctx.task_ctx()).await?;
-//         print_batches(&batches)?;
-//
-//         let expected = vec![
-//             "ProjectionExec: expr=[c3@0 as c3, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2]",
-//             "  GlobalLimitExec: skip=0, fetch=5",
-//             "    WindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: \"SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)) }]",
-//             "      ProjectionExec: expr=[c3@1 as c3, c4@2 as c4, c9@3 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]",
-//             "        BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: \"SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow }], mode=[Sorted]",
-//             "          SortExec: expr=[c3@1 + c4@2 DESC,c9@3 DESC,c2@0 ASC NULLS LAST]",
-//             "            CsvExec: file_groups={1 group: [[Users/akurmustafa/projects/synnada/arrow-datafusion-synnada/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c4, c9], has_header=true",
-//         ];
-//         // Get string representation of the plan
-//         let actual = get_plan_string(&physical_plan);
-//         assert_eq!(
-//             expected, actual,
-//             "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
-//         );
-//
-//         let expected = [
-//             "+------------+",
-//             "| nth_value1 |",
-//             "+------------+",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "| 2          |",
-//             "+------------+",
-//         ];
-//         assert_batches_eq!(expected, &batches);
-//         Ok(())
-//     }
-// }