diff --git a/.github/actions/setup-spark-builder/action.yaml b/.github/actions/setup-spark-builder/action.yaml index ebe8e0dc3..1bd37d6b1 100644 --- a/.github/actions/setup-spark-builder/action.yaml +++ b/.github/actions/setup-spark-builder/action.yaml @@ -29,7 +29,7 @@ inputs: comet-version: description: 'The Comet version to use for Spark' required: true - default: '0.4.0-SNAPSHOT' + default: '0.5.0-SNAPSHOT' runs: using: "composite" steps: diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 05c095b2f..477e3a1ab 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -71,7 +71,7 @@ jobs: with: spark-version: ${{ matrix.spark-version.full }} spark-short-version: ${{ matrix.spark-version.short }} - comet-version: '0.4.0-SNAPSHOT' # TODO: get this from pom.xml + comet-version: '0.5.0-SNAPSHOT' # TODO: get this from pom.xml - name: Run Spark tests run: | cd apache-spark diff --git a/.github/workflows/spark_sql_test_ansi.yml b/.github/workflows/spark_sql_test_ansi.yml index 06a5b2c8e..e1d8388fb 100644 --- a/.github/workflows/spark_sql_test_ansi.yml +++ b/.github/workflows/spark_sql_test_ansi.yml @@ -69,7 +69,7 @@ jobs: with: spark-version: ${{ matrix.spark-version.full }} spark-short-version: ${{ matrix.spark-version.short }} - comet-version: '0.4.0-SNAPSHOT' # TODO: get this from pom.xml + comet-version: '0.5.0-SNAPSHOT' # TODO: get this from pom.xml - name: Run Spark tests run: | cd apache-spark diff --git a/README.md b/README.md index 1a6281a99..1f77aa376 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ The following chart shows the time it takes to run the 22 TPC-H queries against using a single executor with 8 cores. See the [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html) for details of the environment used for these benchmarks. -When using Comet, the overall run time is reduced from 616 seconds to 374 seconds, a 1.6x speedup, with query 1 +When using Comet, the overall run time is reduced from 615 seconds to 364 seconds, a 1.7x speedup, with query 1 running 9x faster than Spark. Running the same queries with DataFusion standalone (without Spark) using the same number of cores results in a 3.6x @@ -55,21 +55,21 @@ speedup compared to Spark. Comet is not yet achieving full DataFusion speeds in all cases, but with future work we aim to provide a 2x-4x speedup for a broader set of queries. -![](docs/source/_static/images/benchmark-results/0.3.0/tpch_allqueries.png) +![](docs/source/_static/images/benchmark-results/0.4.0/tpch_allqueries.png) Here is a breakdown showing relative performance of Spark, Comet, and DataFusion for each TPC-H query. -![](docs/source/_static/images/benchmark-results/0.3.0/tpch_queries_compare.png) +![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_compare.png) The following charts shows how much Comet currently accelerates each query from the benchmark. ### Relative speedup -![](docs/source/_static/images/benchmark-results/0.3.0/tpch_queries_speedup_rel.png) +![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_rel.png) ### Absolute speedup -![](docs/source/_static/images/benchmark-results/0.3.0/tpch_queries_speedup_abs.png) +![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_abs.png) These benchmarks can be reproduced in any environment using the documentation in the [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html). We encourage @@ -80,7 +80,7 @@ Results for our benchmark derived from TPC-DS are available in the [benchmarking ## Use Commodity Hardware Comet leverages commodity hardware, eliminating the need for costly hardware upgrades or -specialized hardware accelerators, such as GPUs or FGPA. By maximizing the utilization of commodity hardware, Comet +specialized hardware accelerators, such as GPUs or FPGA. By maximizing the utilization of commodity hardware, Comet ensures cost-effectiveness and scalability for your Spark deployments. ## Spark Compatibility diff --git a/benchmarks/README.md b/benchmarks/README.md index 1042762d9..97877a344 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -62,7 +62,7 @@ docker push localhost:32000/apache/datafusion-comet-tpcbench:latest export SPARK_MASTER=k8s://https://127.0.0.1:16443 export COMET_DOCKER_IMAGE=localhost:32000/apache/datafusion-comet-tpcbench:latest # Location of Comet JAR within the Docker image -export COMET_JAR=/opt/spark/jars/comet-spark-spark3.4_2.12-0.2.0-SNAPSHOT.jar +export COMET_JAR=/opt/spark/jars/comet-spark-spark3.4_2.12-0.5.0-SNAPSHOT.jar $SPARK_HOME/bin/spark-submit \ --master $SPARK_MASTER \ diff --git a/common/pom.xml b/common/pom.xml index cda873abb..91109edf5 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -26,7 +26,7 @@ under the License. org.apache.datafusion comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.4.0-SNAPSHOT + 0.5.0-SNAPSHOT ../pom.xml diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala index fabdd30c4..3a56f64a1 100644 --- a/common/src/main/scala/org/apache/comet/CometConf.scala +++ b/common/src/main/scala/org/apache/comet/CometConf.scala @@ -342,8 +342,10 @@ object CometConf extends ShimCometConf { val COMET_COLUMNAR_SHUFFLE_MEMORY_SIZE: OptionalConfigEntry[Long] = conf("spark.comet.columnar.shuffle.memorySize") + .internal() .doc( - "The optional maximum size of the memory used for Comet columnar shuffle, in MiB. " + + "Test-only config. This is only used to test Comet shuffle with Spark tests. " + + "The optional maximum size of the memory used for Comet columnar shuffle, in MiB. " + "Note that this config is only used when `spark.comet.exec.shuffle.mode` is " + "`jvm`. Once allocated memory size reaches this config, the current batch will be " + "flushed to disk immediately. If this is not configured, Comet will use " + @@ -355,8 +357,10 @@ object CometConf extends ShimCometConf { val COMET_COLUMNAR_SHUFFLE_MEMORY_FACTOR: ConfigEntry[Double] = conf("spark.comet.columnar.shuffle.memory.factor") + .internal() .doc( - "Fraction of Comet memory to be allocated per executor process for Comet shuffle. " + + "Test-only config. This is only used to test Comet shuffle with Spark tests. " + + "Fraction of Comet memory to be allocated per executor process for Comet shuffle. " + "Comet memory size is specified by `spark.comet.memoryOverhead` or " + "calculated by `spark.comet.memory.overhead.factor` * `spark.executor.memory`.") .doubleConf @@ -365,6 +369,17 @@ object CometConf extends ShimCometConf { "Ensure that Comet shuffle memory overhead factor is a double greater than 0") .createWithDefault(1.0) + val COMET_COLUMNAR_SHUFFLE_UNIFIED_MEMORY_ALLOCATOR_IN_TEST: ConfigEntry[Boolean] = + conf("spark.comet.columnar.shuffle.unifiedMemoryAllocatorTest") + .doc("Whether to use Spark unified memory allocator for Comet columnar shuffle in tests." + + "If not configured, Comet will use a test-only memory allocator for Comet columnar " + + "shuffle when Spark test env detected. The test-ony allocator is proposed to run with " + + "Spark tests as these tests require on-heap memory configuration. " + + "By default, this config is false.") + .internal() + .booleanConf + .createWithDefault(false) + val COMET_COLUMNAR_SHUFFLE_BATCH_SIZE: ConfigEntry[Int] = conf("spark.comet.columnar.shuffle.batch.size") .internal() diff --git a/dev/changelog/0.4.0.md b/dev/changelog/0.4.0.md new file mode 100644 index 000000000..69aa5c30b --- /dev/null +++ b/dev/changelog/0.4.0.md @@ -0,0 +1,108 @@ + + +# DataFusion Comet 0.4.0 Changelog + +This release consists of 51 commits from 10 contributors. See credits at the end of this changelog for more information. + +**Fixed bugs:** + +- fix: Use the number of rows from underlying arrays instead of logical row count from RecordBatch [#972](https://github.com/apache/datafusion-comet/pull/972) (viirya) +- fix: The spilled_bytes metric of CometSortExec should be size instead of time [#984](https://github.com/apache/datafusion-comet/pull/984) (Kontinuation) +- fix: Properly handle Java exceptions without error messages; fix loading of comet native library from java.library.path [#982](https://github.com/apache/datafusion-comet/pull/982) (Kontinuation) +- fix: Fallback to Spark if scan has meta columns [#997](https://github.com/apache/datafusion-comet/pull/997) (viirya) +- fix: Fallback to Spark if named_struct contains duplicate field names [#1016](https://github.com/apache/datafusion-comet/pull/1016) (viirya) +- fix: Make comet-git-info.properties optional [#1027](https://github.com/apache/datafusion-comet/pull/1027) (andygrove) +- fix: TopK operator should return correct results on dictionary column with nulls [#1033](https://github.com/apache/datafusion-comet/pull/1033) (viirya) +- fix: need default value for getSizeAsMb(EXECUTOR_MEMORY.key) [#1046](https://github.com/apache/datafusion-comet/pull/1046) (neyama) + +**Performance related:** + +- perf: Remove one redundant CopyExec for SMJ [#962](https://github.com/apache/datafusion-comet/pull/962) (andygrove) +- perf: Add experimental feature to replace SortMergeJoin with ShuffledHashJoin [#1007](https://github.com/apache/datafusion-comet/pull/1007) (andygrove) +- perf: Cache jstrings during metrics collection [#1029](https://github.com/apache/datafusion-comet/pull/1029) (mbutrovich) + +**Implemented enhancements:** + +- feat: Support `GetArrayStructFields` expression [#993](https://github.com/apache/datafusion-comet/pull/993) (Kimahriman) +- feat: Implement bloom_filter_agg [#987](https://github.com/apache/datafusion-comet/pull/987) (mbutrovich) +- feat: Support more types with BloomFilterAgg [#1039](https://github.com/apache/datafusion-comet/pull/1039) (mbutrovich) +- feat: Implement CAST from struct to string [#1066](https://github.com/apache/datafusion-comet/pull/1066) (andygrove) +- feat: Use official DataFusion 43 release [#1070](https://github.com/apache/datafusion-comet/pull/1070) (andygrove) +- feat: Implement CAST between struct types [#1074](https://github.com/apache/datafusion-comet/pull/1074) (andygrove) +- feat: support array_append [#1072](https://github.com/apache/datafusion-comet/pull/1072) (NoeB) +- feat: Require offHeap memory to be enabled (always use unified memory) [#1062](https://github.com/apache/datafusion-comet/pull/1062) (andygrove) + +**Documentation updates:** + +- doc: add documentation interlinks [#975](https://github.com/apache/datafusion-comet/pull/975) (comphead) +- docs: Add IntelliJ documentation for generated source code [#985](https://github.com/apache/datafusion-comet/pull/985) (mbutrovich) +- docs: Update tuning guide [#995](https://github.com/apache/datafusion-comet/pull/995) (andygrove) +- docs: Various documentation improvements [#1005](https://github.com/apache/datafusion-comet/pull/1005) (andygrove) +- docs: clarify that Maven central only has jars for Linux [#1009](https://github.com/apache/datafusion-comet/pull/1009) (andygrove) +- doc: fix K8s links and doc [#1058](https://github.com/apache/datafusion-comet/pull/1058) (comphead) +- docs: Update benchmarking.md [#1085](https://github.com/apache/datafusion-comet/pull/1085) (rluvaton-flarion) + +**Other:** + +- chore: Generate changelog for 0.3.0 release [#964](https://github.com/apache/datafusion-comet/pull/964) (andygrove) +- chore: fix publish-to-maven script [#966](https://github.com/apache/datafusion-comet/pull/966) (andygrove) +- chore: Update benchmarks results based on 0.3.0-rc1 [#969](https://github.com/apache/datafusion-comet/pull/969) (andygrove) +- chore: update rem expression guide [#976](https://github.com/apache/datafusion-comet/pull/976) (kazuyukitanimura) +- chore: Enable additional CreateArray tests [#928](https://github.com/apache/datafusion-comet/pull/928) (Kimahriman) +- chore: fix compatibility guide [#978](https://github.com/apache/datafusion-comet/pull/978) (kazuyukitanimura) +- chore: Update for 0.3.0 release, prepare for 0.4.0 development [#970](https://github.com/apache/datafusion-comet/pull/970) (andygrove) +- chore: Don't transform the HashAggregate to CometHashAggregate if Comet shuffle is disabled [#991](https://github.com/apache/datafusion-comet/pull/991) (viirya) +- chore: Make parquet reader options Comet options instead of Hadoop options [#968](https://github.com/apache/datafusion-comet/pull/968) (parthchandra) +- chore: remove legacy comet-spark-shell [#1013](https://github.com/apache/datafusion-comet/pull/1013) (andygrove) +- chore: Reserve memory for native shuffle writer per partition [#988](https://github.com/apache/datafusion-comet/pull/988) (viirya) +- chore: Bump arrow-rs to 53.1.0 and datafusion [#1001](https://github.com/apache/datafusion-comet/pull/1001) (kazuyukitanimura) +- chore: Revert "chore: Reserve memory for native shuffle writer per partition (#988)" [#1020](https://github.com/apache/datafusion-comet/pull/1020) (viirya) +- minor: Remove hard-coded version number from Dockerfile [#1025](https://github.com/apache/datafusion-comet/pull/1025) (andygrove) +- chore: Reserve memory for native shuffle writer per partition [#1022](https://github.com/apache/datafusion-comet/pull/1022) (viirya) +- chore: Improve error handling when native lib fails to load [#1000](https://github.com/apache/datafusion-comet/pull/1000) (andygrove) +- chore: Use twox-hash 2.0 xxhash64 oneshot api instead of custom implementation [#1041](https://github.com/apache/datafusion-comet/pull/1041) (NoeB) +- chore: Refactor Arrow Array and Schema allocation in ColumnReader and MetadataColumnReader [#1047](https://github.com/apache/datafusion-comet/pull/1047) (viirya) +- minor: Refactor binary expr serde to reduce code duplication [#1053](https://github.com/apache/datafusion-comet/pull/1053) (andygrove) +- chore: Upgrade to DataFusion 43.0.0-rc1 [#1057](https://github.com/apache/datafusion-comet/pull/1057) (andygrove) +- chore: Refactor UnaryExpr and MathExpr in protobuf [#1056](https://github.com/apache/datafusion-comet/pull/1056) (andygrove) +- minor: use defaults instead of hard-coding values [#1060](https://github.com/apache/datafusion-comet/pull/1060) (andygrove) +- minor: refactor UnaryExpr handling to make code more concise [#1065](https://github.com/apache/datafusion-comet/pull/1065) (andygrove) +- chore: Refactor binary and math expression serde code [#1069](https://github.com/apache/datafusion-comet/pull/1069) (andygrove) +- chore: Simplify CometShuffleMemoryAllocator to use Spark unified memory allocator [#1063](https://github.com/apache/datafusion-comet/pull/1063) (viirya) +- test: Restore one test in CometExecSuite by adding COMET_SHUFFLE_MODE config [#1087](https://github.com/apache/datafusion-comet/pull/1087) (viirya) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 19 Andy Grove + 13 Matt Butrovich + 8 Liang-Chi Hsieh + 3 KAZUYUKI TANIMURA + 2 Adam Binford + 2 Kristin Cowalcijk + 1 NoeB + 1 Oleks V + 1 Parth Chandra + 1 neyama +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/dev/diffs/3.4.3.diff b/dev/diffs/3.4.3.diff index 6ac55f95c..12f739848 100644 --- a/dev/diffs/3.4.3.diff +++ b/dev/diffs/3.4.3.diff @@ -7,7 +7,7 @@ index d3544881af1..bf0e2b53c70 100644 2.5.1 2.0.8 + 3.4 -+ 0.4.0-SNAPSHOT ++ 0.5.0-SNAPSHOT - -
Spark Executor
JVM Code
Comet Parquet Reader


IO and Decompression
Native Code
Native Execution Plan
Parquet Decoding
Shuffle Files
executePlan()
CometExecIterator
next()
Spark Execution Logic
decode()
next()
\ No newline at end of file diff --git a/docs/source/_static/images/CometOverviewDetailed.drawio b/docs/source/_static/images/CometOverviewDetailed.drawio deleted file mode 100644 index ff7f4c591..000000000 --- a/docs/source/_static/images/CometOverviewDetailed.drawio +++ /dev/null @@ -1,94 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/source/_static/images/CometOverviewDetailed.drawio.svg b/docs/source/_static/images/CometOverviewDetailed.drawio.svg deleted file mode 100644 index 0f29083b1..000000000 --- a/docs/source/_static/images/CometOverviewDetailed.drawio.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - -
Spark Executor
Spark Driver
Spark Logical Plan
Spark Physical Plan
Comet Physical Plan
protobuf intermediate representation
Native Execution Plan
Comet Physical Plan
protobuf intermediate representation
Shuffle Files
\ No newline at end of file diff --git a/docs/source/_static/images/benchmark-results/0.4.0/tpcds_allqueries.png b/docs/source/_static/images/benchmark-results/0.4.0/tpcds_allqueries.png new file mode 100644 index 000000000..b1c9e3c90 Binary files /dev/null and b/docs/source/_static/images/benchmark-results/0.4.0/tpcds_allqueries.png differ diff --git a/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_compare.png b/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_compare.png new file mode 100644 index 000000000..e367fe4cc Binary files /dev/null and b/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_compare.png differ diff --git a/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_speedup_abs.png b/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_speedup_abs.png new file mode 100644 index 000000000..d719bdbb1 Binary files /dev/null and b/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_speedup_abs.png differ diff --git a/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_speedup_rel.png b/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_speedup_rel.png new file mode 100644 index 000000000..afbea75ac Binary files /dev/null and b/docs/source/_static/images/benchmark-results/0.4.0/tpcds_queries_speedup_rel.png differ diff --git a/docs/source/_static/images/benchmark-results/0.4.0/tpch_allqueries.png b/docs/source/_static/images/benchmark-results/0.4.0/tpch_allqueries.png new file mode 100644 index 000000000..24a3698bb Binary files /dev/null and b/docs/source/_static/images/benchmark-results/0.4.0/tpch_allqueries.png differ diff --git a/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_compare.png b/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_compare.png new file mode 100644 index 000000000..8d3223a9f Binary files /dev/null and b/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_compare.png differ diff --git a/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_abs.png b/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_abs.png new file mode 100644 index 000000000..f8b3eeaea Binary files /dev/null and b/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_abs.png differ diff --git a/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_rel.png b/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_rel.png new file mode 100644 index 000000000..d95b41503 Binary files /dev/null and b/docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_rel.png differ diff --git a/docs/source/_static/images/comet-dataflow.excalidraw b/docs/source/_static/images/comet-dataflow.excalidraw new file mode 100644 index 000000000..dd1209983 --- /dev/null +++ b/docs/source/_static/images/comet-dataflow.excalidraw @@ -0,0 +1,2134 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "dDrwaYB6MkVSDP_FHWS-F", + "type": "rectangle", + "x": 825.6666870117188, + "y": 116.83334350585938, + "width": 321.9999999999999, + "height": 324, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffd8a8", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "Zz", + "roundness": { + "type": 3 + }, + "seed": 1163356465, + "version": 243, + "versionNonce": 743550265, + "isDeleted": false, + "boundElements": [ + { + "id": "u84B3vp5oTVNXI5uXsZ-r", + "type": "arrow" + }, + { + "id": "dlyj3Gno71fx16oqbbjXF", + "type": "arrow" + } + ], + "updated": 1733167126280, + "link": null, + "locked": false + }, + { + "id": "8pVcXTnP3tefe_O3kTE0b", + "type": "text", + "x": 467.66668701171875, + "y": 48.833343505859375, + "width": 61, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aS", + "roundness": null, + "seed": 306458015, + "version": 181, + "versionNonce": 110788633, + "isDeleted": false, + "boundElements": [], + "updated": 1733167570417, + "link": null, + "locked": false, + "text": "JVM", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "JVM", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "Ax7J0LoYh5TwQoRBM47cz", + "type": "text", + "x": 941.6666870117188, + "y": 56.833343505859375, + "width": 97, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aT", + "roundness": null, + "seed": 1762016049, + "version": 173, + "versionNonce": 1117284823, + "isDeleted": false, + "boundElements": [], + "updated": 1733167564367, + "link": null, + "locked": false, + "text": "Native", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Native", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "lSUrwgLq2W49ULouPfm0h", + "type": "rectangle", + "x": 868.1666870117188, + "y": 168.83334350585938, + "width": 245.00000000000006, + "height": 83.99999999999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aU", + "roundness": { + "type": 3 + }, + "seed": 1188956881, + "version": 337, + "versionNonce": 502265527, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "M6A-P7jOlvlDx-Kc0nsrQ" + }, + { + "id": "MqWIMNh5n51EVvWedfTIA", + "type": "arrow" + }, + { + "id": "GPIY241P4rRnRn48VdbYe", + "type": "arrow" + }, + { + "id": "6KmKXuc4aon2_yKt2fdZE", + "type": "arrow" + }, + { + "id": "ou2srC_Up4kjWcmgzdEH4", + "type": "arrow" + } + ], + "updated": 1733167585167, + "link": null, + "locked": false + }, + { + "id": "M6A-P7jOlvlDx-Kc0nsrQ", + "type": "text", + "x": 903.4791870117188, + "y": 198.33334350585938, + "width": 174.375, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aV", + "roundness": null, + "seed": 1968605361, + "version": 333, + "versionNonce": 1113091385, + "isDeleted": false, + "boundElements": [], + "updated": 1733166946960, + "link": null, + "locked": false, + "text": "ShuffleWriterExec", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "lSUrwgLq2W49ULouPfm0h", + "originalText": "ShuffleWriterExec", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "GHKyE6o_at1-J0KO1mWpt", + "type": "rectangle", + "x": 363.85928382109046, + "y": 505.8341459769945, + "width": 262.9999999999998, + "height": 93.99611799705886, + "angle": 0.003703686768755432, + "strokeColor": "#1e1e1e", + "backgroundColor": "#a5d8ff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aa", + "roundness": { + "type": 3 + }, + "seed": 952999857, + "version": 632, + "versionNonce": 906119703, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "yHFb7s7QYOWZst8xXlFG2" + }, + { + "id": "Jd5Fqfx6eFl_OJ6x0TUki", + "type": "arrow" + }, + { + "id": "7KEns52XY_jok50o5G5op", + "type": "arrow" + }, + { + "id": "quv5xELoqOR6W5SJipUrY", + "type": "arrow" + }, + { + "id": "kQzva6A57whXeUyhhNxOl", + "type": "arrow" + }, + { + "id": "Pjo3gnqBVibIixMHpFvkK", + "type": "arrow" + } + ], + "updated": 1733167372550, + "link": null, + "locked": false + }, + { + "id": "yHFb7s7QYOWZst8xXlFG2", + "type": "text", + "x": 399.93428382109033, + "y": 540.3322049755238, + "width": 190.85000000000002, + "height": 25, + "angle": 0.003703686768755432, + "strokeColor": "#1e1e1e", + "backgroundColor": "#a5d8ff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ab", + "roundness": null, + "seed": 1354040959, + "version": 598, + "versionNonce": 226422583, + "isDeleted": false, + "boundElements": [], + "updated": 1733167372550, + "link": null, + "locked": false, + "text": "CometExecIterator", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "GHKyE6o_at1-J0KO1mWpt", + "originalText": "CometExecIterator", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Iv4y4lEpq-EOkw5bBAWNA", + "type": "text", + "x": 930.6666870117188, + "y": 130.83334350585938, + "width": 109.9000015258789, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffd8a8", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b06", + "roundness": null, + "seed": 952057055, + "version": 92, + "versionNonce": 52977177, + "isDeleted": false, + "boundElements": [], + "updated": 1733166946960, + "link": null, + "locked": false, + "text": "Native Plan", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Native Plan", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "Ro2R78aPw-luRF_bB2EKU", + "type": "rectangle", + "x": 366.34678047371074, + "y": 307.83595662631933, + "width": 262.99999999999983, + "height": 92.00353907094141, + "angle": 0.003703686768755432, + "strokeColor": "#1e1e1e", + "backgroundColor": "#a5d8ff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0g", + "roundness": { + "type": 3 + }, + "seed": 959895479, + "version": 644, + "versionNonce": 1083149527, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "twg3z-vK6jWmVl4xySGde" + }, + { + "id": "u84B3vp5oTVNXI5uXsZ-r", + "type": "arrow" + }, + { + "id": "IISSP3sEmCbjsvI4SFgaX", + "type": "arrow" + }, + { + "id": "7KEns52XY_jok50o5G5op", + "type": "arrow" + }, + { + "id": "quv5xELoqOR6W5SJipUrY", + "type": "arrow" + } + ], + "updated": 1733167372551, + "link": null, + "locked": false + }, + { + "id": "twg3z-vK6jWmVl4xySGde", + "type": "text", + "x": 396.18428047371066, + "y": 341.33772616179004, + "width": 203.32500000000002, + "height": 25, + "angle": 0.003703686768755432, + "strokeColor": "#1e1e1e", + "backgroundColor": "#a5d8ff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0h", + "roundness": null, + "seed": 34654423, + "version": 631, + "versionNonce": 1121311223, + "isDeleted": false, + "boundElements": [], + "updated": 1733167372551, + "link": null, + "locked": false, + "text": "CometBatchIterator", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Ro2R78aPw-luRF_bB2EKU", + "originalText": "CometBatchIterator", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "macb6DKtgx8DhcqjKk6no", + "type": "rectangle", + "x": 366.1634633724364, + "y": 157.33528450732996, + "width": 262.9999999999998, + "height": 93.99611799705886, + "angle": 0.003703686768755432, + "strokeColor": "#1e1e1e", + "backgroundColor": "#a5d8ff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0i", + "roundness": { + "type": 3 + }, + "seed": 1827361271, + "version": 674, + "versionNonce": 1149488599, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "HzfSeR8C3p6yYRHlGGIdM" + }, + { + "id": "dlyj3Gno71fx16oqbbjXF", + "type": "arrow" + }, + { + "id": "MqWIMNh5n51EVvWedfTIA", + "type": "arrow" + } + ], + "updated": 1733167385065, + "link": null, + "locked": false + }, + { + "id": "HzfSeR8C3p6yYRHlGGIdM", + "type": "text", + "x": 402.2384633724363, + "y": 191.83334350585938, + "width": 190.85000000000002, + "height": 25, + "angle": 0.003703686768755432, + "strokeColor": "#1e1e1e", + "backgroundColor": "#a5d8ff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0j", + "roundness": null, + "seed": 901511959, + "version": 643, + "versionNonce": 1747825847, + "isDeleted": false, + "boundElements": [], + "updated": 1733167372551, + "link": null, + "locked": false, + "text": "CometExecIterator", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "macb6DKtgx8DhcqjKk6no", + "originalText": "CometExecIterator", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "7VTYHzsqQvUuKMy0ShKZn", + "type": "rectangle", + "x": 871.1634633724364, + "y": 304.3333435058594, + "width": 245.00000000000006, + "height": 83.99999999999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0k", + "roundness": { + "type": 3 + }, + "seed": 1785572407, + "version": 379, + "versionNonce": 1216788985, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "YKETugAZBRtG7oeas8CPz" + }, + { + "id": "IISSP3sEmCbjsvI4SFgaX", + "type": "arrow" + }, + { + "id": "u84B3vp5oTVNXI5uXsZ-r", + "type": "arrow" + }, + { + "id": "GPIY241P4rRnRn48VdbYe", + "type": "arrow" + }, + { + "id": "6KmKXuc4aon2_yKt2fdZE", + "type": "arrow" + } + ], + "updated": 1733167417649, + "link": null, + "locked": false + }, + { + "id": "YKETugAZBRtG7oeas8CPz", + "type": "text", + "x": 947.8009641353758, + "y": 333.8333435058594, + "width": 91.7249984741211, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0l", + "roundness": null, + "seed": 2121862487, + "version": 357, + "versionNonce": 1828219865, + "isDeleted": false, + "boundElements": [], + "updated": 1733166946960, + "link": null, + "locked": false, + "text": "ScanExec", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "7VTYHzsqQvUuKMy0ShKZn", + "originalText": "ScanExec", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "_a8_bfztXdYiD4AXJxPee", + "type": "rectangle", + "x": 820.6634633724364, + "y": 473.3333435058594, + "width": 334.9999999999999, + "height": 329, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffd8a8", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0m", + "roundness": { + "type": 3 + }, + "seed": 1577231703, + "version": 409, + "versionNonce": 1832634263, + "isDeleted": false, + "boundElements": [ + { + "id": "kQzva6A57whXeUyhhNxOl", + "type": "arrow" + }, + { + "id": "Pjo3gnqBVibIixMHpFvkK", + "type": "arrow" + } + ], + "updated": 1733167465343, + "link": null, + "locked": false + }, + { + "id": "aiAipugp154jY5IgHqjTm", + "type": "rectangle", + "x": 862.1634633724364, + "y": 535.3333435058594, + "width": 245.00000000000006, + "height": 83.99999999999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0n", + "roundness": { + "type": 3 + }, + "seed": 1666310775, + "version": 392, + "versionNonce": 1164820153, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "XPpjuVi7ZYpwo3X03G9P1" + }, + { + "id": "Pjo3gnqBVibIixMHpFvkK", + "type": "arrow" + } + ], + "updated": 1733167345582, + "link": null, + "locked": false + }, + { + "id": "XPpjuVi7ZYpwo3X03G9P1", + "type": "text", + "x": 924.6759633724364, + "y": 564.8333435058594, + "width": 119.97500000000001, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0o", + "roundness": null, + "seed": 838630295, + "version": 403, + "versionNonce": 1508263831, + "isDeleted": false, + "boundElements": [], + "updated": 1733166982872, + "link": null, + "locked": false, + "text": "ProjectExec", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "aiAipugp154jY5IgHqjTm", + "originalText": "ProjectExec", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "aE2QBjfmrpzBTUB_t6QRY", + "type": "text", + "x": 924.6634633724364, + "y": 497.3333435058594, + "width": 109.9000015258789, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffd8a8", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0p", + "roundness": null, + "seed": 1043787959, + "version": 150, + "versionNonce": 1187544183, + "isDeleted": false, + "boundElements": [], + "updated": 1733166977658, + "link": null, + "locked": false, + "text": "Native Plan", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Native Plan", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "vG9tTZvROj2ybA4oAS_bb", + "type": "rectangle", + "x": 864.160239733154, + "y": 671.8333435058594, + "width": 245.00000000000006, + "height": 83.99999999999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0q", + "roundness": { + "type": 3 + }, + "seed": 27640279, + "version": 529, + "versionNonce": 1105701913, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "oWkZozTacCvv40wRG7g3s" + } + ], + "updated": 1733167462816, + "link": null, + "locked": false + }, + { + "id": "oWkZozTacCvv40wRG7g3s", + "type": "text", + "x": 978.160239733154, + "y": 701.3333435058594, + "width": 17, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0r", + "roundness": null, + "seed": 1297968887, + "version": 526, + "versionNonce": 271368441, + "isDeleted": false, + "boundElements": [], + "updated": 1733167462816, + "link": null, + "locked": false, + "text": "...", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "vG9tTZvROj2ybA4oAS_bb", + "originalText": "...", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "kQzva6A57whXeUyhhNxOl", + "type": "arrow", + "x": 627.9453883765393, + "y": 529.8293445331748, + "width": 192.71807499589704, + "height": 0.234522891430629, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0w", + "roundness": { + "type": 2 + }, + "seed": 562773463, + "version": 217, + "versionNonce": 611157943, + "isDeleted": false, + "boundElements": [], + "updated": 1733167475920, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 192.71807499589704, + 0.234522891430629 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "GHKyE6o_at1-J0KO1mWpt", + "focus": -0.48947127224675757, + "gap": 1, + "fixedPoint": null + }, + "endBinding": { + "elementId": "C3-eUJazhRorbXp9Um-Mo", + "focus": -1.9941089907787155, + "gap": 12.73052391874603, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "Pjo3gnqBVibIixMHpFvkK", + "type": "arrow", + "x": 861.6634633724364, + "y": 571.3333435058594, + "width": 233.87028528948713, + "height": 0.4072197033743805, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0y", + "roundness": { + "type": 2 + }, + "seed": 189975865, + "version": 190, + "versionNonce": 1899895735, + "isDeleted": false, + "boundElements": [], + "updated": 1733167372551, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -233.87028528948713, + -0.4072197033743805 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "aiAipugp154jY5IgHqjTm", + "focus": 0.1262072643242283, + "gap": 1, + "fixedPoint": null + }, + "endBinding": { + "elementId": "GHKyE6o_at1-J0KO1mWpt", + "focus": 0.37801089214584216, + "gap": 1, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "u84B3vp5oTVNXI5uXsZ-r", + "type": "arrow", + "x": 867.6634633724364, + "y": 335.3333435058594, + "width": 235.9983810536769, + "height": 0.5628844927418868, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0z", + "roundness": { + "type": 2 + }, + "seed": 849585047, + "version": 139, + "versionNonce": 2098561815, + "isDeleted": false, + "boundElements": null, + "updated": 1733167372551, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -235.9983810536769, + 0.5628844927418868 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "7VTYHzsqQvUuKMy0ShKZn", + "focus": 0.261904761904762, + "gap": 3.5, + "fixedPoint": null + }, + "endBinding": { + "elementId": "Ro2R78aPw-luRF_bB2EKU", + "focus": -0.3765315568105985, + "gap": 2.2509344960505473, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "IISSP3sEmCbjsvI4SFgaX", + "type": "arrow", + "x": 630.6644917368169, + "y": 368.0556851230956, + "width": 238.99897163561945, + "height": 0.7223416172362249, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b12", + "roundness": { + "type": 2 + }, + "seed": 1980422201, + "version": 131, + "versionNonce": 1606617143, + "isDeleted": false, + "boundElements": null, + "updated": 1733167372551, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 238.99897163561945, + -0.7223416172362249 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Ro2R78aPw-luRF_bB2EKU", + "focus": 0.31181090317651905, + "gap": 1.3694590603334404, + "fixedPoint": null + }, + "endBinding": { + "elementId": "7VTYHzsqQvUuKMy0ShKZn", + "focus": -0.5000000000000002, + "gap": 1.5, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "7KEns52XY_jok50o5G5op", + "type": "arrow", + "x": 437.66346337243635, + "y": 399.3333435058594, + "width": 4, + "height": 104, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b13", + "roundness": { + "type": 2 + }, + "seed": 1651841465, + "version": 252, + "versionNonce": 1005623161, + "isDeleted": false, + "boundElements": null, + "updated": 1733167373032, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 4, + 104 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Ro2R78aPw-luRF_bB2EKU", + "focus": 0.46419678699387723, + "gap": 1, + "fixedPoint": null + }, + "endBinding": { + "elementId": "GHKyE6o_at1-J0KO1mWpt", + "focus": -0.3880655447790852, + "gap": 2.3015909311958467, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "quv5xELoqOR6W5SJipUrY", + "type": "arrow", + "x": 555.6634633724364, + "y": 503.3333435058594, + "width": 0, + "height": 103, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b14", + "roundness": { + "type": 2 + }, + "seed": 1677615735, + "version": 236, + "versionNonce": 55168313, + "isDeleted": false, + "boundElements": null, + "updated": 1733167373032, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + -103 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "GHKyE6o_at1-J0KO1mWpt", + "focus": 0.4579838276843583, + "gap": 2.723810257547825, + "fixedPoint": null + }, + "endBinding": { + "elementId": "Ro2R78aPw-luRF_bB2EKU", + "focus": -0.43910468547182224, + "gap": 1, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "dlyj3Gno71fx16oqbbjXF", + "type": "arrow", + "x": 632.6636522386541, + "y": 181.2823496270506, + "width": 193.99981113378226, + "height": 0.9490061211912177, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b15", + "roundness": { + "type": 2 + }, + "seed": 1072669367, + "version": 132, + "versionNonce": 1271110743, + "isDeleted": false, + "boundElements": null, + "updated": 1733167393399, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 193.99981113378226, + -0.9490061211912177 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "macb6DKtgx8DhcqjKk6no", + "focus": -0.4652381310069499, + "gap": 3.4138894826694752, + "fixedPoint": null + }, + "endBinding": { + "elementId": "OFwuou30qsm3aMZ96ASUO", + "focus": -1.5667144994299218, + "gap": 7.5, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "C3-eUJazhRorbXp9Um-Mo", + "type": "text", + "x": 664.6634633724364, + "y": 492.3333435058594, + "width": 189, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b16", + "roundness": null, + "seed": 755500537, + "version": 76, + "versionNonce": 1747049559, + "isDeleted": false, + "boundElements": [ + { + "id": "kQzva6A57whXeUyhhNxOl", + "type": "arrow" + } + ], + "updated": 1733167475516, + "link": null, + "locked": false, + "text": "executePlan()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "executePlan()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "OFwuou30qsm3aMZ96ASUO", + "type": "text", + "x": 669.1634633724364, + "y": 147.83334350585938, + "width": 189, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b17", + "roundness": null, + "seed": 1806263479, + "version": 124, + "versionNonce": 1646888249, + "isDeleted": false, + "boundElements": [ + { + "id": "dlyj3Gno71fx16oqbbjXF", + "type": "arrow" + } + ], + "updated": 1733167389568, + "link": null, + "locked": false, + "text": "executePlan()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "executePlan()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "2ZoBSXI-amAjEfzxoQ17b", + "type": "text", + "x": 749.1634633724364, + "y": 308.8333435058594, + "width": 111, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b18", + "roundness": null, + "seed": 1238305721, + "version": 97, + "versionNonce": 1368434199, + "isDeleted": false, + "boundElements": [], + "updated": 1733167188224, + "link": null, + "locked": false, + "text": "next()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "next()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "1-lAVH11BDSwJVoMYl80T", + "type": "text", + "x": 371.16346337243635, + "y": 439.8333435058594, + "width": 111, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b19", + "roundness": null, + "seed": 756108375, + "version": 211, + "versionNonce": 161358135, + "isDeleted": false, + "boundElements": [], + "updated": 1733167372551, + "link": null, + "locked": false, + "text": "next()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "next()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "VFm7kotI1oNa1rIxLMh6W", + "type": "text", + "x": 676.6634633724364, + "y": 376.3333435058594, + "width": 147, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1A", + "roundness": null, + "seed": 1623222905, + "version": 76, + "versionNonce": 1030050969, + "isDeleted": false, + "boundElements": null, + "updated": 1733167271120, + "link": null, + "locked": false, + "text": "exportBatch()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "exportBatch()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "LZtRekUnAEPkjsECzd7zb", + "type": "text", + "x": 663.6634633724364, + "y": 575.3333435058594, + "width": 217, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1B", + "roundness": null, + "seed": 187512855, + "version": 127, + "versionNonce": 1917573399, + "isDeleted": false, + "boundElements": null, + "updated": 1733167377483, + "link": null, + "locked": false, + "text": "importVectors()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "importVectors()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "MqWIMNh5n51EVvWedfTIA", + "type": "arrow", + "x": 868.6634633724364, + "y": 220.33334350585938, + "width": 239, + "height": 1, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1C", + "roundness": { + "type": 2 + }, + "seed": 685490007, + "version": 35, + "versionNonce": 652639415, + "isDeleted": false, + "boundElements": null, + "updated": 1733167385065, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -239, + 1 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "lSUrwgLq2W49ULouPfm0h", + "focus": -0.2114558118557865, + "gap": 1, + "fixedPoint": null + }, + "endBinding": { + "elementId": "macb6DKtgx8DhcqjKk6no", + "focus": 0.3654122251883526, + "gap": 1, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "RYdCN0xyvNHqlA1WDARNx", + "type": "text", + "x": 670.1634633724364, + "y": 225.83334350585938, + "width": 217, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1D", + "roundness": null, + "seed": 546285145, + "version": 165, + "versionNonce": 1950128183, + "isDeleted": false, + "boundElements": [], + "updated": 1733167400420, + "link": null, + "locked": false, + "text": "importVectors()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "importVectors()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "GPIY241P4rRnRn48VdbYe", + "type": "arrow", + "x": 941.6634633724364, + "y": 264.3333435058594, + "width": 2, + "height": 32, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1E", + "roundness": { + "type": 2 + }, + "seed": 1958688375, + "version": 17, + "versionNonce": 1070980535, + "isDeleted": false, + "boundElements": null, + "updated": 1733167413149, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 2, + 32 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "lSUrwgLq2W49ULouPfm0h", + "focus": 0.4183574316825765, + "gap": 11.500000000000014, + "fixedPoint": null + }, + "endBinding": { + "elementId": "7VTYHzsqQvUuKMy0ShKZn", + "focus": -0.37462537462537454, + "gap": 8, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "6KmKXuc4aon2_yKt2fdZE", + "type": "arrow", + "x": 1052.6634633724364, + "y": 290.3333435058594, + "width": 1, + "height": 30, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1F", + "roundness": { + "type": 2 + }, + "seed": 1971585785, + "version": 15, + "versionNonce": 1869550297, + "isDeleted": false, + "boundElements": null, + "updated": 1733167417649, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -1, + -30 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "7VTYHzsqQvUuKMy0ShKZn", + "focus": 0.4912563895614742, + "gap": 14, + "fixedPoint": null + }, + "endBinding": { + "elementId": "lSUrwgLq2W49ULouPfm0h", + "focus": -0.4789893168742339, + "gap": 7.500000000000014, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "0HMrFdchM3CqZB7BeDX-8", + "type": "text", + "x": 866.1634633724364, + "y": 269.8333435058594, + "width": 111, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1G", + "roundness": null, + "seed": 752246361, + "version": 154, + "versionNonce": 350456215, + "isDeleted": false, + "boundElements": [], + "updated": 1733167430100, + "link": null, + "locked": false, + "text": "next()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "next()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "laDFG84hR_vbCVHWMp16w", + "type": "arrow", + "x": 942.7269220990613, + "y": 628.5394176529115, + "width": 2, + "height": 32, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1H", + "roundness": { + "type": 2 + }, + "seed": 1602854839, + "version": 100, + "versionNonce": 1479430905, + "isDeleted": false, + "boundElements": [], + "updated": 1733167459736, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 2, + 32 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "PKnVxmYbX4yxvUMYxiED6", + "type": "arrow", + "x": 1053.7269220990613, + "y": 654.5394176529115, + "width": 1, + "height": 30, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1I", + "roundness": { + "type": 2 + }, + "seed": 1657442519, + "version": 98, + "versionNonce": 951815129, + "isDeleted": false, + "boundElements": [], + "updated": 1733167459736, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -1, + -30 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "3s_jHVea7P3zpwyhsYqNO", + "type": "text", + "x": 867.2269220990613, + "y": 634.0394176529115, + "width": 111, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1J", + "roundness": null, + "seed": 1863343607, + "version": 237, + "versionNonce": 149858489, + "isDeleted": false, + "boundElements": [], + "updated": 1733167459736, + "link": null, + "locked": false, + "text": "next()", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "next()", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "9t3RnkE-nqccuTgRaqj2w", + "type": "text", + "x": 492.66346337243635, + "y": 444.3333435058594, + "width": 60.000000000000014, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1K", + "roundness": null, + "seed": 1235248153, + "version": 108, + "versionNonce": 91341817, + "isDeleted": false, + "boundElements": null, + "updated": 1733167501667, + "link": null, + "locked": false, + "text": "batch", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "batch", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "3xNt8fnnjY9QtNoev_FMj", + "type": "text", + "x": 1070.6634633724364, + "y": 265.8333435058594, + "width": 60.000000000000014, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1L", + "roundness": null, + "seed": 1471691417, + "version": 154, + "versionNonce": 195092727, + "isDeleted": false, + "boundElements": [], + "updated": 1733167506301, + "link": null, + "locked": false, + "text": "batch", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "batch", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "p9iXL4J6GojVzXDDx1NVM", + "type": "text", + "x": 1071.6634633724364, + "y": 629.8333435058594, + "width": 60.000000000000014, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1M", + "roundness": null, + "seed": 951602999, + "version": 150, + "versionNonce": 595097273, + "isDeleted": false, + "boundElements": [], + "updated": 1733167509787, + "link": null, + "locked": false, + "text": "batch", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "batch", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "xdQ0w3-b5BGEpSvQ2Uc8A", + "type": "rectangle", + "x": 1212.6634633724364, + "y": 131.83334350585938, + "width": 260, + "height": 128.99999999999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#eaddd7", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1N", + "roundness": { + "type": 3 + }, + "seed": 407784057, + "version": 373, + "versionNonce": 1796011255, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "h6I3dPlbn7bb50l-R-ZrT" + }, + { + "id": "ou2srC_Up4kjWcmgzdEH4", + "type": "arrow" + } + ], + "updated": 1733167585167, + "link": null, + "locked": false + }, + { + "id": "h6I3dPlbn7bb50l-R-ZrT", + "type": "text", + "x": 1282.2634633724363, + "y": 136.83334350585938, + "width": 120.80000000000001, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#eaddd7", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1O", + "roundness": null, + "seed": 1894814553, + "version": 326, + "versionNonce": 1273706233, + "isDeleted": false, + "boundElements": [], + "updated": 1733167557133, + "link": null, + "locked": false, + "text": "Shuffle Files", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "top", + "containerId": "xdQ0w3-b5BGEpSvQ2Uc8A", + "originalText": "Shuffle Files", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "rpX5p5xVzE-agyW9ssfpT", + "type": "rectangle", + "x": 1253.6634633724364, + "y": 182.33334350585938, + "width": 190, + "height": 45, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1P", + "roundness": { + "type": 3 + }, + "seed": 337035321, + "version": 422, + "versionNonce": 1799090137, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "ZHswgvVioPRH-MY0pzRZO" + } + ], + "updated": 1733167557133, + "link": null, + "locked": false + }, + { + "id": "ZHswgvVioPRH-MY0pzRZO", + "type": "text", + "x": 1264.0634648983153, + "y": 192.33334350585938, + "width": 169.1999969482422, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffec99", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1Q", + "roundness": null, + "seed": 1336168729, + "version": 390, + "versionNonce": 1438813369, + "isDeleted": false, + "boundElements": [], + "updated": 1733167557133, + "link": null, + "locked": false, + "text": "Arrow IPC Batch", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "rpX5p5xVzE-agyW9ssfpT", + "originalText": "Arrow IPC Batch", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ou2srC_Up4kjWcmgzdEH4", + "type": "arrow", + "x": 1114.6634633724364, + "y": 210.33334350585938, + "width": 97, + "height": 2, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#b2f2bb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b1R", + "roundness": { + "type": 2 + }, + "seed": 1875512793, + "version": 40, + "versionNonce": 1347291095, + "isDeleted": false, + "boundElements": null, + "updated": 1733167585167, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 97, + -2 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "lSUrwgLq2W49ULouPfm0h", + "focus": 0.04618975520292551, + "gap": 1.496776360717604, + "fixedPoint": null + }, + "endBinding": { + "elementId": "xdQ0w3-b5BGEpSvQ2Uc8A", + "focus": -0.13841786234942072, + "gap": 1, + "fixedPoint": null + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/docs/source/_static/images/comet-dataflow.svg b/docs/source/_static/images/comet-dataflow.svg new file mode 100644 index 000000000..20a573c1f --- /dev/null +++ b/docs/source/_static/images/comet-dataflow.svg @@ -0,0 +1,10 @@ + + + + + + + + JVMNativeShuffleWriterExecCometExecIteratorNative PlanCometBatchIteratorCometExecIteratorScanExecProjectExecNative Plan...executePlan()executePlan()next()next()exportBatch()importVectors()importVectors()next()next()batchbatchbatchShuffle FilesArrow IPC Batch \ No newline at end of file diff --git a/docs/source/contributor-guide/benchmark-results/0.3.0/datafusion-python-tpch.json b/docs/source/contributor-guide/benchmark-results/0.3.0/datafusion-python-tpch.json deleted file mode 100644 index 8f6ca23a2..000000000 --- a/docs/source/contributor-guide/benchmark-results/0.3.0/datafusion-python-tpch.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "engine": "datafusion-python", - "datafusion-version": "40.1.0", - "benchmark": "tpch", - "data_path": "/mnt/bigdata/tpch/sf100", - "query_path": "../../tpch/queries/", - "1": [ - 5.120208740234375 - ], - "2": [ - 3.7675418853759766 - ], - "3": [ - 5.07387113571167 - ], - "4": [ - 1.9587907791137695 - ], - "5": [ - 8.816755771636963 - ], - "6": [ - 1.0937256813049316 - ], - "7": [ - 17.575931549072266 - ], - "8": [ - 8.337157964706421 - ], - "9": [ - 17.257150411605835 - ], - "10": [ - 7.945651292800903 - ], - "11": [ - 2.7080790996551514 - ], - "12": [ - 1.7468397617340088 - ], - "13": [ - 5.695569276809692 - ], - "14": [ - 2.0220673084259033 - ], - "15": [ - 4.112257719039917 - ], - "16": [ - 1.9162819385528564 - ], - "17": [ - 21.812944650650024 - ], - "18": [ - 23.510076999664307 - ], - "19": [ - 2.842766761779785 - ], - "20": [ - 7.717615842819214 - ], - "21": [ - 19.96519947052002 - ], - "22": [ - 1.0273211002349854 - ] -} \ No newline at end of file diff --git a/docs/source/contributor-guide/benchmark-results/0.4.0/comet-tpcds.json b/docs/source/contributor-guide/benchmark-results/0.4.0/comet-tpcds.json new file mode 100644 index 000000000..ecd632e84 --- /dev/null +++ b/docs/source/contributor-guide/benchmark-results/0.4.0/comet-tpcds.json @@ -0,0 +1,755 @@ +{ + "engine": "datafusion-comet", + "benchmark": "tpcds", + "data_path": "/mnt/bigdata/tpcds/sf100/", + "query_path": "/tpcds", + "spark_conf": { + "spark.executor.instances": "4", + "spark.comet.cast.allowIncompatible": "true", + "spark.kubernetes.memoryOverheadFactor": "0.4", + "spark.app.initial.jar.urls": "file:/opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.kubernetes.authenticate.driver.serviceAccountName": "spark", + "spark.kubernetes.executor.container.image": "woody:5000/spark-tpcbench:comet", + "spark.kubernetes.driver.volumes.hostPath.data.mount.path": "/mnt/bigdata", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.options.claimName": "benchmark-results-pvc", + "spark.comet.exec.shuffle.enabled": "true", + "spark.memory.offHeap.enabled": "true", + "spark.memory.offHeap.size": "32g", + "spark.executor.memoryOverhead": "2457M", + "spark.master": "k8s://https://kube1:6443", + "spark.app.startTime": "1731962698535", + "spark.app.id": "spark-41930409e7b1448a837ed5c0d9130620", + "spark.kubernetes.executor.podNamePrefix": "comet-benchmark-derived-from-tpcds-31aa739341057c91", + "spark.serializer.objectStreamReset": "100", + "spark.executor.memory": "8g", + "spark.submit.deployMode": "client", + "spark.kubernetes.driver.volumes.hostPath.data.options.path": "/mnt/bigdata", + "spark.driver.host": "tpcbench-py-26286b934105723b-driver-svc.default.svc", + "spark.eventLog.enabled": "false", + "spark.kubernetes.container.image.pullPolicy": "Always", + "spark.kubernetes.submitInDriver": "true", + "spark.comet.exec.replaceSortMergeJoin": "false", + "spark.driver.bindAddress": "10.42.1.9", + "spark.sql.warehouse.dir": "file:/opt/spark/work-dir/spark-warehouse", + "spark.driver.extraJavaOptions": "-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false", + "spark.jars": "local:/opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager", + "spark.driver.blockManager.port": "7079", + "spark.repl.local.jars": "local:///opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.app.submitTime": "1731962698320", + "spark.driver.extraClassPath": "local:///opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.executor.id": "driver", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.mount.readOnly": "false", + "spark.comet.exec.shuffle.mode": "auto", + "spark.driver.memory": "8G", + "spark.plugins": "org.apache.spark.CometPlugin", + "spark.driver.port": "7078", + "spark.sql.extensions": "org.apache.comet.CometSparkSessionExtensions", + "spark.kubernetes.driver.pod.name": "tpcbench-py-26286b934105723b-driver", + "spark.kubernetes.driver.volumes.hostPath.data.options.readOnly": "false", + "spark.kubernetes.resource.type": "python", + "spark.kubernetes.executor.volumes.hostPath.data.mount.path": "/mnt/bigdata", + "spark.kubernetes.driver.container.image": "woody:5000/spark-tpcbench:comet", + "spark.executor.cores": "6", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.mount.path": "/mnt/benchmark-results", + "spark.rdd.compress": "True", + "spark.kubernetes.executor.volumes.hostPath.data.options.path": "/mnt/bigdata", + "spark.executor.extraJavaOptions": "-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false", + "spark.submit.pyFiles": "", + "spark.executor.extraClassPath": "local:///opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.app.name": "comet benchmark derived from tpcds" + }, + "1": [ + 3.613295316696167, + 0.9363176822662354, + 0.8858673572540283, + 0.9213356971740723, + 0.9260072708129883 + ], + "2": [ + 2.1988301277160645, + 1.2930514812469482, + 1.2836058139801025, + 1.3122997283935547, + 1.310011863708496 + ], + "3": [ + 1.1296944618225098, + 0.8390710353851318, + 0.863898754119873, + 0.8277552127838135, + 0.8591916561126709 + ], + "4": [ + 23.46227192878723, + 23.30874729156494, + 22.632073640823364, + 23.021177530288696, + 23.210322380065918 + ], + "5": [ + 4.248774290084839, + 3.8154327869415283, + 3.7951250076293945, + 3.841792583465576, + 3.830977201461792 + ], + "6": [ + 1.2654953002929688, + 1.0582756996154785, + 1.0062625408172607, + 0.9889535903930664, + 1.0066215991973877 + ], + "7": [ + 1.9927458763122559, + 1.7910470962524414, + 1.7864744663238525, + 1.8166968822479248, + 1.8165059089660645 + ], + "8": [ + 1.39858078956604, + 1.1697239875793457, + 1.181609869003296, + 1.1592917442321777, + 1.2115702629089355 + ], + "9": [ + 3.8118808269500732, + 3.5978047847747803, + 3.6009204387664795, + 3.6475167274475098, + 3.720515489578247 + ], + "10": [ + 1.6375336647033691, + 1.253730058670044, + 1.2790555953979492, + 1.2641334533691406, + 1.2304799556732178 + ], + "11": [ + 8.888672590255737, + 8.726006031036377, + 8.779715061187744, + 8.728724002838135, + 8.738395929336548 + ], + "12": [ + 1.209524393081665, + 1.1902368068695068, + 1.2197084426879883, + 1.214184045791626, + 1.0896027088165283 + ], + "13": [ + 2.334285259246826, + 2.3459174633026123, + 2.376317262649536, + 2.3597984313964844, + 2.399404287338257 + ], + "14": [ + 22.216265201568604, + 21.80620813369751, + 22.18099617958069, + 22.004033088684082, + 22.437575578689575 + ], + "15": [ + 2.6664204597473145, + 2.5816941261291504, + 2.6421005725860596, + 2.655304193496704, + 2.6851134300231934 + ], + "16": [ + 7.677209854125977, + 7.403982400894165, + 7.679336309432983, + 7.390750169754028, + 7.398985385894775 + ], + "17": [ + 2.4100611209869385, + 2.4173429012298584, + 2.3482720851898193, + 2.3698456287384033, + 2.4305622577667236 + ], + "18": [ + 2.439337730407715, + 2.5373470783233643, + 2.4983959197998047, + 2.529120922088623, + 2.538098096847534 + ], + "19": [ + 2.5696511268615723, + 2.5729575157165527, + 2.5877132415771484, + 2.5895493030548096, + 2.584230899810791 + ], + "20": [ + 1.7106993198394775, + 1.6393051147460938, + 1.5836944580078125, + 1.624565839767456, + 1.6353082656860352 + ], + "21": [ + 1.096069097518921, + 1.0144309997558594, + 1.0689029693603516, + 1.1604490280151367, + 1.0859742164611816 + ], + "22": [ + 8.084425210952759, + 8.307284593582153, + 8.248545169830322, + 8.41027283668518, + 8.130585432052612 + ], + "23": [ + 31.006169080734253, + 31.809021949768066, + 31.491374731063843, + 31.673828125, + 32.41964364051819 + ], + "24": [ + 13.210773944854736, + 12.064785242080688, + 12.284985542297363, + 12.34001111984253, + 13.093876361846924 + ], + "25": [ + 2.7884602546691895, + 2.611072301864624, + 2.6468446254730225, + 2.5781397819519043, + 2.732714891433716 + ], + "26": [ + 1.1039531230926514, + 1.038722276687622, + 1.0986864566802979, + 0.9996967315673828, + 1.0384690761566162 + ], + "27": [ + 1.8319566249847412, + 1.8125741481781006, + 1.7781953811645508, + 1.7830994129180908, + 1.8019931316375732 + ], + "28": [ + 4.2303125858306885, + 4.181443214416504, + 4.171067953109741, + 4.197100639343262, + 4.169423341751099 + ], + "29": [ + 3.66855525970459, + 3.7532052993774414, + 3.566790819168091, + 3.7179572582244873, + 3.686014175415039 + ], + "30": [ + 1.117868185043335, + 1.2058172225952148, + 1.0498886108398438, + 1.1140456199645996, + 1.1328985691070557 + ], + "31": [ + 4.85772180557251, + 4.797250270843506, + 4.839718818664551, + 4.840039014816284, + 4.837655782699585 + ], + "32": [ + 1.3921968936920166, + 1.3323743343353271, + 1.4425835609436035, + 1.348841667175293, + 1.3850789070129395 + ], + "33": [ + 2.293806314468384, + 2.253751754760742, + 2.240044593811035, + 2.2648448944091797, + 2.2620596885681152 + ], + "34": [ + 1.3108677864074707, + 1.2720186710357666, + 1.3723440170288086, + 1.2785835266113281, + 1.26865816116333 + ], + "35": [ + 2.447936773300171, + 2.445019483566284, + 2.438453197479248, + 2.401923179626465, + 2.5346932411193848 + ], + "36": [ + 2.4888269901275635, + 2.191847085952759, + 2.1506924629211426, + 2.1331875324249268, + 2.2247748374938965 + ], + "37": [ + 1.82137131690979, + 1.7718479633331299, + 1.762279748916626, + 1.805389165878296, + 1.8042621612548828 + ], + "38": [ + 2.4564521312713623, + 2.7351372241973877, + 2.2422144412994385, + 2.271132707595825, + 2.6268062591552734 + ], + "39": [ + 15.272886514663696, + 16.56870722770691, + 15.184351682662964, + 17.65111756324768, + 16.851532697677612 + ], + "40": [ + 4.618505954742432, + 4.582245826721191, + 4.805966854095459, + 4.843486547470093, + 4.6164655685424805 + ], + "41": [ + 0.15246796607971191, + 0.12736821174621582, + 0.123199462890625, + 0.13370347023010254, + 0.13098549842834473 + ], + "42": [ + 0.8077573776245117, + 0.7788431644439697, + 0.755042314529419, + 0.7611534595489502, + 0.7864503860473633 + ], + "43": [ + 1.0223720073699951, + 1.0028953552246094, + 1.0250427722930908, + 1.0194573402404785, + 1.076014518737793 + ], + "44": [ + 1.1571755409240723, + 1.1396656036376953, + 1.1511790752410889, + 1.1235918998718262, + 1.1504936218261719 + ], + "45": [ + 1.8097100257873535, + 1.8120465278625488, + 1.8139166831970215, + 1.7709436416625977, + 1.7835633754730225 + ], + "46": [ + 2.4002232551574707, + 2.2962591648101807, + 2.38244366645813, + 2.4457669258117676, + 2.4249236583709717 + ], + "47": [ + 5.617861032485962, + 5.408482551574707, + 5.431608438491821, + 5.449252128601074, + 5.623114109039307 + ], + "48": [ + 1.638838768005371, + 1.68455171585083, + 1.6805500984191895, + 1.6506848335266113, + 1.642219066619873 + ], + "49": [ + 4.706164598464966, + 4.6550188064575195, + 4.703380346298218, + 4.4827210903167725, + 4.653671503067017 + ], + "50": [ + 6.328258275985718, + 6.51447057723999, + 6.318875789642334, + 6.525310277938843, + 6.306564569473267 + ], + "51": [ + 7.000143051147461, + 6.798591613769531, + 6.729523658752441, + 6.572738170623779, + 6.712196111679077 + ], + "52": [ + 0.919975757598877, + 0.8573052883148193, + 0.8529157638549805, + 0.8662893772125244, + 0.8644464015960693 + ], + "53": [ + 1.268928050994873, + 1.1189360618591309, + 1.155031442642212, + 1.151642084121704, + 1.1593868732452393 + ], + "54": [ + 2.8306691646575928, + 2.8038384914398193, + 2.8667569160461426, + 2.8659167289733887, + 2.8607115745544434 + ], + "55": [ + 0.9192891120910645, + 0.8829891681671143, + 0.9207160472869873, + 0.9063892364501953, + 0.9118208885192871 + ], + "56": [ + 2.098660469055176, + 2.0563149452209473, + 2.0371997356414795, + 2.0526468753814697, + 2.052966594696045 + ], + "57": [ + 3.1710643768310547, + 3.102032423019409, + 3.1820738315582275, + 3.2220702171325684, + 3.1981401443481445 + ], + "58": [ + 4.2822654247283936, + 4.219950199127197, + 4.260200023651123, + 4.203465700149536, + 4.235920667648315 + ], + "59": [ + 2.8434674739837646, + 2.8698465824127197, + 2.958869218826294, + 2.9485886096954346, + 2.944758176803589 + ], + "60": [ + 2.0825767517089844, + 2.0261218547821045, + 2.093472957611084, + 2.028120756149292, + 2.0296173095703125 + ], + "61": [ + 4.164551496505737, + 4.247161149978638, + 4.208996057510376, + 4.154303073883057, + 4.216529369354248 + ], + "62": [ + 0.6185667514801025, + 0.6253015995025635, + 0.6186847686767578, + 0.6257288455963135, + 0.6202499866485596 + ], + "63": [ + 1.088174819946289, + 1.0861599445343018, + 1.045111894607544, + 1.0676589012145996, + 1.0662899017333984 + ], + "64": [ + 11.732340097427368, + 11.538040399551392, + 11.864402532577515, + 11.667150020599365, + 11.87758994102478 + ], + "65": [ + 6.239193439483643, + 6.3904407024383545, + 6.047687768936157, + 6.067760467529297, + 6.426549196243286 + ], + "66": [ + 2.0485336780548096, + 2.0909292697906494, + 2.0843634605407715, + 2.114530324935913, + 2.2097740173339844 + ], + "67": [ + 23.676459312438965, + 24.08505654335022, + 24.311410903930664, + 23.65312933921814, + 23.644501447677612 + ], + "68": [ + 2.6239256858825684, + 2.68916654586792, + 2.713677406311035, + 2.735755443572998, + 2.6569156646728516 + ], + "69": [ + 1.3641211986541748, + 1.3893208503723145, + 1.4258897304534912, + 1.406707763671875, + 1.4287145137786865 + ], + "70": [ + 2.3978335857391357, + 2.388171911239624, + 2.311910629272461, + 2.323887825012207, + 2.357830286026001 + ], + "71": [ + 2.148318290710449, + 2.0978829860687256, + 2.1367037296295166, + 2.1240034103393555, + 2.096367359161377 + ], + "72": [ + 55.71474242210388, + 55.65497851371765, + 55.09920024871826, + 54.11650276184082, + 56.20997095108032 + ], + "73": [ + 0.981971025466919, + 1.0062155723571777, + 0.9884769916534424, + 1.0174438953399658, + 0.9753994941711426 + ], + "74": [ + 6.0974955558776855, + 6.090858697891235, + 6.104249477386475, + 6.180070161819458, + 6.250415325164795 + ], + "75": [ + 7.970996618270874, + 7.890105485916138, + 7.769358396530151, + 7.92317795753479, + 7.910406589508057 + ], + "76": [ + 1.833014726638794, + 1.8315975666046143, + 1.8497719764709473, + 1.8833909034729004, + 1.8657946586608887 + ], + "77": [ + 2.430835008621216, + 2.378892660140991, + 2.253649950027466, + 2.3566322326660156, + 2.381143093109131 + ], + "78": [ + 13.936902523040771, + 14.407816886901855, + 14.292723894119263, + 14.425165891647339, + 14.25799822807312 + ], + "79": [ + 1.7828254699707031, + 1.8427963256835938, + 1.8661816120147705, + 1.9232194423675537, + 1.905480146408081 + ], + "80": [ + 10.53323483467102, + 10.418278694152832, + 10.549378871917725, + 10.663471698760986, + 10.687374830245972 + ], + "81": [ + 1.3656373023986816, + 1.3399004936218262, + 1.3288192749023438, + 1.4544517993927002, + 1.3790254592895508 + ], + "82": [ + 3.3641715049743652, + 3.279186964035034, + 3.213341236114502, + 3.3441648483276367, + 3.389477014541626 + ], + "83": [ + 0.7808544635772705, + 0.7731945514678955, + 0.8409247398376465, + 0.9210186004638672, + 0.8299705982208252 + ], + "84": [ + 0.7040410041809082, + 0.7207241058349609, + 0.7018799781799316, + 0.8818099498748779, + 0.6937227249145508 + ], + "85": [ + 1.421285629272461, + 1.4219837188720703, + 1.5009398460388184, + 1.5087964534759521, + 1.4539406299591064 + ], + "86": [ + 0.7153642177581787, + 0.653031587600708, + 0.6546626091003418, + 0.7315728664398193, + 0.6839661598205566 + ], + "87": [ + 2.6152150630950928, + 2.4368674755096436, + 2.447220802307129, + 2.4948906898498535, + 2.4271810054779053 + ], + "88": [ + 3.7563509941101074, + 3.7107393741607666, + 3.740159511566162, + 3.9487733840942383, + 3.7908895015716553 + ], + "89": [ + 1.3733341693878174, + 1.3950858116149902, + 1.3895792961120605, + 1.434366226196289, + 1.4087369441986084 + ], + "90": [ + 0.32382941246032715, + 0.3856828212738037, + 0.31131482124328613, + 0.3739144802093506, + 0.3414499759674072 + ], + "91": [ + 0.4922313690185547, + 0.4702589511871338, + 0.4913055896759033, + 0.547508955001831, + 0.47322750091552734 + ], + "92": [ + 0.5397188663482666, + 0.5313537120819092, + 0.5446431636810303, + 0.5584907531738281, + 0.5503730773925781 + ], + "93": [ + 7.6846864223480225, + 7.392237186431885, + 7.6069016456604, + 7.612182378768921, + 7.472257614135742 + ], + "94": [ + 4.253883123397827, + 4.180011034011841, + 4.123626470565796, + 4.342567205429077, + 4.465695381164551 + ], + "95": [ + 14.060906887054443, + 14.277164697647095, + 14.19387698173523, + 14.002771854400635, + 15.193023920059204 + ], + "96": [ + 0.526421070098877, + 0.5006999969482422, + 0.5214729309082031, + 0.5473310947418213, + 0.517216682434082 + ], + "97": [ + 3.6567018032073975, + 3.5691311359405518, + 3.5199546813964844, + 3.6811041831970215, + 3.667187213897705 + ], + "98": [ + 3.188143253326416, + 3.1263539791107178, + 3.1795992851257324, + 3.23490834236145, + 3.170579671859741 + ], + "99": [ + 1.0296411514282227, + 1.0101523399353027, + 1.0162627696990967, + 1.0697321891784668, + 1.0347280502319336 + ] +} \ No newline at end of file diff --git a/docs/source/contributor-guide/benchmark-results/0.4.0/comet-tpch.json b/docs/source/contributor-guide/benchmark-results/0.4.0/comet-tpch.json new file mode 100644 index 000000000..25404f872 --- /dev/null +++ b/docs/source/contributor-guide/benchmark-results/0.4.0/comet-tpch.json @@ -0,0 +1,219 @@ +{ + "engine": "datafusion-comet", + "benchmark": "tpch", + "data_path": "/mnt/bigdata/tpch/sf100/", + "query_path": "/tpch", + "spark_conf": { + "spark.comet.cast.allowIncompatible": "true", + "spark.kubernetes.memoryOverheadFactor": "0.4", + "spark.kubernetes.driver.volumes.hostPath.data.mount.path": "/mnt/bigdata", + "spark.driver.host": "tpcbench-py-a08b54934067a8b3-driver-svc.default.svc", + "spark.memory.offHeap.size": "32g", + "spark.master": "k8s://https://kube1:6443", + "spark.kubernetes.driver.volumes.hostPath.data.options.path": "/mnt/bigdata", + "spark.comet.exec.all.enabled": "true", + "spark.driver.bindAddress": "10.42.0.10", + "spark.eventLog.enabled": "false", + "spark.kubernetes.container.image.pullPolicy": "Always", + "spark.kubernetes.submitInDriver": "true", + "spark.app.submitTime": "1731952357631", + "spark.sql.warehouse.dir": "file:/opt/spark/work-dir/spark-warehouse", + "spark.driver.extraJavaOptions": "-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false", + "spark.jars": "local:/opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager", + "spark.comet.exec.enabled": "true", + "spark.executor.id": "driver", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.mount.readOnly": "false", + "spark.comet.exec.shuffle.mode": "auto", + "spark.driver.memory": "8G", + "spark.driver.port": "7078", + "spark.sql.extensions": "org.apache.comet.CometSparkSessionExtensions", + "spark.kubernetes.driver.volumes.hostPath.data.options.readOnly": "false", + "spark.kubernetes.executor.podNamePrefix": "comet-benchmark-derived-from-tpch-42d900934067b351", + "spark.kubernetes.executor.volumes.hostPath.data.mount.path": "/mnt/bigdata", + "spark.kubernetes.driver.container.image": "woody:5000/spark-tpcbench:comet", + "spark.executor.instances": "1", + "spark.kubernetes.driver.pod.name": "tpcbench-py-a08b54934067a8b3-driver", + "spark.executor.extraClassPath": "local:///opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.comet.explain.native.enabled": "false", + "spark.executor.memoryOverhead": "4914M", + "spark.app.initial.jar.urls": "file:/opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.kubernetes.authenticate.driver.serviceAccountName": "spark", + "spark.kubernetes.executor.container.image": "woody:5000/spark-tpcbench:comet", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.options.claimName": "benchmark-results-pvc", + "spark.comet.exec.replaceSortMergeJoin": "true", + "spark.comet.exec.shuffle.enabled": "true", + "spark.memory.offHeap.enabled": "true", + "spark.app.name": "comet benchmark derived from tpch", + "spark.executor.memory": "16G", + "spark.serializer.objectStreamReset": "100", + "spark.submit.deployMode": "client", + "spark.app.id": "spark-71d7e005c63b4f7a9a54d11eaa5d99d4", + "spark.executor.cores": "8", + "spark.driver.blockManager.port": "7079", + "spark.repl.local.jars": "local:///opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.driver.extraClassPath": "local:///opt/spark/jars/comet-spark-spark3.4_2.12-0.4.0-SNAPSHOT.jar", + "spark.plugins": "org.apache.spark.CometPlugin", + "spark.app.startTime": "1731952357848", + "spark.kubernetes.resource.type": "python", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.mount.path": "/mnt/benchmark-results", + "spark.rdd.compress": "True", + "spark.kubernetes.executor.volumes.hostPath.data.options.path": "/mnt/bigdata", + "spark.executor.extraJavaOptions": "-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false", + "spark.submit.pyFiles": "" + }, + "1": [ + 12.675217866897583, + 10.69823956489563, + 10.959712743759155, + 11.049235582351685, + 11.25637674331665 + ], + "2": [ + 7.2883970737457275, + 6.955080509185791, + 6.996195316314697, + 7.001482725143433, + 6.981860399246216 + ], + "3": [ + 17.389552116394043, + 17.815346479415894, + 18.098077297210693, + 18.250796794891357, + 18.289159297943115 + ], + "4": [ + 9.9054114818573, + 9.76110053062439, + 9.778793573379517, + 9.659870386123657, + 9.890227794647217 + ], + "5": [ + 29.422667026519775, + 29.41081738471985, + 29.542785167694092, + 29.379701375961304, + 30.072590589523315 + ], + "6": [ + 2.591843843460083, + 2.5433740615844727, + 2.486654758453369, + 2.5132291316986084, + 2.507908821105957 + ], + "7": [ + 17.24040937423706, + 17.355087280273438, + 17.514805555343628, + 17.43350887298584, + 17.35370373725891 + ], + "8": [ + 29.68608021736145, + 29.521278858184814, + 29.879571437835693, + 29.99302840232849, + 30.18179965019226 + ], + "9": [ + 44.744760274887085, + 45.07009744644165, + 45.67892384529114, + 45.49783205986023, + 46.2200129032135 + ], + "10": [ + 16.815892696380615, + 16.816420078277588, + 16.754045486450195, + 16.916579008102417, + 16.823299169540405 + ], + "11": [ + 5.107349395751953, + 4.9230499267578125, + 4.9613683223724365, + 4.954375743865967, + 4.949444770812988 + ], + "12": [ + 5.476846694946289, + 5.373950719833374, + 5.399810791015625, + 5.373565196990967, + 5.389242172241211 + ], + "13": [ + 11.168701410293579, + 11.108731031417847, + 11.159045219421387, + 10.97795581817627, + 11.064870119094849 + ], + "14": [ + 5.262895584106445, + 5.161571502685547, + 5.15362286567688, + 5.113251209259033, + 5.200662136077881 + ], + "15": [ + 9.296111345291138, + 9.049000263214111, + 8.952059745788574, + 9.111369371414185, + 9.09269094467163 + ], + "16": [ + 6.102790832519531, + 5.706904411315918, + 5.573681831359863, + 5.436795473098755, + 5.416778802871704 + ], + "17": [ + 34.57182478904724, + 34.73775792121887, + 34.8440580368042, + 35.2716920375824, + 35.55662488937378 + ], + "18": [ + 32.07862377166748, + 31.731548070907593, + 31.748567819595337, + 31.76587414741516, + 31.907649993896484 + ], + "19": [ + 6.052667140960693, + 5.940260887145996, + 5.960237264633179, + 6.020690441131592, + 6.060962677001953 + ], + "20": [ + 10.794468641281128, + 10.485475778579712, + 10.518470525741577, + 10.661662101745605, + 10.678892850875854 + ], + "21": [ + 47.43690896034241, + 46.92784881591797, + 46.689401626586914, + 52.07501554489136, + 53.362786054611206 + ], + "22": [ + 4.79883337020874, + 4.179429054260254, + 4.191235303878784, + 4.875058889389038, + 5.055111885070801 + ] +} \ No newline at end of file diff --git a/docs/source/contributor-guide/benchmark-results/0.4.0/spark-tpcds.json b/docs/source/contributor-guide/benchmark-results/0.4.0/spark-tpcds.json new file mode 100644 index 000000000..69da37627 --- /dev/null +++ b/docs/source/contributor-guide/benchmark-results/0.4.0/spark-tpcds.json @@ -0,0 +1,742 @@ +{ + "engine": "datafusion-comet", + "benchmark": "tpcds", + "data_path": "/mnt/bigdata/tpcds/sf100/", + "query_path": "/tpcds", + "spark_conf": { + "spark.executor.instances": "4", + "spark.kubernetes.memoryOverheadFactor": "0.4", + "spark.kubernetes.driver.pod.name": "tpcbench-py-630faa934132c6a0-driver", + "spark.kubernetes.authenticate.driver.serviceAccountName": "spark", + "spark.kubernetes.driver.volumes.hostPath.data.mount.path": "/mnt/bigdata", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.options.claimName": "benchmark-results-pvc", + "spark.kubernetes.executor.podNamePrefix": "spark-benchmark-derived-from-tpcds-e2f7c6934132d1e7", + "spark.memory.offHeap.enabled": "true", + "spark.memory.offHeap.size": "32g", + "spark.master": "k8s://https://kube1:6443", + "spark.driver.bindAddress": "10.42.1.12", + "spark.app.id": "spark-70c8317ee1414dd18a7f648dd9213dc3", + "spark.kubernetes.executor.container.image": "apache/spark:3.4.2", + "spark.kubernetes.driver.container.image": "woody:5000/spark-tpcbench:spark", + "spark.app.startTime": "1731965669512", + "spark.serializer.objectStreamReset": "100", + "spark.executor.memory": "8g", + "spark.submit.deployMode": "client", + "spark.kubernetes.driver.volumes.hostPath.data.options.path": "/mnt/bigdata", + "spark.eventLog.enabled": "false", + "spark.kubernetes.container.image.pullPolicy": "Always", + "spark.kubernetes.submitInDriver": "true", + "spark.sql.warehouse.dir": "file:/opt/spark/work-dir/spark-warehouse", + "spark.driver.extraJavaOptions": "-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false", + "spark.app.submitTime": "1731965669258", + "spark.driver.blockManager.port": "7079", + "spark.driver.host": "tpcbench-py-630faa934132c6a0-driver-svc.default.svc", + "spark.executor.id": "driver", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.mount.readOnly": "false", + "spark.driver.memory": "8G", + "spark.driver.port": "7078", + "spark.kubernetes.driver.volumes.hostPath.data.options.readOnly": "false", + "spark.kubernetes.resource.type": "python", + "spark.kubernetes.executor.volumes.hostPath.data.mount.path": "/mnt/bigdata", + "spark.executor.cores": "6", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.mount.path": "/mnt/benchmark-results", + "spark.rdd.compress": "True", + "spark.kubernetes.executor.volumes.hostPath.data.options.path": "/mnt/bigdata", + "spark.executor.extraJavaOptions": "-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false", + "spark.submit.pyFiles": "", + "spark.app.name": "spark benchmark derived from tpcds" + }, + "1": [ + 4.101798057556152, + 1.6628072261810303, + 1.6944208145141602, + 1.5850894451141357, + 1.6819756031036377 + ], + "2": [ + 2.505007743835449, + 1.4451699256896973, + 1.4508037567138672, + 1.4211256504058838, + 1.4528944492340088 + ], + "3": [ + 1.0310730934143066, + 0.78733229637146, + 0.8511149883270264, + 0.7833230495452881, + 0.8547565937042236 + ], + "4": [ + 30.011910915374756, + 29.12224292755127, + 28.913774251937866, + 28.686842441558838, + 29.61912488937378 + ], + "5": [ + 6.010044097900391, + 5.845148086547852, + 6.015597820281982, + 5.935771703720093, + 5.921597957611084 + ], + "6": [ + 1.7483971118927002, + 1.4204390048980713, + 1.3826384544372559, + 1.4277729988098145, + 1.426151990890503 + ], + "7": [ + 2.1159181594848633, + 1.8394184112548828, + 1.7851722240447998, + 1.9111988544464111, + 1.8159196376800537 + ], + "8": [ + 1.8837699890136719, + 1.8838860988616943, + 1.7019884586334229, + 1.7169978618621826, + 1.7927045822143555 + ], + "9": [ + 3.0422322750091553, + 2.8414883613586426, + 2.9845988750457764, + 2.8628768920898438, + 2.875807523727417 + ], + "10": [ + 1.9554328918457031, + 1.746701955795288, + 1.7458853721618652, + 1.6303400993347168, + 1.6505646705627441 + ], + "11": [ + 10.3078453540802, + 9.928950548171997, + 10.273672103881836, + 9.962933778762817, + 9.891152143478394 + ], + "12": [ + 1.1306350231170654, + 0.966578483581543, + 1.0629677772521973, + 0.9184162616729736, + 0.8431236743927002 + ], + "13": [ + 2.5771689414978027, + 2.2377328872680664, + 2.2578840255737305, + 2.2780606746673584, + 2.208707332611084 + ], + "14": [ + 31.266183137893677, + 29.093640327453613, + 29.287079572677612, + 29.792733430862427, + 29.16390872001648 + ], + "15": [ + 2.7301526069641113, + 2.42869234085083, + 2.4819107055664062, + 2.494722604751587, + 2.5909152030944824 + ], + "16": [ + 8.37593412399292, + 8.275684833526611, + 8.2781822681427, + 8.58998966217041, + 8.340266942977905 + ], + "17": [ + 3.3585970401763916, + 3.002685546875, + 2.973276138305664, + 2.8865857124328613, + 2.9933559894561768 + ], + "18": [ + 2.930973529815674, + 2.7753474712371826, + 2.721234083175659, + 2.608856678009033, + 2.630030870437622 + ], + "19": [ + 2.173407793045044, + 1.8570218086242676, + 1.9925012588500977, + 1.915762186050415, + 1.8977746963500977 + ], + "20": [ + 1.1954548358917236, + 0.9028189182281494, + 0.9437844753265381, + 0.8968183994293213, + 0.8379025459289551 + ], + "21": [ + 1.4207208156585693, + 1.4522266387939453, + 1.318542718887329, + 1.3398542404174805, + 1.2218236923217773 + ], + "22": [ + 14.03326940536499, + 14.454235315322876, + 13.709498643875122, + 14.177965641021729, + 13.597661256790161 + ], + "23": [ + 57.87645149230957, + 56.65002155303955, + 57.25854849815369, + 56.565542221069336, + 56.94704866409302 + ], + "24": [ + 12.221173524856567, + 10.913657665252686, + 10.763681888580322, + 12.460498809814453, + 10.905694246292114 + ], + "25": [ + 3.1492040157318115, + 2.946575880050659, + 2.888327121734619, + 3.002459764480591, + 3.161397933959961 + ], + "26": [ + 1.3824872970581055, + 1.3174099922180176, + 1.4262938499450684, + 1.411545991897583, + 1.3012707233428955 + ], + "27": [ + 1.6621944904327393, + 1.7461235523223877, + 1.6490883827209473, + 1.6821620464324951, + 1.668259859085083 + ], + "28": [ + 4.79725980758667, + 4.714123010635376, + 4.665875196456909, + 5.1792051792144775, + 4.8896565437316895 + ], + "29": [ + 4.536823749542236, + 4.077563047409058, + 4.396845817565918, + 4.073688268661499, + 4.367259502410889 + ], + "30": [ + 1.7702891826629639, + 1.8158881664276123, + 1.6796913146972656, + 1.9046649932861328, + 1.8696742057800293 + ], + "31": [ + 4.611050367355347, + 4.022041320800781, + 4.182346343994141, + 4.287677764892578, + 4.402425050735474 + ], + "32": [ + 1.1875083446502686, + 1.101001501083374, + 1.261136531829834, + 1.2711787223815918, + 1.1264948844909668 + ], + "33": [ + 1.9605779647827148, + 2.101670980453491, + 1.995006799697876, + 2.0540874004364014, + 1.9772546291351318 + ], + "34": [ + 1.1797206401824951, + 1.037219762802124, + 1.116685152053833, + 1.3116002082824707, + 1.1500279903411865 + ], + "35": [ + 2.6695785522460938, + 2.653355121612549, + 2.738192081451416, + 2.6304309368133545, + 2.7922165393829346 + ], + "36": [ + 1.4320690631866455, + 1.3371284008026123, + 1.399195909500122, + 1.3704102039337158, + 1.3641293048858643 + ], + "37": [ + 2.7905285358428955, + 2.6404833793640137, + 2.481349229812622, + 2.54872989654541, + 2.605477809906006 + ], + "38": [ + 3.9898948669433594, + 4.339789867401123, + 4.0293896198272705, + 3.894653081893921, + 4.017303943634033 + ], + "39": [ + 6.200651168823242, + 5.982529163360596, + 6.708714723587036, + 6.462383508682251, + 6.6200456619262695 + ], + "40": [ + 5.8705267906188965, + 6.351257085800171, + 6.25710129737854, + 6.0561816692352295, + 6.249178171157837 + ], + "41": [ + 0.24708271026611328, + 0.2163989543914795, + 0.21043944358825684, + 0.19862890243530273, + 0.20019888877868652 + ], + "42": [ + 0.878232479095459, + 0.8881511688232422, + 0.9662492275238037, + 0.925433874130249, + 0.9298341274261475 + ], + "43": [ + 0.9265544414520264, + 0.8833436965942383, + 0.9058196544647217, + 0.9031116962432861, + 0.8841662406921387 + ], + "44": [ + 0.5191357135772705, + 0.48758602142333984, + 0.5332913398742676, + 0.5114614963531494, + 0.4971604347229004 + ], + "45": [ + 1.7107188701629639, + 1.6529803276062012, + 1.6080644130706787, + 1.671886920928955, + 1.6790966987609863 + ], + "46": [ + 2.0748560428619385, + 1.9469430446624756, + 2.0227723121643066, + 2.011340379714966, + 1.9434280395507812 + ], + "47": [ + 3.5550601482391357, + 3.700406312942505, + 3.5939836502075195, + 3.6490423679351807, + 3.4499003887176514 + ], + "48": [ + 3.7200522422790527, + 3.7572500705718994, + 3.7702255249023438, + 3.7124366760253906, + 3.730222463607788 + ], + "49": [ + 5.228905200958252, + 4.625717878341675, + 4.951889276504517, + 4.957994222640991, + 5.071847438812256 + ], + "50": [ + 9.543980360031128, + 9.599350452423096, + 9.469555377960205, + 9.601779699325562, + 9.497092962265015 + ], + "51": [ + 11.083372354507446, + 9.904197931289673, + 10.511804342269897, + 10.427511930465698, + 11.04982876777649 + ], + "52": [ + 1.1101248264312744, + 1.0503735542297363, + 1.034118413925171, + 0.858811616897583, + 1.0684659481048584 + ], + "53": [ + 1.2383568286895752, + 1.2415111064910889, + 1.2688398361206055, + 1.2551381587982178, + 1.2679004669189453 + ], + "54": [ + 2.482556104660034, + 2.4029476642608643, + 2.4188714027404785, + 2.504854440689087, + 2.386593818664551 + ], + "55": [ + 1.0882360935211182, + 1.1614463329315186, + 1.0238258838653564, + 1.2537846565246582, + 1.060547113418579 + ], + "56": [ + 1.7780375480651855, + 1.8225603103637695, + 1.851440668106079, + 1.7312126159667969, + 1.8368158340454102 + ], + "57": [ + 2.49904203414917, + 2.4126837253570557, + 2.4750289916992188, + 2.433041572570801, + 2.3518331050872803 + ], + "58": [ + 2.17509388923645, + 2.3151907920837402, + 2.259735345840454, + 2.428464651107788, + 2.1495883464813232 + ], + "59": [ + 2.7380449771881104, + 2.754039764404297, + 2.6889383792877197, + 2.604966878890991, + 2.636704921722412 + ], + "60": [ + 1.7363862991333008, + 1.6587703227996826, + 1.5419552326202393, + 1.4795036315917969, + 1.6812565326690674 + ], + "61": [ + 2.4846465587615967, + 2.387451410293579, + 2.4108707904815674, + 2.388557195663452, + 2.3161416053771973 + ], + "62": [ + 0.8136637210845947, + 0.7371957302093506, + 0.7617106437683105, + 0.7683372497558594, + 0.7747564315795898 + ], + "63": [ + 1.1109309196472168, + 1.1343533992767334, + 1.2317695617675781, + 1.133389949798584, + 1.1092209815979004 + ], + "64": [ + 14.743201732635498, + 15.078812599182129, + 14.671916007995605, + 15.006304025650024, + 15.00887680053711 + ], + "65": [ + 7.251803636550903, + 7.2695631980896, + 7.238022804260254, + 7.301697254180908, + 7.161722421646118 + ], + "66": [ + 1.9731743335723877, + 1.9496684074401855, + 1.9994518756866455, + 1.8531701564788818, + 1.930753231048584 + ], + "67": [ + 49.6675705909729, + 48.19621968269348, + 50.70038366317749, + 47.88284492492676, + 48.761271715164185 + ], + "68": [ + 2.0306437015533447, + 1.9414896965026855, + 1.9469645023345947, + 1.9012255668640137, + 1.9914145469665527 + ], + "69": [ + 1.7471072673797607, + 1.536611795425415, + 1.6588964462280273, + 1.6081433296203613, + 1.6223390102386475 + ], + "70": [ + 1.945021629333496, + 1.8145639896392822, + 1.812164545059204, + 1.849292278289795, + 1.8018312454223633 + ], + "71": [ + 1.9524719715118408, + 1.9845960140228271, + 1.9457154273986816, + 1.923093557357788, + 1.951244592666626 + ], + "72": [ + 12.546458959579468, + 11.85893964767456, + 12.32059645652771, + 11.475525379180908, + 11.872873306274414 + ], + "73": [ + 1.0386018753051758, + 1.0372083187103271, + 1.0216479301452637, + 0.9761593341827393, + 0.933098316192627 + ], + "74": [ + 9.800452470779419, + 10.76702070236206, + 11.209776163101196, + 9.91211724281311, + 10.263824701309204 + ], + "75": [ + 8.530277013778687, + 8.607342958450317, + 8.445864200592041, + 8.522737741470337, + 8.499200582504272 + ], + "76": [ + 1.8576176166534424, + 2.0158095359802246, + 1.891855239868164, + 1.8044593334197998, + 1.8446557521820068 + ], + "77": [ + 2.266920566558838, + 2.314894676208496, + 2.4184823036193848, + 2.4789552688598633, + 2.355562925338745 + ], + "78": [ + 22.44752526283264, + 23.02877902984619, + 22.419431447982788, + 22.97319006919861, + 22.363235473632812 + ], + "79": [ + 1.6027934551239014, + 1.4661269187927246, + 1.4280049800872803, + 1.4568893909454346, + 1.4736254215240479 + ], + "80": [ + 16.729133129119873, + 16.709152698516846, + 16.817840099334717, + 17.630494117736816, + 17.207946300506592 + ], + "81": [ + 2.1778812408447266, + 2.3530967235565186, + 2.1917331218719482, + 2.0935323238372803, + 2.1222338676452637 + ], + "82": [ + 6.663294792175293, + 7.8879075050354, + 7.217211961746216, + 6.868515968322754, + 6.5749123096466064 + ], + "83": [ + 1.5960721969604492, + 1.5132818222045898, + 1.4365019798278809, + 1.44840407371521, + 1.477241039276123 + ], + "84": [ + 1.163783073425293, + 1.1479411125183105, + 1.1228821277618408, + 1.2401597499847412, + 1.1172442436218262 + ], + "85": [ + 3.2570650577545166, + 2.7596423625946045, + 3.081651449203491, + 3.0925028324127197, + 3.0603017807006836 + ], + "86": [ + 0.7725980281829834, + 0.7427737712860107, + 0.7912232875823975, + 0.7065935134887695, + 0.7383038997650146 + ], + "87": [ + 4.294668912887573, + 4.660226345062256, + 4.278197765350342, + 4.472849130630493, + 3.904066562652588 + ], + "88": [ + 3.554309606552124, + 3.6045660972595215, + 3.5492982864379883, + 3.3328254222869873, + 3.5090348720550537 + ], + "89": [ + 1.3484902381896973, + 1.276850700378418, + 1.3466012477874756, + 1.2572429180145264, + 1.4886093139648438 + ], + "90": [ + 0.46999287605285645, + 0.529099702835083, + 0.578873872756958, + 0.47667670249938965, + 0.48769092559814453 + ], + "91": [ + 0.7944860458374023, + 0.8916025161743164, + 0.832402229309082, + 0.7235825061798096, + 0.65761399269104 + ], + "92": [ + 0.9741818904876709, + 0.9964475631713867, + 0.992856502532959, + 0.9798798561096191, + 0.9271073341369629 + ], + "93": [ + 14.810303688049316, + 14.715499639511108, + 14.565584897994995, + 14.738952398300171, + 14.814255237579346 + ], + "94": [ + 5.665013790130615, + 5.276642084121704, + 5.49097204208374, + 5.194403886795044, + 5.2361180782318115 + ], + "95": [ + 13.44327449798584, + 13.04519772529602, + 13.047563791275024, + 13.274654865264893, + 13.301786184310913 + ], + "96": [ + 0.5319077968597412, + 0.5078849792480469, + 0.5532786846160889, + 0.49875950813293457, + 0.529167890548706 + ], + "97": [ + 7.3789963722229, + 7.497219800949097, + 7.451964855194092, + 7.59662938117981, + 7.404102802276611 + ], + "98": [ + 1.5613372325897217, + 1.698850154876709, + 1.6474971771240234, + 1.6323800086975098, + 1.5777685642242432 + ], + "99": [ + 1.1006407737731934, + 1.109886884689331, + 1.0147747993469238, + 2.1191627979278564, + 1.0859904289245605 + ] +} \ No newline at end of file diff --git a/docs/source/contributor-guide/benchmark-results/0.4.0/spark-tpch.json b/docs/source/contributor-guide/benchmark-results/0.4.0/spark-tpch.json new file mode 100644 index 000000000..f0a080aa9 --- /dev/null +++ b/docs/source/contributor-guide/benchmark-results/0.4.0/spark-tpch.json @@ -0,0 +1,203 @@ +{ + "engine": "datafusion-comet", + "benchmark": "tpch", + "data_path": "/mnt/bigdata/tpch/sf100/", + "query_path": "/tpch", + "spark_conf": { + "spark.kubernetes.executor.podNamePrefix": "spark-benchmark-derived-from-tpch-91eee2934092023f", + "spark.kubernetes.memoryOverheadFactor": "0.4", + "spark.kubernetes.authenticate.driver.serviceAccountName": "spark", + "spark.kubernetes.driver.volumes.hostPath.data.mount.path": "/mnt/bigdata", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.options.claimName": "benchmark-results-pvc", + "spark.memory.offHeap.enabled": "true", + "spark.app.startTime": "1731955130576", + "spark.app.id": "spark-35db63a6dc9b42c79428c048244d508f", + "spark.master": "k8s://https://kube1:6443", + "spark.memory.offHeap.size": "48g", + "spark.kubernetes.driver.pod.name": "tpcbench-py-bca54a934091f6cd-driver", + "spark.kubernetes.executor.container.image": "apache/spark:3.4.2", + "spark.kubernetes.driver.container.image": "woody:5000/spark-tpcbench:spark", + "spark.executor.memory": "16G", + "spark.serializer.objectStreamReset": "100", + "spark.submit.deployMode": "client", + "spark.driver.host": "tpcbench-py-bca54a934091f6cd-driver-svc.default.svc", + "spark.kubernetes.driver.volumes.hostPath.data.options.path": "/mnt/bigdata", + "spark.eventLog.enabled": "false", + "spark.kubernetes.container.image.pullPolicy": "Always", + "spark.executor.cores": "8", + "spark.kubernetes.submitInDriver": "true", + "spark.sql.warehouse.dir": "file:/opt/spark/work-dir/spark-warehouse", + "spark.driver.extraJavaOptions": "-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false", + "spark.driver.blockManager.port": "7079", + "spark.driver.bindAddress": "10.42.0.12", + "spark.executor.id": "driver", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.mount.readOnly": "false", + "spark.driver.memory": "8G", + "spark.driver.port": "7078", + "spark.kubernetes.driver.volumes.hostPath.data.options.readOnly": "false", + "spark.kubernetes.resource.type": "python", + "spark.kubernetes.executor.volumes.hostPath.data.mount.path": "/mnt/bigdata", + "spark.kubernetes.driver.volumes.persistentVolumeClaim.benchmark-results-pv.mount.path": "/mnt/benchmark-results", + "spark.rdd.compress": "True", + "spark.app.name": "spark benchmark derived from tpch", + "spark.kubernetes.executor.volumes.hostPath.data.options.path": "/mnt/bigdata", + "spark.executor.extraJavaOptions": "-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false", + "spark.executor.instances": "1", + "spark.app.submitTime": "1731955130325", + "spark.submit.pyFiles": "" + }, + "1": [ + 76.3459792137146, + 77.43206644058228, + 77.43688368797302, + 77.57843589782715, + 77.36109328269958 + ], + "2": [ + 11.60047721862793, + 12.094002485275269, + 11.385731935501099, + 11.742280006408691, + 10.831515073776245 + ], + "3": [ + 23.393443822860718, + 23.679790496826172, + 23.2339928150177, + 23.296000957489014, + 23.57963228225708 + ], + "4": [ + 17.686111450195312, + 17.663344860076904, + 17.97491693496704, + 17.99352526664734, + 17.13844132423401 + ], + "5": [ + 47.576931953430176, + 47.90366792678833, + 47.885528802871704, + 47.82782196998596, + 47.35511922836304 + ], + "6": [ + 3.1415493488311768, + 3.0869905948638916, + 3.0410678386688232, + 3.0086350440979004, + 3.006296157836914 + ], + "7": [ + 20.144665241241455, + 20.645880699157715, + 20.30326247215271, + 20.52312994003296, + 20.44648313522339 + ], + "8": [ + 31.149712800979614, + 31.346466541290283, + 31.87547278404236, + 31.968525409698486, + 31.624362230300903 + ], + "9": [ + 70.20270848274231, + 73.088698387146, + 71.15446996688843, + 70.36094760894775, + 71.0561454296112 + ], + "10": [ + 19.035035133361816, + 19.235825538635254, + 19.032540559768677, + 18.992990732192993, + 18.960989236831665 + ], + "11": [ + 12.353803873062134, + 12.383912324905396, + 12.390283346176147, + 12.384623289108276, + 12.608469009399414 + ], + "12": [ + 12.46756649017334, + 12.692992210388184, + 13.037216424942017, + 14.199844360351562, + 12.41795563697815 + ], + "13": [ + 21.14127492904663, + 21.24750590324402, + 21.330607652664185, + 21.19257688522339, + 21.117241144180298 + ], + "14": [ + 5.365051031112671, + 5.244381904602051, + 5.224266290664673, + 5.279320478439331, + 5.351390600204468 + ], + "15": [ + 14.187044382095337, + 14.210954904556274, + 14.301335096359253, + 14.133733034133911, + 14.062178373336792 + ], + "16": [ + 6.709480047225952, + 6.949545383453369, + 6.819269180297852, + 6.797849178314209, + 7.115732192993164 + ], + "17": [ + 59.576388120651245, + 60.451228857040405, + 60.65900158882141, + 60.63210201263428, + 60.68089413642883 + ], + "18": [ + 69.60278511047363, + 70.1350531578064, + 69.89858436584473, + 70.29715323448181, + 69.83147168159485 + ], + "19": [ + 6.37906551361084, + 6.309690952301025, + 6.3234429359436035, + 6.329152345657349, + 6.348209381103516 + ], + "20": [ + 9.532709121704102, + 9.26326322555542, + 9.232454776763916, + 9.317341089248657, + 9.209791898727417 + ], + "21": [ + 65.31082701683044, + 65.26856255531311, + 64.04693102836609, + 64.84621214866638, + 64.52755498886108 + ], + "22": [ + 8.907754182815552, + 8.766010046005249, + 8.830661535263062, + 8.698703527450562, + 8.652528524398804 + ] +} \ No newline at end of file diff --git a/docs/source/contributor-guide/benchmark-results/0.2.0/datafusion-python-tpch.json b/docs/source/contributor-guide/benchmark-results/datafusion-40.1.0/datafusion-python-tpch.json similarity index 100% rename from docs/source/contributor-guide/benchmark-results/0.2.0/datafusion-python-tpch.json rename to docs/source/contributor-guide/benchmark-results/datafusion-40.1.0/datafusion-python-tpch.json diff --git a/docs/source/contributor-guide/benchmark-results/tpc-ds.md b/docs/source/contributor-guide/benchmark-results/tpc-ds.md index 5575fef02..3fa70eb86 100644 --- a/docs/source/contributor-guide/benchmark-results/tpc-ds.md +++ b/docs/source/contributor-guide/benchmark-results/tpc-ds.md @@ -19,27 +19,27 @@ under the License. # Apache DataFusion Comet: Benchmarks Derived From TPC-DS -The following benchmarks were performed on a Linux workstation with PCIe 5, AMD 7950X CPU (16 cores), 128 GB RAM, and +The following benchmarks were performed on a two node Kubernetes cluster with data stored locally in Parquet format on NVMe storage. Performance characteristics will vary in different environments and we encourage you to run these benchmarks in your own environments. The tracking issue for improving TPC-DS performance is [#858](https://github.com/apache/datafusion-comet/issues/858). -![](../../_static/images/benchmark-results/0.3.0/tpcds_allqueries.png) +![](../../_static/images/benchmark-results/0.4.0/tpcds_allqueries.png) Here is a breakdown showing relative performance of Spark and Comet for each query. -![](../../_static/images/benchmark-results/0.3.0/tpcds_queries_compare.png) +![](../../_static/images/benchmark-results/0.4.0/tpcds_queries_compare.png) The following chart shows how much Comet currently accelerates each query from the benchmark in relative terms. -![](../../_static/images/benchmark-results/0.3.0/tpcds_queries_speedup_rel.png) +![](../../_static/images/benchmark-results/0.4.0/tpcds_queries_speedup_rel.png) The following chart shows how much Comet currently accelerates each query from the benchmark in absolute terms. -![](../../_static/images/benchmark-results/0.3.0/tpcds_queries_speedup_abs.png) +![](../../_static/images/benchmark-results/0.4.0/tpcds_queries_speedup_abs.png) The raw results of these benchmarks in JSON format is available here: -- [Spark](0.3.0/spark-tpcds.json) -- [Comet](0.3.0/comet-tpcds.json) +- [Spark](0.4.0/spark-tpcds.json) +- [Comet](0.4.0/comet-tpcds.json) diff --git a/docs/source/contributor-guide/benchmark-results/tpc-h.md b/docs/source/contributor-guide/benchmark-results/tpc-h.md index 22cab0b45..228548935 100644 --- a/docs/source/contributor-guide/benchmark-results/tpc-h.md +++ b/docs/source/contributor-guide/benchmark-results/tpc-h.md @@ -25,22 +25,21 @@ and we encourage you to run these benchmarks in your own environments. The tracking issue for improving TPC-H performance is [#391](https://github.com/apache/datafusion-comet/issues/391). -![](../../_static/images/benchmark-results/0.3.0/tpch_allqueries.png) +![](../../_static/images/benchmark-results/0.4.0/tpch_allqueries.png) Here is a breakdown showing relative performance of Spark, Comet, and DataFusion for each query. -![](../../_static/images/benchmark-results/0.3.0/tpch_queries_compare.png) +![](../../_static/images/benchmark-results/0.4.0/tpch_queries_compare.png) The following chart shows how much Comet currently accelerates each query from the benchmark in relative terms. -![](../../_static/images/benchmark-results/0.3.0/tpch_queries_speedup_rel.png) +![](../../_static/images/benchmark-results/0.4.0/tpch_queries_speedup_rel.png) The following chart shows how much Comet currently accelerates each query from the benchmark in absolute terms. -![](../../_static/images/benchmark-results/0.3.0/tpch_queries_speedup_abs.png) +![](../../_static/images/benchmark-results/0.4.0/tpch_queries_speedup_abs.png) The raw results of these benchmarks in JSON format is available here: -- [Spark](0.3.0/spark-tpch.json) -- [Comet](0.3.0/comet-tpch.json) -- [DataFusion](0.3.0/datafusion-python-tpch.json) \ No newline at end of file +- [Spark](0.4.0/spark-tpch.json) +- [Comet](0.4.0/comet-tpch.json) diff --git a/docs/source/contributor-guide/benchmarking.md b/docs/source/contributor-guide/benchmarking.md index 8c8d53e67..bd280f47f 100644 --- a/docs/source/contributor-guide/benchmarking.md +++ b/docs/source/contributor-guide/benchmarking.md @@ -22,7 +22,7 @@ under the License. To track progress on performance, we regularly run benchmarks derived from TPC-H and TPC-DS. Data generation and benchmarking documentation and scripts are available in the [DataFusion Benchmarks](https://github.com/apache/datafusion-benchmarks) GitHub repository. -We also have many micro benchmarks that can be run from an IDE located [here]()https://github.com/apache/datafusion-comet/tree/main/spark/src/test/scala/org/apache/spark/sql/benchmark). +We also have many micro benchmarks that can be run from an IDE located [here](https://github.com/apache/datafusion-comet/tree/main/spark/src/test/scala/org/apache/spark/sql/benchmark). Here are example commands for running the benchmarks against a Spark cluster. This command will need to be adapted based on the Spark environment and location of data files. @@ -54,11 +54,11 @@ $SPARK_HOME/bin/spark-submit \ --master $SPARK_MASTER \ --conf spark.driver.memory=8G \ --conf spark.executor.instances=1 \ - --conf spark.executor.memory=32G \ + --conf spark.executor.memory=16G \ --conf spark.executor.cores=8 \ --conf spark.cores.max=8 \ --conf spark.memory.offHeap.enabled=true \ - --conf spark.memory.offHeap.size=10g \ + --conf spark.memory.offHeap.size=32g \ --jars $COMET_JAR \ --conf spark.driver.extraClassPath=$COMET_JAR \ --conf spark.executor.extraClassPath=$COMET_JAR \ diff --git a/docs/source/contributor-guide/debugging.md b/docs/source/contributor-guide/debugging.md index 2694802d8..47d1f04c8 100644 --- a/docs/source/contributor-guide/debugging.md +++ b/docs/source/contributor-guide/debugging.md @@ -130,7 +130,7 @@ Then build the Comet as [described](https://github.com/apache/arrow-datafusion-c Start Comet with `RUST_BACKTRACE=1` ```console -RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark3.4_2.12-0.3.0-SNAPSHOT.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true +RUST_BACKTRACE=1 $SPARK_HOME/spark-shell --jars spark/target/comet-spark-spark3.4_2.12-0.5.0-SNAPSHOT.jar --conf spark.plugins=org.apache.spark.CometPlugin --conf spark.comet.enabled=true --conf spark.comet.exec.enabled=true ``` Get the expanded exception details diff --git a/docs/source/contributor-guide/plugin_overview.md b/docs/source/contributor-guide/plugin_overview.md index a211ca6b5..add4172d5 100644 --- a/docs/source/contributor-guide/plugin_overview.md +++ b/docs/source/contributor-guide/plugin_overview.md @@ -19,20 +19,39 @@ under the License. # Comet Plugin Architecture +## Overview + +The Comet plugin enhances Spark SQL by introducing optimized query execution and shuffle mechanisms leveraging +native code. It integrates with Spark's plugin framework and extension API to replace or extend Spark's +default behavior. + +--- + +# Plugin Components + ## Comet SQL Plugin -The entry point to Comet is the `org.apache.spark.CometPlugin` class, which can be registered with Spark by adding the -following setting to the Spark configuration when launching `spark-shell` or `spark-submit`: +The entry point to Comet is the org.apache.spark.CometPlugin class, which is registered in Spark using the following +configuration: ``` --conf spark.plugins=org.apache.spark.CometPlugin ``` +The plugin is loaded on the Spark driver and does not provide executor-side plugins. + +The plugin will update the current `SparkConf` with the extra configuration provided by Comet, such as executor memory +configuration. + +The plugin also registers `CometSparkSessionExtensions` with Spark's extension API. + +## CometSparkSessionExtensions + On initialization, this class registers two physical plan optimization rules with Spark: `CometScanRule` and `CometExecRule`. These rules run whenever a query stage is being planned during Adaptive Query Execution, and run once for the entire plan when Adaptive Query Execution is disabled. -## CometScanRule +### CometScanRule `CometScanRule` replaces any Parquet scans with Comet operators. There are different paths for Spark v1 and v2 data sources. @@ -43,13 +62,13 @@ Parquet data source but leverages native code for decoding Parquet row groups di Comet only supports a subset of data types and will fall back to Spark's scan if unsupported types exist. Comet can still accelerate the rest of the query execution in this case because `CometSparkToColumnarExec` will -convert the output from Spark's can to Arrow arrays. Note that both `spark.comet.exec.enabled=true` and +convert the output from Spark's scan to Arrow arrays. Note that both `spark.comet.exec.enabled=true` and `spark.comet.convert.parquet.enabled=true` must be set to enable this conversion. Refer to the [Supported Spark Data Types](https://datafusion.apache.org/comet/user-guide/datatypes.html) section in the contributor guide to see a list of currently supported data types. -## CometExecRule +### CometExecRule This rule traverses bottom-up from the original Spark plan and attempts to replace each operator with a Comet equivalent. For example, a `ProjectExec` will be replaced by `CometProjectExec`. @@ -64,25 +83,68 @@ of this could outweigh the benefits of running parts of the query stage natively ## Query Execution -Once the plan has been transformed, any consecutive Comet operators are combined into a `CometNativeExec` which contains -a serialized version of the plan (the serialization code can be found in `QueryPlanSerde`). When this operator is -executed, the serialized plan is passed to the native code when calling `Native.createPlan`. +Once the plan has been transformed, any consecutive native Comet operators are combined into a `CometNativeExec` which contains +a protocol buffer serialized version of the plan (the serialization code can be found in `QueryPlanSerde`). + +Spark serializes the physical plan and sends it to the executors when executing tasks. The executors deserialize the +plan and invoke it. + +When `CometNativeExec` is invoked, it will pass the serialized protobuf plan into +`Native.createPlan`, which invokes the native code via JNI, where the plan is then deserialized. -In the native code there is a `PhysicalPlanner` struct (in `planner.rs`) which converts the serialized plan into an +In the native code there is a `PhysicalPlanner` struct (in `planner.rs`) which converts the deserialized plan into an Apache DataFusion `ExecutionPlan`. In some cases, Comet provides specialized physical operators and expressions to override the DataFusion versions to ensure compatibility with Apache Spark. -`CometExecIterator` will invoke `Native.executePlan` to pull the next batch from the native plan. This is repeated -until no more batches are available (meaning that all data has been processed by the native plan). +The leaf nodes in the physical plan are always `ScanExec` and each of these operators will make a JNI call to +`CometBatchIterator.next()` to fetch the next input batch. The input could be a Comet native Parquet scan, +a Spark exchange, or another native plan. + +`CometNativeExec` creates a `CometExecIterator` and applies this iterator to the input RDD +partitions. Each call to `CometExecIterator.next()` will invoke `Native.executePlan`. Once the plan finishes +executing, the resulting Arrow batches are imported into the JVM using Arrow FFI. + +## Shuffle + +Comet integrates with Spark's shuffle mechanism, optimizing both shuffle writes and reads. Comet's shuffle manager +must be registered with Spark using the following configuration: + +``` +--conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager +``` + +### Shuffle Writes + +For shuffle writes, a `ShuffleMapTask` runs in the executors. This task contains a `ShuffleDependency` that is +broadcast to all of the executors. It then passes the input RDD to `ShuffleWriteProcessor.write()` which +requests a `ShuffleWriter` from the shuffle manager, and this is where it gets a Comet shuffle writer. + +`ShuffleWriteProcessor` then invokes the dependency RDD and fetches rows/batches and passes them to Comet's +shuffle writer, which writes batches to disk in Arrow IPC format. + +As a result, we cannot avoid having one native plan to produce the shuffle input and another native plan for +writing the batches to the shuffle file. + +### Shuffle Reads + +For shuffle reads a `ShuffledRDD` requests a `ShuffleReader` from the shuffle manager. Comet provides a +`CometBlockStoreShuffleReader` which is implemented in JVM and fetches blocks from Spark and then creates an +`ArrowReaderIterator` to process the blocks using Arrow's `StreamReader` for decoding IPC batches. + +## Arrow FFI + +Due to the hybrid execution model, it is necessary to pass batches of data between the JVM and native code. + +The foundation for Arrow FFI is the [Arrow C Data Interface], which provides a stable ABI-compatible interface for +accessing Arrow data structures from multiple languages. -The leaf nodes in the physical plan are always `ScanExec` and these operators consume batches of Arrow data that were -prepared before the plan is executed. When `CometExecIterator` invokes `Native.executePlan` it passes the memory -addresses of these Arrow arrays to the native code. +[Arrow C Data Interface]: https://arrow.apache.org/docs/format/CDataInterface.html -![Diagram of Comet Native Execution](../../_static/images/CometOverviewDetailed.drawio.svg) +- `CometExecIterator` invokes native plans and uses Arrow FFI to read the output batches +- Native `ScanExec` operators call `CometBatchIterator` via JNI to fetch input batches from the JVM ## End to End Flow -The following diagram shows the end-to-end flow. +The following diagram shows an example of the end-to-end flow for a query stage. -![Diagram of Comet Native Parquet Scan](../../_static/images/CometNativeParquetReader.drawio.svg) +![Diagram of Comet Data Flow](../../_static/images/comet-dataflow.svg) diff --git a/docs/source/index.rst b/docs/source/index.rst index 39ad27a57..21ec36ca9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,6 +51,7 @@ as a native runtime to achieve improvement in terms of query efficiency and quer Configuration Settings Compatibility Guide Tuning Guide + Metrics Guide .. _toc.contributor-guide-links: .. toctree:: diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index dc97e3c59..8ee5dad5d 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -29,7 +29,6 @@ Comet provides the following configuration settings. | spark.comet.columnar.shuffle.async.enabled | Whether to enable asynchronous shuffle for Arrow-based shuffle. | false | | spark.comet.columnar.shuffle.async.max.thread.num | Maximum number of threads on an executor used for Comet async columnar shuffle. This is the upper bound of total number of shuffle threads per executor. In other words, if the number of cores * the number of shuffle threads per task `spark.comet.columnar.shuffle.async.thread.num` is larger than this config. Comet will use this config as the number of shuffle threads per executor instead. | 100 | | spark.comet.columnar.shuffle.async.thread.num | Number of threads used for Comet async columnar shuffle per shuffle task. Note that more threads means more memory requirement to buffer shuffle data before flushing to disk. Also, more threads may not always improve performance, and should be set based on the number of cores available. | 3 | -| spark.comet.columnar.shuffle.memory.factor | Fraction of Comet memory to be allocated per executor process for Comet shuffle. Comet memory size is specified by `spark.comet.memoryOverhead` or calculated by `spark.comet.memory.overhead.factor` * `spark.executor.memory`. | 1.0 | | spark.comet.convert.csv.enabled | When enabled, data from Spark (non-native) CSV v1 and v2 scans will be converted to Arrow format. Note that to enable native vectorized execution, both this config and 'spark.comet.exec.enabled' need to be enabled. | false | | spark.comet.convert.json.enabled | When enabled, data from Spark (non-native) JSON v1 and v2 scans will be converted to Arrow format. Note that to enable native vectorized execution, both this config and 'spark.comet.exec.enabled' need to be enabled. | false | | spark.comet.convert.parquet.enabled | When enabled, data from Spark (non-native) Parquet v1 and v2 scans will be converted to Arrow format. Note that to enable native vectorized execution, both this config and 'spark.comet.exec.enabled' need to be enabled. | false | @@ -73,7 +72,7 @@ Comet provides the following configuration settings. | spark.comet.parquet.read.parallel.io.enabled | Whether to enable Comet's parallel reader for Parquet files. The parallel reader reads ranges of consecutive data in a file in parallel. It is faster for large files and row groups but uses more resources. | true | | spark.comet.parquet.read.parallel.io.thread-pool.size | The maximum number of parallel threads the parallel reader will use in a single executor. For executors configured with a smaller number of cores, use a smaller number. | 16 | | spark.comet.regexp.allowIncompatible | Comet is not currently fully compatible with Spark for all regular expressions. Set this config to true to allow them anyway using Rust's regular expression engine. See compatibility guide for more information. | false | -| spark.comet.scan.enabled | Whether to enable native scans. When this is turned on, Spark will use Comet to read supported data sources (currently only Parquet is supported natively). Note that to enable native vectorized execution, both this config and 'spark.comet.exec.enabled' need to be enabled. | true | +| spark.comet.scan.enabled | Whether to enable native scans. When this is turned on, Spark will use Comet to read supported data sources (currently only Parquet is supported natively). Note that to enable native vectorized execution, both this config and 'spark.comet.exec.enabled' need to be enabled. | false | | spark.comet.scan.preFetch.enabled | Whether to enable pre-fetching feature of CometScan. | false | | spark.comet.scan.preFetch.threadNum | The number of threads running pre-fetching for CometScan. Effective if spark.comet.scan.preFetch.enabled is enabled. Note that more pre-fetching threads means more memory requirement to store pre-fetched row groups. | 2 | | spark.comet.shuffle.preferDictionary.ratio | The ratio of total values to distinct values in a string column to decide whether to prefer dictionary encoding when shuffling the column. If the ratio is higher than this config, dictionary encoding will be used on shuffling string column. This config is effective if it is higher than 1.0. Note that this config is only used when `spark.comet.exec.shuffle.mode` is `jvm`. | 10.0 | diff --git a/docs/source/user-guide/installation.md b/docs/source/user-guide/installation.md index d58363133..22d482e47 100644 --- a/docs/source/user-guide/installation.md +++ b/docs/source/user-guide/installation.md @@ -51,12 +51,12 @@ is currently necessary to build from source. Here are the direct links for downloading the Comet jar file. -- [Comet plugin for Spark 3.3 / Scala 2.12](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.3_2.12/0.3.0/comet-spark-spark3.3_2.12-0.3.0.jar) -- [Comet plugin for Spark 3.3 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.3_2.13/0.3.0/comet-spark-spark3.3_2.13-0.3.0.jar) -- [Comet plugin for Spark 3.4 / Scala 2.12](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.4_2.12/0.3.0/comet-spark-spark3.4_2.12-0.3.0.jar) -- [Comet plugin for Spark 3.4 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.4_2.13/0.3.0/comet-spark-spark3.4_2.13-0.3.0.jar) -- [Comet plugin for Spark 3.5 / Scala 2.12](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.12/0.3.0/comet-spark-spark3.5_2.12-0.3.0.jar) -- [Comet plugin for Spark 3.5 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.13/0.3.0/comet-spark-spark3.5_2.13-0.3.0.jar) +- [Comet plugin for Spark 3.3 / Scala 2.12](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.3_2.12/0.4.0/comet-spark-spark3.3_2.12-0.4.0.jar) +- [Comet plugin for Spark 3.3 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.3_2.13/0.4.0/comet-spark-spark3.3_2.13-0.4.0.jar) +- [Comet plugin for Spark 3.4 / Scala 2.12](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.4_2.12/0.4.0/comet-spark-spark3.4_2.12-0.4.0.jar) +- [Comet plugin for Spark 3.4 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.4_2.13/0.4.0/comet-spark-spark3.4_2.13-0.4.0.jar) +- [Comet plugin for Spark 3.5 / Scala 2.12](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.12/0.4.0/comet-spark-spark3.5_2.12-0.4.0.jar) +- [Comet plugin for Spark 3.5 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.13/0.4.0/comet-spark-spark3.5_2.13-0.4.0.jar) ## Building from source @@ -74,7 +74,7 @@ See the [Comet Kubernetes Guide](kubernetes.md) guide. Make sure `SPARK_HOME` points to the same Spark version as Comet was built for. ```console -export COMET_JAR=spark/target/comet-spark-spark3.4_2.12-0.3.0-SNAPSHOT.jar +export COMET_JAR=spark/target/comet-spark-spark3.4_2.12-0.5.0-SNAPSHOT.jar $SPARK_HOME/bin/spark-shell \ --jars $COMET_JAR \ @@ -130,7 +130,7 @@ explicitly contain Comet otherwise Spark may use a different class-loader for th components which will then fail at runtime. For example: ``` ---driver-class-path spark/target/comet-spark-spark3.4_2.12-0.3.0-SNAPSHOT.jar +--driver-class-path spark/target/comet-spark-spark3.4_2.12-0.5.0-SNAPSHOT.jar ``` Some cluster managers may require additional configuration, see diff --git a/docs/source/user-guide/metrics.md b/docs/source/user-guide/metrics.md new file mode 100644 index 000000000..509d0ae8c --- /dev/null +++ b/docs/source/user-guide/metrics.md @@ -0,0 +1,66 @@ + + +# Comet Metrics + +## Spark SQL Metrics + +Set `spark.comet.metrics.detailed=true` to see all available Comet metrics. + +### CometScanExec + +| Metric | Description | +| ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `scan time` | Total time to scan a Parquet file. This is not comparable to the same metric in Spark because Comet's scan metric is more accurate. Although both Comet and Spark measure the time in nanoseconds, Spark rounds this time to the nearest millisecond per batch and Comet does not. | + +### Exchange + +Comet adds some additional metrics: + +| Metric | Description | +| ------------------------------- | ------------------------------------------------------------- | +| `native shuffle time` | Total time in native code excluding any child operators. | +| `repartition time` | Time to repartition batches. | +| `memory pool time` | Time interacting with memory pool. | +| `encoding and compression time` | Time to encode batches in IPC format and compress using ZSTD. | + +## Native Metrics + +Setting `spark.comet.explain.native.enabled=true` will cause native plans to be logged in each executor. Metrics are +logged for each native plan (and there is one plan per task, so this is very verbose). + +Here is a guide to some of the native metrics. + +### ScanExec + +| Metric | Description | +| ----------------- | --------------------------------------------------------------------------------------------------- | +| `elapsed_compute` | Total time spent in this operator, fetching batches from a JVM iterator. | +| `jvm_fetch_time` | Time spent in the JVM fetching input batches to be read by this `ScanExec` instance. | +| `arrow_ffi_time` | Time spent using Arrow FFI to create Arrow batches from the memory addresses returned from the JVM. | + +### ShuffleWriterExec + +| Metric | Description | +| ----------------- | ------------------------------------------------------------- | +| `elapsed_compute` | Total time excluding any child operators. | +| `repart_time` | Time to repartition batches. | +| `ipc_time` | Time to encode batches in IPC format and compress using ZSTD. | +| `mempool_time` | Time interacting with memory pool. | +| `write_time` | Time spent writing bytes to disk. | diff --git a/docs/source/user-guide/source.md b/docs/source/user-guide/source.md index 71c9060cb..5f5d03d90 100644 --- a/docs/source/user-guide/source.md +++ b/docs/source/user-guide/source.md @@ -27,7 +27,7 @@ Official source releases can be downloaded from https://dist.apache.org/repos/di ```console # Pick the latest version -export COMET_VERSION=0.3.0 +export COMET_VERSION=0.4.0 # Download the tarball curl -O "https://dist.apache.org/repos/dist/release/datafusion/datafusion-comet-$COMET_VERSION/apache-datafusion-comet-$COMET_VERSION.tar.gz" # Unpack diff --git a/docs/source/user-guide/tuning.md b/docs/source/user-guide/tuning.md index 30ada4c9b..d68481d17 100644 --- a/docs/source/user-guide/tuning.md +++ b/docs/source/user-guide/tuning.md @@ -23,40 +23,12 @@ Comet provides some tuning options to help you get the best performance from you ## Memory Tuning -Comet provides two options for memory management: - -- **Unified Memory Management** shares an off-heap memory pool between Spark and Comet. This is the recommended option. -- **Native Memory Management** leverages DataFusion's memory management for the native plans and allocates memory independently of Spark. - -### Unified Memory Management - -This option is automatically enabled when `spark.memory.offHeap.enabled=true`. +Comet shares an off-heap memory pool between Spark and Comet. This requires setting `spark.memory.offHeap.enabled=true`. +If this setting is not enabled, Comet will not accelerate queries and will fall back to Spark. Each executor will have a single memory pool which will be shared by all native plans being executed within that process, and by Spark itself. The size of the pool is specified by `spark.memory.offHeap.size`. -### Native Memory Management - -This option is automatically enabled when `spark.memory.offHeap.enabled=false`. - -Each native plan has a dedicated memory pool. - -By default, the size of each pool is `spark.comet.memory.overhead.factor * spark.executor.memory`. The default value -for `spark.comet.memory.overhead.factor` is `0.2`. - -It is important to take executor concurrency into account. The maximum number of concurrent plans in an executor can -be calculated with `spark.executor.cores / spark.task.cpus`. - -For example, if the executor can execute 4 plans concurrently, then the total amount of memory allocated will be -`4 * spark.comet.memory.overhead.factor * spark.executor.memory`. - -It is also possible to set `spark.comet.memoryOverhead` to the desired size for each pool, rather than calculating -it based on `spark.comet.memory.overhead.factor`. - -If both `spark.comet.memoryOverhead` and `spark.comet.memory.overhead.factor` are set, the former will be used. - -Comet will allocate at least `spark.comet.memory.overhead.min` memory per pool. - ### Determining How Much Memory to Allocate Generally, increasing memory overhead will improve query performance, especially for queries containing joins and @@ -131,10 +103,12 @@ native shuffle currently only supports `HashPartitioning` and `SinglePartitionin To enable native shuffle, set `spark.comet.exec.shuffle.mode` to `native`. If this mode is explicitly set, then any shuffle operations that cannot be supported in this mode will fall back to Spark. -## Metrics - -Comet metrics are not directly comparable to Spark metrics in some cases. - -`CometScanExec` uses nanoseconds for total scan time. Spark also measures scan time in nanoseconds but converts to -milliseconds _per batch_ which can result in a large loss of precision. In one case we saw total scan time -of 41 seconds reported as 23 seconds for example. +## Explain Plan +### Extended Explain +With Spark 4.0.0 and newer, Comet can provide extended explain plan information in the Spark UI. Currently this lists +reasons why Comet may not have been enabled for specific operations. +To enable this, in the Spark configuration, set the following: +```shell +-c spark.sql.extendedExplainProviders=org.apache.comet.ExtendedExplainInfo +``` +This will add a section to the detailed plan displayed in the Spark SQL UI page. \ No newline at end of file diff --git a/fuzz-testing/README.md b/fuzz-testing/README.md index aa312d0b7..f958d8d95 100644 --- a/fuzz-testing/README.md +++ b/fuzz-testing/README.md @@ -59,7 +59,7 @@ Set appropriate values for `SPARK_HOME`, `SPARK_MASTER`, and `COMET_JAR` environ $SPARK_HOME/bin/spark-submit \ --master $SPARK_MASTER \ --class org.apache.comet.fuzz.Main \ - target/comet-fuzz-spark3.4_2.12-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ + target/comet-fuzz-spark3.4_2.12-0.5.0-SNAPSHOT-jar-with-dependencies.jar \ data --num-files=2 --num-rows=200 --num-columns=100 ``` @@ -75,7 +75,7 @@ Generate random queries that are based on the available test files. $SPARK_HOME/bin/spark-submit \ --master $SPARK_MASTER \ --class org.apache.comet.fuzz.Main \ - target/comet-fuzz-spark3.4_2.12-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ + target/comet-fuzz-spark3.4_2.12-0.5.0-SNAPSHOT-jar-with-dependencies.jar \ queries --num-files=2 --num-queries=500 ``` @@ -97,7 +97,7 @@ $SPARK_HOME/bin/spark-submit \ --conf spark.driver.extraClassPath=$COMET_JAR \ --conf spark.executor.extraClassPath=$COMET_JAR \ --class org.apache.comet.fuzz.Main \ - target/comet-fuzz-spark3.4_2.12-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ + target/comet-fuzz-spark3.4_2.12-0.5.0-SNAPSHOT-jar-with-dependencies.jar \ run --num-files=2 --filename=queries.sql ``` diff --git a/fuzz-testing/pom.xml b/fuzz-testing/pom.xml index 11f57700d..2184e54ee 100644 --- a/fuzz-testing/pom.xml +++ b/fuzz-testing/pom.xml @@ -25,7 +25,7 @@ under the License. org.apache.datafusion comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.4.0-SNAPSHOT + 0.5.0-SNAPSHOT ../pom.xml @@ -100,6 +100,13 @@ under the License. + + org.apache.maven.plugins + maven-install-plugin + + true + + diff --git a/native/Cargo.lock b/native/Cargo.lock index 27e972683..f7bf44075 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -502,9 +502,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.37" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" +checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" dependencies = [ "jobserver", "libc", @@ -607,18 +607,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.20" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.20" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" dependencies = [ "anstyle", "clap_lex", @@ -626,9 +626,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" [[package]] name = "combine" @@ -642,9 +642,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ "strum", "strum_macros", @@ -891,9 +891,8 @@ dependencies = [ [[package]] name = "datafusion-comet" -version = "0.4.0" +version = "0.5.0" dependencies = [ - "ahash", "arrow", "arrow-array", "arrow-buffer", @@ -917,7 +916,6 @@ dependencies = [ "flatbuffers", "flate2", "futures", - "half", "hex", "itertools 0.11.0", "jni", @@ -947,7 +945,7 @@ dependencies = [ [[package]] name = "datafusion-comet-proto" -version = "0.4.0" +version = "0.5.0" dependencies = [ "prost 0.12.6", "prost-build", @@ -955,10 +953,11 @@ dependencies = [ [[package]] name = "datafusion-comet-spark-expr" -version = "0.4.0" +version = "0.5.0" dependencies = [ "arrow", "arrow-array", + "arrow-buffer", "arrow-data", "arrow-schema", "chrono", @@ -968,10 +967,13 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", + "futures", "num", + "parquet", "rand", "regex", "thiserror", + "tokio", "twox-hash 2.0.1", ] @@ -1414,9 +1416,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.34" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", "miniz_oxide", @@ -2819,9 +2821,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] @@ -2838,9 +2840,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -3251,9 +3253,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" [[package]] name = "unsafe-any-ors" diff --git a/native/Cargo.toml b/native/Cargo.toml index b78c1d68f..cc04977de 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -20,7 +20,7 @@ members = ["core", "spark-expr", "proto"] resolver = "2" [workspace.package] -version = "0.4.0" +version = "0.5.0" homepage = "https://datafusion.apache.org/comet" repository = "https://github.com/apache/datafusion-comet" authors = ["Apache DataFusion "] @@ -41,18 +41,19 @@ arrow-ipc = { version = "53.2.0" } arrow-schema = { version = "53.2.0" } flatbuffers = { version = "24.3.25" } parquet = { version = "53.2.0", default-features = false, features = ["experimental"] } -datafusion-common = { version = "43.0.0" } datafusion = { version = "43.0.0", default-features = false, features = ["unicode_expressions", "crypto_expressions", "parquet"] } +datafusion-common = { version = "43.0.0" } datafusion-functions = { version = "43.0.0", features = ["crypto_expressions"] } datafusion-functions-nested = { version = "43.0.0", default-features = false } datafusion-expr = { version = "43.0.0", default-features = false } datafusion-execution = { version = "43.0.0", default-features = false } datafusion-physical-plan = { version = "43.0.0", default-features = false } datafusion-physical-expr = { version = "43.0.0", default-features = false } -datafusion-comet-spark-expr = { path = "spark-expr", version = "0.4.0" } -datafusion-comet-proto = { path = "proto", version = "0.4.0" } +datafusion-comet-spark-expr = { path = "spark-expr", version = "0.5.0" } +datafusion-comet-proto = { path = "proto", version = "0.5.0" } chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } +futures = "0.3.28" num = "0.4" rand = "0.8" regex = "1.9.6" diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 35035ff35..62b51c531 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -43,8 +43,7 @@ arrow-schema = { workspace = true } arrow-ipc = { workspace = true } flatbuffers = { workspace = true } parquet = { workspace = true, default-features = false, features = ["experimental"] } -half = { version = "2.4.1", default-features = false } -futures = "0.3.28" +futures = { workspace = true } mimalloc = { version = "*", default-features = false, optional = true } tokio = { version = "1", features = ["rt-multi-thread"] } async-trait = "0.1" @@ -64,7 +63,6 @@ rand = { workspace = true} num = { workspace = true } bytes = "1.5.0" tempfile = "3.8.0" -ahash = { version = "0.8", default-features = false } itertools = "0.11.0" paste = "1.0.14" datafusion-common = { workspace = true, features= ["object_store"] } @@ -92,7 +90,6 @@ hex = "0.4.3" [features] default = [] -nightly = [] [lib] name = "comet" @@ -107,10 +104,6 @@ harness = false name = "bit_util" harness = false -[[bench]] -name = "hash" -harness = false - [[bench]] name = "row_columnar" harness = false @@ -127,10 +120,6 @@ harness = false name = "filter" harness = false -[[bench]] -name = "aggregate" -harness = false - [[bench]] name = "bloom_filter_agg" harness = false diff --git a/native/core/benches/bloom_filter_agg.rs b/native/core/benches/bloom_filter_agg.rs index af3eb919e..b83ff3fad 100644 --- a/native/core/benches/bloom_filter_agg.rs +++ b/native/core/benches/bloom_filter_agg.rs @@ -19,7 +19,7 @@ use arrow::datatypes::{DataType, Field, Schema}; use arrow_array::builder::Int64Builder; use arrow_array::{ArrayRef, RecordBatch}; use arrow_schema::SchemaRef; -use comet::execution::datafusion::expressions::bloom_filter_agg::BloomFilterAgg; +use comet::execution::expressions::bloom_filter_agg::BloomFilterAgg; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}; @@ -61,10 +61,8 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_function(agg_mode.0, |b| { let comet_bloom_filter_agg = Arc::new(AggregateUDF::new_from_impl(BloomFilterAgg::new( - Arc::clone(&c0), Arc::clone(&num_items), Arc::clone(&num_bits), - "bloom_filter_agg", DataType::Binary, ))); b.to_async(&rt).iter(|| { diff --git a/native/core/benches/hash.rs b/native/core/benches/hash.rs deleted file mode 100644 index 039a6d5d8..000000000 --- a/native/core/benches/hash.rs +++ /dev/null @@ -1,137 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#[path = "common.rs"] -mod common; - -use arrow_array::ArrayRef; -use comet::execution::kernels::hash; -use common::*; -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use datafusion_comet_spark_expr::scalar_funcs::{spark_murmur3_hash, spark_xxhash64}; -use datafusion_common::ScalarValue; -use datafusion_expr::ColumnarValue; -use std::sync::Arc; - -const BATCH_SIZE: usize = 1024 * 8; -const NUM_ITER: usize = 10; -const NULL_FRACTION: f32 = 0.1; - -fn criterion_benchmark(c: &mut Criterion) { - let mut group = c.benchmark_group("hash"); - - let a1: ArrayRef = Arc::new(create_int64_array(BATCH_SIZE, 0.0, 0, BATCH_SIZE as i64)); - let a2: ArrayRef = Arc::new(create_int64_array(BATCH_SIZE, 0.0, 0, BATCH_SIZE as i64)); - let a3: ArrayRef = Arc::new(create_int64_array( - BATCH_SIZE, - NULL_FRACTION, - 0, - BATCH_SIZE as i64, - )); - let a4: ArrayRef = Arc::new(create_int64_array( - BATCH_SIZE, - NULL_FRACTION, - 0, - BATCH_SIZE as i64, - )); - - group.bench_function( - BenchmarkId::new("hash_i64_single_nonnull", BATCH_SIZE), - |b| { - let input = vec![a1.clone()]; - let mut dst = vec![0; BATCH_SIZE]; - - b.iter(|| { - for _ in 0..NUM_ITER { - hash(&input, &mut dst); - } - }); - }, - ); - group.bench_function(BenchmarkId::new("hash_i64_single_null", BATCH_SIZE), |b| { - let input = vec![a3.clone()]; - let mut dst = vec![0; BATCH_SIZE]; - - b.iter(|| { - for _ in 0..NUM_ITER { - hash(&input, &mut dst); - } - }); - }); - group.bench_function( - BenchmarkId::new("hash_i64_multiple_nonnull", BATCH_SIZE), - |b| { - let input = vec![a1.clone(), a2.clone()]; - let mut dst = vec![0; BATCH_SIZE]; - - b.iter(|| { - for _ in 0..NUM_ITER { - hash(&input, &mut dst); - } - }); - }, - ); - group.bench_function( - BenchmarkId::new("hash_i64_multiple_null", BATCH_SIZE), - |b| { - let input = vec![a3.clone(), a4.clone()]; - let mut dst = vec![0; BATCH_SIZE]; - - b.iter(|| { - for _ in 0..NUM_ITER { - hash(&input, &mut dst); - } - }); - }, - ); - group.bench_function(BenchmarkId::new("xxhash64", BATCH_SIZE), |b| { - let inputs = &[ - ColumnarValue::Array(a3.clone()), - ColumnarValue::Array(a4.clone()), - ColumnarValue::Scalar(ScalarValue::Int64(Some(42i64))), - ]; - - b.iter(|| { - for _ in 0..NUM_ITER { - spark_xxhash64(inputs).unwrap(); - } - }); - }); - group.bench_function(BenchmarkId::new("murmur3", BATCH_SIZE), |b| { - let inputs = &[ - ColumnarValue::Array(a3.clone()), - ColumnarValue::Array(a4.clone()), - ColumnarValue::Scalar(ScalarValue::Int32(Some(42))), - ]; - b.iter(|| { - for _ in 0..NUM_ITER { - spark_murmur3_hash(inputs).unwrap(); - } - }); - }); -} - -fn config() -> Criterion { - Criterion::default() -} - -criterion_group! { - name = benches; - config = config(); - targets = criterion_benchmark -} -criterion_main!(benches); diff --git a/native/core/benches/parquet_read.rs b/native/core/benches/parquet_read.rs index 1f8178cd2..ae511ade5 100644 --- a/native/core/benches/parquet_read.rs +++ b/native/core/benches/parquet_read.rs @@ -44,7 +44,7 @@ fn bench(c: &mut Criterion) { let mut group = c.benchmark_group("comet_parquet_read"); let schema = build_test_schema(); - let pages = build_plain_int32_pages(schema.clone(), schema.column(0), 0.0); + let pages = build_plain_int32_pages(schema.column(0), 0.0); group.bench_function("INT/PLAIN/NOT_NULL", |b| { let t = TypePtr::new( PrimitiveTypeBuilder::new("f", PhysicalType::INT32) @@ -107,7 +107,6 @@ const VALUES_PER_PAGE: usize = 10_000; const BATCH_SIZE: usize = 4096; fn build_plain_int32_pages( - schema: SchemaDescPtr, column_desc: ColumnDescPtr, null_density: f32, ) -> impl PageIterator + Clone { @@ -143,7 +142,7 @@ fn build_plain_int32_pages( // Since `InMemoryPageReader` is not exposed from parquet crate, here we use // `InMemoryPageIterator` instead which is a Iter>. - InMemoryPageIterator::new(schema, column_desc, vec![pages]) + InMemoryPageIterator::new(vec![pages]) } struct TestColumnReader { diff --git a/native/core/benches/shuffle_writer.rs b/native/core/benches/shuffle_writer.rs index 6f2871861..272887238 100644 --- a/native/core/benches/shuffle_writer.rs +++ b/native/core/benches/shuffle_writer.rs @@ -17,7 +17,7 @@ use arrow_array::{builder::StringBuilder, RecordBatch}; use arrow_schema::{DataType, Field, Schema}; -use comet::execution::datafusion::shuffle_writer::ShuffleWriterExec; +use comet::execution::shuffle::ShuffleWriterExec; use criterion::{criterion_group, criterion_main, Criterion}; use datafusion::{ physical_plan::{common::collect, memory::MemoryExec, ExecutionPlan}, diff --git a/native/core/src/common/bit.rs b/native/core/src/common/bit.rs index 871786bb1..72d7729d9 100644 --- a/native/core/src/common/bit.rs +++ b/native/core/src/common/bit.rs @@ -17,14 +17,12 @@ use std::{cmp::min, mem::size_of}; -use arrow::buffer::Buffer; - use crate::{ errors::CometResult as Result, - likely, parquet::{data_type::AsBytes, util::bit_packing::unpack32}, - unlikely, }; +use arrow::buffer::Buffer; +use datafusion_comet_spark_expr::utils::{likely, unlikely}; #[inline] pub fn from_ne_slice(bs: &[u8]) -> T { diff --git a/native/core/src/common/mod.rs b/native/core/src/common/mod.rs index 1b7dfad28..dc539879f 100644 --- a/native/core/src/common/mod.rs +++ b/native/core/src/common/mod.rs @@ -17,23 +17,5 @@ #[macro_use] pub mod bit; - -use crate::TypeTrait; - -/// Getter APIs for Comet vectors. -trait ValueGetter { - /// Gets the non-null value at `idx`. - /// - /// Note that null check needs to be done before the call, to ensure the value at `idx` is - /// not null. - fn value(&self, idx: usize) -> T::Native; -} - -/// Setter APIs for Comet mutable vectors. -trait ValueSetter { - /// Appends a non-null value `v` to the end of this vector. - fn append_value(&mut self, v: &T::Native); -} - mod buffer; pub use buffer::*; diff --git a/native/core/src/data_type.rs b/native/core/src/data_type.rs deleted file mode 100644 index b275de1c6..000000000 --- a/native/core/src/data_type.rs +++ /dev/null @@ -1,241 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow::datatypes::DataType as ArrowDataType; -use arrow_schema::TimeUnit; -use std::{cmp, fmt::Debug}; - -#[derive(Debug, PartialEq)] -pub enum DataType { - Boolean, - Byte, - Short, - Integer, - Long, - Float, - Double, - Decimal(u8, i8), - String, - Binary, - Timestamp, - Date, -} - -impl From<&ArrowDataType> for DataType { - fn from(dt: &ArrowDataType) -> Self { - match dt { - ArrowDataType::Boolean => DataType::Boolean, - ArrowDataType::Int8 => DataType::Byte, - ArrowDataType::Int16 => DataType::Short, - ArrowDataType::Int32 => DataType::Integer, - ArrowDataType::Int64 => DataType::Long, - ArrowDataType::Float32 => DataType::Float, - ArrowDataType::Float64 => DataType::Double, - ArrowDataType::Decimal128(precision, scale) => DataType::Decimal(*precision, *scale), - ArrowDataType::Utf8 => DataType::String, - ArrowDataType::Binary => DataType::Binary, - // Spark always store timestamp in micro seconds - ArrowDataType::Timestamp(TimeUnit::Microsecond, _) => DataType::Timestamp, - ArrowDataType::Date32 => DataType::Date, - ArrowDataType::Dictionary(key_dt, value_dt) if is_valid_key_type(key_dt) => { - Self::from(value_dt.as_ref()) - } - dt => panic!("unsupported Arrow data type: {:?}", dt), - } - } -} - -impl DataType { - pub fn kind(&self) -> TypeKind { - match self { - DataType::Boolean => TypeKind::Boolean, - DataType::Byte => TypeKind::Byte, - DataType::Short => TypeKind::Short, - DataType::Integer => TypeKind::Integer, - DataType::Long => TypeKind::Long, - DataType::Float => TypeKind::Float, - DataType::Double => TypeKind::Double, - DataType::Decimal(_, _) => TypeKind::Decimal, - DataType::String => TypeKind::String, - DataType::Binary => TypeKind::Binary, - DataType::Timestamp => TypeKind::Timestamp, - DataType::Date => TypeKind::Date, - } - } -} - -/// Comet only use i32 as dictionary key -fn is_valid_key_type(dt: &ArrowDataType) -> bool { - matches!(dt, ArrowDataType::Int32) -} - -/// Unlike [`DataType`], [`TypeKind`] doesn't carry extra information about the type itself, such as -/// decimal precision & scale. Instead, it is merely a token that is used to do runtime case -/// analysis depending on the actual type. It can be obtained from a `TypeTrait` generic parameter. -#[derive(Debug, PartialEq)] -pub enum TypeKind { - Boolean, - Byte, - Short, - Integer, - Long, - Float, - Double, - Decimal, - String, - Binary, - Timestamp, - Date, -} - -pub const BITS_PER_BYTE: usize = 8; - -impl TypeKind { - /// Returns the size of this type, in number of bits. - pub fn type_size(&self) -> usize { - match self { - TypeKind::Boolean => 1, - TypeKind::Byte => BITS_PER_BYTE, - TypeKind::Short => BITS_PER_BYTE * 2, - TypeKind::Integer | TypeKind::Float => BITS_PER_BYTE * 4, - TypeKind::Long | TypeKind::Double => BITS_PER_BYTE * 8, - TypeKind::Decimal => BITS_PER_BYTE * 16, - TypeKind::String | TypeKind::Binary => BITS_PER_BYTE * 16, - TypeKind::Timestamp => BITS_PER_BYTE * 8, - TypeKind::Date => BITS_PER_BYTE * 4, - } - } -} - -pub const STRING_VIEW_LEN: usize = 16; // StringView is stored using 16 bytes -pub const STRING_VIEW_PREFIX_LEN: usize = 4; // String prefix in StringView is stored using 4 bytes - -#[repr(C, align(16))] -#[derive(Clone, Copy, Debug)] -pub struct StringView { - pub len: u32, - pub prefix: [u8; STRING_VIEW_PREFIX_LEN], - pub ptr: usize, -} - -impl StringView { - pub fn as_utf8_str(&self) -> &str { - unsafe { - let slice = std::slice::from_raw_parts(self.ptr as *const u8, self.len as usize); - std::str::from_utf8_unchecked(slice) - } - } -} - -impl Default for StringView { - fn default() -> Self { - Self { - len: 0, - prefix: [0; STRING_VIEW_PREFIX_LEN], - ptr: 0, - } - } -} - -impl PartialEq for StringView { - fn eq(&self, other: &Self) -> bool { - if self.len != other.len { - return false; - } - if self.prefix != other.prefix { - return false; - } - self.as_utf8_str() == other.as_utf8_str() - } -} - -pub trait NativeEqual { - fn is_equal(&self, other: &Self) -> bool; -} - -macro_rules! make_native_equal { - ($native_ty:ty) => { - impl NativeEqual for $native_ty { - fn is_equal(&self, other: &Self) -> bool { - self == other - } - } - }; -} - -make_native_equal!(bool); -make_native_equal!(i8); -make_native_equal!(i16); -make_native_equal!(i32); -make_native_equal!(i64); -make_native_equal!(i128); -make_native_equal!(StringView); - -impl NativeEqual for f32 { - fn is_equal(&self, other: &Self) -> bool { - self.total_cmp(other) == cmp::Ordering::Equal - } -} - -impl NativeEqual for f64 { - fn is_equal(&self, other: &Self) -> bool { - self.total_cmp(other) == cmp::Ordering::Equal - } -} -pub trait NativeType: Debug + Default + Copy + NativeEqual {} - -impl NativeType for bool {} -impl NativeType for i8 {} -impl NativeType for i16 {} -impl NativeType for i32 {} -impl NativeType for i64 {} -impl NativeType for i128 {} -impl NativeType for f32 {} -impl NativeType for f64 {} -impl NativeType for StringView {} - -/// A trait for Comet data type. This should only be used as generic parameter during method -/// invocations. -pub trait TypeTrait: 'static { - type Native: NativeType; - fn type_kind() -> TypeKind; -} - -macro_rules! make_type_trait { - ($name:ident, $native_ty:ty, $kind:path) => { - pub struct $name {} - impl TypeTrait for $name { - type Native = $native_ty; - fn type_kind() -> TypeKind { - $kind - } - } - }; -} - -make_type_trait!(BoolType, bool, TypeKind::Boolean); -make_type_trait!(ByteType, i8, TypeKind::Byte); -make_type_trait!(ShortType, i16, TypeKind::Short); -make_type_trait!(IntegerType, i32, TypeKind::Integer); -make_type_trait!(LongType, i64, TypeKind::Long); -make_type_trait!(FloatType, f32, TypeKind::Float); -make_type_trait!(DoubleType, f64, TypeKind::Double); -make_type_trait!(DecimalType, i128, TypeKind::Decimal); -make_type_trait!(StringType, StringView, TypeKind::String); -make_type_trait!(BinaryType, StringView, TypeKind::Binary); -make_type_trait!(TimestampType, i64, TypeKind::Timestamp); -make_type_trait!(DateType, i32, TypeKind::Date); diff --git a/native/core/src/errors.rs b/native/core/src/errors.rs index 92799bcf6..4d623d976 100644 --- a/native/core/src/errors.rs +++ b/native/core/src/errors.rs @@ -485,23 +485,6 @@ where || f(t) } -// This is a duplicate of `try_unwrap_or_throw`, which is used to work around Arrow's lack of -// `UnwindSafe` handling. -pub fn try_assert_unwind_safe_or_throw(env: &JNIEnv, f: F) -> T -where - T: JNIDefault, - F: FnOnce(JNIEnv) -> Result, -{ - let mut env1 = unsafe { JNIEnv::from_raw(env.get_raw()).unwrap() }; - let env2 = unsafe { JNIEnv::from_raw(env.get_raw()).unwrap() }; - unwrap_or_throw_default( - &mut env1, - flatten( - catch_unwind(std::panic::AssertUnwindSafe(curry(f, env2))).map_err(CometError::from), - ), - ) -} - // It is currently undefined behavior to unwind from Rust code into foreign code, so we can wrap // our JNI functions and turn these panics into a `RuntimeException`. pub fn try_unwrap_or_throw(env: &JNIEnv, f: F) -> T @@ -534,10 +517,7 @@ mod tests { AttachGuard, InitArgsBuilder, JNIEnv, JNIVersion, JavaVM, }; - use assertables::{ - assert_contains, assert_contains_as_result, assert_starts_with, - assert_starts_with_as_result, - }; + use assertables::{assert_starts_with, assert_starts_with_as_result}; pub fn jvm() -> &'static Arc { static mut JVM: Option> = None; @@ -890,26 +870,4 @@ mod tests { // first line. assert_starts_with!(msg_rust, expected_message); } - - // Asserts that exception's message matches `expected_message`. - fn assert_exception_message_with_stacktrace( - env: &mut JNIEnv, - exception: JThrowable, - expected_message: &str, - stacktrace_contains: &str, - ) { - let message = env - .call_method(exception, "getMessage", "()Ljava/lang/String;", &[]) - .unwrap() - .l() - .unwrap(); - let message_string = message.into(); - let msg_rust: String = env.get_string(&message_string).unwrap().into(); - // Since panics result in multi-line messages which include the backtrace, just use the - // first line. - assert_starts_with!(msg_rust, expected_message); - - // Check that the stacktrace is included by checking for a specific element - assert_contains!(msg_rust, stacktrace_contains); - } } diff --git a/native/core/src/execution/datafusion/expressions/mod.rs b/native/core/src/execution/datafusion/expressions/mod.rs deleted file mode 100644 index 48b80384b..000000000 --- a/native/core/src/execution/datafusion/expressions/mod.rs +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Native DataFusion expressions - -pub mod bitwise_not; -pub mod checkoverflow; -mod normalize_nan; -pub use normalize_nan::NormalizeNaNAndZero; - -use crate::errors::CometError; -pub mod avg; -pub mod avg_decimal; -pub mod bloom_filter_agg; -pub mod bloom_filter_might_contain; -pub mod comet_scalar_funcs; -pub mod correlation; -pub mod covariance; -pub mod negative; -pub mod stddev; -pub mod strings; -pub mod subquery; -pub mod sum_decimal; -pub mod unbound; -pub mod variance; - -pub use datafusion_comet_spark_expr::{EvalMode, SparkError}; - -fn arithmetic_overflow_error(from_type: &str) -> CometError { - CometError::Spark(SparkError::ArithmeticOverflow { - from_type: from_type.to_string(), - }) -} diff --git a/native/core/src/execution/datafusion/mod.rs b/native/core/src/execution/datafusion/mod.rs deleted file mode 100644 index af32b4be1..000000000 --- a/native/core/src/execution/datafusion/mod.rs +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Native execution through DataFusion - -pub mod expressions; -mod operators; -pub mod planner; -pub(crate) mod schema_adapter; -pub mod shuffle_writer; -mod util; diff --git a/native/core/src/execution/datafusion/expressions/bloom_filter_agg.rs b/native/core/src/execution/expressions/bloom_filter_agg.rs similarity index 94% rename from native/core/src/execution/datafusion/expressions/bloom_filter_agg.rs rename to native/core/src/execution/expressions/bloom_filter_agg.rs index e6528a563..ea8bb3647 100644 --- a/native/core/src/execution/datafusion/expressions/bloom_filter_agg.rs +++ b/native/core/src/execution/expressions/bloom_filter_agg.rs @@ -19,8 +19,8 @@ use arrow_schema::Field; use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility}; use std::{any::Any, sync::Arc}; -use crate::execution::datafusion::util::spark_bloom_filter; -use crate::execution::datafusion::util::spark_bloom_filter::SparkBloomFilter; +use crate::execution::util::spark_bloom_filter; +use crate::execution::util::spark_bloom_filter::SparkBloomFilter; use arrow::array::ArrayRef; use arrow_array::BinaryArray; use datafusion::error::Result; @@ -34,9 +34,7 @@ use datafusion_physical_expr::expressions::Literal; #[derive(Debug, Clone)] pub struct BloomFilterAgg { - name: String, signature: Signature, - expr: Arc, num_items: i32, num_bits: i32, } @@ -53,15 +51,12 @@ fn extract_i32_from_literal(expr: Arc) -> i32 { impl BloomFilterAgg { pub fn new( - expr: Arc, num_items: Arc, num_bits: Arc, - name: impl Into, data_type: DataType, ) -> Self { assert!(matches!(data_type, DataType::Binary)); Self { - name: name.into(), signature: Signature::uniform( 1, vec![ @@ -73,7 +68,6 @@ impl BloomFilterAgg { ], Volatility::Immutable, ), - expr, num_items: extract_i32_from_literal(num_items), num_bits: extract_i32_from_literal(num_bits), } diff --git a/native/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs b/native/core/src/execution/expressions/bloom_filter_might_contain.rs similarity index 97% rename from native/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs rename to native/core/src/execution/expressions/bloom_filter_might_contain.rs index de922d831..af6a5a47a 100644 --- a/native/core/src/execution/datafusion/expressions/bloom_filter_might_contain.rs +++ b/native/core/src/execution/expressions/bloom_filter_might_contain.rs @@ -15,9 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::{ - execution::datafusion::util::spark_bloom_filter::SparkBloomFilter, parquet::data_type::AsBytes, -}; +use crate::{execution::util::spark_bloom_filter::SparkBloomFilter, parquet::data_type::AsBytes}; use arrow::record_batch::RecordBatch; use arrow_array::cast::as_primitive_array; use arrow_schema::{DataType, Schema}; diff --git a/native/core/src/execution/kernels/mod.rs b/native/core/src/execution/expressions/mod.rs similarity index 83% rename from native/core/src/execution/kernels/mod.rs rename to native/core/src/execution/expressions/mod.rs index 675dcd489..e2f811fa2 100644 --- a/native/core/src/execution/kernels/mod.rs +++ b/native/core/src/execution/expressions/mod.rs @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -//! Kernels +//! Native DataFusion expressions -mod hash; -pub use hash::hash; +pub mod bloom_filter_agg; +pub mod bloom_filter_might_contain; +pub mod subquery; -pub(crate) mod strings; +pub use datafusion_comet_spark_expr::EvalMode; diff --git a/native/core/src/execution/datafusion/expressions/subquery.rs b/native/core/src/execution/expressions/subquery.rs similarity index 100% rename from native/core/src/execution/datafusion/expressions/subquery.rs rename to native/core/src/execution/expressions/subquery.rs diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index 1dec5173b..074dad65a 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -25,26 +25,27 @@ use datafusion::{ disk_manager::DiskManagerConfig, runtime_env::{RuntimeConfig, RuntimeEnv}, }, - physical_plan::{display::DisplayableExecutionPlan, ExecutionPlan, SendableRecordBatchStream}, + physical_plan::{display::DisplayableExecutionPlan, SendableRecordBatchStream}, prelude::{SessionConfig, SessionContext}, }; use futures::poll; use jni::{ errors::Result as JNIResult, objects::{ - JByteArray, JClass, JIntArray, JLongArray, JMap, JObject, JObjectArray, JPrimitiveArray, - JString, ReleaseMode, + JByteArray, JClass, JIntArray, JLongArray, JObject, JObjectArray, JPrimitiveArray, JString, + ReleaseMode, }, sys::{jbyteArray, jint, jlong, jlongArray}, JNIEnv, }; +use std::time::{Duration, Instant}; use std::{collections::HashMap, sync::Arc, task::Poll}; use crate::{ errors::{try_unwrap_or_throw, CometError, CometResult}, execution::{ - datafusion::planner::PhysicalPlanner, metrics::utils::update_comet_metric, - serde::to_arrow_datatype, shuffle::row::process_sorted_row_partition, sort::RdxSort, + metrics::utils::update_comet_metric, planner::PhysicalPlanner, serde::to_arrow_datatype, + shuffle::row::process_sorted_row_partition, sort::RdxSort, }, jvm_bridge::{jni_new_global_ref, JVMClasses}, }; @@ -58,6 +59,7 @@ use jni::{ use tokio::runtime::Runtime; use crate::execution::operators::ScanExec; +use crate::execution::spark_plan::SparkPlan; use log::info; /// Comet native execution context. Kept alive across JNI calls. @@ -69,19 +71,19 @@ struct ExecutionContext { /// The number of partitions pub partition_count: usize, /// The DataFusion root operator converted from the `spark_plan` - pub root_op: Option>, + pub root_op: Option>, /// The input sources for the DataFusion plan pub scans: Vec, /// The global reference of input sources for the DataFusion plan pub input_sources: Vec>, /// The record batch stream to pull results from pub stream: Option, - /// Configurations for DF execution - pub conf: HashMap, /// The Tokio runtime used for async. pub runtime: Runtime, /// Native metrics pub metrics: Arc, + /// The time it took to create the native plan and configure the context + pub plan_creation_time: Duration, /// DataFusion SessionContext pub session_ctx: Arc, /// Whether to enable additional debugging checks & messages @@ -100,53 +102,36 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan( e: JNIEnv, _class: JClass, id: jlong, - config_object: JObject, iterators: jobjectArray, serialized_query: jbyteArray, partition_count: jint, metrics_node: JObject, comet_task_memory_manager_obj: JObject, + batch_size: jint, + use_unified_memory_manager: jboolean, + memory_limit: jlong, + memory_fraction: jdouble, + debug_native: jboolean, + explain_native: jboolean, + worker_threads: jint, + blocking_threads: jint, ) -> jlong { try_unwrap_or_throw(&e, |mut env| { // Init JVM classes JVMClasses::init(&mut env); + let start = Instant::now(); + let array = unsafe { JPrimitiveArray::from_raw(serialized_query) }; let bytes = env.convert_byte_array(array)?; // Deserialize query plan let spark_plan = serde::deserialize_op(bytes.as_slice())?; - // Sets up context - let mut configs = HashMap::new(); - - let config_map = JMap::from_env(&mut env, &config_object)?; - let mut map_iter = config_map.iter(&mut env)?; - while let Some((key, value)) = map_iter.next(&mut env)? { - let key: String = env.get_string(&JString::from(key)).unwrap().into(); - let value: String = env.get_string(&JString::from(value)).unwrap().into(); - configs.insert(key, value); - } - - // Whether we've enabled additional debugging on the native side - let debug_native = parse_bool(&configs, "debug_native")?; - let explain_native = parse_bool(&configs, "explain_native")?; - - let worker_threads = configs - .get("worker_threads") - .map(String::as_str) - .unwrap_or("4") - .parse::()?; - let blocking_threads = configs - .get("blocking_threads") - .map(String::as_str) - .unwrap_or("10") - .parse::()?; - // Use multi-threaded tokio runtime to prevent blocking spawned tasks if any let runtime = tokio::runtime::Builder::new_multi_thread() - .worker_threads(worker_threads) - .max_blocking_threads(blocking_threads) + .worker_threads(worker_threads as usize) + .max_blocking_threads(blocking_threads as usize) .enable_all() .build()?; @@ -167,7 +152,15 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan( // We need to keep the session context alive. Some session state like temporary // dictionaries are stored in session context. If it is dropped, the temporary // dictionaries will be dropped as well. - let session = prepare_datafusion_session_context(&configs, task_memory_manager)?; + let session = prepare_datafusion_session_context( + batch_size as usize, + use_unified_memory_manager == 1, + memory_limit as usize, + memory_fraction, + task_memory_manager, + )?; + + let plan_creation_time = start.elapsed(); let exec_context = Box::new(ExecutionContext { id, @@ -177,12 +170,12 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan( scans: vec![], input_sources, stream: None, - conf: configs, runtime, metrics, + plan_creation_time, session_ctx: Arc::new(session), - debug_native, - explain_native, + debug_native: debug_native == 1, + explain_native: explain_native == 1, metrics_jstrings: HashMap::new(), }); @@ -190,47 +183,30 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan( }) } -/// Parse Comet configs and configure DataFusion session context. +/// Configure DataFusion session context. fn prepare_datafusion_session_context( - conf: &HashMap, + batch_size: usize, + use_unified_memory_manager: bool, + memory_limit: usize, + memory_fraction: f64, comet_task_memory_manager: Arc, ) -> CometResult { - // Get the batch size from Comet JVM side - let batch_size = conf - .get("batch_size") - .ok_or(CometError::Internal( - "Config 'batch_size' is not specified from Comet JVM side".to_string(), - ))? - .parse::()?; - let mut rt_config = RuntimeConfig::new().with_disk_manager(DiskManagerConfig::NewOs); - // Check if we are using unified memory manager integrated with Spark. Default to false if not - // set. - let use_unified_memory_manager = parse_bool(conf, "use_unified_memory_manager")?; - + // Check if we are using unified memory manager integrated with Spark. if use_unified_memory_manager { // Set Comet memory pool for native let memory_pool = CometMemoryPool::new(comet_task_memory_manager); rt_config = rt_config.with_memory_pool(Arc::new(memory_pool)); } else { // Use the memory pool from DF - if conf.contains_key("memory_limit") { - let memory_limit = conf.get("memory_limit").unwrap().parse::()?; - let memory_fraction = conf - .get("memory_fraction") - .ok_or(CometError::Internal( - "Config 'memory_fraction' is not specified from Comet JVM side".to_string(), - ))? - .parse::()?; - rt_config = rt_config.with_memory_limit(memory_limit, memory_fraction) - } + rt_config = rt_config.with_memory_limit(memory_limit, memory_fraction) } // Get Datafusion configuration from Spark Execution context // can be configured in Comet Spark JVM using Spark --conf parameters // e.g: spark-shell --conf spark.datafusion.sql_parser.parse_float_as_decimal=true - let mut session_config = SessionConfig::new() + let session_config = SessionConfig::new() .with_batch_size(batch_size) // DataFusion partial aggregates can emit duplicate rows so we disable the // skip partial aggregation feature because this is not compatible with Spark's @@ -243,11 +219,7 @@ fn prepare_datafusion_session_context( &ScalarValue::Float64(Some(1.1)), ); - for (key, value) in conf.iter().filter(|(k, _)| k.starts_with("datafusion.")) { - session_config = session_config.set_str(key, value); - } - - let runtime = RuntimeEnv::try_new(rt_config).unwrap(); + let runtime = RuntimeEnv::try_new(rt_config)?; let mut session_ctx = SessionContext::new_with_config_rt(session_config, Arc::new(runtime)); @@ -256,14 +228,6 @@ fn prepare_datafusion_session_context( Ok(session_ctx) } -fn parse_bool(conf: &HashMap, name: &str) -> CometResult { - conf.get(name) - .map(String::as_str) - .unwrap_or("false") - .parse::() - .map_err(|e| CometError::Config(format!("Failed to parse boolean config {name}: {e}"))) -} - /// Prepares arrow arrays for output. fn prepare_output( env: &mut JNIEnv, @@ -342,8 +306,9 @@ fn pull_input_batches(exec_context: &mut ExecutionContext) -> Result<(), CometEr pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( e: JNIEnv, _class: JClass, + stage_id: jint, + partition: jint, exec_context: jlong, - partition_id: jint, array_addrs: jlongArray, schema_addrs: jlongArray, ) -> jlong { @@ -357,6 +322,7 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( // Because we don't know if input arrays are dictionary-encoded when we create // query plan, we need to defer stream initialization to first time execution. if exec_context.root_op.is_none() { + let start = Instant::now(); let planner = PhysicalPlanner::new(Arc::clone(&exec_context.session_ctx)) .with_exec_id(exec_context_id); let (scans, root_op) = planner.create_plan( @@ -364,21 +330,23 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( &mut exec_context.input_sources.clone(), exec_context.partition_count, )?; + let physical_plan_time = start.elapsed(); + exec_context.plan_creation_time += physical_plan_time; exec_context.root_op = Some(Arc::clone(&root_op)); exec_context.scans = scans; if exec_context.explain_native { let formatted_plan_str = - DisplayableExecutionPlan::new(root_op.as_ref()).indent(true); - info!("Comet native query plan:\n {formatted_plan_str:}"); + DisplayableExecutionPlan::new(root_op.native_plan.as_ref()).indent(true); + info!("Comet native query plan:\n{formatted_plan_str:}"); } let task_ctx = exec_context.session_ctx.task_ctx(); let plan = exec_context.root_op.as_ref().unwrap(); - let stream = plan.execute(partition_id as usize, task_ctx)?; + let stream = plan.native_plan.execute(partition as usize, task_ctx)?; exec_context.stream = Some(stream); } else { // Pull input batches @@ -409,8 +377,14 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( if exec_context.explain_native { if let Some(plan) = &exec_context.root_op { let formatted_plan_str = - DisplayableExecutionPlan::with_metrics(plan.as_ref()).indent(true); - info!("Comet native query plan with metrics:\n{formatted_plan_str:}"); + DisplayableExecutionPlan::with_metrics(plan.native_plan.as_ref()) + .indent(true); + info!( + "Comet native query plan with metrics (Plan #{} Stage {} Partition {}):\ + \n plan creation (including CometScans fetching first batches) took {:?}:\ + \n{formatted_plan_str:}", + plan.plan_id, stage_id, partition, exec_context.plan_creation_time + ); } } diff --git a/native/core/src/execution/kernels/hash.rs b/native/core/src/execution/kernels/hash.rs deleted file mode 100644 index b39fd6224..000000000 --- a/native/core/src/execution/kernels/hash.rs +++ /dev/null @@ -1,187 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::common::bit; -use ahash::RandomState; -use arrow::datatypes::{i256, ArrowNativeType}; -use arrow_array::{ - downcast_dictionary_array, downcast_primitive_array, Array, ArrayAccessor, ArrayRef, - ArrowPrimitiveType, PrimitiveArray, -}; -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; -use std::fmt::Debug; - -pub fn hash(src: &[ArrayRef], dst: &mut [u64]) { - let state = RandomState::with_seed(42); - src.iter().enumerate().for_each(|(idx, v)| { - downcast_dictionary_array!( - v => { - let keys = v.keys(); - let values = v.values(); - downcast_primitive_array!( - values => hash_dict_typed(&state, idx > 0, keys, values, dst), - dt => panic!("Expected only primitive type but found {}", dt) - ) - }, - dt => { - downcast_primitive_array!( - v => hash_typed(&state, idx > 0, v, dst), - _ => panic!("Expected only primitive type but found {}", dt) - ) - } - ) - }); -} - -fn hash_typed(state: &RandomState, mix: bool, array: T, dst: &mut [u64]) -where - T: ArrayAccessor, - T::Item: Hashable + Debug, -{ - let nullable = array.null_count() > 0; - let num_values = array.len(); - if nullable { - for i in 0..num_values { - if !array.is_null(i) { - unsafe { - let value = array.value_unchecked(i); - hash1(state, mix, i, value, dst); - } - } - } - } else { - for i in 0..num_values { - unsafe { - let value = array.value_unchecked(i); - hash1(state, mix, i, value, dst); - } - } - } -} - -fn hash_dict_typed( - state: &RandomState, - mix: bool, - keys: &PrimitiveArray, - values: V, - dst: &mut [u64], -) where - K: ArrowPrimitiveType, - V: ArrayAccessor, - V::Item: Hashable + Debug, -{ - let nullable = keys.null_count() > 0; - let num_keys = keys.len(); - let mut value_hashes = vec![0; values.len()]; - - for (i, value_hash) in value_hashes.iter_mut().enumerate() { - unsafe { - *value_hash = values.value_unchecked(i).create_hash(state); - } - } - - if nullable { - for i in 0..num_keys { - if !keys.is_null(i) { - unsafe { - let idx = keys.value_unchecked(i); - let hash = value_hashes[idx.as_usize()]; - hash1_helper(mix, i, hash, dst); - } - } - } - } else { - for i in 0..num_keys { - unsafe { - let idx = keys.value_unchecked(i); - let hash = value_hashes[idx.as_usize()]; - hash1_helper(mix, i, hash, dst); - } - } - } -} - -#[inline(always)] -fn hash1(state: &RandomState, mix: bool, i: usize, value: T, dst: &mut [u64]) { - let hash = value.create_hash(state); - hash1_helper(mix, i, hash, dst); -} - -#[inline(always)] -fn hash1_helper(mix: bool, i: usize, hash: u64, dst: &mut [u64]) { - dst[i] = if mix { - bit::mix_hash(dst[i], hash) - } else { - hash - } -} - -pub(crate) trait Hashable { - fn create_hash(&self, state: &RandomState) -> u64; -} - -macro_rules! impl_hashable { - ($($t:ty),+) => { - $(impl Hashable for $t { - #[inline] - fn create_hash(&self, state: &RandomState) -> u64 { - state.hash_one(self) - } - })+ - }; -} - -impl_hashable!(i8, i16, i32, u8, u16, u32, u64, i128, i256); - -impl Hashable for i64 { - fn create_hash(&self, state: &RandomState) -> u64 { - state.hash_one(self) - } -} - -impl Hashable for half::f16 { - fn create_hash(&self, _: &RandomState) -> u64 { - unimplemented!("hashing on f16 is not supported") - } -} - -impl Hashable for f32 { - fn create_hash(&self, state: &RandomState) -> u64 { - state.hash_one(u32::from_ne_bytes(self.to_ne_bytes())) - } -} - -impl Hashable for f64 { - fn create_hash(&self, state: &RandomState) -> u64 { - state.hash_one(u64::from_ne_bytes(self.to_ne_bytes())) - } -} - -impl Hashable for IntervalDayTime { - fn create_hash(&self, state: &RandomState) -> u64 { - state.hash_one(self.days); - state.hash_one(self.milliseconds) - } -} - -impl Hashable for IntervalMonthDayNano { - fn create_hash(&self, state: &RandomState) -> u64 { - state.hash_one(self.months); - state.hash_one(self.days); - state.hash_one(self.nanoseconds) - } -} diff --git a/native/core/src/execution/metrics/utils.rs b/native/core/src/execution/metrics/utils.rs index 9291f32c7..0eb4b631d 100644 --- a/native/core/src/execution/metrics/utils.rs +++ b/native/core/src/execution/metrics/utils.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. +use crate::execution::spark_plan::SparkPlan; use crate::jvm_bridge::jni_new_global_ref; use crate::{ errors::CometError, jvm_bridge::{jni_call, jni_new_string}, }; -use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_plan::metrics::MetricValue; use jni::objects::{GlobalRef, JString}; use jni::{objects::JObject, JNIEnv}; use std::collections::HashMap; @@ -28,18 +29,36 @@ use std::sync::Arc; /// Updates the metrics of a CometMetricNode. This function is called recursively to /// update the metrics of all the children nodes. The metrics are pulled from the -/// DataFusion execution plan and pushed to the Java side through JNI. +/// native execution plan and pushed to the Java side through JNI. pub fn update_comet_metric( env: &mut JNIEnv, metric_node: &JObject, - execution_plan: &Arc, + spark_plan: &Arc, metrics_jstrings: &mut HashMap>, ) -> Result<(), CometError> { + // combine all metrics from all native plans for this SparkPlan + let metrics = if spark_plan.additional_native_plans.is_empty() { + spark_plan.native_plan.metrics() + } else { + let mut metrics = spark_plan.native_plan.metrics().unwrap_or_default(); + for plan in &spark_plan.additional_native_plans { + let additional_metrics = plan.metrics().unwrap_or_default(); + for c in additional_metrics.iter() { + match c.value() { + MetricValue::OutputRows(_) => { + // we do not want to double count output rows + } + _ => metrics.push(c.to_owned()), + } + } + } + Some(metrics.aggregate_by_name()) + }; + update_metrics( env, metric_node, - &execution_plan - .metrics() + &metrics .unwrap_or_default() .iter() .map(|m| m.value()) @@ -49,7 +68,7 @@ pub fn update_comet_metric( )?; unsafe { - for (i, child_plan) in execution_plan.children().iter().enumerate() { + for (i, child_plan) in spark_plan.children().iter().enumerate() { let child_metric_node: JObject = jni_call!(env, comet_metric_node(metric_node).get_child_node(i as i32) -> JObject )?; diff --git a/native/core/src/execution/mod.rs b/native/core/src/execution/mod.rs index f17935702..a74ec3017 100644 --- a/native/core/src/execution/mod.rs +++ b/native/core/src/execution/mod.rs @@ -16,16 +16,16 @@ // under the License. //! PoC of vectorization execution through JNI to Rust. -pub mod datafusion; +pub mod expressions; pub mod jni_api; - -pub mod kernels; // for benchmarking - mod metrics; pub mod operators; +pub(crate) mod planner; pub mod serde; pub mod shuffle; pub(crate) mod sort; +pub(crate) mod spark_plan; +pub(crate) mod util; pub use datafusion_comet_spark_expr::timezone; pub(crate) mod utils; diff --git a/native/core/src/execution/datafusion/operators/expand.rs b/native/core/src/execution/operators/expand.rs similarity index 97% rename from native/core/src/execution/datafusion/operators/expand.rs rename to native/core/src/execution/operators/expand.rs index a3dd06507..fb43a6e49 100644 --- a/native/core/src/execution/datafusion/operators/expand.rs +++ b/native/core/src/execution/operators/expand.rs @@ -37,14 +37,14 @@ use std::{ /// A Comet native operator that expands a single row into multiple rows. This behaves as same as /// Spark Expand operator. #[derive(Debug)] -pub struct CometExpandExec { +pub struct ExpandExec { projections: Vec>>, child: Arc, schema: SchemaRef, cache: PlanProperties, } -impl CometExpandExec { +impl ExpandExec { /// Create a new ExpandExec pub fn new( projections: Vec>>, @@ -66,7 +66,7 @@ impl CometExpandExec { } } -impl DisplayAs for CometExpandExec { +impl DisplayAs for ExpandExec { fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { @@ -87,7 +87,7 @@ impl DisplayAs for CometExpandExec { } } -impl ExecutionPlan for CometExpandExec { +impl ExecutionPlan for ExpandExec { fn as_any(&self) -> &dyn Any { self } @@ -104,7 +104,7 @@ impl ExecutionPlan for CometExpandExec { self: Arc, children: Vec>, ) -> datafusion_common::Result> { - let new_expand = CometExpandExec::new( + let new_expand = ExpandExec::new( self.projections.clone(), Arc::clone(&children[0]), Arc::clone(&self.schema), diff --git a/native/core/src/execution/operators/mod.rs b/native/core/src/execution/operators/mod.rs index bdc233e94..4e15e4341 100644 --- a/native/core/src/execution/operators/mod.rs +++ b/native/core/src/execution/operators/mod.rs @@ -27,6 +27,8 @@ pub use filter::FilterExec; pub use scan::*; mod copy; +mod expand; +pub use expand::ExpandExec; mod filter; mod scan; diff --git a/native/core/src/execution/operators/scan.rs b/native/core/src/execution/operators/scan.rs index 7d75f7f1c..a297f87c1 100644 --- a/native/core/src/execution/operators/scan.rs +++ b/native/core/src/execution/operators/scan.rs @@ -15,21 +15,10 @@ // specific language governing permissions and limitations // under the License. -use futures::Stream; -use itertools::Itertools; -use std::rc::Rc; -use std::{ - any::Any, - pin::Pin, - sync::{Arc, Mutex}, - task::{Context, Poll}, -}; - use crate::{ errors::CometError, execution::{ - datafusion::planner::TEST_EXEC_CONTEXT_ID, operators::ExecutionError, - utils::SparkArrowConvert, + operators::ExecutionError, planner::TEST_EXEC_CONTEXT_ID, utils::SparkArrowConvert, }, jvm_bridge::{jni_call, JVMClasses}, }; @@ -48,12 +37,23 @@ use datafusion::{ physical_plan::{ExecutionPlan, *}, }; use datafusion_common::{arrow_datafusion_err, DataFusionError, Result as DataFusionResult}; +use futures::Stream; +use itertools::Itertools; use jni::objects::JValueGen; use jni::objects::{GlobalRef, JObject}; use jni::sys::jsize; +use std::rc::Rc; +use std::{ + any::Any, + pin::Pin, + sync::{Arc, Mutex}, + task::{Context, Poll}, +}; /// ScanExec reads batches of data from Spark via JNI. The source of the scan could be a file -/// scan or the result of reading a broadcast or shuffle exchange. +/// scan or the result of reading a broadcast or shuffle exchange. ScanExec isn't invoked +/// until the data is already available in the JVM. When CometExecIterator invokes +/// Native.executePlan, it passes in the memory addresses of the input batches. #[derive(Debug, Clone)] pub struct ScanExec { /// The ID of the execution context that owns this subquery. We use this ID to retrieve the JVM @@ -65,6 +65,8 @@ pub struct ScanExec { pub input_source_description: String, /// The data types of columns of the input batch. Converted from Spark schema. pub data_types: Vec, + /// Schema of first batch + pub schema: SchemaRef, /// The input batch of input data. Used to determine the schema of the input data. /// It is also used in unit test to mock the input data from JVM. pub batch: Arc>>, @@ -72,6 +74,12 @@ pub struct ScanExec { cache: PlanProperties, /// Metrics collector metrics: ExecutionPlanMetricsSet, + /// Baseline metrics + baseline_metrics: BaselineMetrics, + /// Time waiting for JVM input plan to execute and return batches + jvm_fetch_time: Time, + /// Time spent in FFI + arrow_ffi_time: Time, } impl ScanExec { @@ -81,6 +89,11 @@ impl ScanExec { input_source_description: &str, data_types: Vec, ) -> Result { + let metrics_set = ExecutionPlanMetricsSet::default(); + let baseline_metrics = BaselineMetrics::new(&metrics_set, 0); + let arrow_ffi_time = MetricBuilder::new(&metrics_set).subset_time("arrow_ffi_time", 0); + let jvm_fetch_time = MetricBuilder::new(&metrics_set).subset_time("jvm_fetch_time", 0); + // Scan's schema is determined by the input batch, so we need to set it before execution. // Note that we determine if arrays are dictionary-encoded based on the // first batch. The array may be dictionary-encoded in some batches and not others, and @@ -88,7 +101,16 @@ impl ScanExec { // may end up either unpacking dictionary arrays or dictionary-encoding arrays. // Dictionary-encoded primitive arrays are always unpacked. let first_batch = if let Some(input_source) = input_source.as_ref() { - ScanExec::get_next(exec_context_id, input_source.as_obj(), data_types.len())? + let mut timer = baseline_metrics.elapsed_compute().timer(); + let batch = ScanExec::get_next( + exec_context_id, + input_source.as_obj(), + data_types.len(), + &jvm_fetch_time, + &arrow_ffi_time, + )?; + timer.stop(); + batch } else { InputBatch::EOF }; @@ -96,7 +118,7 @@ impl ScanExec { let schema = scan_schema(&first_batch, &data_types); let cache = PlanProperties::new( - EquivalenceProperties::new(schema), + EquivalenceProperties::new(Arc::clone(&schema)), // The partitioning is not important because we are not using DataFusion's // query planner or optimizer Partitioning::UnknownPartitioning(1), @@ -110,7 +132,11 @@ impl ScanExec { data_types, batch: Arc::new(Mutex::new(Some(first_batch))), cache, - metrics: ExecutionPlanMetricsSet::default(), + metrics: metrics_set, + baseline_metrics, + jvm_fetch_time, + arrow_ffi_time, + schema, }) } @@ -149,6 +175,7 @@ impl ScanExec { // This is a unit test. We don't need to call JNI. return Ok(()); } + let mut timer = self.baseline_metrics.elapsed_compute().timer(); let mut current_batch = self.batch.try_lock().unwrap(); if current_batch.is_none() { @@ -156,10 +183,14 @@ impl ScanExec { self.exec_context_id, self.input_source.as_ref().unwrap().as_obj(), self.data_types.len(), + &self.jvm_fetch_time, + &self.arrow_ffi_time, )?; *current_batch = Some(next_batch); } + timer.stop(); + Ok(()) } @@ -168,6 +199,8 @@ impl ScanExec { exec_context_id: i64, iter: &JObject, num_cols: usize, + jvm_fetch_time: &Time, + arrow_ffi_time: &Time, ) -> Result { if exec_context_id == TEST_EXEC_CONTEXT_ID { // This is a unit test. We don't need to call JNI. @@ -183,6 +216,21 @@ impl ScanExec { let mut env = JVMClasses::get_env()?; + let mut timer = jvm_fetch_time.timer(); + + let num_rows: i32 = unsafe { + jni_call!(&mut env, + comet_batch_iterator(iter).has_next() -> i32)? + }; + + timer.stop(); + + if num_rows == -1 { + return Ok(InputBatch::EOF); + } + + let mut timer = arrow_ffi_time.timer(); + let mut array_addrs = Vec::with_capacity(num_cols); let mut schema_addrs = Vec::with_capacity(num_cols); @@ -216,9 +264,9 @@ impl ScanExec { comet_batch_iterator(iter).next(array_obj, schema_obj) -> i32)? }; - if num_rows == -1 { - return Ok(InputBatch::EOF); - } + // we already checked for end of results on call to has_next() so should always + // have a valid row count when calling next() + assert!(num_rows != -1); let mut inputs: Vec = Vec::with_capacity(num_cols); @@ -238,6 +286,8 @@ impl ScanExec { } } + timer.stop(); + Ok(InputBatch::new(inputs, Some(num_rows as usize))) } } @@ -276,11 +326,15 @@ impl ExecutionPlan for ScanExec { } fn schema(&self) -> SchemaRef { - // `unwrap` is safe because `schema` is only called during converting - // Spark plan to DataFusion plan. At the moment, `batch` is not EOF. - let binding = self.batch.try_lock().unwrap(); - let input_batch = binding.as_ref().unwrap(); - scan_schema(input_batch, &self.data_types) + if self.exec_context_id == TEST_EXEC_CONTEXT_ID { + // `unwrap` is safe because `schema` is only called during converting + // Spark plan to DataFusion plan. At the moment, `batch` is not EOF. + let binding = self.batch.try_lock().unwrap(); + let input_batch = binding.as_ref().unwrap(); + scan_schema(input_batch, &self.data_types) + } else { + Arc::clone(&self.schema) + } } fn children(&self) -> Vec<&Arc> { @@ -303,6 +357,7 @@ impl ExecutionPlan for ScanExec { self.clone(), self.schema(), partition, + self.baseline_metrics.clone(), ))) } @@ -352,8 +407,12 @@ struct ScanStream<'a> { } impl<'a> ScanStream<'a> { - pub fn new(scan: ScanExec, schema: SchemaRef, partition: usize) -> Self { - let baseline_metrics = BaselineMetrics::new(&scan.metrics, partition); + pub fn new( + scan: ScanExec, + schema: SchemaRef, + partition: usize, + baseline_metrics: BaselineMetrics, + ) -> Self { let cast_time = MetricBuilder::new(&scan.metrics).subset_time("cast_time", partition); Self { scan, diff --git a/native/core/src/execution/datafusion/planner.rs b/native/core/src/execution/planner.rs similarity index 88% rename from native/core/src/execution/datafusion/planner.rs rename to native/core/src/execution/planner.rs index d892a50b8..5ebcb891c 100644 --- a/native/core/src/execution/datafusion/planner.rs +++ b/native/core/src/execution/planner.rs @@ -18,35 +18,17 @@ //! Converts Spark physical plan to DataFusion physical plan use super::expressions::EvalMode; -use crate::execution::datafusion::expressions::comet_scalar_funcs::create_comet_physical_fun; use crate::execution::operators::{CopyMode, FilterExec as CometFilterExec}; use crate::{ errors::ExpressionError, execution::{ - datafusion::{ - expressions::{ - avg::Avg, - avg_decimal::AvgDecimal, - bitwise_not::BitwiseNotExpr, - bloom_filter_agg::BloomFilterAgg, - bloom_filter_might_contain::BloomFilterMightContain, - checkoverflow::CheckOverflow, - correlation::Correlation, - covariance::Covariance, - negative, - stddev::Stddev, - strings::{Contains, EndsWith, Like, StartsWith, StringSpaceExpr, SubstringExpr}, - subquery::Subquery, - sum_decimal::SumDecimal, - unbound::UnboundColumn, - variance::Variance, - NormalizeNaNAndZero, - }, - operators::expand::CometExpandExec, - shuffle_writer::ShuffleWriterExec, + expressions::{ + bloom_filter_agg::BloomFilterAgg, bloom_filter_might_contain::BloomFilterMightContain, + subquery::Subquery, }, - operators::{CopyExec, ExecutionError, ScanExec}, + operators::{CopyExec, ExecutionError, ExpandExec, ScanExec}, serde::to_arrow_datatype, + shuffle::ShuffleWriterExec, }, }; use arrow::compute::CastOptions; @@ -83,9 +65,13 @@ use datafusion::{ }, prelude::SessionContext, }; +use datafusion_comet_spark_expr::{ + create_comet_physical_fun, create_negate_expr, SparkSchemaAdapterFactory, +}; +use datafusion_functions_nested::concat::ArrayAppend; use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; -use crate::execution::datafusion::schema_adapter::CometSchemaAdapterFactory; +use crate::execution::spark_plan::SparkPlan; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder; use datafusion::datasource::physical_plan::FileScanConfig; @@ -102,8 +88,11 @@ use datafusion_comet_proto::{ spark_partitioning::{partitioning::PartitioningStruct, Partitioning as SparkPartitioning}, }; use datafusion_comet_spark_expr::{ - Cast, CreateNamedStruct, DateTruncExpr, GetArrayStructFields, GetStructField, HourExpr, IfExpr, - ListExtract, MinuteExpr, RLike, SecondExpr, SparkCastOptions, TimestampTruncExpr, ToJson, + ArrayInsert, Avg, AvgDecimal, BitwiseNotExpr, Cast, CheckOverflow, Contains, Correlation, + Covariance, CreateNamedStruct, DateTruncExpr, EndsWith, GetArrayStructFields, GetStructField, + HourExpr, IfExpr, Like, ListExtract, MinuteExpr, NormalizeNaNAndZero, RLike, SecondExpr, + SparkCastOptions, StartsWith, Stddev, StringSpaceExpr, SubstringExpr, SumDecimal, + TimestampTruncExpr, ToJson, UnboundColumn, Variance, }; use datafusion_common::config::TableParquetOptions; use datafusion_common::scalar::ScalarStructBuilder; @@ -114,7 +103,8 @@ use datafusion_common::{ use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_expr::expr::find_df_window_func; use datafusion_expr::{ - AggregateUDF, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, + AggregateUDF, ScalarUDF, WindowFrame, WindowFrameBound, WindowFrameUnits, + WindowFunctionDefinition, }; use datafusion_physical_expr::expressions::{Literal, StatsType}; use datafusion_physical_expr::window::WindowExpr; @@ -128,14 +118,13 @@ use std::{collections::HashMap, sync::Arc}; use url::Url; // For clippy error on type_complexity. -type ExecResult = Result; type PhyAggResult = Result, ExecutionError>; type PhyExprResult = Result, String)>, ExecutionError>; type PartitionPhyExprResult = Result>, ExecutionError>; struct JoinParameters { - pub left: Arc, - pub right: Arc, + pub left: Arc, + pub right: Arc, pub join_on: Vec<(Arc, Arc)>, pub join_filter: Option, pub join_type: DFJoinType, @@ -628,7 +617,7 @@ impl PhysicalPlanner { ExprStruct::UnaryMinus(expr) => { let child: Arc = self.create_expr(expr.child.as_ref().unwrap(), Arc::clone(&input_schema))?; - let result = negative::create_negate_expr(child, expr.fail_on_error); + let result = create_negate_expr(child, expr.fail_on_error); result.map_err(|e| ExecutionError::GeneralError(e.to_string())) } ExprStruct::NormalizeNanAndZero(expr) => { @@ -697,6 +686,49 @@ impl PhysicalPlanner { expr.ordinal as usize, ))) } + ExprStruct::ArrayAppend(expr) => { + let left = + self.create_expr(expr.left.as_ref().unwrap(), Arc::clone(&input_schema))?; + let right = + self.create_expr(expr.right.as_ref().unwrap(), Arc::clone(&input_schema))?; + let return_type = left.data_type(&input_schema)?; + let args = vec![Arc::clone(&left), right]; + let datafusion_array_append = + Arc::new(ScalarUDF::new_from_impl(ArrayAppend::new())); + let array_append_expr: Arc = Arc::new(ScalarFunctionExpr::new( + "array_append", + datafusion_array_append, + args, + return_type, + )); + + let is_null_expr: Arc = Arc::new(IsNullExpr::new(left)); + let null_literal_expr: Arc = + Arc::new(Literal::new(ScalarValue::Null)); + + let case_expr = CaseExpr::try_new( + None, + vec![(is_null_expr, null_literal_expr)], + Some(array_append_expr), + )?; + Ok(Arc::new(case_expr)) + } + ExprStruct::ArrayInsert(expr) => { + let src_array_expr = self.create_expr( + expr.src_array_expr.as_ref().unwrap(), + Arc::clone(&input_schema), + )?; + let pos_expr = + self.create_expr(expr.pos_expr.as_ref().unwrap(), Arc::clone(&input_schema))?; + let item_expr = + self.create_expr(expr.item_expr.as_ref().unwrap(), Arc::clone(&input_schema))?; + Ok(Arc::new(ArrayInsert::new( + src_array_expr, + pos_expr, + item_expr, + expr.legacy_negative_index, + ))) + } expr => Err(ExecutionError::GeneralError(format!( "Not implemented: {:?}", expr @@ -805,7 +837,13 @@ impl PhysicalPlanner { } } - /// Create a DataFusion physical plan from Spark physical plan. + /// Create a DataFusion physical plan from Spark physical plan. There is a level of + /// abstraction where a tree of SparkPlan nodes is returned. There is a 1:1 mapping from a + /// protobuf Operator (that represents a Spark operator) to a native SparkPlan struct. We + /// need this 1:1 mapping so that we can report metrics back to Spark. The native execution + /// plan that is generated for each Operator is sometimes a single ExecutionPlan, but in some + /// cases we generate a tree of ExecutionPlans and we need to collect metrics for all of these + /// plans so we store references to them in the SparkPlan struct. /// /// `inputs` is a vector of input source IDs. It is used to create `ScanExec`s. Each `ScanExec` /// will be assigned a unique ID from `inputs` and the ID will be used to identify the input @@ -819,12 +857,12 @@ impl PhysicalPlanner { /// /// Note that we return created `Scan`s which will be kept at JNI API. JNI calls will use it to /// feed in new input batch from Spark JVM side. - pub fn create_plan<'a>( + pub(crate) fn create_plan<'a>( &'a self, spark_plan: &'a Operator, inputs: &mut Vec>, partition_count: usize, - ) -> Result<(Vec, Arc), ExecutionError> { + ) -> Result<(Vec, Arc), ExecutionError> { let children = &spark_plan.children; match spark_plan.op_struct.as_ref().unwrap() { OpStruct::Projection(project) => { @@ -839,7 +877,14 @@ impl PhysicalPlanner { .map(|r| (r, format!("col_{}", idx))) }) .collect(); - Ok((scans, Arc::new(ProjectionExec::try_new(exprs?, child)?))) + let projection = Arc::new(ProjectionExec::try_new( + exprs?, + Arc::clone(&child.native_plan), + )?); + Ok(( + scans, + Arc::new(SparkPlan::new(spark_plan.plan_id, projection, vec![child])), + )) } OpStruct::Filter(filter) => { assert!(children.len() == 1); @@ -847,11 +892,26 @@ impl PhysicalPlanner { let predicate = self.create_expr(filter.predicate.as_ref().unwrap(), child.schema())?; - if can_reuse_input_batch(&child) { - Ok((scans, Arc::new(CometFilterExec::try_new(predicate, child)?))) - } else { - Ok((scans, Arc::new(FilterExec::try_new(predicate, child)?))) + if can_reuse_input_batch(&child.native_plan) { + let filter = Arc::new(CometFilterExec::try_new( + predicate, + Arc::clone(&child.native_plan), + )?); + + return Ok(( + scans, + Arc::new(SparkPlan::new(spark_plan.plan_id, filter, vec![child])), + )); } + let filter = Arc::new(FilterExec::try_new( + predicate, + Arc::clone(&child.native_plan), + )?); + + Ok(( + scans, + Arc::new(SparkPlan::new(spark_plan.plan_id, filter, vec![child])), + )) } OpStruct::HashAgg(agg) => { assert!(children.len() == 1); @@ -883,13 +943,13 @@ impl PhysicalPlanner { let num_agg = agg.agg_exprs.len(); let aggr_expr = agg_exprs?.into_iter().map(Arc::new).collect(); - let aggregate = Arc::new( + let aggregate: Arc = Arc::new( datafusion::physical_plan::aggregates::AggregateExec::try_new( mode, group_by, aggr_expr, vec![None; num_agg], // no filter expressions - Arc::clone(&child), + Arc::clone(&child.native_plan), Arc::clone(&schema), )?, ); @@ -903,8 +963,11 @@ impl PhysicalPlanner { }) .collect(); - let exec: Arc = if agg.result_exprs.is_empty() { - aggregate + if agg.result_exprs.is_empty() { + Ok(( + scans, + Arc::new(SparkPlan::new(spark_plan.plan_id, aggregate, vec![child])), + )) } else { // For final aggregation, DF's hash aggregate exec doesn't support Spark's // aggregate result expressions like `COUNT(col) + 1`, but instead relying @@ -913,17 +976,34 @@ impl PhysicalPlanner { // // Note that `result_exprs` should only be set for final aggregation on the // Spark side. - Arc::new(ProjectionExec::try_new(result_exprs?, aggregate)?) - }; - - Ok((scans, exec)) + let projection = Arc::new(ProjectionExec::try_new( + result_exprs?, + Arc::clone(&aggregate), + )?); + Ok(( + scans, + Arc::new(SparkPlan::new_with_additional( + spark_plan.plan_id, + projection, + vec![child], + vec![aggregate], + )), + )) + } } OpStruct::Limit(limit) => { assert!(children.len() == 1); let num = limit.limit; let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; - Ok((scans, Arc::new(LocalLimitExec::new(child, num as usize)))) + let limit = Arc::new(LocalLimitExec::new( + Arc::clone(&child.native_plan), + num as usize, + )); + Ok(( + scans, + Arc::new(SparkPlan::new(spark_plan.plan_id, limit, vec![child])), + )) } OpStruct::Sort(sort) => { assert!(children.len() == 1); @@ -941,11 +1021,20 @@ impl PhysicalPlanner { // SortExec fails in some cases if we do not unpack dictionary-encoded arrays, and // it would be more efficient if we could avoid that. // https://github.com/apache/datafusion-comet/issues/963 - let child = Self::wrap_in_copy_exec(child); + let child_copied = Self::wrap_in_copy_exec(Arc::clone(&child.native_plan)); + + let sort = Arc::new( + SortExec::new(LexOrdering::new(exprs?), Arc::clone(&child_copied)) + .with_fetch(fetch), + ); Ok(( scans, - Arc::new(SortExec::new(LexOrdering::new(exprs?), child).with_fetch(fetch)), + Arc::new(SparkPlan::new( + spark_plan.plan_id, + sort, + vec![Arc::clone(&child)], + )), )) } OpStruct::NativeScan(scan) => { @@ -1067,16 +1156,24 @@ impl PhysicalPlanner { table_parquet_options.global.pushdown_filters = true; table_parquet_options.global.reorder_filters = true; + let mut spark_cast_options = SparkCastOptions::new(EvalMode::Legacy, "UTC", false); + spark_cast_options.allow_cast_unsigned_ints = true; + let mut builder = ParquetExecBuilder::new(file_scan_config) .with_table_parquet_options(table_parquet_options) - .with_schema_adapter_factory(Arc::new(CometSchemaAdapterFactory::default())); + .with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new( + spark_cast_options, + ))); if let Some(filter) = cnf_data_filters { builder = builder.with_predicate(filter); } let scan = builder.build(); - Ok((vec![], Arc::new(scan))) + Ok(( + vec![], + Arc::new(SparkPlan::new(spark_plan.plan_id, Arc::new(scan), vec![])), + )) } OpStruct::Scan(scan) => { let data_types = scan.fields.iter().map(to_arrow_datatype).collect_vec(); @@ -1101,7 +1198,10 @@ impl PhysicalPlanner { // The `ScanExec` operator will take actual arrays from Spark during execution let scan = ScanExec::new(self.exec_context_id, input_source, &scan.source, data_types)?; - Ok((vec![scan.clone()], Arc::new(scan))) + Ok(( + vec![scan.clone()], + Arc::new(SparkPlan::new(spark_plan.plan_id, Arc::new(scan), vec![])), + )) } OpStruct::ShuffleWriter(writer) => { assert!(children.len() == 1); @@ -1110,14 +1210,20 @@ impl PhysicalPlanner { let partitioning = self .create_partitioning(writer.partitioning.as_ref().unwrap(), child.schema())?; + let shuffle_writer = Arc::new(ShuffleWriterExec::try_new( + Arc::clone(&child.native_plan), + partitioning, + writer.output_data_file.clone(), + writer.output_index_file.clone(), + )?); + Ok(( scans, - Arc::new(ShuffleWriterExec::try_new( - child, - partitioning, - writer.output_data_file.clone(), - writer.output_index_file.clone(), - )?), + Arc::new(SparkPlan::new( + spark_plan.plan_id, + shuffle_writer, + vec![Arc::clone(&child)], + )), )) } OpStruct::Expand(expand) => { @@ -1161,15 +1267,18 @@ impl PhysicalPlanner { // the data corruption. Note that we only need to copy the input batch // if the child operator is `ScanExec`, because other operators after `ScanExec` // will create new arrays for the output batch. - let child = if can_reuse_input_batch(&child) { - Arc::new(CopyExec::new(child, CopyMode::UnpackOrDeepCopy)) + let input = if can_reuse_input_batch(&child.native_plan) { + Arc::new(CopyExec::new( + Arc::clone(&child.native_plan), + CopyMode::UnpackOrDeepCopy, + )) } else { - child + Arc::clone(&child.native_plan) }; - + let expand = Arc::new(ExpandExec::new(projections, input, schema)); Ok(( scans, - Arc::new(CometExpandExec::new(projections, child, schema)), + Arc::new(SparkPlan::new(spark_plan.plan_id, expand, vec![child])), )) } OpStruct::SortMergeJoin(join) => { @@ -1198,8 +1307,8 @@ impl PhysicalPlanner { .collect(); let join = Arc::new(SortMergeJoinExec::try_new( - join_params.left, - join_params.right, + Arc::clone(&join_params.left.native_plan), + Arc::clone(&join_params.right.native_plan), join_params.join_on, join_params.join_filter, join_params.join_type, @@ -1209,7 +1318,17 @@ impl PhysicalPlanner { false, )?); - Ok((scans, join)) + Ok(( + scans, + Arc::new(SparkPlan::new( + spark_plan.plan_id, + join, + vec![ + Arc::clone(&join_params.left), + Arc::clone(&join_params.right), + ], + )), + )) } OpStruct::HashJoin(join) => { let (join_params, scans) = self.parse_join_parameters( @@ -1226,8 +1345,8 @@ impl PhysicalPlanner { // to copy the input batch to avoid the data corruption from reusing the input // batch. We also need to unpack dictionary arrays, because the join operators // do not support them. - let left = Self::wrap_in_copy_exec(join_params.left); - let right = Self::wrap_in_copy_exec(join_params.right); + let left = Self::wrap_in_copy_exec(Arc::clone(&join_params.left.native_plan)); + let right = Self::wrap_in_copy_exec(Arc::clone(&join_params.right.native_plan)); let hash_join = Arc::new(HashJoinExec::try_new( left, @@ -1243,13 +1362,35 @@ impl PhysicalPlanner { )?); // If the hash join is build right, we need to swap the left and right - let hash_join = if join.build_side == BuildSide::BuildLeft as i32 { - hash_join + if join.build_side == BuildSide::BuildLeft as i32 { + Ok(( + scans, + Arc::new(SparkPlan::new( + spark_plan.plan_id, + hash_join, + vec![join_params.left, join_params.right], + )), + )) } else { - swap_hash_join(hash_join.as_ref(), PartitionMode::Partitioned)? - }; + let swapped_hash_join = + swap_hash_join(hash_join.as_ref(), PartitionMode::Partitioned)?; + + let mut additional_native_plans = vec![]; + if swapped_hash_join.as_any().is::() { + // a projection was added to the hash join + additional_native_plans.push(Arc::clone(swapped_hash_join.children()[0])); + } - Ok((scans, hash_join)) + Ok(( + scans, + Arc::new(SparkPlan::new_with_additional( + spark_plan.plan_id, + swapped_hash_join, + vec![join_params.left, join_params.right], + additional_native_plans, + )), + )) + } } OpStruct::Window(wnd) => { let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; @@ -1282,14 +1423,15 @@ impl PhysicalPlanner { }) .collect(); + let window_agg = Arc::new(BoundedWindowAggExec::try_new( + window_expr?, + Arc::clone(&child.native_plan), + partition_exprs.to_vec(), + InputOrderMode::Sorted, + )?); Ok(( scans, - Arc::new(BoundedWindowAggExec::try_new( - window_expr?, - child, - partition_exprs.to_vec(), - InputOrderMode::Sorted, - )?), + Arc::new(SparkPlan::new(spark_plan.plan_id, window_agg, vec![child])), )) } } @@ -1427,8 +1569,8 @@ impl PhysicalPlanner { Ok(( JoinParameters { - left, - right, + left: Arc::clone(&left), + right: Arc::clone(&right), join_on, join_type, join_filter, @@ -1772,10 +1914,8 @@ impl PhysicalPlanner { self.create_expr(expr.num_bits.as_ref().unwrap(), Arc::clone(&schema))?; let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap()); let func = AggregateUDF::new_from_impl(BloomFilterAgg::new( - Arc::clone(&child), Arc::clone(&num_items), Arc::clone(&num_bits), - "bloom_filter_agg", datatype, )); Self::create_aggr_func_expr("bloom_filter_agg", schema, vec![child], func) @@ -2307,11 +2447,12 @@ mod tests { use datafusion::{physical_plan::common::collect, prelude::SessionContext}; use tokio::sync::mpsc; - use crate::execution::{datafusion::planner::PhysicalPlanner, operators::InputBatch}; + use crate::execution::{operators::InputBatch, planner::PhysicalPlanner}; use crate::execution::operators::ExecutionError; use datafusion_comet_proto::{ spark_expression::expr::ExprStruct::*, + spark_expression::Expr, spark_expression::{self, literal}, spark_operator, spark_operator::{operator::OpStruct, Operator}, @@ -2320,6 +2461,7 @@ mod tests { #[test] fn test_unpack_dictionary_primitive() { let op_scan = Operator { + plan_id: 0, children: vec![], op_struct: Some(OpStruct::Scan(spark_operator::Scan { fields: vec![spark_expression::DataType { @@ -2345,7 +2487,7 @@ mod tests { let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - let mut stream = datafusion_plan.execute(0, task_ctx).unwrap(); + let mut stream = datafusion_plan.native_plan.execute(0, task_ctx).unwrap(); let runtime = tokio::runtime::Runtime::new().unwrap(); let (tx, mut rx) = mpsc::channel(1); @@ -2392,6 +2534,7 @@ mod tests { #[test] fn test_unpack_dictionary_string() { let op_scan = Operator { + plan_id: 0, children: vec![], op_struct: Some(OpStruct::Scan(spark_operator::Scan { fields: vec![spark_expression::DataType { @@ -2428,7 +2571,7 @@ mod tests { let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - let mut stream = datafusion_plan.execute(0, task_ctx).unwrap(); + let mut stream = datafusion_plan.native_plan.execute(0, task_ctx).unwrap(); let runtime = tokio::runtime::Runtime::new().unwrap(); let (tx, mut rx) = mpsc::channel(1); @@ -2477,19 +2620,7 @@ mod tests { #[tokio::test()] #[allow(clippy::field_reassign_with_default)] async fn to_datafusion_filter() { - let op_scan = spark_operator::Operator { - children: vec![], - op_struct: Some(spark_operator::operator::OpStruct::Scan( - spark_operator::Scan { - fields: vec![spark_expression::DataType { - type_id: 3, - type_info: None, - }], - source: "".to_string(), - }, - )), - }; - + let op_scan = create_scan(); let op = create_filter(op_scan, 0); let planner = PhysicalPlanner::default(); @@ -2501,7 +2632,10 @@ mod tests { let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - let stream = datafusion_plan.execute(0, Arc::clone(&task_ctx)).unwrap(); + let stream = datafusion_plan + .native_plan + .execute(0, Arc::clone(&task_ctx)) + .unwrap(); let output = collect(stream).await.unwrap(); assert!(output.is_empty()); } @@ -2555,10 +2689,76 @@ mod tests { }; Operator { + plan_id: 0, children: vec![child_op], op_struct: Some(OpStruct::Filter(spark_operator::Filter { predicate: Some(expr), })), } } + + #[test] + fn spark_plan_metrics_filter() { + let op_scan = create_scan(); + let op = create_filter(op_scan, 0); + let planner = PhysicalPlanner::default(); + + let (mut _scans, filter_exec) = planner.create_plan(&op, &mut vec![], 1).unwrap(); + + assert_eq!("CometFilterExec", filter_exec.native_plan.name()); + assert_eq!(1, filter_exec.children.len()); + assert_eq!(0, filter_exec.additional_native_plans.len()); + } + + #[test] + fn spark_plan_metrics_hash_join() { + let op_scan = create_scan(); + let op_join = Operator { + plan_id: 0, + children: vec![op_scan.clone(), op_scan.clone()], + op_struct: Some(OpStruct::HashJoin(spark_operator::HashJoin { + left_join_keys: vec![create_bound_reference(0)], + right_join_keys: vec![create_bound_reference(0)], + join_type: 0, + condition: None, + build_side: 0, + })), + }; + + let planner = PhysicalPlanner::default(); + + let (_scans, hash_join_exec) = planner.create_plan(&op_join, &mut vec![], 1).unwrap(); + + assert_eq!("HashJoinExec", hash_join_exec.native_plan.name()); + assert_eq!(2, hash_join_exec.children.len()); + assert_eq!("ScanExec", hash_join_exec.children[0].native_plan.name()); + assert_eq!("ScanExec", hash_join_exec.children[1].native_plan.name()); + } + + fn create_bound_reference(index: i32) -> Expr { + Expr { + expr_struct: Some(Bound(spark_expression::BoundReference { + index, + datatype: Some(create_proto_datatype()), + })), + } + } + + fn create_scan() -> Operator { + Operator { + plan_id: 0, + children: vec![], + op_struct: Some(OpStruct::Scan(spark_operator::Scan { + fields: vec![create_proto_datatype()], + source: "".to_string(), + })), + } + } + + fn create_proto_datatype() -> spark_expression::DataType { + spark_expression::DataType { + type_id: 3, + type_info: None, + } + } } diff --git a/native/core/src/execution/shuffle/list.rs b/native/core/src/execution/shuffle/list.rs index d8bdcb197..0f7f3e8cb 100644 --- a/native/core/src/execution/shuffle/list.rs +++ b/native/core/src/execution/shuffle/list.rs @@ -28,7 +28,6 @@ use arrow_schema::{DataType, TimeUnit}; pub struct SparkUnsafeArray { row_addr: i64, - row_size: i32, num_elements: usize, element_offset: i64, } @@ -45,7 +44,7 @@ impl SparkUnsafeObject for SparkUnsafeArray { impl SparkUnsafeArray { /// Creates a `SparkUnsafeArray` which points to the given address and size in bytes. - pub fn new(addr: i64, size: i32) -> Self { + pub fn new(addr: i64) -> Self { // Read the number of elements from the first 8 bytes. let slice: &[u8] = unsafe { std::slice::from_raw_parts(addr as *const u8, 8) }; let num_elements = i64::from_le_bytes(slice.try_into().unwrap()); @@ -60,7 +59,6 @@ impl SparkUnsafeArray { Self { row_addr: addr, - row_size: size, num_elements: num_elements as usize, element_offset: addr + Self::get_header_portion_in_bytes(num_elements), } diff --git a/native/core/src/execution/shuffle/map.rs b/native/core/src/execution/shuffle/map.rs index 014695293..0969168f8 100644 --- a/native/core/src/execution/shuffle/map.rs +++ b/native/core/src/execution/shuffle/map.rs @@ -30,8 +30,6 @@ use arrow_array::builder::{ use arrow_schema::{DataType, FieldRef, Fields, TimeUnit}; pub struct SparkUnsafeMap { - row_addr: i64, - row_size: i32, pub(crate) keys: SparkUnsafeArray, pub(crate) values: SparkUnsafeArray, } @@ -59,8 +57,8 @@ impl SparkUnsafeMap { panic!("Negative value size in bytes of map: {}", value_array_size); } - let keys = SparkUnsafeArray::new(addr + 8, key_array_size as i32); - let values = SparkUnsafeArray::new(addr + 8 + key_array_size, value_array_size); + let keys = SparkUnsafeArray::new(addr + 8); + let values = SparkUnsafeArray::new(addr + 8 + key_array_size); if keys.get_num_elements() != values.get_num_elements() { panic!( @@ -70,16 +68,7 @@ impl SparkUnsafeMap { ); } - Self { - row_addr: addr, - row_size: size, - keys, - values, - } - } - - pub(crate) fn get_num_elements(&self) -> usize { - self.keys.get_num_elements() + Self { keys, values } } } diff --git a/native/core/src/execution/shuffle/mod.rs b/native/core/src/execution/shuffle/mod.rs index b052df29b..8721ead74 100644 --- a/native/core/src/execution/shuffle/mod.rs +++ b/native/core/src/execution/shuffle/mod.rs @@ -18,3 +18,5 @@ mod list; mod map; pub mod row; +mod shuffle_writer; +pub use shuffle_writer::ShuffleWriterExec; diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs index 2aeb48815..ecab77d96 100644 --- a/native/core/src/execution/shuffle/row.rs +++ b/native/core/src/execution/shuffle/row.rs @@ -20,10 +20,10 @@ use crate::{ errors::CometError, execution::{ - datafusion::shuffle_writer::{write_ipc_compressed, Checksum}, shuffle::{ list::{append_list_element, SparkUnsafeArray}, map::{append_map_elements, get_map_key_value_dt, SparkUnsafeMap}, + shuffle_writer::{write_ipc_compressed, Checksum}, }, utils::bytes_to_i128, }, @@ -40,6 +40,7 @@ use arrow_array::{ Array, ArrayRef, RecordBatch, RecordBatchOptions, }; use arrow_schema::{ArrowError, DataType, Field, Schema, TimeUnit}; +use datafusion::physical_plan::metrics::Time; use jni::sys::{jint, jlong}; use std::{ fs::OpenOptions, @@ -48,7 +49,6 @@ use std::{ sync::Arc, }; -const WORD_SIZE: i64 = 8; const MAX_LONG_DIGITS: u8 = 18; const NESTED_TYPE_BUILDER_CAPACITY: usize = 100; @@ -170,8 +170,8 @@ pub trait SparkUnsafeObject { /// Returns array value at the given index of the object. fn get_array(&self, index: usize) -> SparkUnsafeArray { - let (offset, len) = self.get_offset_and_len(index); - SparkUnsafeArray::new(self.get_row_addr() + offset as i64, len) + let (offset, _) = self.get_offset_and_len(index); + SparkUnsafeArray::new(self.get_row_addr() + offset as i64) } fn get_map(&self, index: usize) -> SparkUnsafeMap { @@ -3355,7 +3355,10 @@ pub fn process_sorted_row_partition( let mut frozen: Vec = vec![]; let mut cursor = Cursor::new(&mut frozen); cursor.seek(SeekFrom::End(0))?; - written += write_ipc_compressed(&batch, &mut cursor)?; + + // we do not collect metrics in Native_writeSortedFileNative + let ipc_time = Time::default(); + written += write_ipc_compressed(&batch, &mut cursor, &ipc_time)?; if let Some(checksum) = &mut current_checksum { checksum.update(&mut cursor)?; diff --git a/native/core/src/execution/datafusion/shuffle_writer.rs b/native/core/src/execution/shuffle/shuffle_writer.rs similarity index 89% rename from native/core/src/execution/datafusion/shuffle_writer.rs rename to native/core/src/execution/shuffle/shuffle_writer.rs index 7587ff06d..fcc8c51f6 100644 --- a/native/core/src/execution/datafusion/shuffle_writer.rs +++ b/native/core/src/execution/shuffle/shuffle_writer.rs @@ -17,22 +17,14 @@ //! Defines the External shuffle repartition plan. -use std::{ - any::Any, - fmt, - fmt::{Debug, Formatter}, - fs::{File, OpenOptions}, - io::{BufReader, BufWriter, Cursor, Read, Seek, SeekFrom, Write}, - path::Path, - sync::Arc, - task::{Context, Poll}, +use crate::{ + common::bit::ceil, + errors::{CometError, CometResult}, }; - use arrow::{datatypes::*, ipc::writer::StreamWriter}; use async_trait::async_trait; use bytes::Buf; use crc32fast::Hasher; -use datafusion::physical_plan::metrics::Time; use datafusion::{ arrow::{ array::*, @@ -48,23 +40,32 @@ use datafusion::{ runtime_env::RuntimeEnv, }, physical_plan::{ - metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}, + metrics::{ + BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet, Time, + }, stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }, }; +use datafusion_comet_spark_expr::spark_hash::create_murmur3_hashes; use datafusion_physical_expr::EquivalenceProperties; use futures::executor::block_on; use futures::{lock::Mutex, Stream, StreamExt, TryFutureExt, TryStreamExt}; use itertools::Itertools; use simd_adler32::Adler32; - -use crate::{ - common::bit::ceil, - errors::{CometError, CometResult}, +use std::io::Error; +use std::{ + any::Any, + fmt, + fmt::{Debug, Formatter}, + fs::{File, OpenOptions}, + io::{BufReader, BufWriter, Cursor, Read, Seek, SeekFrom, Write}, + path::Path, + sync::Arc, + task::{Context, Poll}, }; -use datafusion_comet_spark_expr::spark_hash::create_murmur3_hashes; +use tokio::time::Instant; /// The status of appending rows to a partition buffer. enum AppendRowStatus { @@ -242,7 +243,7 @@ impl PartitionBuffer { /// Initializes active builders if necessary. /// Returns error if memory reservation fails. - fn init_active_if_necessary(&mut self) -> Result { + fn init_active_if_necessary(&mut self, metrics: &ShuffleRepartitionerMetrics) -> Result { let mut mem_diff = 0; if self.active.is_empty() { @@ -256,9 +257,13 @@ impl PartitionBuffer { .sum::(); } + let mut mempool_timer = metrics.mempool_time.timer(); self.reservation.try_grow(self.active_slots_mem_size)?; + mempool_timer.stop(); + let mut repart_timer = metrics.repart_time.timer(); self.active = new_array_builders(&self.schema, self.batch_size); + repart_timer.stop(); mem_diff += self.active_slots_mem_size as isize; } @@ -271,13 +276,13 @@ impl PartitionBuffer { columns: &[ArrayRef], indices: &[usize], start_index: usize, - time_metric: &Time, + metrics: &ShuffleRepartitionerMetrics, ) -> AppendRowStatus { let mut mem_diff = 0; let mut start = start_index; // lazy init because some partition may be empty - let init = self.init_active_if_necessary(); + let init = self.init_active_if_necessary(metrics); if init.is_err() { return AppendRowStatus::StartIndex(start); } @@ -285,6 +290,8 @@ impl PartitionBuffer { while start < indices.len() { let end = (start + self.batch_size).min(indices.len()); + + let mut repart_timer = metrics.repart_time.timer(); self.active .iter_mut() .zip(columns) @@ -292,16 +299,16 @@ impl PartitionBuffer { append_columns(builder, column, &indices[start..end], column.data_type()); }); self.num_active_rows += end - start; + repart_timer.stop(); + if self.num_active_rows >= self.batch_size { - let mut timer = time_metric.timer(); - let flush = self.flush(); + let flush = self.flush(&metrics.ipc_time); if let Err(e) = flush { return AppendRowStatus::MemDiff(Err(e)); } mem_diff += flush.unwrap(); - timer.stop(); - let init = self.init_active_if_necessary(); + let init = self.init_active_if_necessary(metrics); if init.is_err() { return AppendRowStatus::StartIndex(end); } @@ -313,7 +320,7 @@ impl PartitionBuffer { } /// flush active data into frozen bytes - fn flush(&mut self) -> Result { + fn flush(&mut self, ipc_time: &Time) -> Result { if self.num_active_rows == 0 { return Ok(0); } @@ -330,7 +337,7 @@ impl PartitionBuffer { let frozen_capacity_old = self.frozen.capacity(); let mut cursor = Cursor::new(&mut self.frozen); cursor.seek(SeekFrom::End(0))?; - write_ipc_compressed(&frozen_batch, &mut cursor)?; + write_ipc_compressed(&frozen_batch, &mut cursor, ipc_time)?; mem_diff += (self.frozen.capacity() - frozen_capacity_old) as isize; Ok(mem_diff) @@ -628,6 +635,21 @@ struct ShuffleRepartitionerMetrics { /// metrics baseline: BaselineMetrics, + /// Time to perform repartitioning + repart_time: Time, + + /// Time interacting with memory pool + mempool_time: Time, + + /// Time encoding batches to IPC format + ipc_time: Time, + + /// Time spent writing to disk. Maps to "shuffleWriteTime" in Spark SQL Metrics. + write_time: Time, + + /// Number of input batches + input_batches: Count, + /// count of spills during the execution of the operator spill_count: Count, @@ -642,6 +664,11 @@ impl ShuffleRepartitionerMetrics { fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { Self { baseline: BaselineMetrics::new(metrics, partition), + repart_time: MetricBuilder::new(metrics).subset_time("repart_time", partition), + mempool_time: MetricBuilder::new(metrics).subset_time("mempool_time", partition), + ipc_time: MetricBuilder::new(metrics).subset_time("ipc_time", partition), + write_time: MetricBuilder::new(metrics).subset_time("write_time", partition), + input_batches: MetricBuilder::new(metrics).counter("input_batches", partition), spill_count: MetricBuilder::new(metrics).spill_count(partition), spilled_bytes: MetricBuilder::new(metrics).spilled_bytes(partition), data_size: MetricBuilder::new(metrics).counter("data_size", partition), @@ -701,6 +728,7 @@ impl ShuffleRepartitioner { /// This function will slice input batch according to configured batch size and then /// shuffle rows into corresponding partition buffer. async fn insert_batch(&mut self, batch: RecordBatch) -> Result<()> { + let start_time = Instant::now(); let mut start = 0; while start < batch.num_rows() { let end = (start + self.batch_size).min(batch.num_rows()); @@ -708,6 +736,11 @@ impl ShuffleRepartitioner { self.partitioning_batch(batch).await?; start = end; } + self.metrics.input_batches.add(1); + self.metrics + .baseline + .elapsed_compute() + .add_duration(start_time.elapsed()); Ok(()) } @@ -738,53 +771,61 @@ impl ShuffleRepartitioner { let num_output_partitions = self.num_output_partitions; match &self.partitioning { Partitioning::Hash(exprs, _) => { - let arrays = exprs - .iter() - .map(|expr| expr.evaluate(&input)?.into_array(input.num_rows())) - .collect::>>()?; - - // use identical seed as spark hash partition - let hashes_buf = &mut self.hashes_buf[..arrays[0].len()]; - hashes_buf.fill(42_u32); - - // Hash arrays and compute buckets based on number of partitions - let partition_ids = &mut self.partition_ids[..arrays[0].len()]; - create_murmur3_hashes(&arrays, hashes_buf)? - .iter() - .enumerate() - .for_each(|(idx, hash)| { - partition_ids[idx] = pmod(*hash, num_output_partitions) as u64 + let (partition_starts, shuffled_partition_ids): (Vec, Vec) = { + let mut timer = self.metrics.repart_time.timer(); + let arrays = exprs + .iter() + .map(|expr| expr.evaluate(&input)?.into_array(input.num_rows())) + .collect::>>()?; + + // use identical seed as spark hash partition + let hashes_buf = &mut self.hashes_buf[..arrays[0].len()]; + hashes_buf.fill(42_u32); + + // Hash arrays and compute buckets based on number of partitions + let partition_ids = &mut self.partition_ids[..arrays[0].len()]; + create_murmur3_hashes(&arrays, hashes_buf)? + .iter() + .enumerate() + .for_each(|(idx, hash)| { + partition_ids[idx] = pmod(*hash, num_output_partitions) as u64 + }); + + // count each partition size + let mut partition_counters = vec![0usize; num_output_partitions]; + partition_ids + .iter() + .for_each(|partition_id| partition_counters[*partition_id as usize] += 1); + + // accumulate partition counters into partition ends + // e.g. partition counter: [1, 3, 2, 1] => [1, 4, 6, 7] + let mut partition_ends = partition_counters; + let mut accum = 0; + partition_ends.iter_mut().for_each(|v| { + *v += accum; + accum = *v; }); - // count each partition size - let mut partition_counters = vec![0usize; num_output_partitions]; - partition_ids - .iter() - .for_each(|partition_id| partition_counters[*partition_id as usize] += 1); - - // accumulate partition counters into partition ends - // e.g. partition counter: [1, 3, 2, 1] => [1, 4, 6, 7] - let mut partition_ends = partition_counters; - let mut accum = 0; - partition_ends.iter_mut().for_each(|v| { - *v += accum; - accum = *v; - }); - - // calculate shuffled partition ids - // e.g. partition ids: [3, 1, 1, 1, 2, 2, 0] => [6, 1, 2, 3, 4, 5, 0] which is the - // row indices for rows ordered by their partition id. For example, first partition - // 0 has one row index [6], partition 1 has row indices [1, 2, 3], etc. - let mut shuffled_partition_ids = vec![0usize; input.num_rows()]; - for (index, partition_id) in partition_ids.iter().enumerate().rev() { - partition_ends[*partition_id as usize] -= 1; - let end = partition_ends[*partition_id as usize]; - shuffled_partition_ids[end] = index; - } + // calculate shuffled partition ids + // e.g. partition ids: [3, 1, 1, 1, 2, 2, 0] => [6, 1, 2, 3, 4, 5, 0] which is the + // row indices for rows ordered by their partition id. For example, first partition + // 0 has one row index [6], partition 1 has row indices [1, 2, 3], etc. + let mut shuffled_partition_ids = vec![0usize; input.num_rows()]; + for (index, partition_id) in partition_ids.iter().enumerate().rev() { + partition_ends[*partition_id as usize] -= 1; + let end = partition_ends[*partition_id as usize]; + shuffled_partition_ids[end] = index; + } - // after calculating, partition ends become partition starts - let mut partition_starts = partition_ends; - partition_starts.push(input.num_rows()); + // after calculating, partition ends become partition starts + let mut partition_starts = partition_ends; + partition_starts.push(input.num_rows()); + timer.stop(); + Ok::<(Vec, Vec), DataFusionError>(( + partition_starts, + shuffled_partition_ids, + )) + }?; // For each interval of row indices of partition, taking rows from input batch and // appending into output buffer. @@ -804,11 +845,20 @@ impl ShuffleRepartitioner { if mem_diff > 0 { let mem_increase = mem_diff as usize; - if self.reservation.try_grow(mem_increase).is_err() { + + let try_grow = { + let mut mempool_timer = self.metrics.mempool_time.timer(); + let result = self.reservation.try_grow(mem_increase); + mempool_timer.stop(); + result + }; + + if try_grow.is_err() { self.spill().await?; + let mut mempool_timer = self.metrics.mempool_time.timer(); self.reservation.free(); self.reservation.try_grow(mem_increase)?; - + mempool_timer.stop(); mem_diff = 0; } } @@ -816,7 +866,9 @@ impl ShuffleRepartitioner { if mem_diff < 0 { let mem_used = self.reservation.size(); let mem_decrease = mem_used.min(-mem_diff as usize); + let mut mempool_timer = self.metrics.mempool_time.timer(); self.reservation.shrink(mem_decrease); + mempool_timer.stop(); } } } @@ -848,12 +900,13 @@ impl ShuffleRepartitioner { /// Writes buffered shuffled record batches into Arrow IPC bytes. async fn shuffle_write(&mut self) -> Result { + let mut elapsed_compute = self.metrics.baseline.elapsed_compute().timer(); let num_output_partitions = self.num_output_partitions; let buffered_partitions = &mut self.buffered_partitions; let mut output_batches: Vec> = vec![vec![]; num_output_partitions]; - + let mut offsets = vec![0; num_output_partitions + 1]; for i in 0..num_output_partitions { - buffered_partitions[i].flush()?; + buffered_partitions[i].flush(&self.metrics.ipc_time)?; output_batches[i] = std::mem::take(&mut buffered_partitions[i].frozen); } @@ -863,53 +916,38 @@ impl ShuffleRepartitioner { let data_file = self.output_data_file.clone(); let index_file = self.output_index_file.clone(); - let mut offsets = vec![0; num_output_partitions + 1]; - let mut output_data = OpenOptions::new() + let mut write_time = self.metrics.write_time.timer(); + + let output_data = OpenOptions::new() .write(true) .create(true) .truncate(true) .open(data_file) .map_err(|e| DataFusionError::Execution(format!("shuffle write error: {:?}", e)))?; - for i in 0..num_output_partitions { - let mut timer = self.metrics.baseline.elapsed_compute().timer(); + let mut output_data = BufWriter::new(output_data); + for i in 0..num_output_partitions { offsets[i] = output_data.stream_position()?; output_data.write_all(&output_batches[i])?; - - timer.stop(); - output_batches[i].clear(); // append partition in each spills for spill in &output_spills { let length = spill.offsets[i + 1] - spill.offsets[i]; if length > 0 { - let mut timer = self.metrics.baseline.elapsed_compute().timer(); - let mut spill_file = - BufReader::new(File::open(spill.file.path()).map_err(|e| { - DataFusionError::Execution(format!("shuffle write error: {:?}", e)) - })?); + BufReader::new(File::open(spill.file.path()).map_err(Self::to_df_err)?); spill_file.seek(SeekFrom::Start(spill.offsets[i]))?; - std::io::copy(&mut spill_file.take(length), &mut output_data).map_err(|e| { - DataFusionError::Execution(format!("shuffle write error: {:?}", e)) - })?; - - timer.stop(); + std::io::copy(&mut spill_file.take(length), &mut output_data) + .map_err(Self::to_df_err)?; } } } - let mut timer = self.metrics.baseline.elapsed_compute().timer(); output_data.flush()?; - timer.stop(); // add one extra offset at last to ease partition length computation - offsets[num_output_partitions] = output_data - .stream_position() - .map_err(|e| DataFusionError::Execution(format!("shuffle write error: {:?}", e)))?; - - let mut timer = self.metrics.baseline.elapsed_compute().timer(); + offsets[num_output_partitions] = output_data.stream_position().map_err(Self::to_df_err)?; let mut output_index = BufWriter::new(File::create(index_file).map_err(|e| { @@ -918,19 +956,27 @@ impl ShuffleRepartitioner { for offset in offsets { output_index .write_all(&(offset as i64).to_le_bytes()[..]) - .map_err(|e| DataFusionError::Execution(format!("shuffle write error: {:?}", e)))?; + .map_err(Self::to_df_err)?; } output_index.flush()?; - timer.stop(); + write_time.stop(); + let mut mempool_timer = self.metrics.mempool_time.timer(); let used = self.reservation.size(); self.reservation.shrink(used); + mempool_timer.stop(); + + elapsed_compute.stop(); // shuffle writer always has empty output Ok(Box::pin(EmptyStream::try_new(Arc::clone(&self.schema))?)) } + fn to_df_err(e: Error) -> DataFusionError { + DataFusionError::Execution(format!("shuffle write error: {:?}", e)) + } + fn used(&self) -> usize { self.reservation.size() } @@ -959,7 +1005,7 @@ impl ShuffleRepartitioner { return Ok(0); } - let mut timer = self.metrics.baseline.elapsed_compute().timer(); + let mut timer = self.metrics.write_time.timer(); let spillfile = self .runtime @@ -969,6 +1015,7 @@ impl ShuffleRepartitioner { &mut self.buffered_partitions, spillfile.path(), self.num_output_partitions, + &self.metrics.ipc_time, )?; timer.stop(); @@ -995,12 +1042,10 @@ impl ShuffleRepartitioner { let output = &mut self.buffered_partitions[partition_id]; - let time_metric = self.metrics.baseline.elapsed_compute(); - // If the range of indices is not big enough, just appending the rows into // active array builders instead of directly adding them as a record batch. let mut start_index: usize = 0; - let mut output_ret = output.append_rows(columns, indices, start_index, time_metric); + let mut output_ret = output.append_rows(columns, indices, start_index, &self.metrics); loop { match output_ret { @@ -1012,15 +1057,15 @@ impl ShuffleRepartitioner { // Cannot allocate enough memory for the array builders in the partition, // spill partitions and retry. self.spill().await?; - self.reservation.free(); + let mut mempool_timer = self.metrics.mempool_time.timer(); + self.reservation.free(); let output = &mut self.buffered_partitions[partition_id]; output.reservation.free(); - - let time_metric = self.metrics.baseline.elapsed_compute(); + mempool_timer.stop(); start_index = new_start; - output_ret = output.append_rows(columns, indices, start_index, time_metric); + output_ret = output.append_rows(columns, indices, start_index, &self.metrics); if let AppendRowStatus::StartIndex(new_start) = output_ret { if new_start == start_index { @@ -1045,11 +1090,12 @@ fn spill_into( buffered_partitions: &mut [PartitionBuffer], path: &Path, num_output_partitions: usize, + ipc_time: &Time, ) -> Result> { let mut output_batches: Vec> = vec![vec![]; num_output_partitions]; for i in 0..num_output_partitions { - buffered_partitions[i].flush()?; + buffered_partitions[i].flush(ipc_time)?; output_batches[i] = std::mem::take(&mut buffered_partitions[i].frozen); } let path = path.to_owned(); @@ -1485,10 +1531,13 @@ impl Checksum { pub(crate) fn write_ipc_compressed( batch: &RecordBatch, output: &mut W, + ipc_time: &Time, ) -> Result { if batch.num_rows() == 0 { return Ok(0); } + + let mut timer = ipc_time.timer(); let start_pos = output.stream_position()?; // write ipc_length placeholder @@ -1508,8 +1557,10 @@ pub(crate) fn write_ipc_compressed( // fill ipc length output.seek(SeekFrom::Start(start_pos))?; output.write_all(&ipc_length.to_le_bytes()[..])?; - output.seek(SeekFrom::Start(end_pos))?; + + timer.stop(); + Ok((end_pos - start_pos) as usize) } diff --git a/native/core/src/execution/spark_plan.rs b/native/core/src/execution/spark_plan.rs new file mode 100644 index 000000000..4e26ff888 --- /dev/null +++ b/native/core/src/execution/spark_plan.rs @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::execution::operators::CopyExec; +use arrow_schema::SchemaRef; +use datafusion::physical_plan::ExecutionPlan; +use std::sync::Arc; + +/// Wrapper around a native plan that maps to a Spark plan and can optionally contain +/// references to other native plans that should contribute to the Spark SQL metrics +/// for the root plan (such as CopyExec and ScanExec nodes) +#[derive(Debug, Clone)] +pub(crate) struct SparkPlan { + /// Spark plan ID (used for informational purposes only) + pub(crate) plan_id: u32, + /// The root of the native plan that was generated for this Spark plan + pub(crate) native_plan: Arc, + /// Child Spark plans + pub(crate) children: Vec>, + /// Additional native plans that were generated for this Spark plan that we need + /// to collect metrics for + pub(crate) additional_native_plans: Vec>, +} + +impl SparkPlan { + /// Create a SparkPlan that consists of a single native plan + pub(crate) fn new( + plan_id: u32, + native_plan: Arc, + children: Vec>, + ) -> Self { + let mut additional_native_plans: Vec> = vec![]; + for child in &children { + collect_additional_plans(Arc::clone(&child.native_plan), &mut additional_native_plans); + } + Self { + plan_id, + native_plan, + children, + additional_native_plans, + } + } + + /// Create a SparkPlan that consists of more than one native plan + pub(crate) fn new_with_additional( + plan_id: u32, + native_plan: Arc, + children: Vec>, + additional_native_plans: Vec>, + ) -> Self { + let mut accum: Vec> = vec![]; + for plan in &additional_native_plans { + accum.push(Arc::clone(plan)); + } + for child in &children { + collect_additional_plans(Arc::clone(&child.native_plan), &mut accum); + } + Self { + plan_id, + native_plan, + children, + additional_native_plans: accum, + } + } + + /// Get the schema of the native plan + pub(crate) fn schema(&self) -> SchemaRef { + self.native_plan.schema() + } + + /// Get the child SparkPlan instances + pub(crate) fn children(&self) -> &Vec> { + &self.children + } +} + +fn collect_additional_plans( + child: Arc, + additional_native_plans: &mut Vec>, +) { + if child.as_any().is::() { + additional_native_plans.push(Arc::clone(&child)); + } +} diff --git a/native/core/src/execution/datafusion/util/mod.rs b/native/core/src/execution/util/mod.rs similarity index 100% rename from native/core/src/execution/datafusion/util/mod.rs rename to native/core/src/execution/util/mod.rs diff --git a/native/core/src/execution/datafusion/util/spark_bit_array.rs b/native/core/src/execution/util/spark_bit_array.rs similarity index 99% rename from native/core/src/execution/datafusion/util/spark_bit_array.rs rename to native/core/src/execution/util/spark_bit_array.rs index 68b97d660..6cfecc1bf 100644 --- a/native/core/src/execution/datafusion/util/spark_bit_array.rs +++ b/native/core/src/execution/util/spark_bit_array.rs @@ -70,6 +70,7 @@ impl SparkBitArray { self.data.len() } + #[allow(dead_code)] // this is only called from tests pub fn cardinality(&self) -> usize { self.bit_count } diff --git a/native/core/src/execution/datafusion/util/spark_bloom_filter.rs b/native/core/src/execution/util/spark_bloom_filter.rs similarity index 98% rename from native/core/src/execution/datafusion/util/spark_bloom_filter.rs rename to native/core/src/execution/util/spark_bloom_filter.rs index 35fa23b46..2c3af1691 100644 --- a/native/core/src/execution/datafusion/util/spark_bloom_filter.rs +++ b/native/core/src/execution/util/spark_bloom_filter.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::execution::datafusion::util::spark_bit_array; -use crate::execution::datafusion::util::spark_bit_array::SparkBitArray; +use crate::execution::util::spark_bit_array; +use crate::execution::util::spark_bit_array::SparkBitArray; use arrow_array::{ArrowNativeTypeOp, BooleanArray, Int64Array}; use arrow_buffer::ToByteSlice; use datafusion_comet_spark_expr::spark_hash::spark_compatible_murmur3_hash; diff --git a/native/core/src/execution/utils.rs b/native/core/src/execution/utils.rs index 553d42606..4992b7ba9 100644 --- a/native/core/src/execution/utils.rs +++ b/native/core/src/execution/utils.rs @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::sync::Arc; - use arrow::{ array::ArrayData, error::ArrowError, @@ -52,10 +50,6 @@ pub trait SparkArrowConvert { where Self: Sized; - /// Convert Arrow Arrays to C data interface. - /// It returns a tuple (ArrowArray address, ArrowSchema address). - fn to_spark(&self) -> Result<(i64, i64), ExecutionError>; - /// Move Arrow Arrays to C data interface. fn move_to_spark(&self, array: i64, schema: i64) -> Result<(), ExecutionError>; } @@ -88,18 +82,6 @@ impl SparkArrowConvert for ArrayData { Ok(ffi_array) } - /// Converts this ArrowData to pointers of Arrow C data interface. - /// Returned pointers are Arc-ed and should be freed manually. - #[allow(clippy::arc_with_non_send_sync)] - fn to_spark(&self) -> Result<(i64, i64), ExecutionError> { - let arrow_array = Arc::new(FFI_ArrowArray::new(self)); - let arrow_schema = Arc::new(FFI_ArrowSchema::try_from(self.data_type())?); - - let (array, schema) = (Arc::into_raw(arrow_array), Arc::into_raw(arrow_schema)); - - Ok((array as i64, schema as i64)) - } - /// Move this ArrowData to pointers of Arrow C data interface. fn move_to_spark(&self, array: i64, schema: i64) -> Result<(), ExecutionError> { let array_ptr = array as *mut FFI_ArrowArray; diff --git a/native/core/src/jvm_bridge/batch_iterator.rs b/native/core/src/jvm_bridge/batch_iterator.rs index 4870624d2..998e540c7 100644 --- a/native/core/src/jvm_bridge/batch_iterator.rs +++ b/native/core/src/jvm_bridge/batch_iterator.rs @@ -24,8 +24,11 @@ use jni::{ }; /// A struct that holds all the JNI methods and fields for JVM `CometBatchIterator` class. +#[allow(dead_code)] // we need to keep references to Java items to prevent GC pub struct CometBatchIterator<'a> { pub class: JClass<'a>, + pub method_has_next: JMethodID, + pub method_has_next_ret: ReturnType, pub method_next: JMethodID, pub method_next_ret: ReturnType, } @@ -38,6 +41,8 @@ impl<'a> CometBatchIterator<'a> { Ok(CometBatchIterator { class, + method_has_next: env.get_method_id(Self::JVM_CLASS, "hasNext", "()I")?, + method_has_next_ret: ReturnType::Primitive(Primitive::Int), method_next: env.get_method_id(Self::JVM_CLASS, "next", "([J[J)I")?, method_next_ret: ReturnType::Primitive(Primitive::Int), }) diff --git a/native/core/src/jvm_bridge/comet_metric_node.rs b/native/core/src/jvm_bridge/comet_metric_node.rs index 8647e071a..85386d9b0 100644 --- a/native/core/src/jvm_bridge/comet_metric_node.rs +++ b/native/core/src/jvm_bridge/comet_metric_node.rs @@ -23,6 +23,7 @@ use jni::{ }; /// A struct that holds all the JNI methods and fields for JVM CometMetricNode class. +#[allow(dead_code)] // we need to keep references to Java items to prevent GC pub struct CometMetricNode<'a> { pub class: JClass<'a>, pub method_get_child_node: JMethodID, diff --git a/native/core/src/jvm_bridge/comet_task_memory_manager.rs b/native/core/src/jvm_bridge/comet_task_memory_manager.rs index 97d1bf3a7..22c3332c6 100644 --- a/native/core/src/jvm_bridge/comet_task_memory_manager.rs +++ b/native/core/src/jvm_bridge/comet_task_memory_manager.rs @@ -25,6 +25,7 @@ use jni::{ /// A wrapper which delegate acquire/release memory calls to the /// JVM side `CometTaskMemoryManager`. #[derive(Debug)] +#[allow(dead_code)] // we need to keep references to Java items to prevent GC pub struct CometTaskMemoryManager<'a> { pub class: JClass<'a>, pub method_acquire_memory: JMethodID, diff --git a/native/core/src/jvm_bridge/mod.rs b/native/core/src/jvm_bridge/mod.rs index 4936b1c5b..5fc0a55e3 100644 --- a/native/core/src/jvm_bridge/mod.rs +++ b/native/core/src/jvm_bridge/mod.rs @@ -189,6 +189,7 @@ pub use comet_metric_node::*; pub use comet_task_memory_manager::*; /// The JVM classes that are used in the JNI calls. +#[allow(dead_code)] // we need to keep references to Java items to prevent GC pub struct JVMClasses<'a> { /// Cached JClass for "java.lang.Object" java_lang_object: JClass<'a>, diff --git a/native/core/src/lib.rs b/native/core/src/lib.rs index 36e63e39c..cab511faf 100644 --- a/native/core/src/lib.rs +++ b/native/core/src/lib.rs @@ -17,7 +17,6 @@ #![allow(incomplete_features)] #![allow(non_camel_case_types)] -#![allow(dead_code)] #![allow(clippy::upper_case_acronyms)] // For prost generated struct #![allow(clippy::derive_partial_eq_without_eq)] @@ -40,15 +39,12 @@ use log4rs::{ use mimalloc::MiMalloc; use once_cell::sync::OnceCell; -pub use data_type::*; - use errors::{try_unwrap_or_throw, CometError, CometResult}; #[macro_use] mod errors; #[macro_use] pub mod common; -mod data_type; pub mod execution; mod jvm_bridge; pub mod parquet; @@ -107,30 +103,3 @@ fn default_logger_config() -> CometResult { .build(root) .map_err(|err| CometError::Config(err.to_string())) } - -// These are borrowed from hashbrown crate: -// https://github.com/rust-lang/hashbrown/blob/master/src/raw/mod.rs - -// On stable we can use #[cold] to get a equivalent effect: this attributes -// suggests that the function is unlikely to be called -#[cfg(not(feature = "nightly"))] -#[inline] -#[cold] -fn cold() {} - -#[cfg(not(feature = "nightly"))] -#[inline] -fn likely(b: bool) -> bool { - if !b { - cold(); - } - b -} -#[cfg(not(feature = "nightly"))] -#[inline] -fn unlikely(b: bool) -> bool { - if b { - cold(); - } - b -} diff --git a/native/core/src/parquet/mod.rs b/native/core/src/parquet/mod.rs index c234b6f7b..a26978a62 100644 --- a/native/core/src/parquet/mod.rs +++ b/native/core/src/parquet/mod.rs @@ -49,6 +49,7 @@ use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder; use datafusion::datasource::physical_plan::FileScanConfig; use datafusion::physical_plan::ExecutionPlan; +use datafusion_comet_spark_expr::{EvalMode, SparkCastOptions, SparkSchemaAdapterFactory}; use datafusion_common::config::TableParquetOptions; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use futures::{poll, StreamExt}; @@ -60,8 +61,6 @@ use util::jni::{convert_column_descriptor, convert_encoding, deserialize_schema, use self::util::jni::TypePromotionInfo; -const STR_CLASS_NAME: &str = "java/lang/String"; - /// Parquet read context maintained across multiple JNI calls. struct Context { pub column_reader: ColumnReader, @@ -680,11 +679,14 @@ pub unsafe extern "system" fn Java_org_apache_comet_parquet_Native_initRecordBat table_parquet_options.global.pushdown_filters = true; table_parquet_options.global.reorder_filters = true; + let mut spark_cast_options = SparkCastOptions::new(EvalMode::Legacy, "UTC", false); + spark_cast_options.allow_cast_unsigned_ints = true; + let builder2 = ParquetExecBuilder::new(file_scan_config) .with_table_parquet_options(table_parquet_options) - .with_schema_adapter_factory(Arc::new( - crate::execution::datafusion::schema_adapter::CometSchemaAdapterFactory::default(), - )); + .with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new( + spark_cast_options, + ))); //TODO: (ARROW NATIVE) - predicate pushdown?? // builder = builder.with_predicate(filter); diff --git a/native/core/src/parquet/mutable_vector.rs b/native/core/src/parquet/mutable_vector.rs index 7f30d7d87..d19ea32fa 100644 --- a/native/core/src/parquet/mutable_vector.rs +++ b/native/core/src/parquet/mutable_vector.rs @@ -40,12 +40,6 @@ pub struct ParquetMutableVector { /// The number of null elements in this vector, must <= `num_values`. pub(crate) num_nulls: usize, - /// The capacity of the vector - pub(crate) capacity: usize, - - /// How many bits are required to store a single value - pub(crate) bit_width: usize, - /// The validity buffer of this Arrow vector. A bit set at position `i` indicates the `i`th /// element is not null. Otherwise, an unset bit at position `i` indicates the `i`th element is /// null. @@ -109,8 +103,6 @@ impl ParquetMutableVector { arrow_type, num_values: 0, num_nulls: 0, - capacity, - bit_width, validity_buffer, value_buffer, children, diff --git a/native/core/src/parquet/read/column.rs b/native/core/src/parquet/read/column.rs index 73f8df956..05a0bf7b5 100644 --- a/native/core/src/parquet/read/column.rs +++ b/native/core/src/parquet/read/column.rs @@ -770,7 +770,7 @@ impl TypedColumnReader { // Create a new vector for dictionary values let mut value_vector = ParquetMutableVector::new(page_value_count, &self.arrow_type); - let mut dictionary = self.get_decoder(page_data, page_value_count, encoding); + let mut dictionary = self.get_decoder(page_data, encoding); dictionary.read_batch(&mut value_vector, page_value_count); value_vector.num_values = page_value_count; @@ -812,7 +812,7 @@ impl TypedColumnReader { self.def_level_decoder = Some(dl_decoder); page_buffer = page_buffer.slice(offset); - let value_decoder = self.get_decoder(page_buffer, page_value_count, encoding); + let value_decoder = self.get_decoder(page_buffer, encoding); self.value_decoder = Some(value_decoder); } @@ -838,7 +838,7 @@ impl TypedColumnReader { dl_decoder.set_data(page_value_count, &def_level_data); self.def_level_decoder = Some(dl_decoder); - let value_decoder = self.get_decoder(value_data, page_value_count, encoding); + let value_decoder = self.get_decoder(value_data, encoding); self.value_decoder = Some(value_decoder); } @@ -977,15 +977,9 @@ impl TypedColumnReader { } } - fn get_decoder( - &self, - value_data: Buffer, - page_value_count: usize, - encoding: Encoding, - ) -> Box { + fn get_decoder(&self, value_data: Buffer, encoding: Encoding) -> Box { get_decoder::( value_data, - page_value_count, encoding, Arc::clone(&self.desc), self.read_options, diff --git a/native/core/src/parquet/read/levels.rs b/native/core/src/parquet/read/levels.rs index 3d74b277c..9077c0e48 100644 --- a/native/core/src/parquet/read/levels.rs +++ b/native/core/src/parquet/read/levels.rs @@ -17,15 +17,14 @@ use std::mem; -use arrow::buffer::Buffer; -use parquet::schema::types::ColumnDescPtr; - use super::values::Decoder; use crate::{ common::bit::{self, read_u32, BitReader}, parquet::ParquetMutableVector, - unlikely, }; +use arrow::buffer::Buffer; +use datafusion_comet_spark_expr::utils::unlikely; +use parquet::schema::types::ColumnDescPtr; const INITIAL_BUF_LEN: usize = 16; diff --git a/native/core/src/parquet/read/mod.rs b/native/core/src/parquet/read/mod.rs index 4d057a06c..5a55f2117 100644 --- a/native/core/src/parquet/read/mod.rs +++ b/native/core/src/parquet/read/mod.rs @@ -44,9 +44,6 @@ pub struct PlainDecoderInner { /// The current offset in `data`, in bytes. offset: usize, - /// The number of total values in `data` - value_count: usize, - /// Reads `data` bit by bit, used if `T` is [`BoolType`]. bit_reader: BitReader, diff --git a/native/core/src/parquet/read/values.rs b/native/core/src/parquet/read/values.rs index 324f6106d..e28d695ec 100644 --- a/native/core/src/parquet/read/values.rs +++ b/native/core/src/parquet/read/values.rs @@ -28,26 +28,22 @@ use crate::write_val_or_null; use crate::{ common::bit::{self, BitReader}, parquet::{data_type::*, ParquetMutableVector}, - unlikely, }; use arrow::datatypes::DataType as ArrowDataType; +use datafusion_comet_spark_expr::utils::unlikely; pub fn get_decoder( value_data: Buffer, - num_values: usize, encoding: Encoding, desc: ColumnDescPtr, read_options: ReadOptions, ) -> Box { let decoder: Box = match encoding { - Encoding::PLAIN | Encoding::PLAIN_DICTIONARY => Box::new(PlainDecoder::::new( - value_data, - num_values, - desc, - read_options, - )), + Encoding::PLAIN | Encoding::PLAIN_DICTIONARY => { + Box::new(PlainDecoder::::new(value_data, desc, read_options)) + } // This is for dictionary indices - Encoding::RLE_DICTIONARY => Box::new(DictDecoder::new(value_data, num_values)), + Encoding::RLE_DICTIONARY => Box::new(DictDecoder::new(value_data)), _ => panic!("Unsupported encoding: {}", encoding), }; decoder @@ -108,17 +104,11 @@ pub struct PlainDecoder { } impl PlainDecoder { - pub fn new( - value_data: Buffer, - num_values: usize, - desc: ColumnDescPtr, - read_options: ReadOptions, - ) -> Self { + pub fn new(value_data: Buffer, desc: ColumnDescPtr, read_options: ReadOptions) -> Self { let len = value_data.len(); let inner = PlainDecoderInner { data: value_data.clone(), offset: 0, - value_count: num_values, bit_reader: BitReader::new(value_data, len), read_options, desc, @@ -476,7 +466,7 @@ make_int_variant_impl!(Int32ToDoubleType, copy_i32_to_f64, 8); make_int_variant_impl!(FloatToDoubleType, copy_f32_to_f64, 8); // unsigned type require double the width and zeroes are written for the second half -// perhaps because they are implemented as the next size up signed type? +// because they are implemented as the next size up signed type make_int_variant_impl!(UInt8Type, copy_i32_to_u8, 2); make_int_variant_impl!(UInt16Type, copy_i32_to_u16, 4); make_int_variant_impl!(UInt32Type, copy_i32_to_u32, 8); @@ -586,8 +576,6 @@ macro_rules! generate_cast_to_unsigned { }; } -generate_cast_to_unsigned!(copy_i32_to_u8, i32, u8, 0_u8); -generate_cast_to_unsigned!(copy_i32_to_u16, i32, u16, 0_u16); generate_cast_to_unsigned!(copy_i32_to_u32, i32, u32, 0_u32); macro_rules! generate_cast_to_signed { @@ -624,6 +612,9 @@ generate_cast_to_signed!(copy_i64_to_i64, i64, i64); generate_cast_to_signed!(copy_i64_to_i128, i64, i128); generate_cast_to_signed!(copy_u64_to_u128, u64, u128); generate_cast_to_signed!(copy_f32_to_f64, f32, f64); +// even for u8/u16, need to copy full i16/i32 width for Spark compatibility +generate_cast_to_signed!(copy_i32_to_u8, i32, i16); +generate_cast_to_signed!(copy_i32_to_u16, i32, i32); // Shared implementation for variants of Binary type macro_rules! make_plain_binary_impl { @@ -937,9 +928,6 @@ pub struct DictDecoder { /// Number of bits used to represent dictionary indices. Must be between `[0, 64]`. bit_width: usize, - /// The number of total values in `data` - value_count: usize, - /// Bit reader bit_reader: BitReader, @@ -954,12 +942,11 @@ pub struct DictDecoder { } impl DictDecoder { - pub fn new(buf: Buffer, num_values: usize) -> Self { + pub fn new(buf: Buffer) -> Self { let bit_width = buf.as_bytes()[0] as usize; Self { bit_width, - value_count: num_values, bit_reader: BitReader::new_all(buf.slice(1)), rle_left: 0, bit_packed_left: 0, @@ -1096,7 +1083,7 @@ mod test { let source = hex::decode("8a000000dbffffff1800000034ffffff300000001d000000abffffff37fffffff1000000") .unwrap(); - let expected = hex::decode("8a00db001800340030001d00ab003700f100").unwrap(); + let expected = hex::decode("8a00dbff180034ff30001d00abff37fff100").unwrap(); let num = source.len() / 4; let mut dest: Vec = vec![b' '; num * 2]; copy_i32_to_u8(source.as_bytes(), dest.as_mut_slice(), num); diff --git a/native/core/src/parquet/util/test_common/mod.rs b/native/core/src/parquet/util/test_common/mod.rs index e46d73223..d92544608 100644 --- a/native/core/src/parquet/util/test_common/mod.rs +++ b/native/core/src/parquet/util/test_common/mod.rs @@ -15,10 +15,9 @@ // specific language governing permissions and limitations // under the License. -pub mod file_util; pub mod page_util; pub mod rand_gen; pub use self::rand_gen::{random_bools, random_bytes, random_numbers, random_numbers_range}; -pub use self::file_util::{get_temp_file, get_temp_filename}; +pub use datafusion_comet_spark_expr::test_common::file_util::{get_temp_file, get_temp_filename}; diff --git a/native/core/src/parquet/util/test_common/page_util.rs b/native/core/src/parquet/util/test_common/page_util.rs index e20cc30cf..333298bc3 100644 --- a/native/core/src/parquet/util/test_common/page_util.rs +++ b/native/core/src/parquet/util/test_common/page_util.rs @@ -28,7 +28,7 @@ use parquet::{ levels::{max_buffer_size, LevelEncoder}, }, errors::Result, - schema::types::{ColumnDescPtr, SchemaDescPtr}, + schema::types::ColumnDescPtr, }; use super::random_numbers_range; @@ -201,20 +201,12 @@ impl + Send> Iterator for InMemoryPageReader

{ /// A utility page iterator which stores page readers in memory, used for tests. #[derive(Clone)] pub struct InMemoryPageIterator>> { - schema: SchemaDescPtr, - column_desc: ColumnDescPtr, page_reader_iter: I, } impl>> InMemoryPageIterator { - pub fn new( - schema: SchemaDescPtr, - column_desc: ColumnDescPtr, - pages: impl IntoIterator, IntoIter = I>, - ) -> Self { + pub fn new(pages: impl IntoIterator, IntoIter = I>) -> Self { Self { - schema, - column_desc, page_reader_iter: pages.into_iter(), } } diff --git a/native/proto/src/proto/expr.proto b/native/proto/src/proto/expr.proto index 3a8193f4a..7a8ea78d5 100644 --- a/native/proto/src/proto/expr.proto +++ b/native/proto/src/proto/expr.proto @@ -82,6 +82,8 @@ message Expr { ToJson to_json = 55; ListExtract list_extract = 56; GetArrayStructFields get_array_struct_fields = 57; + BinaryExpr array_append = 58; + ArrayInsert array_insert = 59; } } @@ -402,6 +404,14 @@ enum NullOrdering { NullsLast = 1; } +// Array functions +message ArrayInsert { + Expr src_array_expr = 1; + Expr pos_expr = 2; + Expr item_expr = 3; + bool legacy_negative_index = 4; +} + message DataType { enum DataTypeId { BOOL = 0; diff --git a/native/proto/src/proto/operator.proto b/native/proto/src/proto/operator.proto index b4e12d123..3f04d42c8 100644 --- a/native/proto/src/proto/operator.proto +++ b/native/proto/src/proto/operator.proto @@ -31,6 +31,9 @@ message Operator { // The child operators of this repeated Operator children = 1; + // Spark plan ID + uint32 plan_id = 2; + oneof op_struct { Scan scan = 100; Projection projection = 101; diff --git a/native/spark-expr/Cargo.toml b/native/spark-expr/Cargo.toml index 532bf7437..27367d83e 100644 --- a/native/spark-expr/Cargo.toml +++ b/native/spark-expr/Cargo.toml @@ -29,9 +29,11 @@ edition = { workspace = true } [dependencies] arrow = { workspace = true } arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-data = { workspace = true } arrow-schema = { workspace = true } chrono = { workspace = true } -datafusion = { workspace = true } +datafusion = { workspace = true, features = ["parquet"] } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-physical-expr = { workspace = true } @@ -39,12 +41,16 @@ chrono-tz = { workspace = true } num = { workspace = true } regex = { workspace = true } thiserror = { workspace = true } +futures = { workspace = true } twox-hash = "2.0.0" +rand = { workspace = true } [dev-dependencies] arrow-data = {workspace = true} +parquet = { workspace = true, features = ["arrow"] } criterion = "0.5.1" rand = { workspace = true} +tokio = { version = "1", features = ["rt-multi-thread"] } [lib] @@ -66,3 +72,8 @@ harness = false [[bench]] name = "decimal_div" harness = false + +[[bench]] +name = "aggregate" +harness = false + diff --git a/native/core/benches/aggregate.rs b/native/spark-expr/benches/aggregate.rs similarity index 97% rename from native/core/benches/aggregate.rs rename to native/spark-expr/benches/aggregate.rs index c6209406f..43194fdda 100644 --- a/native/core/benches/aggregate.rs +++ b/native/spark-expr/benches/aggregate.rs @@ -19,16 +19,16 @@ use arrow::datatypes::{DataType, Field, Schema}; use arrow_array::builder::{Decimal128Builder, StringBuilder}; use arrow_array::{ArrayRef, RecordBatch}; use arrow_schema::SchemaRef; -use comet::execution::datafusion::expressions::avg_decimal::AvgDecimal; -use comet::execution::datafusion::expressions::sum_decimal::SumDecimal; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion::execution::TaskContext; use datafusion::functions_aggregate::average::avg_udaf; use datafusion::functions_aggregate::sum::sum_udaf; use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}; use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::ExecutionPlan; -use datafusion_execution::TaskContext; +use datafusion_comet_spark_expr::AvgDecimal; +use datafusion_comet_spark_expr::SumDecimal; use datafusion_expr::AggregateUDF; use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_expr::expressions::Column; diff --git a/native/spark-expr/benches/cast_from_string.rs b/native/spark-expr/benches/cast_from_string.rs index 056ada2eb..c6b0bcf39 100644 --- a/native/spark-expr/benches/cast_from_string.rs +++ b/native/spark-expr/benches/cast_from_string.rs @@ -18,36 +18,18 @@ use arrow_array::{builder::StringBuilder, RecordBatch}; use arrow_schema::{DataType, Field, Schema}; use criterion::{criterion_group, criterion_main, Criterion}; -use datafusion_comet_spark_expr::{Cast, EvalMode}; +use datafusion_comet_spark_expr::{Cast, EvalMode, SparkCastOptions}; use datafusion_physical_expr::{expressions::Column, PhysicalExpr}; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let batch = create_utf8_batch(); let expr = Arc::new(Column::new("a", 0)); - let timezone = "".to_string(); - let cast_string_to_i8 = Cast::new( - expr.clone(), - DataType::Int8, - EvalMode::Legacy, - timezone.clone(), - false, - ); - let cast_string_to_i16 = Cast::new( - expr.clone(), - DataType::Int16, - EvalMode::Legacy, - timezone.clone(), - false, - ); - let cast_string_to_i32 = Cast::new( - expr.clone(), - DataType::Int32, - EvalMode::Legacy, - timezone.clone(), - false, - ); - let cast_string_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone, false); + let spark_cast_options = SparkCastOptions::new(EvalMode::Legacy, "", false); + let cast_string_to_i8 = Cast::new(expr.clone(), DataType::Int8, spark_cast_options.clone()); + let cast_string_to_i16 = Cast::new(expr.clone(), DataType::Int16, spark_cast_options.clone()); + let cast_string_to_i32 = Cast::new(expr.clone(), DataType::Int32, spark_cast_options.clone()); + let cast_string_to_i64 = Cast::new(expr, DataType::Int64, spark_cast_options); let mut group = c.benchmark_group("cast_string_to_int"); group.bench_function("cast_string_to_i8", |b| { diff --git a/native/spark-expr/benches/cast_numeric.rs b/native/spark-expr/benches/cast_numeric.rs index 15ef1a5a2..8ec8b2f89 100644 --- a/native/spark-expr/benches/cast_numeric.rs +++ b/native/spark-expr/benches/cast_numeric.rs @@ -18,29 +18,17 @@ use arrow_array::{builder::Int32Builder, RecordBatch}; use arrow_schema::{DataType, Field, Schema}; use criterion::{criterion_group, criterion_main, Criterion}; -use datafusion_comet_spark_expr::{Cast, EvalMode}; +use datafusion_comet_spark_expr::{Cast, EvalMode, SparkCastOptions}; use datafusion_physical_expr::{expressions::Column, PhysicalExpr}; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let batch = create_int32_batch(); let expr = Arc::new(Column::new("a", 0)); - let timezone = "".to_string(); - let cast_i32_to_i8 = Cast::new( - expr.clone(), - DataType::Int8, - EvalMode::Legacy, - timezone.clone(), - false, - ); - let cast_i32_to_i16 = Cast::new( - expr.clone(), - DataType::Int16, - EvalMode::Legacy, - timezone.clone(), - false, - ); - let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone, false); + let spark_cast_options = SparkCastOptions::new_without_timezone(EvalMode::Legacy, false); + let cast_i32_to_i8 = Cast::new(expr.clone(), DataType::Int8, spark_cast_options.clone()); + let cast_i32_to_i16 = Cast::new(expr.clone(), DataType::Int16, spark_cast_options.clone()); + let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, spark_cast_options); let mut group = c.benchmark_group("cast_int_to_int"); group.bench_function("cast_i32_to_i8", |b| { diff --git a/native/core/src/execution/datafusion/expressions/avg.rs b/native/spark-expr/src/avg.rs similarity index 100% rename from native/core/src/execution/datafusion/expressions/avg.rs rename to native/spark-expr/src/avg.rs diff --git a/native/core/src/execution/datafusion/expressions/avg_decimal.rs b/native/spark-expr/src/avg_decimal.rs similarity index 99% rename from native/core/src/execution/datafusion/expressions/avg_decimal.rs rename to native/spark-expr/src/avg_decimal.rs index a265fdc29..163e1560b 100644 --- a/native/core/src/execution/datafusion/expressions/avg_decimal.rs +++ b/native/spark-expr/src/avg_decimal.rs @@ -28,7 +28,7 @@ use datafusion_common::{not_impl_err, Result, ScalarValue}; use datafusion_physical_expr::{expressions::format_state_name, PhysicalExpr}; use std::{any::Any, sync::Arc}; -use crate::execution::datafusion::expressions::checkoverflow::is_valid_decimal_precision; +use crate::utils::is_valid_decimal_precision; use arrow_array::ArrowNativeTypeOp; use arrow_data::decimal::{MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION}; use datafusion::logical_expr::Volatility::Immutable; diff --git a/native/core/src/execution/datafusion/expressions/bitwise_not.rs b/native/spark-expr/src/bitwise_not.rs similarity index 88% rename from native/core/src/execution/datafusion/expressions/bitwise_not.rs rename to native/spark-expr/src/bitwise_not.rs index a2b9ebe5b..36234935e 100644 --- a/native/core/src/execution/datafusion/expressions/bitwise_not.rs +++ b/native/spark-expr/src/bitwise_not.rs @@ -28,7 +28,7 @@ use arrow::{ }; use datafusion::physical_expr_common::physical_expr::down_cast_any_ref; use datafusion::{error::DataFusionError, logical_expr::ColumnarValue}; -use datafusion_common::{Result, ScalarValue}; +use datafusion_common::Result; use datafusion_physical_expr::PhysicalExpr; macro_rules! compute_op { @@ -135,22 +135,6 @@ pub fn bitwise_not(arg: Arc) -> Result> Ok(Arc::new(BitwiseNotExpr::new(arg))) } -fn scalar_bitwise_not(scalar: ScalarValue) -> Result { - match scalar { - ScalarValue::Int8(None) - | ScalarValue::Int16(None) - | ScalarValue::Int32(None) - | ScalarValue::Int64(None) => Ok(scalar), - ScalarValue::Int8(Some(v)) => Ok(ScalarValue::Int8(Some(!v))), - ScalarValue::Int16(Some(v)) => Ok(ScalarValue::Int16(Some(!v))), - ScalarValue::Int32(Some(v)) => Ok(ScalarValue::Int32(Some(!v))), - ScalarValue::Int64(Some(v)) => Ok(ScalarValue::Int64(Some(!v))), - value => Err(DataFusionError::Internal(format!( - "Can not run ! on scalar value {value:?}" - ))), - } -} - #[cfg(test)] mod tests { use arrow::datatypes::*; diff --git a/native/spark-expr/src/cast.rs b/native/spark-expr/src/cast.rs index 95fde3735..09530c3c7 100644 --- a/native/spark-expr/src/cast.rs +++ b/native/spark-expr/src/cast.rs @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +use crate::timezone; +use crate::utils::array_with_timezone; +use crate::{EvalMode, SparkError, SparkResult}; use arrow::{ array::{ cast::AsArray, @@ -56,11 +59,6 @@ use std::{ sync::Arc, }; -use crate::timezone; -use crate::utils::array_with_timezone; - -use crate::{EvalMode, SparkError, SparkResult}; - static TIMESTAMP_FORMAT: Option<&str> = Some("%Y-%m-%d %H:%M:%S%.f"); const MICROS_PER_SECOND: i64 = 1000000; @@ -140,6 +138,240 @@ pub struct Cast { pub cast_options: SparkCastOptions, } +/// Determine if Comet supports a cast, taking options such as EvalMode and Timezone into account. +pub fn cast_supported( + from_type: &DataType, + to_type: &DataType, + options: &SparkCastOptions, +) -> bool { + use DataType::*; + + let from_type = if let Dictionary(_, dt) = from_type { + dt + } else { + from_type + }; + + let to_type = if let Dictionary(_, dt) = to_type { + dt + } else { + to_type + }; + + if from_type == to_type { + return true; + } + + match (from_type, to_type) { + (Boolean, _) => can_cast_from_boolean(to_type, options), + (UInt8 | UInt16 | UInt32 | UInt64, Int8 | Int16 | Int32 | Int64) + if options.allow_cast_unsigned_ints => + { + true + } + (Int8, _) => can_cast_from_byte(to_type, options), + (Int16, _) => can_cast_from_short(to_type, options), + (Int32, _) => can_cast_from_int(to_type, options), + (Int64, _) => can_cast_from_long(to_type, options), + (Float32, _) => can_cast_from_float(to_type, options), + (Float64, _) => can_cast_from_double(to_type, options), + (Decimal128(p, s), _) => can_cast_from_decimal(p, s, to_type, options), + (Timestamp(_, None), _) => can_cast_from_timestamp_ntz(to_type, options), + (Timestamp(_, Some(_)), _) => can_cast_from_timestamp(to_type, options), + (Utf8 | LargeUtf8, _) => can_cast_from_string(to_type, options), + (_, Utf8 | LargeUtf8) => can_cast_to_string(from_type, options), + (Struct(from_fields), Struct(to_fields)) => from_fields + .iter() + .zip(to_fields.iter()) + .all(|(a, b)| cast_supported(a.data_type(), b.data_type(), options)), + _ => false, + } +} + +fn can_cast_from_string(to_type: &DataType, options: &SparkCastOptions) -> bool { + use DataType::*; + match to_type { + Boolean | Int8 | Int16 | Int32 | Int64 | Binary => true, + Float32 | Float64 => { + // https://github.com/apache/datafusion-comet/issues/326 + // Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. + // Does not support ANSI mode. + options.allow_incompat + } + Decimal128(_, _) => { + // https://github.com/apache/datafusion-comet/issues/325 + // Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. + // Does not support ANSI mode. Returns 0.0 instead of null if input contains no digits + + options.allow_incompat + } + Date32 | Date64 => { + // https://github.com/apache/datafusion-comet/issues/327 + // Only supports years between 262143 BC and 262142 AD + options.allow_incompat + } + Timestamp(_, _) if options.eval_mode == EvalMode::Ansi => { + // ANSI mode not supported + false + } + Timestamp(_, Some(tz)) if tz.as_ref() != "UTC" => { + // Cast will use UTC instead of $timeZoneId + options.allow_incompat + } + Timestamp(_, _) => { + // https://github.com/apache/datafusion-comet/issues/328 + // Not all valid formats are supported + options.allow_incompat + } + _ => false, + } +} + +fn can_cast_to_string(from_type: &DataType, options: &SparkCastOptions) -> bool { + use DataType::*; + match from_type { + Boolean | Int8 | Int16 | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => true, + Float32 | Float64 => { + // There can be differences in precision. + // For example, the input \"1.4E-45\" will produce 1.0E-45 " + + // instead of 1.4E-45")) + true + } + Decimal128(_, _) => { + // https://github.com/apache/datafusion-comet/issues/1068 + // There can be formatting differences in some case due to Spark using + // scientific notation where Comet does not + true + } + Binary => { + // https://github.com/apache/datafusion-comet/issues/377 + // Only works for binary data representing valid UTF-8 strings + options.allow_incompat + } + Struct(fields) => fields + .iter() + .all(|f| can_cast_to_string(f.data_type(), options)), + _ => false, + } +} + +fn can_cast_from_timestamp_ntz(to_type: &DataType, options: &SparkCastOptions) -> bool { + use DataType::*; + match to_type { + Timestamp(_, _) | Date32 | Date64 | Utf8 => { + // incompatible + options.allow_incompat + } + _ => { + // unsupported + false + } + } +} + +fn can_cast_from_timestamp(to_type: &DataType, _options: &SparkCastOptions) -> bool { + use DataType::*; + match to_type { + Boolean | Int8 | Int16 => { + // https://github.com/apache/datafusion-comet/issues/352 + // this seems like an edge case that isn't important for us to support + false + } + Int64 => { + // https://github.com/apache/datafusion-comet/issues/352 + true + } + Date32 | Date64 | Utf8 | Decimal128(_, _) => true, + _ => { + // unsupported + false + } + } +} + +fn can_cast_from_boolean(to_type: &DataType, _: &SparkCastOptions) -> bool { + use DataType::*; + matches!(to_type, Int8 | Int16 | Int32 | Int64 | Float32 | Float64) +} + +fn can_cast_from_byte(to_type: &DataType, _: &SparkCastOptions) -> bool { + use DataType::*; + matches!( + to_type, + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128(_, _) + ) +} + +fn can_cast_from_short(to_type: &DataType, _: &SparkCastOptions) -> bool { + use DataType::*; + matches!( + to_type, + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128(_, _) + ) +} + +fn can_cast_from_int(to_type: &DataType, options: &SparkCastOptions) -> bool { + use DataType::*; + match to_type { + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Utf8 => true, + Decimal128(_, _) => { + // incompatible: no overflow check + options.allow_incompat + } + _ => false, + } +} + +fn can_cast_from_long(to_type: &DataType, options: &SparkCastOptions) -> bool { + use DataType::*; + match to_type { + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => true, + Decimal128(_, _) => { + // incompatible: no overflow check + options.allow_incompat + } + _ => false, + } +} + +fn can_cast_from_float(to_type: &DataType, _: &SparkCastOptions) -> bool { + use DataType::*; + matches!( + to_type, + Boolean | Int8 | Int16 | Int32 | Int64 | Float64 | Decimal128(_, _) + ) +} + +fn can_cast_from_double(to_type: &DataType, _: &SparkCastOptions) -> bool { + use DataType::*; + matches!( + to_type, + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Decimal128(_, _) + ) +} + +fn can_cast_from_decimal( + p1: &u8, + _s1: &i8, + to_type: &DataType, + options: &SparkCastOptions, +) -> bool { + use DataType::*; + match to_type { + Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => true, + Decimal128(p2, _) => { + if p2 < p1 { + // https://github.com/apache/datafusion/issues/13492 + // Incompatible(Some("Casting to smaller precision is not supported")) + options.allow_incompat + } else { + true + } + } + _ => false, + } +} + macro_rules! cast_utf8_to_int { ($array:expr, $eval_mode:expr, $array_type:ty, $cast_method:ident) => {{ let len = $array.len(); @@ -560,6 +792,8 @@ pub struct SparkCastOptions { pub timezone: String, /// Allow casts that are supported but not guaranteed to be 100% compatible pub allow_incompat: bool, + /// Support casting unsigned ints to signed ints (used by Parquet SchemaAdapter) + pub allow_cast_unsigned_ints: bool, /// We also use the cast logic for adapting Parquet schemas, so this flag is used /// for that use case pub is_adapting_schema: bool, @@ -571,6 +805,7 @@ impl SparkCastOptions { eval_mode, timezone: timezone.to_string(), allow_incompat, + allow_cast_unsigned_ints: false, is_adapting_schema: false, } } @@ -580,6 +815,7 @@ impl SparkCastOptions { eval_mode, timezone: "".to_string(), allow_incompat, + allow_cast_unsigned_ints: false, is_adapting_schema: false, } } @@ -616,14 +852,14 @@ fn cast_array( to_type: &DataType, cast_options: &SparkCastOptions, ) -> DataFusionResult { + use DataType::*; let array = array_with_timezone(array, cast_options.timezone.clone(), Some(to_type))?; let from_type = array.data_type().clone(); let array = match &from_type { - DataType::Dictionary(key_type, value_type) - if key_type.as_ref() == &DataType::Int32 - && (value_type.as_ref() == &DataType::Utf8 - || value_type.as_ref() == &DataType::LargeUtf8) => + Dictionary(key_type, value_type) + if key_type.as_ref() == &Int32 + && (value_type.as_ref() == &Utf8 || value_type.as_ref() == &LargeUtf8) => { let dict_array = array .as_any() @@ -636,7 +872,7 @@ fn cast_array( ); let casted_result = match to_type { - DataType::Dictionary(_, _) => Arc::new(casted_dictionary.clone()), + Dictionary(_, _) => Arc::new(casted_dictionary.clone()), _ => take(casted_dictionary.values().as_ref(), dict_array.keys(), None)?, }; return Ok(spark_cast_postprocess(casted_result, &from_type, to_type)); @@ -647,70 +883,66 @@ fn cast_array( let eval_mode = cast_options.eval_mode; let cast_result = match (from_type, to_type) { - (DataType::Utf8, DataType::Boolean) => spark_cast_utf8_to_boolean::(&array, eval_mode), - (DataType::LargeUtf8, DataType::Boolean) => { - spark_cast_utf8_to_boolean::(&array, eval_mode) - } - (DataType::Utf8, DataType::Timestamp(_, _)) => { + (Utf8, Boolean) => spark_cast_utf8_to_boolean::(&array, eval_mode), + (LargeUtf8, Boolean) => spark_cast_utf8_to_boolean::(&array, eval_mode), + (Utf8, Timestamp(_, _)) => { cast_string_to_timestamp(&array, to_type, eval_mode, &cast_options.timezone) } - (DataType::Utf8, DataType::Date32) => cast_string_to_date(&array, to_type, eval_mode), - (DataType::Int64, DataType::Int32) - | (DataType::Int64, DataType::Int16) - | (DataType::Int64, DataType::Int8) - | (DataType::Int32, DataType::Int16) - | (DataType::Int32, DataType::Int8) - | (DataType::Int16, DataType::Int8) + (Utf8, Date32) => cast_string_to_date(&array, to_type, eval_mode), + (Int64, Int32) + | (Int64, Int16) + | (Int64, Int8) + | (Int32, Int16) + | (Int32, Int8) + | (Int16, Int8) if eval_mode != EvalMode::Try => { spark_cast_int_to_int(&array, eval_mode, from_type, to_type) } - (DataType::Utf8, DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64) => { + (Utf8, Int8 | Int16 | Int32 | Int64) => { cast_string_to_int::(to_type, &array, eval_mode) } - ( - DataType::LargeUtf8, - DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64, - ) => cast_string_to_int::(to_type, &array, eval_mode), - (DataType::Float64, DataType::Utf8) => spark_cast_float64_to_utf8::(&array, eval_mode), - (DataType::Float64, DataType::LargeUtf8) => { - spark_cast_float64_to_utf8::(&array, eval_mode) - } - (DataType::Float32, DataType::Utf8) => spark_cast_float32_to_utf8::(&array, eval_mode), - (DataType::Float32, DataType::LargeUtf8) => { - spark_cast_float32_to_utf8::(&array, eval_mode) - } - (DataType::Float32, DataType::Decimal128(precision, scale)) => { + (LargeUtf8, Int8 | Int16 | Int32 | Int64) => { + cast_string_to_int::(to_type, &array, eval_mode) + } + (Float64, Utf8) => spark_cast_float64_to_utf8::(&array, eval_mode), + (Float64, LargeUtf8) => spark_cast_float64_to_utf8::(&array, eval_mode), + (Float32, Utf8) => spark_cast_float32_to_utf8::(&array, eval_mode), + (Float32, LargeUtf8) => spark_cast_float32_to_utf8::(&array, eval_mode), + (Float32, Decimal128(precision, scale)) => { cast_float32_to_decimal128(&array, *precision, *scale, eval_mode) } - (DataType::Float64, DataType::Decimal128(precision, scale)) => { + (Float64, Decimal128(precision, scale)) => { cast_float64_to_decimal128(&array, *precision, *scale, eval_mode) } - (DataType::Float32, DataType::Int8) - | (DataType::Float32, DataType::Int16) - | (DataType::Float32, DataType::Int32) - | (DataType::Float32, DataType::Int64) - | (DataType::Float64, DataType::Int8) - | (DataType::Float64, DataType::Int16) - | (DataType::Float64, DataType::Int32) - | (DataType::Float64, DataType::Int64) - | (DataType::Decimal128(_, _), DataType::Int8) - | (DataType::Decimal128(_, _), DataType::Int16) - | (DataType::Decimal128(_, _), DataType::Int32) - | (DataType::Decimal128(_, _), DataType::Int64) + (Float32, Int8) + | (Float32, Int16) + | (Float32, Int32) + | (Float32, Int64) + | (Float64, Int8) + | (Float64, Int16) + | (Float64, Int32) + | (Float64, Int64) + | (Decimal128(_, _), Int8) + | (Decimal128(_, _), Int16) + | (Decimal128(_, _), Int32) + | (Decimal128(_, _), Int64) if eval_mode != EvalMode::Try => { spark_cast_nonintegral_numeric_to_integral(&array, eval_mode, from_type, to_type) } - (DataType::Struct(_), DataType::Utf8) => { - Ok(casts_struct_to_string(array.as_struct(), cast_options)?) - } - (DataType::Struct(_), DataType::Struct(_)) => Ok(cast_struct_to_struct( + (Struct(_), Utf8) => Ok(casts_struct_to_string(array.as_struct(), cast_options)?), + (Struct(_), Struct(_)) => Ok(cast_struct_to_struct( array.as_struct(), from_type, to_type, cast_options, )?), + (UInt8 | UInt16 | UInt32 | UInt64, Int8 | Int16 | Int32 | Int64) + if cast_options.allow_cast_unsigned_ints => + { + Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?) + } _ if cast_options.is_adapting_schema || is_datafusion_spark_compatible(from_type, to_type, cast_options.allow_incompat) => { diff --git a/native/core/src/execution/datafusion/expressions/checkoverflow.rs b/native/spark-expr/src/checkoverflow.rs similarity index 88% rename from native/core/src/execution/datafusion/expressions/checkoverflow.rs rename to native/spark-expr/src/checkoverflow.rs index ed03ab667..e922171bd 100644 --- a/native/core/src/execution/datafusion/expressions/checkoverflow.rs +++ b/native/spark-expr/src/checkoverflow.rs @@ -27,8 +27,7 @@ use arrow::{ datatypes::{Decimal128Type, DecimalType}, record_batch::RecordBatch, }; -use arrow_data::decimal::{MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION}; -use arrow_schema::{DataType, Schema, DECIMAL128_MAX_PRECISION}; +use arrow_schema::{DataType, Schema}; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr_common::physical_expr::down_cast_any_ref; use datafusion_common::{DataFusionError, ScalarValue}; @@ -172,15 +171,3 @@ impl PhysicalExpr for CheckOverflow { self.hash(&mut s); } } - -/// Adapted from arrow-rs `validate_decimal_precision` but returns bool -/// instead of Err to avoid the cost of formatting the error strings and is -/// optimized to remove a memcpy that exists in the original function -/// we can remove this code once we upgrade to a version of arrow-rs that -/// includes https://github.com/apache/arrow-rs/pull/6419 -#[inline] -pub fn is_valid_decimal_precision(value: i128, precision: u8) -> bool { - precision <= DECIMAL128_MAX_PRECISION - && value >= MIN_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1] - && value <= MAX_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1] -} diff --git a/native/core/src/execution/datafusion/expressions/comet_scalar_funcs.rs b/native/spark-expr/src/comet_scalar_funcs.rs similarity index 98% rename from native/core/src/execution/datafusion/expressions/comet_scalar_funcs.rs rename to native/spark-expr/src/comet_scalar_funcs.rs index 06717aabe..71ff0e9dc 100644 --- a/native/core/src/execution/datafusion/expressions/comet_scalar_funcs.rs +++ b/native/spark-expr/src/comet_scalar_funcs.rs @@ -15,15 +15,15 @@ // specific language governing permissions and limitations // under the License. -use arrow_schema::DataType; -use datafusion_comet_spark_expr::scalar_funcs::hash_expressions::{ +use crate::scalar_funcs::hash_expressions::{ spark_sha224, spark_sha256, spark_sha384, spark_sha512, }; -use datafusion_comet_spark_expr::scalar_funcs::{ +use crate::scalar_funcs::{ spark_ceil, spark_date_add, spark_date_sub, spark_decimal_div, spark_floor, spark_hex, spark_isnan, spark_make_decimal, spark_murmur3_hash, spark_read_side_padding, spark_round, spark_unhex, spark_unscaled_value, spark_xxhash64, SparkChrFunc, }; +use arrow_schema::DataType; use datafusion_common::{DataFusionError, Result as DataFusionResult}; use datafusion_expr::registry::FunctionRegistry; use datafusion_expr::{ diff --git a/native/core/src/execution/datafusion/expressions/correlation.rs b/native/spark-expr/src/correlation.rs similarity index 98% rename from native/core/src/execution/datafusion/expressions/correlation.rs rename to native/spark-expr/src/correlation.rs index 6bf35e711..e5f36c6f9 100644 --- a/native/core/src/execution/datafusion/expressions/correlation.rs +++ b/native/spark-expr/src/correlation.rs @@ -19,9 +19,8 @@ use arrow::compute::{and, filter, is_not_null}; use std::{any::Any, sync::Arc}; -use crate::execution::datafusion::expressions::{ - covariance::CovarianceAccumulator, stddev::StddevAccumulator, -}; +use crate::covariance::CovarianceAccumulator; +use crate::stddev::StddevAccumulator; use arrow::{ array::ArrayRef, datatypes::{DataType, Field}, diff --git a/native/core/src/execution/datafusion/expressions/covariance.rs b/native/spark-expr/src/covariance.rs similarity index 100% rename from native/core/src/execution/datafusion/expressions/covariance.rs rename to native/spark-expr/src/covariance.rs diff --git a/native/spark-expr/src/kernels/mod.rs b/native/spark-expr/src/kernels/mod.rs index 88aa34b1a..3669ff13a 100644 --- a/native/spark-expr/src/kernels/mod.rs +++ b/native/spark-expr/src/kernels/mod.rs @@ -17,4 +17,5 @@ //! Kernels +pub mod strings; pub(crate) mod temporal; diff --git a/native/core/src/execution/kernels/strings.rs b/native/spark-expr/src/kernels/strings.rs similarity index 52% rename from native/core/src/execution/kernels/strings.rs rename to native/spark-expr/src/kernels/strings.rs index 2e5e67b67..bb275fbb9 100644 --- a/native/core/src/execution/kernels/strings.rs +++ b/native/spark-expr/src/kernels/strings.rs @@ -21,19 +21,18 @@ use std::sync::Arc; use arrow::{ array::*, - buffer::{Buffer, MutableBuffer}, + buffer::MutableBuffer, compute::kernels::substring::{substring as arrow_substring, substring_by_char}, datatypes::{DataType, Int32Type}, }; - -use crate::errors::ExpressionError; +use datafusion_common::DataFusionError; /// Returns an ArrayRef with a string consisting of `length` spaces. /// /// # Preconditions /// /// - elements in `length` must not be negative -pub fn string_space(length: &dyn Array) -> Result { +pub fn string_space(length: &dyn Array) -> Result { match length.data_type() { DataType::Int32 => { let array = length.as_any().downcast_ref::().unwrap(); @@ -52,7 +51,7 @@ pub fn string_space(length: &dyn Array) -> Result { } } -pub fn substring(array: &dyn Array, start: i64, length: u64) -> Result { +pub fn substring(array: &dyn Array, start: i64, length: u64) -> Result { match array.data_type() { DataType::LargeUtf8 => substring_by_char( array @@ -87,43 +86,6 @@ pub fn substring(array: &dyn Array, start: i64, length: u64) -> Result ArrayRef { - match array.data_type() { - DataType::LargeUtf8 => generic_substring( - array - .as_any() - .downcast_ref::() - .expect("A large string is expected"), - start, - length, - |i| i as i64, - ), - DataType::Utf8 => generic_substring( - array - .as_any() - .downcast_ref::() - .expect("A string is expected"), - start, - length, - |i| i, - ), - _ => panic!("substring does not support type {:?}", array.data_type()), - } -} - fn generic_string_space(length: &Int32Array) -> ArrayRef { let array_len = length.len(); let mut offsets = MutableBuffer::new((array_len + 1) * std::mem::size_of::()); @@ -163,81 +125,3 @@ fn generic_string_space(length: &Int32Array) -> Arr }; make_array(data) } - -fn generic_substring( - array: &GenericStringArray, - start: &Int32Array, - length: &Int32Array, - f: F, -) -> ArrayRef -where - F: Fn(i32) -> OffsetSize, -{ - assert_eq!(array.len(), start.len()); - assert_eq!(array.len(), length.len()); - - // compute current offsets - let offsets = array.to_data().buffers()[0].clone(); - let offsets: &[OffsetSize] = offsets.typed_data::(); - - // compute null bitmap (copy) - let null_bit_buffer = array.to_data().nulls().map(|b| b.buffer().clone()); - - // Gets slices of start and length arrays to access them directly for performance. - let start_data = start.to_data(); - let length_data = length.to_data(); - let starts = start_data.buffers()[0].typed_data::(); - let lengths = length_data.buffers()[0].typed_data::(); - - // compute values - let array_data = array.to_data(); - let values = &array_data.buffers()[1]; - let data = values.as_slice(); - - // we have no way to estimate how much this will be. - let mut new_values = MutableBuffer::new(0); - let mut new_offsets: Vec = Vec::with_capacity(array.len() + 1); - - let mut length_so_far = OffsetSize::zero(); - new_offsets.push(length_so_far); - (0..array.len()).for_each(|i| { - // the length of this entry - let length_i: OffsetSize = offsets[i + 1] - offsets[i]; - // compute where we should start slicing this entry - let start_pos: OffsetSize = f(starts[i]); - - let start = offsets[i] - + if start_pos >= OffsetSize::zero() { - start_pos - } else { - length_i + start_pos - }; - - let start = start.clamp(offsets[i], offsets[i + 1]); - // compute the length of the slice - let slice_length: OffsetSize = f(lengths[i].max(0)).min(offsets[i + 1] - start); - - length_so_far += slice_length; - - new_offsets.push(length_so_far); - - // we need usize for ranges - let start = start.to_usize().unwrap(); - let slice_length = slice_length.to_usize().unwrap(); - - new_values.extend_from_slice(&data[start..start + slice_length]); - }); - - let data = unsafe { - ArrayData::new_unchecked( - GenericStringArray::::DATA_TYPE, - array.len(), - None, - null_bit_buffer, - 0, - vec![Buffer::from_slice_ref(&new_offsets), new_values.into()], - vec![], - ) - }; - make_array(data) -} diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs index eb02cef84..f35873100 100644 --- a/native/spark-expr/src/lib.rs +++ b/native/spark-expr/src/lib.rs @@ -23,21 +23,54 @@ mod cast; mod error; mod if_expr; +mod avg; +pub use avg::Avg; +mod bitwise_not; +pub use bitwise_not::{bitwise_not, BitwiseNotExpr}; +mod avg_decimal; +pub use avg_decimal::AvgDecimal; +mod checkoverflow; +pub use checkoverflow::CheckOverflow; +mod correlation; +pub use correlation::Correlation; +mod covariance; +pub use covariance::Covariance; +mod strings; +pub use strings::{Contains, EndsWith, Like, StartsWith, StringSpaceExpr, SubstringExpr}; mod kernels; mod list; mod regexp; pub mod scalar_funcs; +mod schema_adapter; +pub use schema_adapter::SparkSchemaAdapterFactory; + pub mod spark_hash; +mod stddev; +pub use stddev::Stddev; mod structs; +mod sum_decimal; +pub use sum_decimal::SumDecimal; +mod negative; +pub use negative::{create_negate_expr, NegativeExpr}; +mod normalize_nan; mod temporal; + +pub mod test_common; pub mod timezone; mod to_json; +mod unbound; +pub use unbound::UnboundColumn; pub mod utils; +pub use normalize_nan::NormalizeNaNAndZero; +mod variance; +pub use variance::Variance; +mod comet_scalar_funcs; pub use cast::{spark_cast, Cast, SparkCastOptions}; +pub use comet_scalar_funcs::create_comet_physical_fun; pub use error::{SparkError, SparkResult}; pub use if_expr::IfExpr; -pub use list::{GetArrayStructFields, ListExtract}; +pub use list::{ArrayInsert, GetArrayStructFields, ListExtract}; pub use regexp::RLike; pub use structs::{CreateNamedStruct, GetStructField}; pub use temporal::{DateTruncExpr, HourExpr, MinuteExpr, SecondExpr, TimestampTruncExpr}; @@ -61,3 +94,9 @@ pub enum EvalMode { /// failing the entire query. Try, } + +pub(crate) fn arithmetic_overflow_error(from_type: &str) -> SparkError { + SparkError::ArithmeticOverflow { + from_type: from_type.to_string(), + } +} diff --git a/native/spark-expr/src/list.rs b/native/spark-expr/src/list.rs index a376198db..7dc17b568 100644 --- a/native/spark-expr/src/list.rs +++ b/native/spark-expr/src/list.rs @@ -15,9 +15,16 @@ // specific language governing permissions and limitations // under the License. -use arrow::{array::MutableArrayData, datatypes::ArrowNativeType, record_batch::RecordBatch}; -use arrow_array::{Array, GenericListArray, Int32Array, OffsetSizeTrait, StructArray}; -use arrow_schema::{DataType, FieldRef, Schema}; +use arrow::{ + array::{as_primitive_array, Capacities, MutableArrayData}, + buffer::{NullBuffer, OffsetBuffer}, + datatypes::ArrowNativeType, + record_batch::RecordBatch, +}; +use arrow_array::{ + make_array, Array, ArrayRef, GenericListArray, Int32Array, OffsetSizeTrait, StructArray, +}; +use arrow_schema::{DataType, Field, FieldRef, Schema}; use datafusion::logical_expr::ColumnarValue; use datafusion::physical_expr_common::physical_expr::down_cast_any_ref; use datafusion_common::{ @@ -27,10 +34,16 @@ use datafusion_common::{ use datafusion_physical_expr::PhysicalExpr; use std::{ any::Any, - fmt::{Display, Formatter}, + fmt::{Debug, Display, Formatter}, hash::{Hash, Hasher}, sync::Arc, }; + +// 2147483632 == java.lang.Integer.MAX_VALUE - 15 +// It is a value of ByteArrayUtils.MAX_ROUNDED_ARRAY_LENGTH +// https://github.com/apache/spark/blob/master/common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java +const MAX_ROUNDED_ARRAY_LENGTH: usize = 2147483632; + #[derive(Debug, Hash)] pub struct ListExtract { child: Arc, @@ -413,14 +426,297 @@ impl PartialEq for GetArrayStructFields { } } +#[derive(Debug, Hash)] +pub struct ArrayInsert { + src_array_expr: Arc, + pos_expr: Arc, + item_expr: Arc, + legacy_negative_index: bool, +} + +impl ArrayInsert { + pub fn new( + src_array_expr: Arc, + pos_expr: Arc, + item_expr: Arc, + legacy_negative_index: bool, + ) -> Self { + Self { + src_array_expr, + pos_expr, + item_expr, + legacy_negative_index, + } + } + + pub fn array_type(&self, data_type: &DataType) -> DataFusionResult { + match data_type { + DataType::List(field) => Ok(DataType::List(Arc::clone(field))), + DataType::LargeList(field) => Ok(DataType::LargeList(Arc::clone(field))), + data_type => Err(DataFusionError::Internal(format!( + "Unexpected src array type in ArrayInsert: {:?}", + data_type + ))), + } + } +} + +impl PhysicalExpr for ArrayInsert { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, input_schema: &Schema) -> DataFusionResult { + self.array_type(&self.src_array_expr.data_type(input_schema)?) + } + + fn nullable(&self, input_schema: &Schema) -> DataFusionResult { + self.src_array_expr.nullable(input_schema) + } + + fn evaluate(&self, batch: &RecordBatch) -> DataFusionResult { + let pos_value = self + .pos_expr + .evaluate(batch)? + .into_array(batch.num_rows())?; + + // Spark supports only IntegerType (Int32): + // https://github.com/apache/spark/blob/branch-3.5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L4737 + if !matches!(pos_value.data_type(), DataType::Int32) { + return Err(DataFusionError::Internal(format!( + "Unexpected index data type in ArrayInsert: {:?}, expected type is Int32", + pos_value.data_type() + ))); + } + + // Check that src array is actually an array and get it's value type + let src_value = self + .src_array_expr + .evaluate(batch)? + .into_array(batch.num_rows())?; + + let src_element_type = match self.array_type(src_value.data_type())? { + DataType::List(field) => &field.data_type().clone(), + DataType::LargeList(field) => &field.data_type().clone(), + _ => unreachable!(), + }; + + // Check that inserted value has the same type as an array + let item_value = self + .item_expr + .evaluate(batch)? + .into_array(batch.num_rows())?; + if item_value.data_type() != src_element_type { + return Err(DataFusionError::Internal(format!( + "Type mismatch in ArrayInsert: array type is {:?} but item type is {:?}", + src_element_type, + item_value.data_type() + ))); + } + + match src_value.data_type() { + DataType::List(_) => { + let list_array = as_list_array(&src_value)?; + array_insert( + list_array, + &item_value, + &pos_value, + self.legacy_negative_index, + ) + } + DataType::LargeList(_) => { + let list_array = as_large_list_array(&src_value)?; + array_insert( + list_array, + &item_value, + &pos_value, + self.legacy_negative_index, + ) + } + _ => unreachable!(), // This case is checked already + } + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.src_array_expr, &self.pos_expr, &self.item_expr] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> DataFusionResult> { + match children.len() { + 3 => Ok(Arc::new(ArrayInsert::new( + Arc::clone(&children[0]), + Arc::clone(&children[1]), + Arc::clone(&children[2]), + self.legacy_negative_index, + ))), + _ => internal_err!("ArrayInsert should have exactly three childrens"), + } + } + + fn dyn_hash(&self, _state: &mut dyn Hasher) { + let mut s = _state; + self.src_array_expr.hash(&mut s); + self.pos_expr.hash(&mut s); + self.item_expr.hash(&mut s); + self.legacy_negative_index.hash(&mut s); + self.hash(&mut s); + } +} + +fn array_insert( + list_array: &GenericListArray, + items_array: &ArrayRef, + pos_array: &ArrayRef, + legacy_mode: bool, +) -> DataFusionResult { + // The code is based on the implementation of the array_append from the Apache DataFusion + // https://github.com/apache/datafusion/blob/main/datafusion/functions-nested/src/concat.rs#L513 + // + // This code is also based on the implementation of the array_insert from the Apache Spark + // https://github.com/apache/spark/blob/branch-3.5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L4713 + + let values = list_array.values(); + let offsets = list_array.offsets(); + let values_data = values.to_data(); + let item_data = items_array.to_data(); + let new_capacity = Capacities::Array(values_data.len() + item_data.len()); + + let mut mutable_values = + MutableArrayData::with_capacities(vec![&values_data, &item_data], true, new_capacity); + + let mut new_offsets = vec![O::usize_as(0)]; + let mut new_nulls = Vec::::with_capacity(list_array.len()); + + let pos_data: &Int32Array = as_primitive_array(&pos_array); // Spark supports only i32 for positions + + for (row_index, offset_window) in offsets.windows(2).enumerate() { + let pos = pos_data.values()[row_index]; + let start = offset_window[0].as_usize(); + let end = offset_window[1].as_usize(); + let is_item_null = items_array.is_null(row_index); + + if list_array.is_null(row_index) { + // In Spark if value of the array is NULL than nothing happens + mutable_values.extend_nulls(1); + new_offsets.push(new_offsets[row_index] + O::one()); + new_nulls.push(false); + continue; + } + + if pos == 0 { + return Err(DataFusionError::Internal( + "Position for array_insert should be greter or less than zero".to_string(), + )); + } + + if (pos > 0) || ((-pos).as_usize() < (end - start + 1)) { + let corrected_pos = if pos > 0 { + (pos - 1).as_usize() + } else { + end - start - (-pos).as_usize() + if legacy_mode { 0 } else { 1 } + }; + let new_array_len = std::cmp::max(end - start + 1, corrected_pos); + if new_array_len > MAX_ROUNDED_ARRAY_LENGTH { + return Err(DataFusionError::Internal(format!( + "Max array length in Spark is {:?}, but got {:?}", + MAX_ROUNDED_ARRAY_LENGTH, new_array_len + ))); + } + + if (start + corrected_pos) <= end { + mutable_values.extend(0, start, start + corrected_pos); + mutable_values.extend(1, row_index, row_index + 1); + mutable_values.extend(0, start + corrected_pos, end); + new_offsets.push(new_offsets[row_index] + O::usize_as(new_array_len)); + } else { + mutable_values.extend(0, start, end); + mutable_values.extend_nulls(new_array_len - (end - start)); + mutable_values.extend(1, row_index, row_index + 1); + // In that case spark actualy makes array longer than expected; + // For example, if pos is equal to 5, len is eq to 3, than resulted len will be 5 + new_offsets.push(new_offsets[row_index] + O::usize_as(new_array_len) + O::one()); + } + } else { + // This comment is takes from the Apache Spark source code as is: + // special case- if the new position is negative but larger than the current array size + // place the new item at start of array, place the current array contents at the end + // and fill the newly created array elements inbetween with a null + let base_offset = if legacy_mode { 1 } else { 0 }; + let new_array_len = (-pos + base_offset).as_usize(); + if new_array_len > MAX_ROUNDED_ARRAY_LENGTH { + return Err(DataFusionError::Internal(format!( + "Max array length in Spark is {:?}, but got {:?}", + MAX_ROUNDED_ARRAY_LENGTH, new_array_len + ))); + } + mutable_values.extend(1, row_index, row_index + 1); + mutable_values.extend_nulls(new_array_len - (end - start + 1)); + mutable_values.extend(0, start, end); + new_offsets.push(new_offsets[row_index] + O::usize_as(new_array_len)); + } + if is_item_null { + if (start == end) || (values.is_null(row_index)) { + new_nulls.push(false) + } else { + new_nulls.push(true) + } + } else { + new_nulls.push(true) + } + } + + let data = make_array(mutable_values.freeze()); + let data_type = match list_array.data_type() { + DataType::List(field) => field.data_type(), + DataType::LargeList(field) => field.data_type(), + _ => unreachable!(), + }; + let new_array = GenericListArray::::try_new( + Arc::new(Field::new("item", data_type.clone(), true)), + OffsetBuffer::new(new_offsets.into()), + data, + Some(NullBuffer::new(new_nulls.into())), + )?; + + Ok(ColumnarValue::Array(Arc::new(new_array))) +} + +impl Display for ArrayInsert { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "ArrayInsert [array: {:?}, pos: {:?}, item: {:?}]", + self.src_array_expr, self.pos_expr, self.item_expr + ) + } +} + +impl PartialEq for ArrayInsert { + fn eq(&self, other: &dyn Any) -> bool { + down_cast_any_ref(other) + .downcast_ref::() + .map(|x| { + self.src_array_expr.eq(&x.src_array_expr) + && self.pos_expr.eq(&x.pos_expr) + && self.item_expr.eq(&x.item_expr) + && self.legacy_negative_index.eq(&x.legacy_negative_index) + }) + .unwrap_or(false) + } +} + #[cfg(test)] mod test { - use crate::list::{list_extract, zero_based_index}; + use crate::list::{array_insert, list_extract, zero_based_index}; use arrow::datatypes::Int32Type; - use arrow_array::{Array, Int32Array, ListArray}; + use arrow_array::{Array, ArrayRef, Int32Array, ListArray}; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::ColumnarValue; + use std::sync::Arc; #[test] fn test_list_extract_default_value() -> Result<()> { @@ -458,4 +754,120 @@ mod test { ); Ok(()) } + + #[test] + fn test_array_insert() -> Result<()> { + // Test inserting an item into a list array + // Inputs and expected values are taken from the Spark results + let list = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + Some(vec![Some(4), Some(5)]), + Some(vec![None]), + Some(vec![Some(1), Some(2), Some(3)]), + Some(vec![Some(1), Some(2), Some(3)]), + None, + ]); + + let positions = Int32Array::from(vec![2, 1, 1, 5, 6, 1]); + let items = Int32Array::from(vec![ + Some(10), + Some(20), + Some(30), + Some(100), + Some(100), + Some(40), + ]); + + let ColumnarValue::Array(result) = array_insert( + &list, + &(Arc::new(items) as ArrayRef), + &(Arc::new(positions) as ArrayRef), + false, + )? + else { + unreachable!() + }; + + let expected = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(10), Some(2), Some(3)]), + Some(vec![Some(20), Some(4), Some(5)]), + Some(vec![Some(30), None]), + Some(vec![Some(1), Some(2), Some(3), None, Some(100)]), + Some(vec![Some(1), Some(2), Some(3), None, None, Some(100)]), + None, + ]); + + assert_eq!(&result.to_data(), &expected.to_data()); + + Ok(()) + } + + #[test] + fn test_array_insert_negative_index() -> Result<()> { + // Test insert with negative index + // Inputs and expected values are taken from the Spark results + let list = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + Some(vec![Some(4), Some(5)]), + Some(vec![Some(1)]), + None, + ]); + + let positions = Int32Array::from(vec![-2, -1, -3, -1]); + let items = Int32Array::from(vec![Some(10), Some(20), Some(100), Some(30)]); + + let ColumnarValue::Array(result) = array_insert( + &list, + &(Arc::new(items) as ArrayRef), + &(Arc::new(positions) as ArrayRef), + false, + )? + else { + unreachable!() + }; + + let expected = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(10), Some(3)]), + Some(vec![Some(4), Some(5), Some(20)]), + Some(vec![Some(100), None, Some(1)]), + None, + ]); + + assert_eq!(&result.to_data(), &expected.to_data()); + + Ok(()) + } + + #[test] + fn test_array_insert_legacy_mode() -> Result<()> { + // Test the so-called "legacy" mode exisiting in the Spark + let list = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + Some(vec![Some(4), Some(5)]), + None, + ]); + + let positions = Int32Array::from(vec![-1, -1, -1]); + let items = Int32Array::from(vec![Some(10), Some(20), Some(30)]); + + let ColumnarValue::Array(result) = array_insert( + &list, + &(Arc::new(items) as ArrayRef), + &(Arc::new(positions) as ArrayRef), + true, + )? + else { + unreachable!() + }; + + let expected = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(10), Some(3)]), + Some(vec![Some(4), Some(20), Some(5)]), + None, + ]); + + assert_eq!(&result.to_data(), &expected.to_data()); + + Ok(()) + } } diff --git a/native/core/src/execution/datafusion/expressions/negative.rs b/native/spark-expr/src/negative.rs similarity index 98% rename from native/core/src/execution/datafusion/expressions/negative.rs rename to native/spark-expr/src/negative.rs index 8dfe71742..3d9063e78 100644 --- a/native/core/src/execution/datafusion/expressions/negative.rs +++ b/native/spark-expr/src/negative.rs @@ -16,7 +16,7 @@ // under the License. use super::arithmetic_overflow_error; -use crate::errors::CometError; +use crate::SparkError; use arrow::{compute::kernels::numeric::neg_wrapping, datatypes::IntervalDayTimeType}; use arrow_array::RecordBatch; use arrow_buffer::IntervalDayTime; @@ -26,8 +26,7 @@ use datafusion::{ logical_expr::{interval_arithmetic::Interval, ColumnarValue}, physical_expr::PhysicalExpr, }; -use datafusion_comet_spark_expr::SparkError; -use datafusion_common::{Result, ScalarValue}; +use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::sort_properties::ExprProperties; use std::{ any::Any, @@ -38,7 +37,7 @@ use std::{ pub fn create_negate_expr( expr: Arc, fail_on_error: bool, -) -> Result, CometError> { +) -> Result, DataFusionError> { Ok(Arc::new(NegativeExpr::new(expr, fail_on_error))) } diff --git a/native/core/src/execution/datafusion/expressions/normalize_nan.rs b/native/spark-expr/src/normalize_nan.rs similarity index 100% rename from native/core/src/execution/datafusion/expressions/normalize_nan.rs rename to native/spark-expr/src/normalize_nan.rs diff --git a/native/spark-expr/src/scalar_funcs.rs b/native/spark-expr/src/scalar_funcs.rs index 5cc3f3dd7..2961f038d 100644 --- a/native/spark-expr/src/scalar_funcs.rs +++ b/native/spark-expr/src/scalar_funcs.rs @@ -354,7 +354,7 @@ pub fn spark_round( DataType::Int32 if *point < 0 => round_integer_array!(array, point, Int32Array, i32), DataType::Int16 if *point < 0 => round_integer_array!(array, point, Int16Array, i16), DataType::Int8 if *point < 0 => round_integer_array!(array, point, Int8Array, i8), - DataType::Decimal128(_, scale) if *scale > 0 => { + DataType::Decimal128(_, scale) if *scale >= 0 => { let f = decimal_round_f(scale, point); let (precision, scale) = get_precision_scale(data_type); make_decimal_array(array, precision, scale, &f) diff --git a/native/core/src/execution/datafusion/schema_adapter.rs b/native/spark-expr/src/schema_adapter.rs similarity index 71% rename from native/core/src/execution/datafusion/schema_adapter.rs rename to native/spark-expr/src/schema_adapter.rs index 2c6032a0a..161ad6f16 100644 --- a/native/core/src/execution/datafusion/schema_adapter.rs +++ b/native/spark-expr/src/schema_adapter.rs @@ -17,19 +17,32 @@ //! Custom schema adapter that uses Spark-compatible casts -use arrow::compute::can_cast_types; +use crate::cast::cast_supported; +use crate::{spark_cast, SparkCastOptions}; use arrow_array::{new_null_array, Array, RecordBatch, RecordBatchOptions}; -use arrow_schema::{DataType, Schema, SchemaRef, TimeUnit}; +use arrow_schema::{Schema, SchemaRef}; use datafusion::datasource::schema_adapter::{SchemaAdapter, SchemaAdapterFactory, SchemaMapper}; -use datafusion_comet_spark_expr::{spark_cast, EvalMode, SparkCastOptions}; use datafusion_common::plan_err; use datafusion_expr::ColumnarValue; use std::sync::Arc; -#[derive(Clone, Debug, Default)] -pub struct CometSchemaAdapterFactory {} +/// An implementation of DataFusion's `SchemaAdapterFactory` that uses a Spark-compatible +/// `cast` implementation. +#[derive(Clone, Debug)] +pub struct SparkSchemaAdapterFactory { + /// Spark cast options + cast_options: SparkCastOptions, +} + +impl SparkSchemaAdapterFactory { + pub fn new(options: SparkCastOptions) -> Self { + Self { + cast_options: options, + } + } +} -impl SchemaAdapterFactory for CometSchemaAdapterFactory { +impl SchemaAdapterFactory for SparkSchemaAdapterFactory { /// Create a new factory for mapping batches from a file schema to a table /// schema. /// @@ -41,9 +54,10 @@ impl SchemaAdapterFactory for CometSchemaAdapterFactory { required_schema: SchemaRef, table_schema: SchemaRef, ) -> Box { - Box::new(CometSchemaAdapter { + Box::new(SparkSchemaAdapter { required_schema, table_schema, + cast_options: self.cast_options.clone(), }) } } @@ -51,7 +65,7 @@ impl SchemaAdapterFactory for CometSchemaAdapterFactory { /// This SchemaAdapter requires both the table schema and the projected table /// schema. See [`SchemaMapping`] for more details #[derive(Clone, Debug)] -pub struct CometSchemaAdapter { +pub struct SparkSchemaAdapter { /// The schema for the table, projected to include only the fields being output (projected) by the /// associated ParquetExec required_schema: SchemaRef, @@ -61,9 +75,11 @@ pub struct CometSchemaAdapter { /// which may refer to columns that are not referred to anywhere /// else in the plan. table_schema: SchemaRef, + /// Spark cast options + cast_options: SparkCastOptions, } -impl SchemaAdapter for CometSchemaAdapter { +impl SchemaAdapter for SparkSchemaAdapter { /// Map a column index in the table schema to a column index in a particular /// file schema /// @@ -93,7 +109,11 @@ impl SchemaAdapter for CometSchemaAdapter { if let Some((table_idx, table_field)) = self.required_schema.fields().find(file_field.name()) { - if comet_can_cast_types(file_field.data_type(), table_field.data_type()) { + if cast_supported( + file_field.data_type(), + table_field.data_type(), + &self.cast_options, + ) { field_mappings[table_idx] = Some(projection.len()); projection.push(file_idx); } else { @@ -107,14 +127,12 @@ impl SchemaAdapter for CometSchemaAdapter { } } - let mut cast_options = SparkCastOptions::new(EvalMode::Legacy, "UTC", false); - cast_options.is_adapting_schema = true; Ok(( Arc::new(SchemaMapping { required_schema: Arc::::clone(&self.required_schema), field_mappings, table_schema: Arc::::clone(&self.table_schema), - cast_options, + cast_options: self.cast_options.clone(), }), projection, )) @@ -165,7 +183,7 @@ pub struct SchemaMapping { /// This contains all fields in the table, regardless of if they will be /// projected out or not. table_schema: SchemaRef, - + /// Spark cast options cast_options: SparkCastOptions, } @@ -266,15 +284,93 @@ impl SchemaMapper for SchemaMapping { } } -fn comet_can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { - // TODO this is just a quick hack to get tests passing - match (from_type, to_type) { - (DataType::Struct(_), DataType::Struct(_)) => { - // workaround for struct casting - true - } - // TODO this is maybe no longer needed - (_, DataType::Timestamp(TimeUnit::Nanosecond, _)) => false, - _ => can_cast_types(from_type, to_type), +#[cfg(test)] +mod test { + use crate::test_common::file_util::get_temp_filename; + use crate::{EvalMode, SparkCastOptions, SparkSchemaAdapterFactory}; + use arrow::array::{Int32Array, StringArray}; + use arrow::datatypes::{DataType, Field, Schema}; + use arrow::record_batch::RecordBatch; + use arrow_array::UInt32Array; + use arrow_schema::SchemaRef; + use datafusion::datasource::listing::PartitionedFile; + use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec}; + use datafusion::execution::object_store::ObjectStoreUrl; + use datafusion::execution::TaskContext; + use datafusion::physical_plan::ExecutionPlan; + use datafusion_common::DataFusionError; + use futures::StreamExt; + use parquet::arrow::ArrowWriter; + use std::fs::File; + use std::sync::Arc; + + #[tokio::test] + async fn parquet_roundtrip_int_as_string() -> Result<(), DataFusionError> { + let file_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + + let ids = Arc::new(Int32Array::from(vec![1, 2, 3])) as Arc; + let names = Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])) + as Arc; + let batch = RecordBatch::try_new(Arc::clone(&file_schema), vec![ids, names])?; + + let required_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Utf8, false), + Field::new("name", DataType::Utf8, false), + ])); + + let _ = roundtrip(&batch, required_schema).await?; + + Ok(()) + } + + #[tokio::test] + async fn parquet_roundtrip_unsigned_int() -> Result<(), DataFusionError> { + let file_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt32, false)])); + + let ids = Arc::new(UInt32Array::from(vec![1, 2, 3])) as Arc; + let batch = RecordBatch::try_new(Arc::clone(&file_schema), vec![ids])?; + + let required_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); + + let _ = roundtrip(&batch, required_schema).await?; + + Ok(()) + } + + /// Create a Parquet file containing a single batch and then read the batch back using + /// the specified required_schema. This will cause the SchemaAdapter code to be used. + async fn roundtrip( + batch: &RecordBatch, + required_schema: SchemaRef, + ) -> Result { + let filename = get_temp_filename(); + let filename = filename.as_path().as_os_str().to_str().unwrap().to_string(); + let file = File::create(&filename)?; + let mut writer = ArrowWriter::try_new(file, Arc::clone(&batch.schema()), None)?; + writer.write(batch)?; + writer.close()?; + + let object_store_url = ObjectStoreUrl::local_filesystem(); + let file_scan_config = FileScanConfig::new(object_store_url, required_schema) + .with_file_groups(vec![vec![PartitionedFile::from_path( + filename.to_string(), + )?]]); + + let mut spark_cast_options = SparkCastOptions::new(EvalMode::Legacy, "UTC", false); + spark_cast_options.allow_cast_unsigned_ints = true; + + let parquet_exec = ParquetExec::builder(file_scan_config) + .with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new( + spark_cast_options, + ))) + .build(); + + let mut stream = parquet_exec + .execute(0, Arc::new(TaskContext::default())) + .unwrap(); + stream.next().await.unwrap() } } diff --git a/native/core/src/execution/datafusion/expressions/stddev.rs b/native/spark-expr/src/stddev.rs similarity index 98% rename from native/core/src/execution/datafusion/expressions/stddev.rs rename to native/spark-expr/src/stddev.rs index 1ba495e21..3cf604da0 100644 --- a/native/core/src/execution/datafusion/expressions/stddev.rs +++ b/native/spark-expr/src/stddev.rs @@ -17,7 +17,7 @@ use std::{any::Any, sync::Arc}; -use crate::execution::datafusion::expressions::variance::VarianceAccumulator; +use crate::variance::VarianceAccumulator; use arrow::{ array::ArrayRef, datatypes::{DataType, Field}, diff --git a/native/core/src/execution/datafusion/expressions/strings.rs b/native/spark-expr/src/strings.rs similarity index 99% rename from native/core/src/execution/datafusion/expressions/strings.rs rename to native/spark-expr/src/strings.rs index 200b4ec5a..a8aab6aee 100644 --- a/native/core/src/execution/datafusion/expressions/strings.rs +++ b/native/spark-expr/src/strings.rs @@ -17,7 +17,7 @@ #![allow(deprecated)] -use crate::execution::kernels::strings::{string_space, substring}; +use crate::kernels::strings::{string_space, substring}; use arrow::{ compute::{ contains_dyn, contains_utf8_scalar_dyn, ends_with_dyn, ends_with_utf8_scalar_dyn, like_dyn, diff --git a/native/core/src/execution/datafusion/expressions/sum_decimal.rs b/native/spark-expr/src/sum_decimal.rs similarity index 98% rename from native/core/src/execution/datafusion/expressions/sum_decimal.rs rename to native/spark-expr/src/sum_decimal.rs index d885ff90b..ab142aee6 100644 --- a/native/core/src/execution/datafusion/expressions/sum_decimal.rs +++ b/native/spark-expr/src/sum_decimal.rs @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::execution::datafusion::expressions::checkoverflow::is_valid_decimal_precision; -use crate::unlikely; +use crate::utils::{is_valid_decimal_precision, unlikely}; use arrow::{ array::BooleanBufferBuilder, buffer::{BooleanBuffer, NullBuffer}, @@ -113,7 +112,6 @@ impl AggregateUDFImpl for SumDecimal { Ok(Box::new(SumDecimalGroupsAccumulator::new( self.result_type.clone(), self.precision, - self.scale, ))) } @@ -286,18 +284,16 @@ struct SumDecimalGroupsAccumulator { sum: Vec, result_type: DataType, precision: u8, - scale: i8, } impl SumDecimalGroupsAccumulator { - fn new(result_type: DataType, precision: u8, scale: i8) -> Self { + fn new(result_type: DataType, precision: u8) -> Self { Self { is_not_null: BooleanBufferBuilder::new(0), is_empty: BooleanBufferBuilder::new(0), sum: Vec::new(), result_type, precision, - scale, } } @@ -488,11 +484,11 @@ mod tests { use arrow::datatypes::*; use arrow_array::builder::{Decimal128Builder, StringBuilder}; use arrow_array::RecordBatch; + use datafusion::execution::TaskContext; use datafusion::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}; use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::ExecutionPlan; use datafusion_common::Result; - use datafusion_execution::TaskContext; use datafusion_expr::AggregateUDF; use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_expr::expressions::{Column, Literal}; diff --git a/native/core/src/parquet/util/test_common/file_util.rs b/native/spark-expr/src/test_common/file_util.rs similarity index 100% rename from native/core/src/parquet/util/test_common/file_util.rs rename to native/spark-expr/src/test_common/file_util.rs diff --git a/native/core/src/execution/datafusion/operators/mod.rs b/native/spark-expr/src/test_common/mod.rs similarity index 97% rename from native/core/src/execution/datafusion/operators/mod.rs rename to native/spark-expr/src/test_common/mod.rs index 3d28a266a..efd25a4a2 100644 --- a/native/core/src/execution/datafusion/operators/mod.rs +++ b/native/spark-expr/src/test_common/mod.rs @@ -14,5 +14,4 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. - -pub mod expand; +pub mod file_util; diff --git a/native/core/src/execution/datafusion/expressions/unbound.rs b/native/spark-expr/src/unbound.rs similarity index 100% rename from native/core/src/execution/datafusion/expressions/unbound.rs rename to native/spark-expr/src/unbound.rs diff --git a/native/spark-expr/src/utils.rs b/native/spark-expr/src/utils.rs index 2fc8de974..ed04941bd 100644 --- a/native/spark-expr/src/utils.rs +++ b/native/spark-expr/src/utils.rs @@ -19,7 +19,7 @@ use arrow_array::{ cast::as_primitive_array, types::{Int32Type, TimestampMicrosecondType}, }; -use arrow_schema::{ArrowError, DataType, TimeUnit}; +use arrow_schema::{ArrowError, DataType, TimeUnit, DECIMAL128_MAX_PRECISION}; use std::sync::Arc; use crate::timezone::Tz; @@ -28,6 +28,7 @@ use arrow::{ temporal_conversions::as_datetime, }; use arrow_array::types::TimestampMillisecondType; +use arrow_data::decimal::{MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION}; use chrono::{DateTime, Offset, TimeZone}; /// Preprocesses input arrays to add timezone information from Spark to Arrow array datatype or @@ -211,3 +212,39 @@ fn pre_timestamp_cast(array: ArrayRef, timezone: String) -> Result Ok(array), } } + +/// Adapted from arrow-rs `validate_decimal_precision` but returns bool +/// instead of Err to avoid the cost of formatting the error strings and is +/// optimized to remove a memcpy that exists in the original function +/// we can remove this code once we upgrade to a version of arrow-rs that +/// includes https://github.com/apache/arrow-rs/pull/6419 +#[inline] +pub fn is_valid_decimal_precision(value: i128, precision: u8) -> bool { + precision <= DECIMAL128_MAX_PRECISION + && value >= MIN_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1] + && value <= MAX_DECIMAL_FOR_EACH_PRECISION[precision as usize - 1] +} + +// These are borrowed from hashbrown crate: +// https://github.com/rust-lang/hashbrown/blob/master/src/raw/mod.rs + +// On stable we can use #[cold] to get a equivalent effect: this attributes +// suggests that the function is unlikely to be called +#[inline] +#[cold] +pub fn cold() {} + +#[inline] +pub fn likely(b: bool) -> bool { + if !b { + cold(); + } + b +} +#[inline] +pub fn unlikely(b: bool) -> bool { + if b { + cold(); + } + b +} diff --git a/native/core/src/execution/datafusion/expressions/variance.rs b/native/spark-expr/src/variance.rs similarity index 100% rename from native/core/src/execution/datafusion/expressions/variance.rs rename to native/spark-expr/src/variance.rs diff --git a/pom.xml b/pom.xml index ed0a8afb2..cdc44a5ca 100644 --- a/pom.xml +++ b/pom.xml @@ -30,7 +30,7 @@ under the License. org.apache.datafusion comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.4.0-SNAPSHOT + 0.5.0-SNAPSHOT pom Comet Project Parent POM diff --git a/spark-integration/pom.xml b/spark-integration/pom.xml index a9e1619a1..84c09c1c9 100644 --- a/spark-integration/pom.xml +++ b/spark-integration/pom.xml @@ -26,7 +26,7 @@ under the License. org.apache.datafusion comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.4.0-SNAPSHOT + 0.5.0-SNAPSHOT ../pom.xml @@ -100,6 +100,13 @@ under the License. + + org.apache.maven.plugins + maven-install-plugin + + true + + diff --git a/spark/pom.xml b/spark/pom.xml index d1bf0fa2f..ad7590dbc 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -26,7 +26,7 @@ under the License. org.apache.datafusion comet-parent-spark${spark.version.short}_${scala.binary.version} - 0.4.0-SNAPSHOT + 0.5.0-SNAPSHOT ../pom.xml diff --git a/spark/src/main/java/org/apache/comet/CometBatchIterator.java b/spark/src/main/java/org/apache/comet/CometBatchIterator.java index accd57c20..e05bea1df 100644 --- a/spark/src/main/java/org/apache/comet/CometBatchIterator.java +++ b/spark/src/main/java/org/apache/comet/CometBatchIterator.java @@ -33,12 +33,31 @@ public class CometBatchIterator { final Iterator input; final NativeUtil nativeUtil; + private ColumnarBatch currentBatch = null; CometBatchIterator(Iterator input, NativeUtil nativeUtil) { this.input = input; this.nativeUtil = nativeUtil; } + /** + * Fetch the next input batch. + * + * @return Number of rows in next batch or -1 if no batches left. + */ + public int hasNext() { + if (currentBatch == null) { + if (input.hasNext()) { + currentBatch = input.next(); + } + } + if (currentBatch == null) { + return -1; + } else { + return currentBatch.numRows(); + } + } + /** * Get the next batches of Arrow arrays. * @@ -47,12 +66,11 @@ public class CometBatchIterator { * @return the number of rows of the current batch. -1 if there is no more batch. */ public int next(long[] arrayAddrs, long[] schemaAddrs) { - boolean hasBatch = input.hasNext(); - - if (!hasBatch) { + if (currentBatch == null) { return -1; } - - return nativeUtil.exportBatch(arrayAddrs, schemaAddrs, input.next()); + int numRows = nativeUtil.exportBatch(arrayAddrs, schemaAddrs, currentBatch); + currentBatch = null; + return numRows; } } diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocator.java index 2837fa369..54e349c13 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocator.java +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocator.java @@ -20,157 +20,75 @@ package org.apache.spark.shuffle.comet; import java.io.IOException; -import java.util.BitSet; import org.apache.spark.SparkConf; import org.apache.spark.memory.MemoryConsumer; import org.apache.spark.memory.MemoryMode; -import org.apache.spark.memory.SparkOutOfMemoryError; import org.apache.spark.memory.TaskMemoryManager; -import org.apache.spark.sql.internal.SQLConf; -import org.apache.spark.unsafe.array.LongArray; import org.apache.spark.unsafe.memory.MemoryBlock; -import org.apache.spark.unsafe.memory.UnsafeMemoryAllocator; +import org.apache.spark.util.Utils; -import org.apache.comet.CometSparkSessionExtensions$; +import org.apache.comet.CometConf$; /** * A simple memory allocator used by `CometShuffleExternalSorter` to allocate memory blocks which - * store serialized rows. We don't rely on Spark memory allocator because we need to allocate - * off-heap memory no matter memory mode is on-heap or off-heap. This allocator is configured with - * fixed size of memory, and it will throw `SparkOutOfMemoryError` if the memory is not enough. - * - *

Some methods are copied from `org.apache.spark.unsafe.memory.TaskMemoryManager` with - * modifications. Most modifications are to remove the dependency on the configured memory mode. + * store serialized rows. This class is simply an implementation of `MemoryConsumer` that delegates + * memory allocation to the `TaskMemoryManager`. This requires that the `TaskMemoryManager` is + * configured with `MemoryMode.OFF_HEAP`, i.e. it is using off-heap memory. */ -public final class CometShuffleMemoryAllocator extends MemoryConsumer { - private final UnsafeMemoryAllocator allocator = new UnsafeMemoryAllocator(); - - private final long pageSize; - private final long totalMemory; - private long allocatedMemory = 0L; - - /** The number of bits used to address the page table. */ - private static final int PAGE_NUMBER_BITS = 13; - - /** The number of entries in the page table. */ - private static final int PAGE_TABLE_SIZE = 1 << PAGE_NUMBER_BITS; - - private final MemoryBlock[] pageTable = new MemoryBlock[PAGE_TABLE_SIZE]; - private final BitSet allocatedPages = new BitSet(PAGE_TABLE_SIZE); +public final class CometShuffleMemoryAllocator extends CometShuffleMemoryAllocatorTrait { + private static CometShuffleMemoryAllocatorTrait INSTANCE; - private static final int OFFSET_BITS = 51; - private static final long MASK_LONG_LOWER_51_BITS = 0x7FFFFFFFFFFFFL; - - private static CometShuffleMemoryAllocator INSTANCE; - - public static synchronized CometShuffleMemoryAllocator getInstance( + /** + * Returns the singleton instance of `CometShuffleMemoryAllocator`. This method should be used + * instead of the constructor to ensure that only one instance of `CometShuffleMemoryAllocator` is + * created. For Spark tests, this returns `CometTestShuffleMemoryAllocator` which is a test-only + * allocator that should not be used in production. + */ + public static CometShuffleMemoryAllocatorTrait getInstance( SparkConf conf, TaskMemoryManager taskMemoryManager, long pageSize) { - if (INSTANCE == null) { - INSTANCE = new CometShuffleMemoryAllocator(conf, taskMemoryManager, pageSize); + boolean isSparkTesting = Utils.isTesting(); + boolean useUnifiedMemAllocator = + (boolean) + CometConf$.MODULE$.COMET_COLUMNAR_SHUFFLE_UNIFIED_MEMORY_ALLOCATOR_IN_TEST().get(); + + if (isSparkTesting && !useUnifiedMemAllocator) { + synchronized (CometShuffleMemoryAllocator.class) { + if (INSTANCE == null) { + // CometTestShuffleMemoryAllocator handles pages by itself so it can be a singleton. + INSTANCE = new CometTestShuffleMemoryAllocator(conf, taskMemoryManager, pageSize); + } + } + return INSTANCE; + } else { + if (taskMemoryManager.getTungstenMemoryMode() != MemoryMode.OFF_HEAP) { + throw new IllegalArgumentException( + "CometShuffleMemoryAllocator should be used with off-heap " + + "memory mode, but got " + + taskMemoryManager.getTungstenMemoryMode()); + } + + // CometShuffleMemoryAllocator stores pages in TaskMemoryManager which is not singleton, + // but one instance per task. So we need to create a new instance for each task. + return new CometShuffleMemoryAllocator(taskMemoryManager, pageSize); } - - return INSTANCE; } - CometShuffleMemoryAllocator(SparkConf conf, TaskMemoryManager taskMemoryManager, long pageSize) { + CometShuffleMemoryAllocator(TaskMemoryManager taskMemoryManager, long pageSize) { super(taskMemoryManager, pageSize, MemoryMode.OFF_HEAP); - this.pageSize = pageSize; - this.totalMemory = - CometSparkSessionExtensions$.MODULE$.getCometShuffleMemorySize(conf, SQLConf.get()); - } - - public synchronized long acquireMemory(long size) { - if (allocatedMemory >= totalMemory) { - throw new SparkOutOfMemoryError( - "Unable to acquire " - + size - + " bytes of memory, current usage " - + "is " - + allocatedMemory - + " bytes and max memory is " - + totalMemory - + " bytes"); - } - long allocationSize = Math.min(size, totalMemory - allocatedMemory); - allocatedMemory += allocationSize; - return allocationSize; } public long spill(long l, MemoryConsumer memoryConsumer) throws IOException { + // JVM shuffle writer does not support spilling for other memory consumers return 0; } - public synchronized LongArray allocateArray(long size) { - long required = size * 8L; - MemoryBlock page = allocate(required); - return new LongArray(page); - } - - public synchronized void freeArray(LongArray array) { - if (array == null) { - return; - } - free(array.memoryBlock()); - } - - public synchronized MemoryBlock allocatePage(long required) { - long size = Math.max(pageSize, required); - return allocate(size); - } - - private synchronized MemoryBlock allocate(long required) { - if (required > TaskMemoryManager.MAXIMUM_PAGE_SIZE_BYTES) { - throw new TooLargePageException(required); - } - - long got = acquireMemory(required); - - if (got < required) { - allocatedMemory -= got; - - throw new SparkOutOfMemoryError( - "Unable to acquire " - + required - + " bytes of memory, got " - + got - + " bytes. Available: " - + (totalMemory - allocatedMemory)); - } - - int pageNumber = allocatedPages.nextClearBit(0); - if (pageNumber >= PAGE_TABLE_SIZE) { - allocatedMemory -= got; - - throw new IllegalStateException( - "Have already allocated a maximum of " + PAGE_TABLE_SIZE + " pages"); - } - - MemoryBlock block = allocator.allocate(got); - - block.pageNumber = pageNumber; - pageTable[pageNumber] = block; - allocatedPages.set(pageNumber); - - return block; + public synchronized MemoryBlock allocate(long required) { + return this.allocatePage(required); } public synchronized void free(MemoryBlock block) { - if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) { - // Already freed block - return; - } - allocatedMemory -= block.size(); - - pageTable[block.pageNumber] = null; - allocatedPages.clear(block.pageNumber); - block.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER; - - allocator.free(block); - } - - public synchronized long getAvailableMemory() { - return totalMemory - allocatedMemory; + this.freePage(block); } /** @@ -178,21 +96,11 @@ public synchronized long getAvailableMemory() { * method assumes that the page number is valid. */ public long getOffsetInPage(long pagePlusOffsetAddress) { - long offsetInPage = decodeOffset(pagePlusOffsetAddress); - int pageNumber = TaskMemoryManager.decodePageNumber(pagePlusOffsetAddress); - assert (pageNumber >= 0 && pageNumber < PAGE_TABLE_SIZE); - MemoryBlock page = pageTable[pageNumber]; - assert (page != null); - return page.getBaseOffset() + offsetInPage; - } - - public long decodeOffset(long pagePlusOffsetAddress) { - return pagePlusOffsetAddress & MASK_LONG_LOWER_51_BITS; + return taskMemoryManager.getOffsetInPage(pagePlusOffsetAddress); } public long encodePageNumberAndOffset(int pageNumber, long offsetInPage) { - assert (pageNumber >= 0); - return ((long) pageNumber) << OFFSET_BITS | offsetInPage & MASK_LONG_LOWER_51_BITS; + return TaskMemoryManager.encodePageNumberAndOffset(pageNumber, offsetInPage); } public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) { diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocatorTrait.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocatorTrait.java new file mode 100644 index 000000000..6831396b3 --- /dev/null +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometShuffleMemoryAllocatorTrait.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.shuffle.comet; + +import org.apache.spark.memory.MemoryConsumer; +import org.apache.spark.memory.MemoryMode; +import org.apache.spark.memory.TaskMemoryManager; +import org.apache.spark.unsafe.memory.MemoryBlock; + +/** The base class for Comet JVM shuffle memory allocators. */ +public abstract class CometShuffleMemoryAllocatorTrait extends MemoryConsumer { + protected CometShuffleMemoryAllocatorTrait( + TaskMemoryManager taskMemoryManager, long pageSize, MemoryMode mode) { + super(taskMemoryManager, pageSize, mode); + } + + public abstract MemoryBlock allocate(long required); + + public abstract void free(MemoryBlock block); + + public abstract long getOffsetInPage(long pagePlusOffsetAddress); + + public abstract long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage); +} diff --git a/spark/src/main/java/org/apache/spark/shuffle/comet/CometTestShuffleMemoryAllocator.java b/spark/src/main/java/org/apache/spark/shuffle/comet/CometTestShuffleMemoryAllocator.java new file mode 100644 index 000000000..084e82b2b --- /dev/null +++ b/spark/src/main/java/org/apache/spark/shuffle/comet/CometTestShuffleMemoryAllocator.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.shuffle.comet; + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.spark.SparkConf; +import org.apache.spark.memory.MemoryConsumer; +import org.apache.spark.memory.MemoryMode; +import org.apache.spark.memory.SparkOutOfMemoryError; +import org.apache.spark.memory.TaskMemoryManager; +import org.apache.spark.sql.internal.SQLConf; +import org.apache.spark.unsafe.array.LongArray; +import org.apache.spark.unsafe.memory.MemoryBlock; +import org.apache.spark.unsafe.memory.UnsafeMemoryAllocator; + +import org.apache.comet.CometSparkSessionExtensions$; + +/** + * A simple memory allocator used by `CometShuffleExternalSorter` to allocate memory blocks which + * store serialized rows. We don't rely on Spark memory allocator because we need to allocate + * off-heap memory no matter memory mode is on-heap or off-heap. This allocator is configured with + * fixed size of memory, and it will throw `SparkOutOfMemoryError` if the memory is not enough. + * + *

Some methods are copied from `org.apache.spark.unsafe.memory.TaskMemoryManager` with + * modifications. Most modifications are to remove the dependency on the configured memory mode. + * + *

This allocator is test-only and should not be used in production. It is used to test Comet JVM + * shuffle and execution with Spark tests which basically require on-heap memory configuration. + * Thus, this allocator is used to allocate separate off-heap memory allocation for Comet JVM + * shuffle and execution apart from Spark's on-heap memory configuration. + */ +public final class CometTestShuffleMemoryAllocator extends CometShuffleMemoryAllocatorTrait { + private final UnsafeMemoryAllocator allocator = new UnsafeMemoryAllocator(); + + private final long pageSize; + private final long totalMemory; + private long allocatedMemory = 0L; + + /** The number of bits used to address the page table. */ + private static final int PAGE_NUMBER_BITS = 13; + + /** The number of entries in the page table. */ + private static final int PAGE_TABLE_SIZE = 1 << PAGE_NUMBER_BITS; + + private final MemoryBlock[] pageTable = new MemoryBlock[PAGE_TABLE_SIZE]; + private final BitSet allocatedPages = new BitSet(PAGE_TABLE_SIZE); + + private static final int OFFSET_BITS = 51; + private static final long MASK_LONG_LOWER_51_BITS = 0x7FFFFFFFFFFFFL; + + private static CometTestShuffleMemoryAllocator INSTANCE; + + CometTestShuffleMemoryAllocator( + SparkConf conf, TaskMemoryManager taskMemoryManager, long pageSize) { + super(taskMemoryManager, pageSize, MemoryMode.OFF_HEAP); + this.pageSize = pageSize; + this.totalMemory = + CometSparkSessionExtensions$.MODULE$.getCometShuffleMemorySize(conf, SQLConf.get()); + } + + private synchronized long _acquireMemory(long size) { + if (allocatedMemory >= totalMemory) { + throw new SparkOutOfMemoryError( + "Unable to acquire " + + size + + " bytes of memory, current usage " + + "is " + + allocatedMemory + + " bytes and max memory is " + + totalMemory + + " bytes"); + } + long allocationSize = Math.min(size, totalMemory - allocatedMemory); + allocatedMemory += allocationSize; + return allocationSize; + } + + public long spill(long l, MemoryConsumer memoryConsumer) throws IOException { + return 0; + } + + public synchronized LongArray allocateArray(long size) { + long required = size * 8L; + MemoryBlock page = allocateMemoryBlock(required); + return new LongArray(page); + } + + public synchronized void freeArray(LongArray array) { + if (array == null) { + return; + } + free(array.memoryBlock()); + } + + public synchronized MemoryBlock allocate(long required) { + long size = Math.max(pageSize, required); + return allocateMemoryBlock(size); + } + + private synchronized MemoryBlock allocateMemoryBlock(long required) { + if (required > TaskMemoryManager.MAXIMUM_PAGE_SIZE_BYTES) { + throw new TooLargePageException(required); + } + + long got = _acquireMemory(required); + + if (got < required) { + allocatedMemory -= got; + + throw new SparkOutOfMemoryError( + "Unable to acquire " + + required + + " bytes of memory, got " + + got + + " bytes. Available: " + + (totalMemory - allocatedMemory)); + } + + int pageNumber = allocatedPages.nextClearBit(0); + if (pageNumber >= PAGE_TABLE_SIZE) { + allocatedMemory -= got; + + throw new IllegalStateException( + "Have already allocated a maximum of " + PAGE_TABLE_SIZE + " pages"); + } + + MemoryBlock block = allocator.allocate(got); + + block.pageNumber = pageNumber; + pageTable[pageNumber] = block; + allocatedPages.set(pageNumber); + + return block; + } + + public synchronized void free(MemoryBlock block) { + if (block.pageNumber == MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) { + // Already freed block + return; + } + allocatedMemory -= block.size(); + + pageTable[block.pageNumber] = null; + allocatedPages.clear(block.pageNumber); + block.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER; + + allocator.free(block); + } + + /** + * Returns the offset in the page for the given page plus base offset address. Note that this + * method assumes that the page number is valid. + */ + public long getOffsetInPage(long pagePlusOffsetAddress) { + long offsetInPage = decodeOffset(pagePlusOffsetAddress); + int pageNumber = TaskMemoryManager.decodePageNumber(pagePlusOffsetAddress); + assert (pageNumber >= 0 && pageNumber < PAGE_TABLE_SIZE); + MemoryBlock page = pageTable[pageNumber]; + assert (page != null); + return page.getBaseOffset() + offsetInPage; + } + + public long decodeOffset(long pagePlusOffsetAddress) { + return pagePlusOffsetAddress & MASK_LONG_LOWER_51_BITS; + } + + public long encodePageNumberAndOffset(int pageNumber, long offsetInPage) { + assert (pageNumber >= 0); + return ((long) pageNumber) << OFFSET_BITS | offsetInPage & MASK_LONG_LOWER_51_BITS; + } + + public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) { + return encodePageNumberAndOffset(page.pageNumber, offsetInPage - page.getBaseOffset()); + } +} diff --git a/spark/src/main/java/org/apache/spark/shuffle/sort/CometShuffleExternalSorter.java b/spark/src/main/java/org/apache/spark/shuffle/sort/CometShuffleExternalSorter.java index ed3e2be66..cc4495570 100644 --- a/spark/src/main/java/org/apache/spark/shuffle/sort/CometShuffleExternalSorter.java +++ b/spark/src/main/java/org/apache/spark/shuffle/sort/CometShuffleExternalSorter.java @@ -38,6 +38,7 @@ import org.apache.spark.shuffle.ShuffleWriteMetricsReporter; import org.apache.spark.shuffle.comet.CometShuffleChecksumSupport; import org.apache.spark.shuffle.comet.CometShuffleMemoryAllocator; +import org.apache.spark.shuffle.comet.CometShuffleMemoryAllocatorTrait; import org.apache.spark.shuffle.comet.TooLargePageException; import org.apache.spark.sql.comet.execution.shuffle.CometUnsafeShuffleWriter; import org.apache.spark.sql.comet.execution.shuffle.ShuffleThreadPool; @@ -110,7 +111,7 @@ public final class CometShuffleExternalSorter implements CometShuffleChecksumSup // The memory allocator for this sorter. It is used to allocate/free memory pages for this sorter. // Because we need to allocate off-heap memory regardless of configured Spark memory mode // (on-heap/off-heap), we need a separate memory allocator. - private final CometShuffleMemoryAllocator allocator; + private final CometShuffleMemoryAllocatorTrait allocator; /** Whether to write shuffle spilling file in async mode */ private final boolean isAsync; diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometDiskBlockWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometDiskBlockWriter.java index f793874d7..dcb9d99d3 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometDiskBlockWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/CometDiskBlockWriter.java @@ -41,6 +41,7 @@ import org.apache.spark.serializer.SerializerInstance; import org.apache.spark.shuffle.ShuffleWriteMetricsReporter; import org.apache.spark.shuffle.comet.CometShuffleMemoryAllocator; +import org.apache.spark.shuffle.comet.CometShuffleMemoryAllocatorTrait; import org.apache.spark.shuffle.sort.RowPartition; import org.apache.spark.sql.catalyst.expressions.UnsafeRow; import org.apache.spark.sql.types.StructType; @@ -87,7 +88,7 @@ public final class CometDiskBlockWriter { static final int MAXIMUM_PAGE_SIZE_BYTES = 1 << 27; /** The Comet allocator used to allocate pages. */ - private final CometShuffleMemoryAllocator allocator; + private final CometShuffleMemoryAllocatorTrait allocator; /** The serializer used to write rows to memory page. */ private final SerializerInstance serializer; @@ -435,12 +436,17 @@ public int compare(CometDiskBlockWriter lhs, CometDiskBlockWriter rhs) { } }); + long totalFreed = 0; for (CometDiskBlockWriter writer : currentWriters) { // Force to spill the writer in a synchronous way, otherwise, we may not be able to // acquire enough memory. + long used = writer.getActiveMemoryUsage(); + writer.doSpill(true); - if (allocator.getAvailableMemory() >= required) { + totalFreed += used; + + if (totalFreed >= required) { break; } } diff --git a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java index cc8c04fdd..3dc86b05b 100644 --- a/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java +++ b/spark/src/main/java/org/apache/spark/sql/comet/execution/shuffle/SpillWriter.java @@ -31,7 +31,7 @@ import org.apache.spark.memory.SparkOutOfMemoryError; import org.apache.spark.shuffle.ShuffleWriteMetricsReporter; -import org.apache.spark.shuffle.comet.CometShuffleMemoryAllocator; +import org.apache.spark.shuffle.comet.CometShuffleMemoryAllocatorTrait; import org.apache.spark.shuffle.sort.RowPartition; import org.apache.spark.sql.types.StructType; import org.apache.spark.unsafe.memory.MemoryBlock; @@ -62,7 +62,7 @@ public abstract class SpillWriter { // The memory allocator for this sorter. It is used to allocate/free memory pages for this sorter. // Because we need to allocate off-heap memory regardless of configured Spark memory mode // (on-heap/off-heap), we need a separate memory allocator. - protected CometShuffleMemoryAllocator allocator; + protected CometShuffleMemoryAllocatorTrait allocator; protected Native nativeLib; @@ -134,7 +134,7 @@ public boolean acquireNewPageIfNecessary(int required) { || pageCursor + required > currentPage.getBaseOffset() + currentPage.size()) { // TODO: try to find space in previous pages try { - currentPage = allocator.allocatePage(required); + currentPage = allocator.allocate(required); } catch (SparkOutOfMemoryError error) { try { // Cannot allocate enough memory, spill @@ -155,7 +155,7 @@ public boolean acquireNewPageIfNecessary(int required) { public void initialCurrentPage(int required) { assert (currentPage == null); try { - currentPage = allocator.allocatePage(required); + currentPage = allocator.allocate(required); } catch (SparkOutOfMemoryError e) { logger.error("Unable to acquire {} bytes of memory", required); throw e; diff --git a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala index b2eef5d09..08b24e029 100644 --- a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala +++ b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala @@ -60,60 +60,39 @@ class CometExecIterator( new CometBatchIterator(iterator, nativeUtil) }.toArray private val plan = { - val configs = createNativeConf + val conf = SparkEnv.get.conf + // Only enable unified memory manager when off-heap mode is enabled. Otherwise, + // we'll use the built-in memory pool from DF, and initializes with `memory_limit` + // and `memory_fraction` below. nativeLib.createPlan( id, - configs, cometBatchIterators, protobufQueryPlan, numParts, nativeMetrics, - new CometTaskMemoryManager(id)) + new CometTaskMemoryManager(id), + batchSize = COMET_BATCH_SIZE.get(), + use_unified_memory_manager = conf.getBoolean("spark.memory.offHeap.enabled", false), + memory_limit = CometSparkSessionExtensions.getCometMemoryOverhead(conf), + memory_fraction = COMET_EXEC_MEMORY_FRACTION.get(), + debug = COMET_DEBUG_ENABLED.get(), + explain = COMET_EXPLAIN_NATIVE_ENABLED.get(), + workerThreads = COMET_WORKER_THREADS.get(), + blockingThreads = COMET_BLOCKING_THREADS.get()) } private var nextBatch: Option[ColumnarBatch] = None private var currentBatch: ColumnarBatch = null private var closed: Boolean = false - /** - * Creates a new configuration map to be passed to the native side. - */ - private def createNativeConf: java.util.HashMap[String, String] = { - val result = new java.util.HashMap[String, String]() - val conf = SparkEnv.get.conf - - val maxMemory = CometSparkSessionExtensions.getCometMemoryOverhead(conf) - // Only enable unified memory manager when off-heap mode is enabled. Otherwise, - // we'll use the built-in memory pool from DF, and initializes with `memory_limit` - // and `memory_fraction` below. - result.put( - "use_unified_memory_manager", - String.valueOf(conf.get("spark.memory.offHeap.enabled", "false"))) - result.put("memory_limit", String.valueOf(maxMemory)) - result.put("memory_fraction", String.valueOf(COMET_EXEC_MEMORY_FRACTION.get())) - result.put("batch_size", String.valueOf(COMET_BATCH_SIZE.get())) - result.put("debug_native", String.valueOf(COMET_DEBUG_ENABLED.get())) - result.put("explain_native", String.valueOf(COMET_EXPLAIN_NATIVE_ENABLED.get())) - result.put("worker_threads", String.valueOf(COMET_WORKER_THREADS.get())) - result.put("blocking_threads", String.valueOf(COMET_BLOCKING_THREADS.get())) - - // Strip mandatory prefix spark. which is not required for DataFusion session params - conf.getAll.foreach { - case (k, v) if k.startsWith("spark.datafusion") => - result.put(k.replaceFirst("spark\\.", ""), v) - case _ => - } - - result - } - def getNextBatch(): Option[ColumnarBatch] = { assert(partitionIndex >= 0 && partitionIndex < numParts) nativeUtil.getNextBatch( numOutputCols, (arrayAddrs, schemaAddrs) => { - nativeLib.executePlan(plan, partitionIndex, arrayAddrs, schemaAddrs) + val ctx = TaskContext.get() + nativeLib.executePlan(ctx.stageId(), partitionIndex, plan, arrayAddrs, schemaAddrs) }) } diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala index 32668f0dd..522da0f58 100644 --- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala +++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala @@ -53,7 +53,7 @@ import org.apache.spark.sql.types.{DoubleType, FloatType} import org.apache.comet.CometConf._ import org.apache.comet.CometExplainInfo.getActualPlan -import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometBroadcastNotEnabledReason, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometNativeShuffleMode, isCometScan, isCometScanEnabled, isCometShuffleEnabled, isSpark34Plus, isSpark40Plus, shouldApplySparkToColumnar, withInfo, withInfos} +import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometBroadcastNotEnabledReason, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometNativeShuffleMode, isCometScan, isCometScanEnabled, isCometShuffleEnabled, isOffHeapEnabled, isSpark34Plus, isSpark40Plus, isTesting, shouldApplySparkToColumnar, withInfo, withInfos} import org.apache.comet.parquet.{CometParquetScan, SupportsComet} import org.apache.comet.rules.RewriteJoin import org.apache.comet.serde.OperatorOuterClass.Operator @@ -207,7 +207,7 @@ class CometSparkSessionExtensions // data source V1 case scanExec @ FileSourceScanExec( - HadoopFsRelation(_, partitionSchema, _, _, _: ParquetFileFormat, _), + HadoopFsRelation(_, partitionSchema, _, _, fileFormat, _), _: Seq[_], requiredSchema, _, @@ -216,14 +216,15 @@ class CometSparkSessionExtensions _, _, _) - if CometScanExec.isSchemaSupported(requiredSchema) + if CometScanExec.isFileFormatSupported(fileFormat) + && CometScanExec.isSchemaSupported(requiredSchema) && CometScanExec.isSchemaSupported(partitionSchema) => logInfo("Comet extension enabled for v1 Scan") CometScanExec(scanExec, session) // data source v1 not supported case case scanExec @ FileSourceScanExec( - HadoopFsRelation(_, partitionSchema, _, _, _: ParquetFileFormat, _), + HadoopFsRelation(_, partitionSchema, _, _, fileFormat, _), _: Seq[_], requiredSchema, _, @@ -233,12 +234,15 @@ class CometSparkSessionExtensions _, _) => val info1 = createMessage( + !CometScanExec.isFileFormatSupported(fileFormat), + s"File format $fileFormat is not supported") + val info2 = createMessage( !CometScanExec.isSchemaSupported(requiredSchema), s"Schema $requiredSchema is not supported") - val info2 = createMessage( + val info3 = createMessage( !CometScanExec.isSchemaSupported(partitionSchema), s"Partition schema $partitionSchema is not supported") - withInfo(scanExec, Seq(info1, info2).flatten.mkString(",")) + withInfo(scanExec, Seq(info1, info2, info3).flatten.mkString(",")) scanExec } } @@ -938,6 +942,14 @@ class CometSparkSessionExtensions } override def apply(plan: SparkPlan): SparkPlan = { + + // Comet required off-heap memory to be enabled + if (!isOffHeapEnabled(conf) && !isTesting) { + logWarning("Comet native exec disabled because spark.memory.offHeap.enabled=false") + withInfo(plan, "Comet native exec disabled because spark.memory.offHeap.enabled=false") + return plan + } + // DataFusion doesn't have ANSI mode. For now we just disable CometExec if ANSI mode is // enabled. if (isANSIEnabled(conf)) { @@ -1194,8 +1206,21 @@ object CometSparkSessionExtensions extends Logging { } } + private[comet] def isOffHeapEnabled(conf: SQLConf): Boolean = + conf.getConfString("spark.memory.offHeap.enabled", "false").toBoolean + + // Copied from org.apache.spark.util.Utils which is private to Spark. + private[comet] def isTesting: Boolean = { + System.getenv("SPARK_TESTING") != null || System.getProperty("spark.testing") != null + } + + // Check whether Comet shuffle is enabled: + // 1. `COMET_EXEC_SHUFFLE_ENABLED` is true + // 2. `spark.shuffle.manager` is set to `CometShuffleManager` + // 3. Off-heap memory is enabled || Spark/Comet unit testing private[comet] def isCometShuffleEnabled(conf: SQLConf): Boolean = - COMET_EXEC_SHUFFLE_ENABLED.get(conf) && isCometShuffleManagerEnabled(conf) + COMET_EXEC_SHUFFLE_ENABLED.get(conf) && isCometShuffleManagerEnabled(conf) && + (isOffHeapEnabled(conf) || isTesting) private[comet] def getCometShuffleNotEnabledReason(conf: SQLConf): Option[String] = { if (!COMET_EXEC_SHUFFLE_ENABLED.get(conf)) { diff --git a/spark/src/main/scala/org/apache/comet/Native.scala b/spark/src/main/scala/org/apache/comet/Native.scala index ce0e26129..82c0373f4 100644 --- a/spark/src/main/scala/org/apache/comet/Native.scala +++ b/spark/src/main/scala/org/apache/comet/Native.scala @@ -19,13 +19,12 @@ package org.apache.comet -import java.util.Map - import org.apache.spark.CometTaskMemoryManager import org.apache.spark.sql.comet.CometMetricNode class Native extends NativeBase { + // scalastyle:off /** * Create a native query plan from execution SparkPlan serialized in bytes. * @param id @@ -45,18 +44,31 @@ class Native extends NativeBase { * @return * the address to native query plan. */ + // scalastyle:off @native def createPlan( id: Long, - configMap: Map[String, String], iterators: Array[CometBatchIterator], plan: Array[Byte], partitionCount: Int, metrics: CometMetricNode, - taskMemoryManager: CometTaskMemoryManager): Long + taskMemoryManager: CometTaskMemoryManager, + batchSize: Int, + use_unified_memory_manager: Boolean, + memory_limit: Long, + memory_fraction: Double, + debug: Boolean, + explain: Boolean, + workerThreads: Int, + blockingThreads: Int): Long + // scalastyle:on /** * Execute a native query plan based on given input Arrow arrays. * + * @param stage + * the stage ID, for informational purposes + * @param partition + * the partition ID, for informational purposes * @param plan * the address to native query plan. * @param arrayAddrs @@ -67,8 +79,9 @@ class Native extends NativeBase { * the number of rows, if -1, it means end of the output. */ @native def executePlan( + stage: Int, + partition: Int, plan: Long, - partitionId: Int, arrayAddrs: Array[Long], schemaAddrs: Array[Long]): Long diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala index 11d6d049f..859cb13be 100644 --- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala +++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala @@ -70,9 +70,13 @@ object CometCast { case _ => Unsupported } - case (_: DecimalType, _: DecimalType) => - // https://github.com/apache/datafusion-comet/issues/375 - Incompatible() + case (from: DecimalType, to: DecimalType) => + if (to.precision < from.precision) { + // https://github.com/apache/datafusion/issues/13492 + Incompatible(Some("Casting to smaller precision is not supported")) + } else { + Compatible() + } case (DataTypes.StringType, _) => canCastFromString(toType, timeZoneId, evalMode) case (_, DataTypes.StringType) => diff --git a/spark/src/main/scala/org/apache/comet/parquet/ParquetFilters.scala b/spark/src/main/scala/org/apache/comet/parquet/ParquetFilters.scala index 17844aba8..bcb23986f 100644 --- a/spark/src/main/scala/org/apache/comet/parquet/ParquetFilters.scala +++ b/spark/src/main/scala/org/apache/comet/parquet/ParquetFilters.scala @@ -723,20 +723,22 @@ class ParquetFilters( .lift(nameToParquetField(name).fieldType) .map(_(nameToParquetField(name).fieldNames, value)) - case sources.LessThan(name, value) if canMakeFilterOn(name, value) => + case sources.LessThan(name, value) if (value != null) && canMakeFilterOn(name, value) => makeLt .lift(nameToParquetField(name).fieldType) .map(_(nameToParquetField(name).fieldNames, value)) - case sources.LessThanOrEqual(name, value) if canMakeFilterOn(name, value) => + case sources.LessThanOrEqual(name, value) + if (value != null) && canMakeFilterOn(name, value) => makeLtEq .lift(nameToParquetField(name).fieldType) .map(_(nameToParquetField(name).fieldNames, value)) - case sources.GreaterThan(name, value) if canMakeFilterOn(name, value) => + case sources.GreaterThan(name, value) if (value != null) && canMakeFilterOn(name, value) => makeGt .lift(nameToParquetField(name).fieldType) .map(_(nameToParquetField(name).fieldNames, value)) - case sources.GreaterThanOrEqual(name, value) if canMakeFilterOn(name, value) => + case sources.GreaterThanOrEqual(name, value) + if (value != null) && canMakeFilterOn(name, value) => makeGtEq .lift(nameToParquetField(name).fieldType) .map(_(nameToParquetField(name).fieldNames, value)) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 5ee16bd7b..a92ffa668 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -2194,6 +2194,35 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim None } + case expr if expr.prettyName == "array_insert" => + val srcExprProto = exprToProto(expr.children(0), inputs, binding) + val posExprProto = exprToProto(expr.children(1), inputs, binding) + val itemExprProto = exprToProto(expr.children(2), inputs, binding) + val legacyNegativeIndex = + SQLConf.get.getConfString("spark.sql.legacy.negativeIndexInArrayInsert").toBoolean + if (srcExprProto.isDefined && posExprProto.isDefined && itemExprProto.isDefined) { + val arrayInsertBuilder = ExprOuterClass.ArrayInsert + .newBuilder() + .setSrcArrayExpr(srcExprProto.get) + .setPosExpr(posExprProto.get) + .setItemExpr(itemExprProto.get) + .setLegacyNegativeIndex(legacyNegativeIndex) + + Some( + ExprOuterClass.Expr + .newBuilder() + .setArrayInsert(arrayInsertBuilder) + .build()) + } else { + withInfo( + expr, + "unsupported arguments for ArrayInsert", + expr.children(0), + expr.children(1), + expr.children(2)) + None + } + case ElementAt(child, ordinal, defaultValue, failOnError) if child.dataType.isInstanceOf[ArrayType] => val childExpr = exprToProto(child, inputs, binding) @@ -2239,7 +2268,12 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim withInfo(expr, "unsupported arguments for GetArrayStructFields", child) None } - + case _ if expr.prettyName == "array_append" => + createBinaryExpr( + expr.children(0), + expr.children(1), + inputs, + (builder, binaryExpr) => builder.setArrayAppend(binaryExpr)) case _ => withInfo(expr, s"${expr.prettyName} is not supported", expr.children: _*) None @@ -2476,7 +2510,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim */ def operator2Proto(op: SparkPlan, childOp: Operator*): Option[Operator] = { val conf = op.conf - val result = OperatorOuterClass.Operator.newBuilder() + val result = OperatorOuterClass.Operator.newBuilder().setPlanId(op.id) childOp.foreach(result.addChildren) op match { @@ -2952,7 +2986,12 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim case op if isCometSink(op) && op.output.forall(a => supportedDataType(a.dataType, true)) => // These operators are source of Comet native execution chain val scanBuilder = OperatorOuterClass.Scan.newBuilder() - scanBuilder.setSource(op.simpleStringWithNodeId()) + val source = op.simpleStringWithNodeId() + if (source.isEmpty) { + scanBuilder.setSource(op.getClass.getSimpleName) + } else { + scanBuilder.setSource(source) + } val scanTypes = op.output.flatten { attr => serializeDataType(attr.dataType) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometCollectLimitExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometCollectLimitExec.scala index 8ea0b1765..f75af5076 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometCollectLimitExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometCollectLimitExec.scala @@ -57,7 +57,8 @@ case class CometCollectLimitExec( "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"), "numPartitions" -> SQLMetrics.createMetric( sparkContext, - "number of partitions")) ++ readMetrics ++ writeMetrics + "number of partitions")) ++ readMetrics ++ writeMetrics ++ CometMetricNode.shuffleMetrics( + sparkContext) private lazy val serializer: Serializer = new UnsafeRowSerializer(child.output.size, longMetric("dataSize")) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometExecUtils.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometExecUtils.scala index 9698dc98b..2fc73bb7c 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometExecUtils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometExecUtils.scala @@ -88,7 +88,7 @@ object CometExecUtils { * child partition */ def getLimitNativePlan(outputAttributes: Seq[Attribute], limit: Int): Option[Operator] = { - val scanBuilder = OperatorOuterClass.Scan.newBuilder() + val scanBuilder = OperatorOuterClass.Scan.newBuilder().setSource("LimitInput") val scanOpBuilder = OperatorOuterClass.Operator.newBuilder() val scanTypes = outputAttributes.flatten { attr => @@ -118,7 +118,7 @@ object CometExecUtils { sortOrder: Seq[SortOrder], child: SparkPlan, limit: Int): Option[Operator] = { - val scanBuilder = OperatorOuterClass.Scan.newBuilder() + val scanBuilder = OperatorOuterClass.Scan.newBuilder().setSource("TopKInput") val scanOpBuilder = OperatorOuterClass.Operator.newBuilder() val scanTypes = outputAttributes.flatten { attr => diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometMetricNode.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometMetricNode.scala index 47c89d943..a26fa28c8 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometMetricNode.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometMetricNode.scala @@ -130,6 +130,17 @@ object CometMetricNode { "spilled_rows" -> SQLMetrics.createMetric(sc, "Total spilled rows")) } + def shuffleMetrics(sc: SparkContext): Map[String, SQLMetric] = { + Map( + "elapsed_compute" -> SQLMetrics.createNanoTimingMetric(sc, "native shuffle time"), + "mempool_time" -> SQLMetrics.createNanoTimingMetric(sc, "memory pool time"), + "repart_time" -> SQLMetrics.createNanoTimingMetric(sc, "repartition time"), + "ipc_time" -> SQLMetrics.createNanoTimingMetric(sc, "encoding and compression time"), + "spill_count" -> SQLMetrics.createMetric(sc, "number of spills"), + "spilled_bytes" -> SQLMetrics.createMetric(sc, "spilled bytes"), + "input_batches" -> SQLMetrics.createMetric(sc, "number of input batches")) + } + /** * Creates a [[CometMetricNode]] from a [[CometPlan]]. */ diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala index 5d28b4b72..352d4a656 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.comet.shims.ShimCometScanExec import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions +import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetOptions} import org.apache.spark.sql.execution.datasources.v2.DataSourceRDD import org.apache.spark.sql.execution.metric._ import org.apache.spark.sql.types._ @@ -510,4 +510,10 @@ object CometScanExec extends DataTypeSupport { scanExec.logicalLink.foreach(batchScanExec.setLogicalLink) batchScanExec } + + def isFileFormatSupported(fileFormat: FileFormat): Boolean = { + // Only support Spark's built-in Parquet scans, not others such as Delta which use a subclass + // of ParquetFileFormat. + fileFormat.getClass().equals(classOf[ParquetFileFormat]) + } } diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometTakeOrderedAndProjectExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometTakeOrderedAndProjectExec.scala index 5582f4d68..19586628a 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometTakeOrderedAndProjectExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometTakeOrderedAndProjectExec.scala @@ -57,7 +57,8 @@ case class CometTakeOrderedAndProjectExec( "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"), "numPartitions" -> SQLMetrics.createMetric( sparkContext, - "number of partitions")) ++ readMetrics ++ writeMetrics + "number of partitions")) ++ readMetrics ++ writeMetrics ++ CometMetricNode.shuffleMetrics( + sparkContext) private lazy val serializer: Serializer = new UnsafeRowSerializer(child.output.size, longMetric("dataSize")) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala index 388c07a27..0cd8a9ce6 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/execution/shuffle/CometShuffleExchangeExec.scala @@ -79,7 +79,8 @@ case class CometShuffleExchangeExec( "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"), "numPartitions" -> SQLMetrics.createMetric( sparkContext, - "number of partitions")) ++ readMetrics ++ writeMetrics + "number of partitions")) ++ readMetrics ++ writeMetrics ++ CometMetricNode.shuffleMetrics( + sparkContext) override def nodeName: String = if (shuffleType == CometNativeShuffle) { "CometExchange" @@ -477,11 +478,21 @@ class CometShuffleWriteProcessor( // Call native shuffle write val nativePlan = getNativePlan(tempDataFilename, tempIndexFilename) + val detailedMetrics = Seq( + "elapsed_compute", + "ipc_time", + "repart_time", + "mempool_time", + "input_batches", + "spill_count", + "spilled_bytes") + // Maps native metrics to SQL metrics val nativeSQLMetrics = Map( "output_rows" -> metrics(SQLShuffleWriteMetricsReporter.SHUFFLE_RECORDS_WRITTEN), "data_size" -> metrics("dataSize"), - "elapsed_compute" -> metrics(SQLShuffleWriteMetricsReporter.SHUFFLE_WRITE_TIME)) + "write_time" -> metrics(SQLShuffleWriteMetricsReporter.SHUFFLE_WRITE_TIME)) ++ + metrics.filterKeys(detailedMetrics.contains) val nativeMetrics = CometMetricNode(nativeSQLMetrics) // Getting rid of the fake partitionId @@ -528,7 +539,7 @@ class CometShuffleWriteProcessor( } def getNativePlan(dataFile: String, indexFile: String): Operator = { - val scanBuilder = OperatorOuterClass.Scan.newBuilder() + val scanBuilder = OperatorOuterClass.Scan.newBuilder().setSource("ShuffleWriterInput") val opBuilder = OperatorOuterClass.Operator.newBuilder() val scanTypes = outputAttributes.flatten { attr => diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index db9a870dc..f8c1a8b09 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -861,10 +861,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { // primitives checkSparkAnswerAndOperator( "SELECT CAST(struct(_1, _2, _3, _4, _5, _6, _7, _8) as string) FROM tbl") - // TODO: enable tests for unsigned ints (_9, _10, _11, _12) once - // https://github.com/apache/datafusion-comet/issues/1067 is resolved - // checkSparkAnswerAndOperator( - // "SELECT CAST(struct(_9, _10, _11, _12) as string) FROM tbl") + checkSparkAnswerAndOperator("SELECT CAST(struct(_9, _10, _11, _12) as string) FROM tbl") // decimals // TODO add _16 when https://github.com/apache/datafusion-comet/issues/1068 is resolved checkSparkAnswerAndOperator("SELECT CAST(struct(_15, _17) as string) FROM tbl") @@ -895,6 +892,34 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + test("cast between decimals with different precision and scale") { + // cast between default Decimal(38, 18) to Decimal(6,2) + val values = Seq(BigDecimal("12345.6789"), BigDecimal("9876.5432"), BigDecimal("123.4567")) + val df = withNulls(values) + .toDF("b") + .withColumn("a", col("b").cast(DecimalType(6, 2))) + checkSparkAnswer(df) + } + + test("cast between decimals with higher precision than source") { + // cast between Decimal(10, 2) to Decimal(10,4) + castTest(generateDecimalsPrecision10Scale2(), DataTypes.createDecimalType(10, 4)) + } + + test("cast between decimals with negative precision") { + // cast to negative scale + checkSparkMaybeThrows( + spark.sql("select a, cast(a as DECIMAL(10,-4)) from t order by a")) match { + case (expected, actual) => + assert(expected.contains("PARSE_SYNTAX_ERROR") === actual.contains("PARSE_SYNTAX_ERROR")) + } + } + + test("cast between decimals with zero precision") { + // cast between Decimal(10, 2) to Decimal(10,0) + castTest(generateDecimalsPrecision10Scale2(), DataTypes.createDecimalType(10, 0)) + } + private def generateFloats(): DataFrame = { withNulls(gen.generateFloats(dataSize)).toDF("a") } diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index e65feb6b2..f9e2c44c6 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE import org.apache.spark.sql.types.{Decimal, DecimalType} -import org.apache.comet.CometSparkSessionExtensions.{isSpark33Plus, isSpark34Plus, isSpark40Plus} +import org.apache.comet.CometSparkSessionExtensions.{isSpark33Plus, isSpark34Plus, isSpark35Plus, isSpark40Plus} class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { import testImplicits._ @@ -119,10 +119,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { val path = new Path(dir.toURI.toString, "test.parquet") makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000) withParquetTable(path.toString, "tbl") { - // TODO: enable test for unsigned ints - checkSparkAnswerAndOperator( - "select _1, _2, _3, _4, _5, _6, _7, _8, _13, _14, _15, _16, _17, " + - "_18, _19, _20 FROM tbl WHERE _2 > 100") + checkSparkAnswerAndOperator("select * FROM tbl WHERE _2 > 100") } } } @@ -1115,7 +1112,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { val path = new Path(dir.toURI.toString, "test.parquet") makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 100) withParquetTable(path.toString, "tbl") { - Seq(2, 3, 4, 5, 6, 7, 15, 16, 17).foreach { col => + Seq(2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 15, 16, 17).foreach { col => checkSparkAnswerAndOperator(s"SELECT abs(_${col}) FROM tbl") } } @@ -1239,9 +1236,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { withParquetTable(path.toString, "tbl") { for (s <- Seq(-5, -1, 0, 1, 5, -1000, 1000, -323, -308, 308, -15, 15, -16, 16, null)) { // array tests - // TODO: enable test for unsigned ints (_9, _10, _11, _12) // TODO: enable test for floats (_6, _7, _8, _13) - for (c <- Seq(2, 3, 4, 5, 15, 16, 17)) { + for (c <- Seq(2, 3, 4, 5, 9, 10, 11, 12, 15, 16, 17)) { checkSparkAnswerAndOperator(s"select _${c}, round(_${c}, ${s}) FROM tbl") } // scalar tests @@ -1452,9 +1448,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000) withParquetTable(path.toString, "tbl") { - // _9 and _10 (uint8 and uint16) not supported checkSparkAnswerAndOperator( - "SELECT hex(_1), hex(_2), hex(_3), hex(_4), hex(_5), hex(_6), hex(_7), hex(_8), hex(_11), hex(_12), hex(_13), hex(_14), hex(_15), hex(_16), hex(_17), hex(_18), hex(_19), hex(_20) FROM tbl") + "SELECT hex(_1), hex(_2), hex(_3), hex(_4), hex(_5), hex(_6), hex(_7), hex(_8), hex(_9), hex(_10), hex(_11), hex(_12), hex(_13), hex(_14), hex(_15), hex(_16), hex(_17), hex(_18), hex(_19), hex(_20) FROM tbl") } } } @@ -2200,6 +2195,133 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + ignore("get_struct_field - select primitive fields") { + withTempPath { dir => + // create input file with Comet disabled + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(5) + // Add both a null struct and null inner value + .select(when(col("id") > 1, struct(when(col("id") > 2, col("id")).alias("id"))) + .alias("nested1")) + + df.write.parquet(dir.toString()) + } + + Seq("", "parquet").foreach { v1List => + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { + val df = spark.read.parquet(dir.toString()) + checkSparkAnswerAndOperator(df.select("nested1.id")) + } + } + } + } + + ignore("get_struct_field - select subset of struct") { + withTempPath { dir => + // create input file with Comet disabled + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(5) + // Add both a null struct and null inner value + .select( + when( + col("id") > 1, + struct( + when(col("id") > 2, col("id")).alias("id"), + when(col("id") > 2, struct(when(col("id") > 3, col("id")).alias("id"))) + .as("nested2"))) + .alias("nested1")) + + df.write.parquet(dir.toString()) + } + + Seq("", "parquet").foreach { v1List => + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { + val df = spark.read.parquet(dir.toString()) + checkSparkAnswerAndOperator(df.select("nested1.id")) + checkSparkAnswerAndOperator(df.select("nested1.nested2")) + checkSparkAnswerAndOperator(df.select("nested1.nested2.id")) + checkSparkAnswerAndOperator(df.select("nested1.id", "nested1.nested2.id")) + } + } + } + } + + ignore("get_struct_field - read entire struct") { + withTempPath { dir => + // create input file with Comet disabled + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(5) + // Add both a null struct and null inner value + .select( + when( + col("id") > 1, + struct( + when(col("id") > 2, col("id")).alias("id"), + when(col("id") > 2, struct(when(col("id") > 3, col("id")).alias("id"))) + .as("nested2"))) + .alias("nested1")) + + df.write.parquet(dir.toString()) + } + + Seq("", "parquet").foreach { v1List => + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { + val df = spark.read.parquet(dir.toString()) + checkSparkAnswerAndOperator(df.select("nested1")) + } + } + } + } + + ignore("read map[int, int] from parquet") { + withTempPath { dir => + // create input file with Comet disabled + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(5) + // Spark does not allow null as a key but does allow null as a + // value, and the entire map be null + .select( + when(col("id") > 1, map(col("id"), when(col("id") > 2, col("id")))).alias("map1")) + df.write.parquet(dir.toString()) + } + + Seq("", "parquet").foreach { v1List => + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { + val df = spark.read.parquet(dir.toString()) + checkSparkAnswerAndOperator(df.select("map1")) + checkSparkAnswerAndOperator(df.select(map_keys(col("map1")))) + checkSparkAnswerAndOperator(df.select(map_values(col("map1")))) + } + } + } + } + + ignore("read array[int] from parquet") { + withTempPath { dir => + // create input file with Comet disabled + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(5) + // Spark does not allow null as a key but does allow null as a + // value, and the entire map be null + .select(when(col("id") > 1, sequence(lit(0), col("id") * 2)).alias("array1")) + df.write.parquet(dir.toString()) + } + + Seq("", "parquet").foreach { v1List => + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { + val df = spark.read.parquet(dir.toString()) + checkSparkAnswerAndOperator(df.select("array1")) + checkSparkAnswerAndOperator(df.select(element_at(col("array1"), lit(1)))) + } + } + } + } + test("get_struct_field with DataFusion ParquetExec - simple case") { withTempPath { dir => // create input file with Comet disabled @@ -2412,4 +2534,86 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } } + + test("array_append") { + assume(isSpark34Plus) + Seq(true, false).foreach { dictionaryEnabled => + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000) + spark.read.parquet(path.toString).createOrReplaceTempView("t1"); + checkSparkAnswerAndOperator(spark.sql("Select array_append(array(_1),false) from t1")) + checkSparkAnswerAndOperator( + spark.sql("SELECT array_append(array(_2, _3, _4), 4) FROM t1")) + checkSparkAnswerAndOperator( + spark.sql("SELECT array_append(array(_2, _3, _4), null) FROM t1")); + checkSparkAnswerAndOperator( + spark.sql("SELECT array_append(array(_6, _7), CAST(6.5 AS DOUBLE)) FROM t1")); + checkSparkAnswerAndOperator(spark.sql("SELECT array_append(array(_8), 'test') FROM t1")); + checkSparkAnswerAndOperator(spark.sql("SELECT array_append(array(_19), _19) FROM t1")); + checkSparkAnswerAndOperator( + spark.sql("SELECT array_append((CASE WHEN _2 =_3 THEN array(_4) END), _4) FROM t1")); + } + } + } + + test("array_prepend") { + assume(isSpark35Plus) // in Spark 3.5 array_prepend is implemented via array_insert + Seq(true, false).foreach { dictionaryEnabled => + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000) + spark.read.parquet(path.toString).createOrReplaceTempView("t1"); + checkSparkAnswerAndOperator(spark.sql("Select array_prepend(array(_1),false) from t1")) + checkSparkAnswerAndOperator( + spark.sql("SELECT array_prepend(array(_2, _3, _4), 4) FROM t1")) + checkSparkAnswerAndOperator( + spark.sql("SELECT array_prepend(array(_2, _3, _4), null) FROM t1")); + checkSparkAnswerAndOperator( + spark.sql("SELECT array_prepend(array(_6, _7), CAST(6.5 AS DOUBLE)) FROM t1")); + checkSparkAnswerAndOperator(spark.sql("SELECT array_prepend(array(_8), 'test') FROM t1")); + checkSparkAnswerAndOperator(spark.sql("SELECT array_prepend(array(_19), _19) FROM t1")); + checkSparkAnswerAndOperator( + spark.sql("SELECT array_prepend((CASE WHEN _2 =_3 THEN array(_4) END), _4) FROM t1")); + } + } + } + + test("ArrayInsert") { + assume(isSpark34Plus) + Seq(true, false).foreach(dictionaryEnabled => + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllTypes(path, dictionaryEnabled, 10000) + val df = spark.read + .parquet(path.toString) + .withColumn("arr", array(col("_4"), lit(null), col("_4"))) + .withColumn("arrInsertResult", expr("array_insert(arr, 1, 1)")) + .withColumn("arrInsertNegativeIndexResult", expr("array_insert(arr, -1, 1)")) + .withColumn("arrPosGreaterThanSize", expr("array_insert(arr, 8, 1)")) + .withColumn("arrNegPosGreaterThanSize", expr("array_insert(arr, -8, 1)")) + .withColumn("arrInsertNone", expr("array_insert(arr, 1, null)")) + checkSparkAnswerAndOperator(df.select("arrInsertResult")) + checkSparkAnswerAndOperator(df.select("arrInsertNegativeIndexResult")) + checkSparkAnswerAndOperator(df.select("arrPosGreaterThanSize")) + checkSparkAnswerAndOperator(df.select("arrNegPosGreaterThanSize")) + checkSparkAnswerAndOperator(df.select("arrInsertNone")) + }) + } + + test("ArrayInsertUnsupportedArgs") { + // This test checks that the else branch in ArrayInsert + // mapping to the comet is valid and fallback to spark is working fine. + assume(isSpark34Plus) + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllTypes(path, dictionaryEnabled = false, 10000) + val df = spark.read + .parquet(path.toString) + .withColumn("arr", array(col("_4"), lit(null), col("_4"))) + .withColumn("idx", udf((_: Int) => 1).apply(col("_4"))) + .withColumn("arrUnsupportedArgs", expr("array_insert(arr, idx, 1)")) + checkSparkAnswer(df.select("arrUnsupportedArgs")) + } + } } diff --git a/spark/src/test/scala/org/apache/comet/exec/CometColumnarShuffleSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometColumnarShuffleSuite.scala index ecc056ddd..6130e4cd5 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometColumnarShuffleSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometColumnarShuffleSuite.scala @@ -40,6 +40,7 @@ import org.apache.comet.CometConf abstract class CometColumnarShuffleSuite extends CometTestBase with AdaptiveSparkPlanHelper { protected val adaptiveExecutionEnabled: Boolean protected val numElementsForceSpillThreshold: Int = 10 + protected val useUnifiedMemoryAllocator: Boolean = true override protected def sparkConf: SparkConf = { val conf = super.sparkConf @@ -57,6 +58,8 @@ abstract class CometColumnarShuffleSuite extends CometTestBase with AdaptiveSpar CometConf.COMET_COLUMNAR_SHUFFLE_SPILL_THRESHOLD.key -> numElementsForceSpillThreshold.toString, CometConf.COMET_EXEC_ENABLED.key -> "false", CometConf.COMET_SHUFFLE_MODE.key -> "jvm", + CometConf.COMET_COLUMNAR_SHUFFLE_UNIFIED_MEMORY_ALLOCATOR_IN_TEST.key -> + useUnifiedMemoryAllocator.toString, CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", CometConf.COMET_COLUMNAR_SHUFFLE_MEMORY_SIZE.key -> "1536m") { testFun @@ -747,6 +750,10 @@ abstract class CometColumnarShuffleSuite extends CometTestBase with AdaptiveSpar $"_6", $"_7", $"_8", + $"_9", + $"_10", + $"_11", + $"_12", $"_13", $"_14", $"_15", @@ -968,6 +975,13 @@ abstract class CometColumnarShuffleSuite extends CometTestBase with AdaptiveSpar } } +class CometTestMemoryAllocatorShuffleSuite extends CometColumnarShuffleSuite { + override protected val asyncShuffleEnable: Boolean = false + override protected val adaptiveExecutionEnabled: Boolean = true + // Explicitly test with `CometTestShuffleMemoryAllocator` + override protected val useUnifiedMemoryAllocator: Boolean = false +} + class CometAsyncShuffleSuite extends CometColumnarShuffleSuite { override protected val asyncShuffleEnable: Boolean = true diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala index a54b70ea4..1d1af7b3e 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.comet.{CometBroadcastExchangeExec, CometBroadcastHas import org.apache.spark.sql.comet.execution.shuffle.{CometColumnarShuffle, CometShuffleExchangeExec} import org.apache.spark.sql.execution.{CollectLimitExec, ProjectExec, SQLExecution, UnionExec} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec} import org.apache.spark.sql.execution.joins.{BroadcastNestedLoopJoinExec, CartesianProductExec, SortMergeJoinExec} import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery @@ -129,7 +130,7 @@ class CometExecSuite extends CometTestBase { sql( "CREATE VIEW lv_noalias AS SELECT myTab.* FROM src " + "LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab LIMIT 2") - val df = sql("SELECT * FROM lv_noalias a JOIN lv_noalias b ON a.key=b.key") + val df = sql("SELECT * FROM lv_noalias a JOIN lv_noalias b ON a.key=b.key"); checkSparkAnswer(df) } } @@ -1889,6 +1890,14 @@ class CometExecSuite extends CometTestBase { } } } + + test("Supported file formats for CometScanExec") { + assert(CometScanExec.isFileFormatSupported(new ParquetFileFormat())) + + class CustomParquetFileFormat extends ParquetFileFormat {} + + assert(!CometScanExec.isFileFormatSupported(new CustomParquetFileFormat())) + } } case class BucketedTableTestSpec( diff --git a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala index 65fe94591..b97865a1f 100644 --- a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala +++ b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala @@ -433,8 +433,8 @@ abstract class ParquetReadSuite extends CometTestBase { i.toFloat, i.toDouble, i.toString * 48, - java.lang.Byte.toUnsignedInt((-i).toByte), - java.lang.Short.toUnsignedInt((-i).toShort), + (-i).toByte, + (-i).toShort, java.lang.Integer.toUnsignedLong(-i), new BigDecimal(UnsignedLong.fromLongBits((-i).toLong).bigIntegerValue()), i.toString, diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala index e997c5bfd..39af52e90 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala @@ -234,11 +234,9 @@ abstract class CometTestBase df: => DataFrame): (Option[Throwable], Option[Throwable]) = { var expected: Option[Throwable] = None withSQLConf(CometConf.COMET_ENABLED.key -> "false") { - val dfSpark = Dataset.ofRows(spark, df.logicalPlan) - expected = Try(dfSpark.collect()).failed.toOption + expected = Try(Dataset.ofRows(spark, df.logicalPlan).collect()).failed.toOption } - val dfComet = Dataset.ofRows(spark, df.logicalPlan) - val actual = Try(dfComet.collect()).failed.toOption + val actual = Try(Dataset.ofRows(spark, df.logicalPlan).collect()).failed.toOption (expected, actual) } diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala index 3dd930f67..3ee37bd66 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala @@ -274,23 +274,23 @@ object CometExecBenchmark extends CometBenchmarkBase { } override def runCometBenchmark(mainArgs: Array[String]): Unit = { -// runBenchmarkWithTable("Subquery", 1024 * 1024 * 10) { v => -// subqueryExecBenchmark(v) -// } -// -// runBenchmarkWithTable("Expand", 1024 * 1024 * 10) { v => -// expandExecBenchmark(v) -// } -// -// runBenchmarkWithTable("Project + Filter", 1024 * 1024 * 10) { v => -// for (fractionOfZeros <- List(0.0, 0.50, 0.95)) { -// numericFilterExecBenchmark(v, fractionOfZeros) -// } -// } -// -// runBenchmarkWithTable("Sort", 1024 * 1024 * 10) { v => -// sortExecBenchmark(v) -// } + runBenchmarkWithTable("Subquery", 1024 * 1024 * 10) { v => + subqueryExecBenchmark(v) + } + + runBenchmarkWithTable("Expand", 1024 * 1024 * 10) { v => + expandExecBenchmark(v) + } + + runBenchmarkWithTable("Project + Filter", 1024 * 1024 * 10) { v => + for (fractionOfZeros <- List(0.0, 0.50, 0.95)) { + numericFilterExecBenchmark(v, fractionOfZeros) + } + } + + runBenchmarkWithTable("Sort", 1024 * 1024 * 10) { v => + sortExecBenchmark(v) + } runBenchmarkWithTable("BloomFilterAggregate", 1024 * 1024 * 10) { v => for (card <- List(100, 1024, 1024 * 1024)) { diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala index 080655fe2..c3513e59e 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala @@ -26,6 +26,7 @@ import scala.collection.mutable import org.apache.commons.io.FileUtils import org.apache.spark.SparkContext +import org.apache.spark.internal.config.{MEMORY_OFFHEAP_ENABLED, MEMORY_OFFHEAP_SIZE} import org.apache.spark.sql.TPCDSBase import org.apache.spark.sql.catalyst.expressions.AttributeSet import org.apache.spark.sql.catalyst.util.resourceToString @@ -293,6 +294,8 @@ trait CometPlanStabilitySuite extends DisableAdaptiveExecutionSuite with TPCDSBa conf.set( "spark.shuffle.manager", "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager") + conf.set(MEMORY_OFFHEAP_ENABLED.key, "true") + conf.set(MEMORY_OFFHEAP_SIZE.key, "2g") conf.set(CometConf.COMET_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_ENABLED.key, "true") conf.set(CometConf.COMET_NATIVE_SCAN_ENABLED.key, "true")