From dab4674f41f01c3e723aeaee556d4670b8ab4b96 Mon Sep 17 00:00:00 2001 From: arnavb Date: Tue, 7 Jan 2025 19:43:09 +0000 Subject: [PATCH] update --- .../gluten/utils/ParquetMetadataUtils.scala | 7 +- dev/buildbundle-veloxbe.sh | 2 +- dev/builddeps-veloxbe.sh | 6 +- package/pom.xml | 165 ------------------ 4 files changed, 9 insertions(+), 171 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala index 0792fb0f2878..3c8570c0f851 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala @@ -72,7 +72,7 @@ object ParquetMetadataUtils { } /** - * Checks if any Parquet file under the given path is encrypted using a recursive + * Check any Parquet file under the given path is encrypted using a recursive * iterator. Only the first `fileLimit` files are processed for efficiency. * * @param fs @@ -110,7 +110,10 @@ object ParquetMetadataUtils { } /** - * Recursively checks if the exception or its causes match the specified type. + * Utility to check the exception for the specified type. + * Parquet 1.12 does not provide direct utility to check for encryption. + * Newer versions provide utility to check encryption from read footer + * which can be used in the future once Spark brings it in. * * @param throwable * Exception to check diff --git a/dev/buildbundle-veloxbe.sh b/dev/buildbundle-veloxbe.sh index e8c361b0713e..8515f7b12db7 100755 --- a/dev/buildbundle-veloxbe.sh +++ b/dev/buildbundle-veloxbe.sh @@ -25,7 +25,7 @@ check_supported # SPARK_VERSION is defined in builddeps-veloxbe.sh if [ "$SPARK_VERSION" = "ALL" ]; then - for spark_version in 3.3 #3.2 3.3 3.4 3.5 + for spark_version in 3.2 3.3 3.4 3.5 do build_for_spark $spark_version done diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 6a433d3f0572..8eb4cf4edf2c 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -207,9 +207,9 @@ function build_gluten_cpp { } function build_velox_backend { - # if [ $BUILD_ARROW == "ON" ]; then - # build_arrow - # fi + if [ $BUILD_ARROW == "ON" ]; then + build_arrow + fi build_velox build_gluten_cpp } diff --git a/package/pom.xml b/package/pom.xml index 6731ff00ae60..b9c114181bcd 100644 --- a/package/pom.xml +++ b/package/pom.xml @@ -132,167 +132,6 @@ true - - org.apache.spark.TaskContextUtils - org.apache.gluten.shaded.org.apache.spark.TaskContextUtils - - - org.apache.spark.ShuffleUtils - org.apache.gluten.shaded.org.apache.spark.ShuffleUtils - - - org.apache.spark.sql.hive.execution.HiveFileFormat - org.apache.gluten.shaded.org.apache.spark.sql.hive.execution.HiveFileFormat - - - org.apache.spark.sql.hive.execution.AbstractHiveTableScanExec - org.apache.gluten.shaded.org.apache.spark.sql.hive.execution.AbstractHiveTableScanExec - - - org.apache.spark.sql.catalyst.types.DataTypeUtils - org.apache.gluten.shaded.org.apache.spark.sql.catalyst.types.DataTypeUtils - - - org.apache.spark.sql.catalyst.expressions.EvalMode - org.apache.gluten.shaded.org.apache.spark.sql.catalyst.expressions.EvalMode - - - org.apache.spark.sql.catalyst.optimizer.CollapseProjectShim - org.apache.gluten.shaded.org.apache.spark.sql.catalyst.optimizer.CollapseProjectShim - - - org.apache.spark.sql.execution.FileSourceScanExecShim - org.apache.gluten.shaded.org.apache.spark.sql.execution.FileSourceScanExecShim - - - org.apache.spark.sql.execution.AbstractFileSourceScanExec - org.apache.gluten.shaded.org.apache.spark.sql.execution.AbstractFileSourceScanExec - - - org.apache.spark.sql.execution.stat.StatFunctions - org.apache.gluten.shaded.org.apache.spark.sql.execution.stat.StatFunctions - - - org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand - - - org.apache.spark.sql.execution.datasources.BasicWriteStatsTracker - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.BasicWriteStatsTracker - - - org.apache.spark.sql.execution.datasources.orc.OrcFileFormat - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.orc.OrcFileFormat - - - org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil - - - org.apache.spark.sql.execution.datasources.v2.Spark33Scan - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.v2.Spark33Scan - - - org.apache.spark.sql.execution.datasources.v2.BatchScanExecShim - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.v2.BatchScanExecShim - - - org.apache.spark.sql.execution.datasources.v2.AbstractBatchScanExec - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.v2.AbstractBatchScanExec - - - org.apache.spark.sql.execution.datasources.FileFormatWriter - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.FileFormatWriter - - - org.apache.spark.sql.execution.datasources.FileFormatDataWriter - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.FileFormatDataWriter - - - org.apache.spark.sql.execution.datasources.WriteFiles - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.WriteFiles - - - org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat - - - org.apache.spark.sql.execution.ExpandOutputPartitioningShim - org.apache.gluten.shaded.org.apache.spark.sql.execution.ExpandOutputPartitioningShim - - - org.apache.spark.sql.execution.python.BasePythonRunnerShim - org.apache.gluten.shaded.org.apache.spark.sql.execution.python.BasePythonRunnerShim - - - org.apache.spark.sql.execution.window.WindowGroupLimitExecShim - org.apache.gluten.shaded.org.apache.spark.sql.execution.window.WindowGroupLimitExecShim - - - org.apache.spark.sql.execution.datasources.EmptyDirectoryDataWriter - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.EmptyDirectoryDataWriter - - - org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter - - - org.apache.spark.sql.execution.datasources.BaseDynamicPartitionDataWriter - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.BaseDynamicPartitionDataWriter - - - org.apache.spark.sql.execution.datasources.DynamicPartitionDataSingleWriter - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.DynamicPartitionDataSingleWriter - - - org.apache.spark.sql.execution.datasources.DynamicPartitionDataConcurrentWriter - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.DynamicPartitionDataConcurrentWriter - - - org.apache.spark.sql.execution.datasources.WriterBucketSpec - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.WriterBucketSpec - - - org.apache.spark.sql.execution.datasources.WriteJobDescription - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.WriteJobDescription - - - org.apache.spark.sql.execution.datasources.WriteTaskResult - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.WriteTaskResult - - - org.apache.spark.sql.execution.datasources.ExecutedWriteSummary - org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.ExecutedWriteSummary - - - org.apache.spark.sql.execution.adaptive.GlutenCostEvaluator - org.apache.gluten.shaded.org.apache.spark.sql.execution.adaptive.GlutenCostEvaluator - - - org.apache.spark.sql.execution.PartitioningAndOrderingPreservingNodeShim - org.apache.gluten.shaded.org.apache.spark.sql.execution.PartitioningAndOrderingPreservingNodeShim - - - - - com.google.protobuf ${gluten.shade.packageName}.com.google.protobuf @@ -351,10 +190,6 @@ ${gluten.shade.packageName}.com.google.flatbuffers - - - - *:*