Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ArnavBalyan committed Jan 7, 2025
1 parent 064f5e7 commit dab4674
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 171 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ object ParquetMetadataUtils {
}

/**
* Checks if any Parquet file under the given path is encrypted using a recursive
* Check any Parquet file under the given path is encrypted using a recursive
* iterator. Only the first `fileLimit` files are processed for efficiency.
*
* @param fs
Expand Down Expand Up @@ -110,7 +110,10 @@ object ParquetMetadataUtils {
}

/**
* Recursively checks if the exception or its causes match the specified type.
* Utility to check the exception for the specified type.
* Parquet 1.12 does not provide direct utility to check for encryption.
* Newer versions provide utility to check encryption from read footer
* which can be used in the future once Spark brings it in.
*
* @param throwable
* Exception to check
Expand Down
2 changes: 1 addition & 1 deletion dev/buildbundle-veloxbe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ check_supported

# SPARK_VERSION is defined in builddeps-veloxbe.sh
if [ "$SPARK_VERSION" = "ALL" ]; then
for spark_version in 3.3 #3.2 3.3 3.4 3.5
for spark_version in 3.2 3.3 3.4 3.5
do
build_for_spark $spark_version
done
Expand Down
6 changes: 3 additions & 3 deletions dev/builddeps-veloxbe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ function build_gluten_cpp {
}

function build_velox_backend {
# if [ $BUILD_ARROW == "ON" ]; then
# build_arrow
# fi
if [ $BUILD_ARROW == "ON" ]; then
build_arrow
fi
build_velox
build_gluten_cpp
}
Expand Down
165 changes: 0 additions & 165 deletions package/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -132,167 +132,6 @@
<configuration>
<createSourcesJar>true</createSourcesJar>
<relocations>
<relocation>
<pattern>org.apache.spark.TaskContextUtils</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.TaskContextUtils</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.ShuffleUtils</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.ShuffleUtils</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.hive.execution.HiveFileFormat</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.hive.execution.HiveFileFormat</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.hive.execution.AbstractHiveTableScanExec</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.hive.execution.AbstractHiveTableScanExec</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.catalyst.types.DataTypeUtils</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.catalyst.types.DataTypeUtils</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.catalyst.expressions.EvalMode</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.catalyst.expressions.EvalMode</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.catalyst.optimizer.CollapseProjectShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.catalyst.optimizer.CollapseProjectShim</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.FileSourceScanExecShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.FileSourceScanExecShim</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.AbstractFileSourceScanExec</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.AbstractFileSourceScanExec</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.stat.StatFunctions</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.stat.StatFunctions</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.BasicWriteStatsTracker</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.BasicWriteStatsTracker</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.orc.OrcFileFormat</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.orc.OrcFileFormat</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.v2.Spark33Scan</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.v2.Spark33Scan</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.v2.BatchScanExecShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.v2.BatchScanExecShim</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.v2.AbstractBatchScanExec</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.v2.AbstractBatchScanExec</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.FileFormatWriter</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.FileFormatWriter</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.FileFormatDataWriter</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.FileFormatDataWriter</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.WriteFiles</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.WriteFiles</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.ExpandOutputPartitioningShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.ExpandOutputPartitioningShim</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.python.BasePythonRunnerShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.python.BasePythonRunnerShim</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.window.WindowGroupLimitExecShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.window.WindowGroupLimitExecShim</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.EmptyDirectoryDataWriter</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.EmptyDirectoryDataWriter</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.BaseDynamicPartitionDataWriter</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.BaseDynamicPartitionDataWriter</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.DynamicPartitionDataSingleWriter</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.DynamicPartitionDataSingleWriter</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.DynamicPartitionDataConcurrentWriter</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.DynamicPartitionDataConcurrentWriter</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.WriterBucketSpec</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.WriterBucketSpec</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.WriteJobDescription</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.WriteJobDescription</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.WriteTaskResult</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.WriteTaskResult</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.datasources.ExecutedWriteSummary</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.datasources.ExecutedWriteSummary</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.adaptive.GlutenCostEvaluator</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.adaptive.GlutenCostEvaluator</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.PartitioningAndOrderingPreservingNodeShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.PartitioningAndOrderingPreservingNodeShim</shadedPattern>
</relocation>
<!-- <relocation>
<pattern>org.apache.gluten.utils.InternalRowUtl</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.gluten.utils.InternalRowUtl</shadedPattern>
</relocation> -->
<!-- <relocation>
<pattern>org.apache.gluten.sql.shims.spark33.SparkShimProvider</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.gluten.sql.shims.spark33.SparkShimProvider</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.gluten.sql.shims.spark33.Spark33Shims</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.gluten.sql.shims.spark33.Spark33Shims</shadedPattern>
</relocation> -->
<!-- <relocation>
<pattern>org.apache.gluten.execution.GenerateTreeStringShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.gluten.execution.GenerateTreeStringShim</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.execution.vectorized.WritableColumnVectorShim</pattern>
<shadedPattern>org.apache.gluten.shaded.org.apache.spark.sql.execution.vectorized.WritableColumnVectorShim</shadedPattern>
</relocation> -->

<relocation>
<pattern>com.google.protobuf</pattern>
<shadedPattern>${gluten.shade.packageName}.com.google.protobuf</shadedPattern>
Expand Down Expand Up @@ -351,10 +190,6 @@
<shadedPattern>${gluten.shade.packageName}.com.google.flatbuffers</shadedPattern>
</relocation>
</relocations>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"/>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
Expand Down

0 comments on commit dab4674

Please sign in to comment.