Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into jtb-rtoc-new
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrennan333 committed Mar 26, 2024
2 parents 5be7594 + e833d39 commit a74adbe
Show file tree
Hide file tree
Showing 11 changed files with 40 additions and 23 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/auto-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ name: auto-merge HEAD to BASE
on:
pull_request_target:
branches:
- branch-24.02
- branch-24.04
types: [closed]

jobs:
Expand All @@ -29,13 +29,13 @@ jobs:
steps:
- uses: actions/checkout@v3
with:
ref: branch-24.02 # force to fetch from latest upstream instead of PR ref
ref: branch-24.04 # force to fetch from latest upstream instead of PR ref

- name: auto-merge job
uses: ./.github/workflows/auto-merge
env:
OWNER: NVIDIA
REPO_NAME: spark-rapids
HEAD: branch-24.02
BASE: branch-24.04
HEAD: branch-24.04
BASE: branch-24.06
AUTOMERGE_TOKEN: ${{ secrets.AUTOMERGE_TOKEN }} # use to merge PR
8 changes: 4 additions & 4 deletions jenkins/Jenkinsfile-blossom.premerge
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/local/env groovy
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,7 @@ import hudson.model.Result
import hudson.model.Run
import jenkins.model.CauseOfInterruption.UserInterruption

@Library(['shared-libs', 'blossom-lib']) _
@Library('blossom-lib')
@Library('blossom-github-lib@master')
import ipp.blossom.*

Expand Down Expand Up @@ -68,10 +68,10 @@ pipeline {
PREMERGE_SCRIPT = '$JENKINS_ROOT/spark-premerge-build.sh'
MVN_URM_MIRROR = '-s jenkins/settings.xml -P mirror-apache-to-urm'
LIBCUDF_KERNEL_CACHE_PATH = '/tmp/.cudf'
ARTIFACTORY_NAME = "${ArtifactoryConstants.ARTIFACTORY_NAME}"
ARTIFACTORY_NAME = "${common.ARTIFACTORY_NAME}"
GITHUB_TOKEN = credentials("github-token")
URM_CREDS = credentials("urm_creds")
URM_URL = "https://${ArtifactoryConstants.ARTIFACTORY_NAME}/artifactory/sw-spark-maven"
URM_URL = "https://${common.ARTIFACTORY_NAME}/artifactory/sw-spark-maven"
PVC = credentials("pvc")
CUSTOM_WORKSPACE = "/home/jenkins/agent/workspace/${BUILD_TAG}"
CLASSIFIER = 'cuda11'
Expand Down
2 changes: 1 addition & 1 deletion jenkins/Jenkinsfile-blossom.premerge-databricks
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
*
*/

@Library(['shared-libs', 'blossom-lib']) _
@Library('blossom-lib')
@Library('blossom-github-lib@master')

import ipp.blossom.*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.sql.connector.read.Scan
import org.apache.spark.sql.execution.{ColumnarToRowTransition, SparkPlan}
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, BroadcastQueryStageExec, ShuffleQueryStageExec}
import org.apache.spark.sql.execution.command.{DataWritingCommand, RunnableCommand}
import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, PartitioningAwareFileIndex}
import org.apache.spark.sql.execution.datasources.{FileFormat, FilePartition, PartitionedFile, PartitioningAwareFileIndex}
import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.execution.exchange.{ReusedExchangeExec, ShuffleExchangeLike}
import org.apache.spark.sql.internal.SQLConf
Expand Down Expand Up @@ -78,7 +78,8 @@ trait SparkShims {
readFunction: (PartitionedFile) => Iterator[InternalRow],
filePartitions: Seq[FilePartition],
readDataSchema: StructType,
metadataColumns: Seq[AttributeReference] = Seq.empty): RDD[InternalRow]
metadataColumns: Seq[AttributeReference] = Seq.empty,
fileFormat: Option[FileFormat] = None): RDD[InternalRow]

def shouldFailDivOverflow: Boolean

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ case class GpuFileSourceScanExec(
if (isPerFileReadEnabled) {
logInfo("Using the original per file reader")
SparkShimImpl.getFileScanRDD(relation.sparkSession, readFile.get, locatedPartitions,
requiredSchema)
requiredSchema, fileFormat = Some(relation.fileFormat))
} else {
logDebug(s"Using Datasource RDD, files are: " +
s"${prunedPartitions.flatMap(_.files).mkString(",")}")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
import org.apache.spark.sql.execution._
import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, DataWritingCommand, RunnableCommand}
import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, PartitionedFile}
import org.apache.spark.sql.execution.datasources.{FileFormat, FilePartition, FileScanRDD, PartitionedFile}
import org.apache.spark.sql.execution.datasources.v2._
import org.apache.spark.sql.types.StructType

Expand All @@ -50,7 +50,8 @@ trait Spark31Xuntil33XShims extends SparkShims {
readFunction: PartitionedFile => Iterator[InternalRow],
filePartitions: Seq[FilePartition],
readDataSchema: StructType,
metadataColumns: Seq[AttributeReference]): RDD[InternalRow] = {
metadataColumns: Seq[AttributeReference],
fileFormat: Option[FileFormat]): RDD[InternalRow] = {
new FileScanRDD(sparkSession, readFunction, filePartitions)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,7 +37,7 @@ import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, PartitionedFile}
import org.apache.spark.sql.execution.datasources.{FileFormat, FilePartition, FileScanRDD, PartitionedFile}
import org.apache.spark.sql.rapids.shims.{GpuDivideYMInterval, GpuMultiplyYMInterval}
import org.apache.spark.sql.types.StructType

Expand All @@ -50,7 +50,8 @@ trait Spark330PlusShims extends Spark321PlusShims with Spark320PlusNonDBShims {
readFunction: PartitionedFile => Iterator[InternalRow],
filePartitions: Seq[FilePartition],
readDataSchema: StructType,
metadataColumns: Seq[AttributeReference]): RDD[InternalRow] = {
metadataColumns: Seq[AttributeReference],
fileFormat: Option[FileFormat]): RDD[InternalRow] = {
new FileScanRDD(sparkSession, readFunction, filePartitions, readDataSchema, metadataColumns)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ trait Spark321PlusDBShims extends SparkShims
readFunction: PartitionedFile => Iterator[InternalRow],
filePartitions: Seq[FilePartition],
readDataSchema: StructType,
metadataColumns: Seq[AttributeReference]): RDD[InternalRow] = {
metadataColumns: Seq[AttributeReference],
fileFormat: Option[FileFormat]): RDD[InternalRow] = {
new GpuFileScanRDD(sparkSession, readFunction, filePartitions)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,29 @@ package com.nvidia.spark.rapids.shims

import com.nvidia.spark.rapids._

import org.apache.spark.sql.catalyst.expressions.{Expression, PythonUDAF, ToPrettyString}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, PythonUDAF, ToPrettyString}
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.adaptive.TableCacheQueryStageExec
import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
import org.apache.spark.sql.execution.datasources.{FileFormat, FilePartition, FileScanRDD, PartitionedFile}
import org.apache.spark.sql.execution.window.WindowGroupLimitExec
import org.apache.spark.sql.rapids.execution.python.GpuPythonUDAF
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.{StringType, StructType}

object SparkShimImpl extends Spark340PlusNonDBShims {
override def getFileScanRDD(
sparkSession: SparkSession,
readFunction: PartitionedFile => Iterator[InternalRow],
filePartitions: Seq[FilePartition],
readDataSchema: StructType,
metadataColumns: Seq[AttributeReference] = Seq.empty,
fileFormat: Option[FileFormat]): RDD[InternalRow] = {
new FileScanRDD(sparkSession, readFunction, filePartitions, readDataSchema, metadataColumns,
metadataExtractors = fileFormat.map(_.fileConstantMetadataExtractors).getOrElse(Map.empty))
}

override def getExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
val shimExprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Seq(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import org.apache.spark.sql.rapids.execution.TrampolineUtil
class HostAllocSuite extends AnyFunSuite with BeforeAndAfterEach with
BeforeAndAfterAll with TimeLimits {
private val sqlConf = new SQLConf()
sqlConf.setConfString("spark.rapids.memory.gpu.state.debug", "stderr")
private val rc = new RapidsConf(sqlConf)
private val timeoutMs = 10000

Expand Down

0 comments on commit a74adbe

Please sign in to comment.