Skip to content

Commit

Permalink
Unarchive Spark test jar for spark.read(ability) (NVIDIA#10946)
Browse files Browse the repository at this point in the history
Closes NVIDIA#10875
Contributes to NVIDIA#10773
    
Unjar, cache, and share the test jar content among all test suites from the same jar

Test:
```bash
mvn package -Dbuildver=330 -pl tests -am -Dsuffixes='.*\.RapidsJsonSuite'
```

Signed-off-by: Gera Shegalov <[email protected]>
  • Loading branch information
gerashegalov authored and SurajAralihalli committed Jul 12, 2024
1 parent cd12aa8 commit 272c633
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap

class RapidsJsonSuite
extends JsonSuite with RapidsSQLTestsBaseTrait with RapidsJsonConfTrait {
/** Returns full path to the given file in the resource folder */
override protected def testFile(fileName: String): String = {
getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + "/" + fileName
}
}

class RapidsJsonV1Suite extends RapidsJsonSuite with RapidsSQLTestsBaseTrait {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,36 @@ package org.apache.spark.sql.rapids.utils

import java.util.{Locale, TimeZone}

import org.apache.hadoop.fs.FileUtil
import org.scalactic.source.Position
import org.scalatest.Tag

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config.Tests.IS_TESTING
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, ShuffleQueryStageExec}
import org.apache.spark.sql.rapids.execution.TrampolineUtil
import org.apache.spark.sql.rapids.utils.RapidsTestConstants.RAPIDS_TEST
import org.apache.spark.sql.test.SharedSparkSession


/** Basic trait for Rapids SQL test cases. */
trait RapidsSQLTestsBaseTrait extends SharedSparkSession with RapidsTestsBaseTrait {

protected override def afterAll(): Unit = {
// SparkFunSuite will set this to true, and forget to reset to false
System.clearProperty(IS_TESTING.key)
super.afterAll()
}

override protected def testFile(fileName: String): String = {
import RapidsSQLTestsBaseTrait.sparkTestResourcesDir

java.nio.file.Paths.get(sparkTestResourcesDir(getClass).toString, fileName)
.toString
}

protected def testRapids(testName: String, testTag: Tag*)(testFun: => Any)(implicit
pos: Position): Unit = {
test(RAPIDS_TEST + testName, testTag: _*)(testFun)
Expand Down Expand Up @@ -107,7 +116,40 @@ trait RapidsSQLTestsBaseTrait extends SharedSparkSession with RapidsTestsBaseTra
}
}

object RapidsSQLTestsBaseTrait {
object RapidsSQLTestsBaseTrait extends Logging {
private val resourceMap = scala.collection.mutable.Map.empty[String, java.nio.file.Path]
private val testJarUrlRegex = raw"jar:file:(/.*-tests.jar)!.*".r
TrampolineUtil.addShutdownHook(10000, () => {
resourceMap.valuesIterator.foreach { dirPath =>
logWarning(s"Deleting expanded test jar dir $dirPath")
FileUtil.fullyDelete(dirPath.toFile)
}
})

private def expandJar(jarPath: String): java.nio.file.Path = {
val jarFile = new java.io.File(jarPath)
val destDir = java.nio.file.Files.createTempDirectory(jarFile.getName + ".expanded")
logWarning(s"Registering $destDir for deletion on exit")
FileUtil.unZip(jarFile, destDir.toFile)
destDir
}

def sparkTestResourcesDir(testClass: Class[_]): java.nio.file.Path = {
var sparkTestClass = testClass
while (sparkTestClass.getName.contains("rapids")) {
sparkTestClass = sparkTestClass.getSuperclass
}
val sparkTestClassResource = "/" + sparkTestClass.getName.replace(".", "/") + ".class"
val resourceURL = sparkTestClass.getResource(sparkTestClassResource).toString
val resourceJar = resourceURL match {
case testJarUrlRegex(testJarPath) => testJarPath
case _ => sys.error(s"Could not extract tests jar path from $resourceURL")
}
this.synchronized {
resourceMap.getOrElseUpdate(resourceJar, expandJar(resourceJar))
}
}

def nativeSparkConf(origin: SparkConf, warehouse: String): SparkConf = {
// Timezone is fixed to UTC to allow timestamps to work by default
TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,6 @@ class RapidsTestSettings extends BackendTestSettings {
enableSuite[RapidsJsonSuite]
.exclude("Casting long as timestamp", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("Write timestamps correctly with timestampFormat option and timeZone option", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-23723: json in UTF-16 with BOM", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-23723: multi-line json in UTF-32BE with BOM", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-23723: Use user's encoding in reading of multi-line json in UTF-16LE", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-23723: Unsupported encoding name", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-23723: checking that the encoding option is case agnostic", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-23723: specified encoding is not matched to actual encoding", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-23724: lineSep should be set if encoding if different from UTF-8", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-31716: inferring should handle malformed input", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("SPARK-24190: restrictions for JSONOptions in read", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
.exclude("exception mode for parsing date/timestamp string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
enableSuite[RapidsMathFunctionsSuite]
enableSuite[RapidsRegexpExpressionsSuite]
Expand Down

0 comments on commit 272c633

Please sign in to comment.