Skip to content

Commit

Permalink
Updated to scala 2.13 and spark 3.2.0-SNAPSHOT
Browse files Browse the repository at this point in the history
Issues with:
 - StructType string changes
 - Fragile schema tests because scala's .groupBy function is not deterministic in ordering, and deep schemas don't resolve
  • Loading branch information
skestle committed May 13, 2021
1 parent 4d24f44 commit 11fcd2a
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 9 deletions.
10 changes: 6 additions & 4 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ organization := "com.github.mrpowers"
name := "spark-daria"

version := "1.0.0"
crossScalaVersions := Seq("2.12.12")
scalaVersion := "2.12.12"
val sparkVersion = "3.0.1"
crossScalaVersions := Seq("2.13.5")
scalaVersion := crossScalaVersions.value.head
val sparkVersion = "3.2.0-SNAPSHOT"

resolvers += "Apache Snapshots" at "https://repository.apache.org/snapshots"

libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion % "provided"
libraryDependencies += "org.apache.spark" %% "spark-mllib" % sparkVersion % "provided"
libraryDependencies += "com.github.mrpowers" %% "spark-fast-tests" % "1.0.0" % "test"
libraryDependencies += "com.lihaoyi" %% "utest" % "0.6.3" % "test"
libraryDependencies += "com.lihaoyi" %% "utest" % "0.7.9" % "test"
libraryDependencies += "com.lihaoyi" %% "os-lib" % "0.7.1" % "test"
testFrameworks += new TestFramework("com.github.mrpowers.spark.daria.CustomFramework")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ object functions {
val d = Option(delimiters).getOrElse(return None)
val c = Option(colName).getOrElse(return None)
// initialize the previousLetter to be the null character - the closest representation of the empty character: https://stackoverflow.com/questions/8306060/how-do-i-represent-an-empty-char-in-scala
var previousLetter: Char = '\0'
var previousLetter: Char = '\u0000'
Some(c.map { letter: Char =>
if (d.contains(previousLetter) || previousLetter.equals('\0')) {
if (d.contains(previousLetter) || previousLetter.equals('\u0000')) {
previousLetter = letter
letter.toUpper
} else {
Expand Down Expand Up @@ -423,6 +423,7 @@ object functions {
arr.view
.map(f(_))
.filter(_ != null)
.toSeq
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1014,18 +1014,26 @@ object DataFrameExtTest extends TestSuite with DataFrameComparer with SparkSessi
StructType(schema)
)

println("DF Schema:")
df.printSchema()
val delimiter = "_"
println("FlattenedDF Schema:")
df.flattenSchema(delimiter).printSchema()
val expectedDF = df
.flattenSchema(delimiter)
.structureSchema(delimiter)
.setNullableForAllColumns(true) //for some reason spark changes nullability of struct columns
println("ExpectedDF Schema:")
expectedDF.printSchema()

// Work around test fragility that breaks when scala 2.13 applies groupBy in a different order
DariaValidator.validateSchema(
expectedDF,
schema
expectedDF.select("z"),
StructType(Seq(schema.apply("z")))
)
DariaValidator.validateSchema(
expectedDF.select("foo.baz", "foo.bar"),
schema.apply("foo").dataType.asInstanceOf[StructType]
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ object DataFrameSchemaCheckerTest extends TestSuite with SparkSessionTestWrapper
)

val expected =
"The [StructField(name,StringType,true)] StructFields are not included in the DataFrame with the following StructFields [StructType(StructField(num1,IntegerType,true), StructField(num2,IntegerType,true))]"
"The [StructField(name,StringType,true)] StructFields are not included in the DataFrame with the following StructFields [Seq(StructField(num1,IntegerType,true), StructField(num2,IntegerType,true))]"

assert(c.missingStructFieldsMessage() == expected)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ trait SparkSessionTestWrapper {
"spark.sql.shuffle.partitions",
"1"
)
// Time zone tests
.config("spark.sql.session.timeZone", "GMT")
.config("spark.executor.extraJavaOptions", "-Duser.timezone=GMT")
.config("spark.driver.extraJavaOptions", "-Duser.timezone=GMT")
.getOrCreate()
}

Expand Down

0 comments on commit 11fcd2a

Please sign in to comment.