Skip to content

Commit

Permalink
[SPARK-45386][SQL]: Fix correctness issue with persist using StorageL…
Browse files Browse the repository at this point in the history
…evel.NONE on Dataset (#43188)

* SPARK-45386: Fix correctness issue with StorageLevel.NONE

* Move to CacheManager

* Add comment
  • Loading branch information
Emil Ejbyfeldt authored Oct 2, 2023
1 parent 8f1b028 commit a0c9ab6
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,9 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
planToCache: LogicalPlan,
tableName: Option[String],
storageLevel: StorageLevel): Unit = {
if (lookupCachedData(planToCache).nonEmpty) {
if (storageLevel == StorageLevel.NONE) {
// Do nothing for StorageLevel.NONE since it will not actually cache any data.
} else if (lookupCachedData(planToCache).nonEmpty) {
logWarning("Asked to cache already cached data.")
} else {
val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types._
import org.apache.spark.storage.StorageLevel

case class TestDataPoint(x: Int, y: Double, s: String, t: TestDataPoint2)
case class TestDataPoint2(x: Int, s: String)
Expand Down Expand Up @@ -2604,6 +2605,11 @@ class DatasetSuite extends QueryTest
parameters = Map("cls" -> classOf[Array[Int]].getName))
}
}

test("SPARK-45386: persist with StorageLevel.NONE should give correct count") {
val ds = Seq(1, 2).toDS().persist(StorageLevel.NONE)
assert(ds.count() == 2)
}
}

class DatasetLargeResultCollectingSuite extends QueryTest
Expand Down

0 comments on commit a0c9ab6

Please sign in to comment.