Skip to content

Commit

Permalink
Record row counts in benchmark runs that call collect (#1790)
Browse files Browse the repository at this point in the history
Signed-off-by: Andy Grove <[email protected]>
  • Loading branch information
andygrove authored Feb 23, 2021
1 parent 95d949a commit f45a3b7
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ object BenchUtils {
var df: DataFrame = null
val queryStatus = new ListBuffer[String]()
val queryTimes = new ListBuffer[Long]()
val rowCounts = new ListBuffer[Long]()
for (i <- 0 until iterations) {
spark.sparkContext.setJobDescription(s"Benchmark Run: query=$queryDescription; iteration=$i")

Expand All @@ -198,7 +199,9 @@ object BenchUtils {
df = createDataFrame(spark)

resultsAction match {
case Collect() => df.collect()
case Collect() =>
val rows = df.collect()
rowCounts.append(rows.length)
case WriteCsv(path, mode, options) =>
ensureValidColumnNames(df).write.mode(mode).options(options).csv(path)
case WriteOrc(path, mode, options) =>
Expand Down Expand Up @@ -296,6 +299,7 @@ object BenchUtils {
queryDescription,
queryPlan,
queryPlansWithMetrics,
rowCounts,
queryTimes,
queryStatus,
exceptions)
Expand Down Expand Up @@ -796,6 +800,7 @@ case class BenchmarkReport(
query: String,
queryPlan: QueryPlan,
queryPlans: Seq[SparkPlanNode],
rowCounts: Seq[Long],
queryTimes: Seq[Long],
queryStatus: Seq[String],
exceptions: Seq[String])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class BenchUtilsSuite extends FunSuite with BeforeAndAfterEach {
query = "q1",
queryPlan = QueryPlan("logical", "physical"),
Seq.empty,
rowCounts = Seq(10, 10, 10),
queryTimes = Seq(99, 88, 77),
queryStatus = Seq("Completed", "Completed", "Completed"),
exceptions = Seq.empty)
Expand Down

0 comments on commit f45a3b7

Please sign in to comment.