Skip to content

Commit

Permalink
Profiling tool - fix reporting contains dataset when sql time 0 (#2651)
Browse files Browse the repository at this point in the history
* Fix matching on the dataset

Signed-off-by: Thomas Graves <[email protected]>

* Add test for contains dataset
  • Loading branch information
tgravescs authored Jun 9, 2021
1 parent 8db90e2 commit 7d91ae2
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ case class SQLExecutionCase(
duration: Option[Long],
durationStr: String,
sqlQualDuration: Option[Long],
hasDataset: Boolean,
problematic: String = "")

case class SQLPlanMetricsCase(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,10 +347,10 @@ class ApplicationInfo(
case Some(i) => UIUtils.formatDuration(i)
case None => ""
}
val sqlQDuration = if (datasetSQL.exists(_.sqlID == res.sqlID)) {
Some(0L)
val (containsDataset, sqlQDuration) = if (datasetSQL.exists(_.sqlID == res.sqlID)) {
(true, Some(0L))
} else {
durationResult
(false, durationResult)
}
val potProbs = problematicSQL.filter { p =>
p.sqlID == res.sqlID && p.reason.nonEmpty
Expand All @@ -364,6 +364,7 @@ class ApplicationInfo(
duration = durationResult,
durationStr = durationString,
sqlQualDuration = sqlQDuration,
hasDataset = containsDataset,
problematic = finalPotProbs
)
sqlStartNew += sqlExecutionNew
Expand Down Expand Up @@ -851,7 +852,7 @@ class ApplicationInfo(
|'$appId' as `App ID`,
|sq.sqlID,
|sq.duration as `SQL Duration`,
|case when sq.sqlQualDuration > 0 then false else true end as `Contains Dataset Op`,
|sq.hasDataset as `Contains Dataset Op`,
|app.duration as `App Duration`,
|problematic as `Potential Problems`,
|round(executorCPUTime/executorRunTime*100,2) as `Executor CPU Time Percent`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ class EventsProcessor(forQualification: Boolean = false) extends Logging {
None,
"",
None,
false,
""
)
app.sqlStart += sqlExecution
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,22 @@ class AnalysisSuite extends FunSuite {
val actualDf = analysis.shuffleSkewCheckSingleApp(apps.head)
assert(actualDf.count() == 0)
}

test("test contains dataset false") {
val qualLogDir = ToolTestUtils.getTestResourcePath("spark-events-qualification")
val logs = Array(s"$qualLogDir/nds_q86_test")

val apps = ToolTestUtils.processProfileApps(logs, sparkSession)
val analysis = new Analysis(apps, None)
val sqlAggMetricsDF = analysis.sqlMetricsAggregation()
sqlAggMetricsDF.createOrReplaceTempView("sqlAggMetricsDF")
val actualDf = analysis.sqlMetricsAggregationDurationAndCpuTime()

val rows = actualDf.collect()
assert(rows.length === 25)
def fieldIndex(name: String) = actualDf.schema.fieldIndex(name)
rows.foreach { row =>
assert(row.getBoolean(fieldIndex("Contains Dataset Op")) == false)
}
}
}

0 comments on commit 7d91ae2

Please sign in to comment.