Skip to content

Commit

Permalink
stage metric distributions
Browse files Browse the repository at this point in the history
  • Loading branch information
squito committed Mar 31, 2015
1 parent e48ba32 commit a4b1397
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.apache.spark.executor.{InputMetrics => InternalInputMetrics, OutputMe
import org.apache.spark.scheduler.{AccumulableInfo => InternalAccumulableInfo, StageInfo}
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.UIData.{StageUIData, TaskUIData}
import org.apache.spark.util.Distribution

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class AllStagesResource(uiRoot: UIRoot) {
Expand Down Expand Up @@ -145,6 +146,84 @@ private[v1] object AllStagesResource {
)
}

def taskMetricDistributions(allTaskData: Seq[TaskUIData], quantiles: Array[Double]): TaskMetricDistributions = {

val rawMetrics = allTaskData.flatMap{_.taskMetrics}

def getMetric[T](data: Seq[T], f: T => Double): IndexedSeq[Double] =
Distribution(data.map{d=> f(d)}).get.getQuantiles(quantiles)

abstract class MetricHelper[I,O](f: InternalTaskMetrics => Option[I]) {
val data: Seq[I] = rawMetrics.flatMap{x => f(x)}
def build: O
def m(f: I => Double): IndexedSeq[Double] = getMetric(data, f)
def metricOption: Option[O] = {
if (data.isEmpty) {
None
} else {
Some(build)
}
}
}

def m(f: InternalTaskMetrics => Double): IndexedSeq[Double] =
getMetric(rawMetrics, f)

val inputMetrics =
new MetricHelper[InternalInputMetrics, InputMetricDistributions](_.inputMetrics) {
def build: InputMetricDistributions = new InputMetricDistributions(
bytesRead = m(_.bytesRead),
recordsRead = m(_.recordsRead)
)
}.metricOption

val outputMetrics =
new MetricHelper[InternalOutputMetrics, OutputMetricDistributions](_.outputMetrics) {
def build: OutputMetricDistributions = new OutputMetricDistributions(
bytesWritten = m(_.bytesWritten),
recordsWritten = m(_.recordsWritten)
)
}.metricOption

val shuffleReadMetrics =
new MetricHelper[InternalShuffleReadMetrics, ShuffleReadMetricDistributions](_.shuffleReadMetrics) {
def build: ShuffleReadMetricDistributions = new ShuffleReadMetricDistributions(
readBytes = m(_.totalBytesRead),
readRecords = m(_.recordsRead),
remoteBytesRead = m(_.remoteBytesRead),
remoteBlocksFetched = m(_.remoteBlocksFetched),
localBlocksFetched = m(_.localBlocksFetched),
totalBlocksFetched = m(_.totalBlocksFetched),
fetchWaitTime = m(_.fetchWaitTime)
)
}.metricOption

val shuffleWriteMetrics =
new MetricHelper[InternalShuffleWriteMetrics, ShuffleWriteMetricDistributions](_.shuffleWriteMetrics) {
def build: ShuffleWriteMetricDistributions = new ShuffleWriteMetricDistributions(
writeBytes = m(_.shuffleBytesWritten),
writeRecords = m(_.shuffleRecordsWritten),
writeTime = m(_.shuffleWriteTime)
)
}.metricOption


new TaskMetricDistributions(
quantiles = quantiles,
executorDeserializeTime = m(_.executorDeserializeTime),
executorRunTime = m(_.executorRunTime),
resultSize = m(_.resultSize),
jvmGcTime = m(_.jvmGCTime),
resultSerializationTime = m(_.resultSerializationTime),
memoryBytesSpilled = m(_.memoryBytesSpilled),
diskBytesSpilled = m(_.diskBytesSpilled),
inputMetrics = inputMetrics,
outputMetrics = outputMetrics,
shuffleReadMetrics = shuffleReadMetrics,
shuffleWriteMetrics = shuffleWriteMetrics
)
}

def convertAccumulableInfo(acc: InternalAccumulableInfo): AccumulableInfo = {
new AccumulableInfo(acc.id, acc.name, acc.update, acc.value)
}
Expand Down
44 changes: 44 additions & 0 deletions core/src/main/scala/org/apache/spark/status/api/v1/api.scala
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,50 @@ class ShuffleWriteMetrics(
val recordsWritten: Long
)

class TaskMetricDistributions(
val quantiles: IndexedSeq[Double],

val executorDeserializeTime: IndexedSeq[Double],
val executorRunTime: IndexedSeq[Double],
val resultSize: IndexedSeq[Double],
val jvmGcTime: IndexedSeq[Double],
val resultSerializationTime: IndexedSeq[Double],
val memoryBytesSpilled: IndexedSeq[Double],
val diskBytesSpilled: IndexedSeq[Double],

val inputMetrics: Option[InputMetricDistributions],
val outputMetrics: Option[OutputMetricDistributions],
val shuffleReadMetrics: Option[ShuffleReadMetricDistributions],
val shuffleWriteMetrics: Option[ShuffleWriteMetricDistributions]
)

class InputMetricDistributions(
val bytesRead: IndexedSeq[Double],
val recordsRead: IndexedSeq[Double]
)

class OutputMetricDistributions(
val bytesWritten: IndexedSeq[Double],
val recordsWritten: IndexedSeq[Double]
)


class ShuffleReadMetricDistributions(
val readBytes: IndexedSeq[Double],
val readRecords: IndexedSeq[Double],
val remoteBlocksFetched: IndexedSeq[Double],
val localBlocksFetched: IndexedSeq[Double],
val fetchWaitTime: IndexedSeq[Double],
val remoteBytesRead: IndexedSeq[Double],
val totalBlocksFetched: IndexedSeq[Double]
)

class ShuffleWriteMetricDistributions(
val writeBytes: IndexedSeq[Double],
val writeRecords: IndexedSeq[Double],
val writeTime: IndexedSeq[Double]
)

class AccumulableInfo (
val id: Long,
val name: String,
Expand Down

0 comments on commit a4b1397

Please sign in to comment.