Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweaks to OpBinScoreEvaluator #233

Merged
merged 24 commits into from
Apr 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e72d405
adding Lift enum to EvaluationMetrics
Feb 26, 2019
155a113
rearchitecting LiftEvaluator for tmog
Feb 27, 2019
0bd5e06
Adding tests
Feb 28, 2019
3350d74
LiftEvaluator cleaning up docs and scalastyle
Feb 28, 2019
b40e5f1
Merge branch 'master' into shae/LiftPlot
shaeselix Feb 28, 2019
cc65e50
Changing NaN to None for json serialization
Feb 28, 2019
2002585
Merge branch 'master' into shae/LiftPlot
shaeselix Feb 28, 2019
b6b9e07
Refactoring LiftEvaluator to extend OpBinaryClassificationEvaluatorBase
Mar 22, 2019
4dc5f92
Merge remote-tracking branch 'origin/shae/LiftPlot' into shae/LiftPlot
Mar 22, 2019
03c5956
Merge branch 'master' into shae/LiftPlot
shaeselix Mar 22, 2019
8792ebd
Merge branch 'master' into shae/LiftPlot
shaeselix Mar 27, 2019
7102c4b
Adapting OpBinScoreEvaluator for LiftPlot needs
Mar 28, 2019
92809a0
Fixing ModelInsightsTest
Mar 28, 2019
f3eba65
Fixing scalastyle
Mar 28, 2019
2d70401
Removing binmetrics from BinaryClassificationMetrics
Mar 28, 2019
0d22997
Cleaning up some comments
Mar 28, 2019
c768a0f
Merge branch 'master' into shae/LiftPlot
shaeselix Mar 28, 2019
0fa0178
Cleaning up some comments
Mar 29, 2019
7297a83
Merge branch 'master' into shae/LiftPlot
shaeselix Mar 29, 2019
9a722f8
Merge branch 'master' into shae/LiftPlot
shaeselix Apr 1, 2019
75dcd59
`sumOfLabels` -> `numberOfPositiveLabels`
Apr 1, 2019
db93eb6
Merge remote-tracking branch 'origin/shae/LiftPlot' into shae/LiftPlot
Apr 1, 2019
fe94120
small comment fix
Apr 1, 2019
e67646a
Merge branch 'master' into shae/LiftPlot
leahmcguire Apr 2, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.salesforce.op.UID
import com.twitter.algebird.Operators._
import com.twitter.algebird.Tuple4Semigroup
import org.apache.spark.rdd.RDD
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.types.DoubleType
Expand All @@ -46,8 +47,8 @@ import org.slf4j.LoggerFactory
* This evaluator creates the specified number of bins and computes the statistics for each bin
* and returns [[BinaryClassificationBinMetrics]].
*
* @param numOfBins number of bins to produce
* @param uid uid for instance
* @param numOfBins number of bins to produce
* @param uid uid for instance
*/
private[op] class OpBinScoreEvaluator
(
Expand All @@ -66,60 +67,69 @@ private[op] class OpBinScoreEvaluator
val labelColumnName = getLabelCol
val dataToUse = makeDataToUse(data, labelColumnName)
.select(col(getProbabilityCol), col(labelColumnName).cast(DoubleType)).rdd
val scoreAndLabels = dataToUse.map {
case Row(prob: Vector, label: Double) => (prob(1), label)
case Row(prob: Double, label: Double) => (prob, label)
}
evaluateScoreAndLabels(scoreAndLabels)
}

if (dataToUse.isEmpty()) {
log.warn("The dataset is empty. Returning empty metrics.")
BinaryClassificationBinMetrics(0.0, Seq(), Seq(), Seq(), Seq())
} else {
val scoreAndLabels = dataToUse.map {
case Row(prob: Vector, label: Double) => (prob(1), label)
case Row(prob: Double, label: Double) => (prob, label)
}
def evaluateScoreAndLabels(scoreAndLabels: RDD[(Double, Double)]): BinaryClassificationBinMetrics = {

val (maxScore, minScore) = scoreAndLabels.map {
case (score, _) => (score, score)
}.fold(1.0, 0.0) {
case ((maxVal, minVal), (scoreMax, scoreMin)) =>
(math.max(maxVal, scoreMax), math.min(minVal, scoreMin))
}
val (maxScore, minScore) = scoreAndLabels.map {
case (score, _) => (score, score)
}.fold(1.0, 0.0) {
case ((maxVal, minVal), (scoreMax, scoreMin)) =>
(math.max(maxVal, scoreMax), math.min(minVal, scoreMin))
}

// Finding stats per bin -> avg score, avg conv rate,
// total num of data points and overall brier score.
implicit val sg = new Tuple4Semigroup[Double, Double, Long, Double]()
val stats = scoreAndLabels.map {
case (score, label) =>
(getBinIndex(score, minScore, maxScore), (score, label, 1L, math.pow(score - label, 2)))
}.reduceByKey(_ + _).map {
case (bin, (scoreSum, labelSum, count, squaredError)) =>
(bin, scoreSum / count, labelSum / count, count, squaredError)
}.collect()

val zero = (new Array[Double](numOfBins), new Array[Double](numOfBins), new Array[Long](numOfBins), 0.0, 0L)
val (averageScore, averageConversionRate, numberOfDataPoints, brierScoreSum, numberOfPoints) =
stats.foldLeft(zero) {
case ((score, convRate, dataPoints, brierScoreSum, totalPoints),
(binIndex, avgScore, avgConvRate, counts, squaredError)) =>
score(binIndex) = avgScore
convRate(binIndex) = avgConvRate
dataPoints(binIndex) = counts
(score, convRate, dataPoints, brierScoreSum + squaredError, totalPoints + counts)
}

// binCenters is the center point in each bin.
// e.g., for bins [(0.0 - 0.5), (0.5 - 1.0)], bin centers are [0.25, 0.75].
val diff = maxScore - minScore
val binCenters = for {i <- 0 until numOfBins} yield minScore + ((diff * i) / numOfBins) + (diff / (2 * numOfBins))

val metrics = BinaryClassificationBinMetrics(
BrierScore = brierScoreSum / numberOfPoints,
binCenters = binCenters,
numberOfDataPoints = numberOfDataPoints,
averageScore = averageScore,
averageConversionRate = averageConversionRate
)

log.info("Evaluated metrics: {}", metrics.toString)
metrics
// Finding stats per bin -> avg score, avg conv rate,
// total num of data points and overall brier score.
implicit val sg = new Tuple4Semigroup[Double, Long, Long, Double]()
val stats = scoreAndLabels.map {
case (score, label) =>
(getBinIndex(score, minScore, maxScore),
(score, if (label > 0.0) 1L else 0L, 1L, math.pow(score - label, 2)))
}.reduceByKey(_ + _).map {
case (bin, (scoreSum, positiveCount, count, squaredError)) =>
(bin, scoreSum, positiveCount, count, squaredError)
}.collect()

stats.toList match {
case Nil => BinaryClassificationBinMetrics.empty
case _ => {
val zero = (new Array[Double](numOfBins), new Array[Double](numOfBins),
new Array[Long](numOfBins), new Array[Long](numOfBins), 0.0, 0L)
val (averageScore, averageConversionRate, numberOfDataPoints, positiveLabels, brierScoreSum, numberOfPoints) =
stats.foldLeft(zero) {
case ((score, convRate, dataPoints, positiveLabels, brierScoreSum, totalPoints),
(binIndex, scoreSum, positiveCount, counts, squaredError)) =>
score(binIndex) = scoreSum / counts
convRate(binIndex) = positiveCount.toDouble / counts
dataPoints(binIndex) = counts
positiveLabels(binIndex) = positiveCount
(score, convRate, dataPoints, positiveLabels, brierScoreSum + squaredError, totalPoints + counts)
}

// binCenters is the center point in each bin.
// e.g., for bins [(0.0 - 0.5), (0.5 - 1.0)], bin centers are [0.25, 0.75].
val diff = maxScore - minScore
val binCenters = for {i <- 0 until numOfBins}
yield minScore + ((diff * i) / numOfBins) + (diff / (2 * numOfBins))

val metrics = BinaryClassificationBinMetrics(
BrierScore = brierScoreSum / numberOfPoints,
binSize = diff / numOfBins,
binCenters = binCenters,
numberOfDataPoints = numberOfDataPoints,
numberOfPositiveLabels = positiveLabels,
averageScore = averageScore,
averageConversionRate = averageConversionRate
)

log.info("Evaluated metrics: {}", metrics.toString)
metrics
}
}
}

Expand All @@ -133,21 +143,31 @@ private[op] class OpBinScoreEvaluator
/**
* Metrics of BinaryClassificationBinMetrics
*
* @param BrierScore brier score for overall dataset
* @param binCenters center of each bin
* @param numberOfDataPoints total number of data points in each bin
* @param averageScore average score in each bin
* @param averageConversionRate average conversion rate in each bin
* @param BrierScore brier score for overall dataset
* @param binSize size of each bin
* @param binCenters center of each bin
* @param numberOfDataPoints total number of data points in each bin
* @param numberOfPositiveLabels count of labels > 0 in each bin
* @param averageScore average score in each bin
* @param averageConversionRate average conversion rate in each bin
*/
case class BinaryClassificationBinMetrics
(
BrierScore: Double,
binSize: Double,
@JsonDeserialize(contentAs = classOf[java.lang.Double])
binCenters: Seq[Double],
@JsonDeserialize(contentAs = classOf[java.lang.Long])
numberOfDataPoints: Seq[Long],
@JsonDeserialize(contentAs = classOf[java.lang.Double])
numberOfPositiveLabels: Seq[Long],
@JsonDeserialize(contentAs = classOf[java.lang.Double])
averageScore: Seq[Double],
@JsonDeserialize(contentAs = classOf[java.lang.Double])
averageConversionRate: Seq[Double]
) extends EvaluationMetrics

object BinaryClassificationBinMetrics {
def empty: BinaryClassificationBinMetrics =
BinaryClassificationBinMetrics(0.0, 0.0, Seq(), Seq(), Seq(), Seq(), Seq())
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ import org.slf4j.LoggerFactory
* The metrics are AUROC, AUPR, Precision, Recall, F1 and Error Rate
* Default evaluation returns AUROC
*
* @param name name of default metric
* @param name name of default metric
* @param isLargerBetter is metric better if larger
* @param uid uid for instance
* @param uid uid for instance
*/

private[op] class OpBinaryClassificationEvaluator
Expand Down Expand Up @@ -123,7 +123,8 @@ private[op] class OpBinaryClassificationEvaluator
}
}

final protected def getBinaryEvaluatorMetric(
final protected def getBinaryEvaluatorMetric
(
metricName: ClassificationEvalMetric,
dataset: Dataset[_],
default: => Double
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ class ModelInsightsTest extends FlatSpec with PassengerSparkFixtureTest {
pretty should not include m.modelName
}
}
pretty should include("area under precision-recall | 1.0")
pretty should include regex raw"area under precision-recall\s+|\s+1.0"
pretty should include("Model Evaluation Metrics")
pretty should include("Top Model Insights")
pretty should include("Top Positive Correlations")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,8 @@ class OpWorkflowTest extends FlatSpec with PassengerSparkFixtureTest {

val prettySummary = fittedWorkflow.summaryPretty()
log.info(prettySummary)
prettySummary should include regex raw"area under precision-recall\s+|\s+1.0\s+|\s+0.0"
prettySummary should include("Selected Model - OpLogisticRegression")
prettySummary should include("area under precision-recall | 1.0 | 0.0")
prettySummary should include("Model Evaluation Metrics")
prettySummary should include("Top Model Insights")
prettySummary should include("Top Positive Correlations")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ class OpBinScoreEvaluatorTest extends FlatSpec with TestSparkContext {

metrics shouldBe BinaryClassificationBinMetrics(
0.09800605366,
0.25,
Seq(0.125, 0.375, 0.625, 0.875),
Seq(2, 0, 1, 2),
Seq(0, 0, 0, 2),
Seq(0.003205, 0.0, 0.7, 0.99999),
Seq(0.0, 0.0, 0.0, 1.0)
)
Expand All @@ -89,8 +91,10 @@ class OpBinScoreEvaluatorTest extends FlatSpec with TestSparkContext {

metrics shouldBe BinaryClassificationBinMetrics(
40.999986666733335,
3.2499975,
Seq(0.62500875, 3.87500625, 7.125003749999999, 10.37500125),
Seq(2, 0, 0, 1),
Seq(1, 0, 0, 1),
Seq(0.49999999999999994, 0.0, 0.0, 12.0),
Seq(0.5, 0.0, 0.0, 1.0)
)
Expand All @@ -107,7 +111,7 @@ class OpBinScoreEvaluatorTest extends FlatSpec with TestSparkContext {
val metrics = new OpBinScoreEvaluator(numOfBins = 10)
.setLabelCol(labelEmptyData.name).setPredictionCol(predictionEmptyData.name).evaluateAll(emptyData)

metrics shouldBe BinaryClassificationBinMetrics(0.0, Seq(), Seq(), Seq(), Seq())
metrics shouldBe BinaryClassificationBinMetrics(0.0, 0.0, Seq(), Seq(), Seq(), Seq(), Seq())
}

it should "evaluate bin metrics for skewed data" in {
Expand All @@ -116,8 +120,10 @@ class OpBinScoreEvaluatorTest extends FlatSpec with TestSparkContext {

metrics shouldBe BinaryClassificationBinMetrics(
7.294225500000013E-4,
0.2,
Seq(0.1, 0.30000000000000004, 0.5, 0.7, 0.9),
Seq(0, 0, 0, 0, 4),
Seq(0, 0, 0, 0, 4),
Seq(0.0, 0.0, 0.0, 0.0, 0.98617),
Seq(0.0, 0.0, 0.0, 0.0, 1.0)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ package com.salesforce.op.evaluators
import com.salesforce.op.evaluators.BinaryClassEvalMetrics._
import com.salesforce.op.features.types._
import com.salesforce.op.stages.impl.classification.{BinaryClassificationModelSelector, OpLogisticRegression}
import com.salesforce.op.stages.impl.selector.ModelSelectorNames.EstimatorType
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.evaluation._
Expand Down Expand Up @@ -167,7 +166,7 @@ class OpBinaryClassificationEvaluatorTest extends FlatSpec with TestSparkContext

val (tp, tn, fp, fn, precision, recall, f1) = getPosNegValues(
transformedData2.select(prediction.name, test_label.name).rdd
.map( r => Row(r.getMap[String, Double](0).toMap.toPrediction.prediction, r.getDouble(1)) )
.map(r => Row(r.getMap[String, Double](0).toMap.toPrediction.prediction, r.getDouble(1)))
)

tp.toDouble shouldBe metrics.TP
Expand Down Expand Up @@ -197,7 +196,6 @@ class OpBinaryClassificationEvaluatorTest extends FlatSpec with TestSparkContext
}



it should "evaluate the metrics on dataset with only the label and prediction 1" in {
val transformedDataOne = model.setInput(one_label, one_features).transform(one_ds)

Expand Down