Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metadata to OpStandardScaler to allow for descaling #378

Merged
merged 11 commits into from
Aug 5, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,17 @@ class OpScalarStandardScaler
val internalScaler = new MLLibStandardScaler(withMean = estimator.getWithMean, withStd = estimator.getWithStd)
val scalerModel = internalScaler.fit(vecData)

val std = scalerModel.std.toArray
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
val mean = scalerModel.mean.toArray
val stdMean = std.sum / std.length
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
val meanMean = mean.sum / mean.length
val scalingArgs = LinearScalerArgs(1 / stdMean, meanMean / stdMean)
val meta = ScalerMetadata(ScalingType.Linear, scalingArgs).toMetadata()
setMetadata(meta)

new OpScalarStandardScalerModel(
std = scalerModel.std.toArray,
mean = scalerModel.mean.toArray,
std = std,
mean = mean,
withStd = scalerModel.withStd,
withMean = scalerModel.withMean,
operationName = operationName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner

import scala.util.{Failure, Success}


@RunWith(classOf[JUnitRunner])
class OpScalarStandardScalerTest extends OpEstimatorSpec[RealNN, UnaryModel[RealNN, RealNN], OpScalarStandardScaler] {
Expand Down Expand Up @@ -148,6 +150,35 @@ class OpScalarStandardScalerTest extends OpEstimatorSpec[RealNN, UnaryModel[Real
assert(sumSqDist <= 0.000001, "===> the sum of squared distances between actual and expected should be zero.")
}

it should "descale and work in standardized workflow" in {
val featureNormalizer = new OpScalarStandardScaler().setInput(testF)
val normedOutput = featureNormalizer.getOutput()
val metadata = featureNormalizer.fit(inputData).getMetadata()
val expectedStd = 90.0 * math.sqrt(37.0)
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
val expectedMean = 370.0
val expectedSlope = 1 / expectedStd
val expectedIntercept = expectedMean / expectedStd
ScalerMetadata(metadata) match {
case Failure(err) => fail(err)
case Success(meta) =>
meta shouldBe a[ScalerMetadata]
meta.scalingType shouldBe ScalingType.Linear
meta.scalingArgs shouldBe a[LinearScalerArgs]
meta.scalingArgs.asInstanceOf[LinearScalerArgs].slope - expectedSlope should be < 0.001
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
meta.scalingArgs.asInstanceOf[LinearScalerArgs].intercept - expectedIntercept should be < 0.001
}

val descaledResponse = new DescalerTransformer[RealNN, RealNN, RealNN]()
.setInput(normedOutput, normedOutput).getOutput()
val workflow = new OpWorkflow().setResultFeatures(descaledResponse)
val wfModel = workflow.setInputDataset(inputData).train()
val transformed = wfModel.score()

val actual = transformed.collect().map(_.getAs[Double](1))
val expected = Array(-730.0, -640.0, 260.0)
erica-chiu marked this conversation as resolved.
Show resolved Hide resolved
all(actual.zip(expected).map(x => math.abs(x._2 - x._1))) should be < 0.0001
}

private def validateDataframeDoubleColumn(normalizedFeatureDF: DataFrame, scaledFeatureName: String,
targetColumnName: String): Double = {
val sqDistUdf = udf { (leftCol: Double, rightCol: Double) => Math.pow(leftCol - rightCol, 2) }
Expand Down