aws · jotok · Nov 4, 2021 · Oct 15, 2021 · Oct 15, 2021 · Oct 16, 2021
diff --git a/Java/core/src/main/java/com/amazon/randomcutforest/config/ImputationMethod.java b/Java/core/src/main/java/com/amazon/randomcutforest/config/ImputationMethod.java
@@ -32,13 +32,17 @@ public enum ImputationMethod {
      * last known value in each input dimension
      */
     PREVIOUS,
+    /**
+     * next seen value in each input dimension
+     */
+    NEXT,
     /**
      * linear interpolation
      */
     LINEAR,
     /**
      * use the RCF imputation; but would often require a minimum number of
-     * observations and would uses a default (often LINEAR) till that point
+     * observations and currently defaults to LINEAR
      */
     RCF;
 }
diff --git a/.../main/java/com/amazon/randomcutforest/examples/parkservices/Thresholded1DGaussianMix.java b/.../main/java/com/amazon/randomcutforest/examples/parkservices/Thresholded1DGaussianMix.java
@@ -77,8 +77,8 @@ public void run() throws Exception {
             AnomalyDescriptor result = forest.process(point, count);
 
             if (keyCounter < dataWithKeys.changeIndices.length
-                    && result.getTimestamp() == dataWithKeys.changeIndices[keyCounter]) {
-                System.out.println("timestamp " + (result.getTimestamp()) + " CHANGE");
+                    && result.getInternalTimeStamp() == dataWithKeys.changeIndices[keyCounter]) {
+                System.out.println("timestamp " + (result.getInputTimestamp()) + " CHANGE");
                 ++keyCounter;
             }
 
@@ -90,7 +90,7 @@ public void run() throws Exception {
             if (result.getAnomalyGrade() != 0) {
                 System.out.print("timestamp " + (count) + " RESULT value ");
                 for (int i = 0; i < baseDimensions; i++) {
-                    System.out.print(result.getCurrentValues()[i] + ", ");
+                    System.out.print(result.getCurrentInput()[i] + ", ");
                 }
                 System.out.print("score " + result.getRcfScore() + ", grade " + result.getAnomalyGrade() + ", ");
 
@@ -112,10 +112,9 @@ public void run() throws Exception {
                         System.out.print("expected ");
                         for (int i = 0; i < baseDimensions; i++) {
                             System.out.print(result.getExpectedValuesList()[0][i] + ", ");
-                            if (result.getCurrentValues()[i] != result.getExpectedValuesList()[0][i]) {
-                                System.out.print(
-                                        "( " + (result.getCurrentValues()[i] - result.getExpectedValuesList()[0][i])
-                                                + " ) ");
+                            if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
+                                System.out.print("( "
+                                        + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
                             }
                         }
                     }

diff --git a/...com/amazon/randomcutforest/examples/parkservices/ThresholdedInternalShinglingExample.java b/...com/amazon/randomcutforest/examples/parkservices/ThresholdedInternalShinglingExample.java
@@ -15,11 +15,14 @@
 
 package com.amazon.randomcutforest.examples.parkservices;
 
+import static com.amazon.randomcutforest.CommonUtils.checkArgument;
+
 import java.util.Arrays;
 import java.util.Random;
 
 import com.amazon.randomcutforest.config.ForestMode;
 import com.amazon.randomcutforest.config.Precision;
+import com.amazon.randomcutforest.config.TransformMethod;
 import com.amazon.randomcutforest.examples.Example;
 import com.amazon.randomcutforest.parkservices.AnomalyDescriptor;
 import com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest;
@@ -46,7 +49,7 @@ public String description() {
     public void run() throws Exception {
         // Create and populate a random cut forest
 
-        int shingleSize = 8;
+        int shingleSize = 4;
         int numberOfTrees = 50;
         int sampleSize = 256;
         Precision precision = Precision.FLOAT_32;
@@ -57,15 +60,29 @@ public void run() throws Exception {
         int baseDimensions = 1;
 
         long count = 0;
-
         int dimensions = baseDimensions * shingleSize;
+        TransformMethod transformMethod = TransformMethod.NONE;
         ThresholdedRandomCutForest forest = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions)
                 .randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize)
                 .internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD)
-                .outputAfter(32).initialAcceptFraction(0.125).adjustThreshold(true).build();
+                .weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32)
+                .initialAcceptFraction(0.125).build();
+        ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions)
+                .randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize)
+                .internalShinglingEnabled(true).precision(precision).anomalyRate(0.01)
+                .forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod)
+                .normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
+
+        // ensuring that the parameters are the same; otherwise the grades/scores cannot
+        // be the same
+        // weighTime has to be 0
+        forest.setLowerThreshold(1.1);
+        second.setLowerThreshold(1.1);
+        forest.setHorizon(0.75);
+        second.setHorizon(0.75);
 
         long seed = new Random().nextLong();
-        Random noise = new Random();
+        Random noise = new Random(0);
 
         System.out.println("seed = " + seed);
         // change the last argument seed for a different run
@@ -80,7 +97,11 @@ public void run() throws Exception {
             // then the noise corresponds to a jitter; one can try TIME_AUGMENTED and
             // .normalizeTime(true)
 
-            AnomalyDescriptor result = forest.process(point, 100 * count + noise.nextInt(10) - 5);
+            long timestamp = 100 * count + noise.nextInt(10) - 5;
+            AnomalyDescriptor result = forest.process(point, timestamp);
+            AnomalyDescriptor test = second.process(point, timestamp);
+            checkArgument(Math.abs(result.getRcfScore() - test.getRcfScore()) < 1e-10, " error");
+            checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
 
             if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
                 System.out
@@ -89,9 +110,9 @@ public void run() throws Exception {
             }
 
             if (result.getAnomalyGrade() != 0) {
-                System.out.print("timestamp " + count + " RESULT value ");
+                System.out.print("timestamp " + count + " RESULT value " + result.getInternalTimeStamp() + " ");
                 for (int i = 0; i < baseDimensions; i++) {
-                    System.out.print(result.getCurrentValues()[i] + ", ");
+                    System.out.print(result.getCurrentInput()[i] + ", ");
                 }
                 System.out.print("score " + result.getRcfScore() + ", grade " + result.getAnomalyGrade() + ", ");
                 if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
@@ -115,10 +136,9 @@ public void run() throws Exception {
                         System.out.print("expected ");
                         for (int i = 0; i < baseDimensions; i++) {
                             System.out.print(result.getExpectedValuesList()[0][i] + ", ");
-                            if (result.getCurrentValues()[i] != result.getExpectedValuesList()[0][i]) {
-                                System.out.print(
-                                        "( " + (result.getCurrentValues()[i] - result.getExpectedValuesList()[0][i])
-                                                + " ) ");
+                            if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
+                                System.out.print("( "
+                                        + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
                             }
                         }
                     }

diff --git a/.../com/amazon/randomcutforest/examples/parkservices/ThresholdedMultiDimensionalExample.java b/.../com/amazon/randomcutforest/examples/parkservices/ThresholdedMultiDimensionalExample.java
@@ -82,7 +82,7 @@ public void run() throws Exception {
             if (result.getAnomalyGrade() != 0) {
                 System.out.print("timestamp " + (count + shingleSize - 1) + " RESULT value ");
                 for (int i = (shingleSize - 1) * baseDimensions; i < shingleSize * baseDimensions; i++) {
-                    System.out.print(result.getCurrentValues()[i] + ", ");
+                    System.out.print(result.getCurrentInput()[i] + ", ");
                 }
                 System.out.print("score " + result.getRcfScore() + ", grade " + result.getAnomalyGrade() + ", ");
 
@@ -104,10 +104,10 @@ public void run() throws Exception {
                         System.out.print("expected ");
                         for (int i = 0; i < baseDimensions; i++) {
                             System.out.print(result.getExpectedValuesList()[0][i] + ", ");
-                            if (result.getCurrentValues()[(shingleSize - 1) * baseDimensions
+                            if (result.getCurrentInput()[(shingleSize - 1) * baseDimensions
                                     + i] != result.getExpectedValuesList()[0][i]) {
                                 System.out
-                                        .print("( " + (result.getCurrentValues()[(shingleSize - 1) * baseDimensions + i]
+                                        .print("( " + (result.getCurrentInput()[(shingleSize - 1) * baseDimensions + i]
                                                 - result.getExpectedValuesList()[0][i]) + " ) ");
                             }
                         }

diff --git a/...mples/src/main/java/com/amazon/randomcutforest/examples/parkservices/ThresholdedTime.java b/...mples/src/main/java/com/amazon/randomcutforest/examples/parkservices/ThresholdedTime.java
@@ -83,7 +83,7 @@ public void run() throws Exception {
             AnomalyDescriptor result = forest.process(data, time);
 
             if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
-                System.out.print("Sequence " + count + " stamp " + (result.getTimestamp()) + " CHANGE ");
+                System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " CHANGE ");
                 if (!anomalyState) {
                     System.out.println(" to Distribution 1 ");
                 } else {
@@ -94,7 +94,7 @@ public void run() throws Exception {
             }
 
             if (result.getAnomalyGrade() != 0) {
-                System.out.print("Sequence " + count + " stamp " + (result.getTimestamp()) + " RESULT ");
+                System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " RESULT ");
                 System.out.print("score " + result.getRcfScore() + ", grade " + result.getAnomalyGrade() + ", ");
 
                 if (result.isExpectedValuesPresent()) {
@@ -105,7 +105,7 @@ public void run() throws Exception {
                                 + (result.getOldTimeStamp() - result.getExpectedTimeStamp() + ")"));
                     } else {
                         System.out.print("expected " + result.getExpectedTimeStamp() + " ( "
-                                + (result.getTimestamp() - result.getExpectedTimeStamp() + ")"));
+                                + (result.getInternalTimeStamp() - result.getExpectedTimeStamp() + ")"));
                     }
                 }
                 System.out.println();

diff --git a/...parkservices/src/main/java/com/amazon/randomcutforest/parkservices/AnomalyDescriptor.java b/...parkservices/src/main/java/com/amazon/randomcutforest/parkservices/AnomalyDescriptor.java
@@ -26,7 +26,7 @@
 
 @Getter
 @Setter
-public class AnomalyDescriptor {
+public class AnomalyDescriptor extends RCFComputeDescriptor {
 
     public static int NUMBER_OF_EXPECTED_VALUES = 1;
 
@@ -40,30 +40,12 @@ public class AnomalyDescriptor {
     // information
     DiVector attribution;
 
-    // sequence index (the number of updates to RCF) -- it is possible in imputation
-    // that
-    // the number of updates more than the input tuples seen by the overall program
-    long totalUpdates;
-
-    // timestamp (basically a sequence index, but can be scaled and jittered as in
-    // the example);
-    // kept as long for potential future use
-    long timestamp;
-
     // if the anomaly is due to timestamp when it is augmented only for current time
     long expectedTimeStamp;
 
     // confidence, for both anomalies/non-anomalies
     double dataConfidence;
 
-    // number of trees in the forest
-    int forestSize;
-
-    // flag indicating of expected values are present -- one reason for them not
-    // being present
-    // is that forecasting can requires more values than anomaly detection,
-    boolean expectedValuesPresent;
-
     // flag indicating if the anomaly is the start of an anomaly or part of a run of
     // anomalies
     boolean startOfAnomaly;
@@ -82,14 +64,11 @@ public class AnomalyDescriptor {
     // a flattened version denoting the basic contribution of each input variable
     // (not shingled) for the
     // time slice indicated by relativeIndex
-    double[] currentTimeAttribution;
+    double[] relevantAttribution;
 
-    // when time is appended
+    // when time is appended for the anomalous time slice
     double timeAttribution;
 
-    // current values
-    double[] currentValues;
-
     // the values being replaced; may correspond to past
     double[] oldValues;
 
@@ -102,22 +81,23 @@ public class AnomalyDescriptor {
     // likelihood values for the list
     double[] likelihoodOfValues;
 
+    // the threshold used in inference
     double threshold;
 
-    public void setCurrentValues(double[] currentValues) {
-        this.currentValues = Arrays.copyOf(currentValues, currentValues.length);
-    }
-
     public void setAttribution(DiVector attribution) {
         this.attribution = new DiVector(attribution);
     }
 
     public void setOldValues(double[] values) {
-        this.oldValues = Arrays.copyOf(values, values.length);
+        oldValues = copyIfNotnull(values);
     }
 
-    public void setCurrentTimeAttribution(double[] values) {
-        this.currentTimeAttribution = Arrays.copyOf(values, values.length);
+    public boolean isExpectedValuesPresent() {
+        return expectedValuesList != null;
+    }
+
+    public void setRelevantAttribution(double[] values) {
+        this.relevantAttribution = copyIfNotnull(values);
     }
 
     public void setExpectedValues(int position, double[] values, double likelihood) {
@@ -131,4 +111,5 @@ public void setExpectedValues(int position, double[] values, double likelihood)
         expectedValuesList[position] = Arrays.copyOf(values, values.length);
         likelihoodOfValues[position] = likelihood;
     }
+
 }
diff --git a/...kservices/src/main/java/com/amazon/randomcutforest/parkservices/RCFComputeDescriptor.java b/...kservices/src/main/java/com/amazon/randomcutforest/parkservices/RCFComputeDescriptor.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * or in the "license" file accompanying this file. This file is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+package com.amazon.randomcutforest.parkservices;
+
+import java.util.Arrays;
+
+import lombok.Getter;
+import lombok.Setter;
+
+/**
+ * a basic class that is used to store the internal state of the streaming
+ * processing in ThresholdedRandomCutForest and others.
+ */
+@Getter
+@Setter
+public class RCFComputeDescriptor {
+
+    // sequence index (the number of updates to RCF) -- it is possible in imputation
+    // that
+    // the number of updates more than the input tuples seen by the overall program
+    long totalUpdates;
+
+    // internal timestamp (basically a sequence index, but can be scaled and
+    // jittered as in
+    // the example);
+    // kept as long for potential future use
+    long internalTimeStamp;
+
+    // number of trees in the forest
+    int numberOfTrees;
+
+    // current values
+    double[] currentInput;
+
+    // input timestamp
+    long inputTimestamp;
+
+    // potential number of imputes before processing current point
+    int numberOfImputes;
+
+    // actual, potentially transformed point on which compute occurs
+    double[] RCFPoint;
+
+    // expected RCFPoint for the current point
+    double[] expectedRCFPoint;
+
+    // internal timestamp of last anomaly
+    long lastAnomalyInternalTimestamp;
+
+    // expected point at anomaly
+    double[] lastExpectedPoint;
+
+    public void setCurrentInput(double[] currentValues) {
+        this.currentInput = copyIfNotnull(currentValues);
+    }
+
+    public double[] getCurrentInput() {
+        return copyIfNotnull(currentInput);
+    }
+
+    public void setExpectedRCFPoint(double[] point) {
+        expectedRCFPoint = copyIfNotnull(point);
+    }
+
+    public double[] getExpectedRCFPoint() {
+        return copyIfNotnull(expectedRCFPoint);
+    }
+
+    public void setRCFPoint(double[] point) {
+        RCFPoint = copyIfNotnull(point);
+    }
+
+    public double[] getRCFPoint() {
+        return copyIfNotnull(RCFPoint);
+    }
+
+    public void setLastExpectedPoint(double[] point) {
+        lastExpectedPoint = copyIfNotnull(point);
+    }
+
+    public double[] getLastExpectedPoint() {
+        return copyIfNotnull(lastExpectedPoint);
+    }
+
+    protected double[] copyIfNotnull(double[] array) {
+        return array == null ? null : Arrays.copyOf(array, array.length);
+    }
+}