From 2b43c8abd3a3874360476804e36a7496500f5113 Mon Sep 17 00:00:00 2001 From: Ed Savage <32410745+edsavage@users.noreply.github.com> Date: Tue, 25 Sep 2018 15:30:10 +0100 Subject: [PATCH] [ML] Modify thresholds for normalization triggers (#33663) [ML] Modify thresholds for normalization triggers The (arbitrary) threshold factors used to judge if scores have changed significantly enough to trigger a look-back renormalization have been changed to values that reduce the frequency of such renormalizations. Added a clause to treat changes in scores as a 'big change' if it would result in a change of severity reported in the UI. Also altered the clause affecting small scores so that a change should be considered big if scores have changed by at least 1.5. Relates https://github.com/elastic/machine-learning-qa/issues/263 --- .../ml/job/process/normalizer/Normalizer.java | 21 ++++++++++++++----- .../process/normalizer/NormalizerTests.java | 2 +- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java index 22e7d3ba99598..74eb01987c562 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java @@ -190,23 +190,34 @@ private double mergeRecursively(Iterator scoresIter, Normaliza * Encapsulate the logic for deciding whether a change to a normalized score * is "big". *

- * Current logic is that a big change is a change of at least 1 or more than - * than 50% of the higher of the two values. + * Current logic is that a change is considered big if any of the following criteria are met: + *

+ * These values have been chosen through a process of experimentation, in particular it was desired to reduce + * the number of updates written to the results index due to renormalization events for performance reasons + * while not changing the normalized scores greatly * * @param oldVal The old value of the normalized score * @param newVal The new value of the normalized score * @return true if the update is considered "big" */ private static boolean isBigUpdate(double oldVal, double newVal) { - if (Math.abs(oldVal - newVal) >= 1.0) { + if ((int) (oldVal / 25.0) != (int) (newVal / 25.0)) { + return true; + } + if (Math.abs(oldVal - newVal) >= 1.5) { return true; } if (oldVal > newVal) { - if (oldVal * 0.5 > newVal) { + if (oldVal * 0.33 > newVal) { return true; } } else { - if (newVal * 0.5 > oldVal) { + if (newVal * 0.33 > oldVal) { return true; } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerTests.java index d06146ad53fdb..661eeca98db8f 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerTests.java @@ -29,7 +29,7 @@ public class NormalizerTests extends ESTestCase { private static final String INDEX_NAME = "foo-index"; private static final String QUANTILES_STATE = "someState"; private static final int BUCKET_SPAN = 600; - private static final double INITIAL_SCORE = 2.0; + private static final double INITIAL_SCORE = 3.0; private static final double FACTOR = 2.0; private Bucket generateBucket(Date timestamp) throws IOException {