Merge pull request #62 from RoyalHaskoningDHV/backup

Backup from devops (missed 2 versions)
RoyalHaskoningDHV · Aug 23, 2022 · aa4dcf6 · aa4dcf6
2 parents d9a9ac7 + 4adb429
commit aa4dcf6
Show file tree

Hide file tree

Showing 20 changed files with 351 additions and 122 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,16 @@ Version X.Y.Z stands for:
 
 -------------
 
+## Version 3.0.4
+
+### Changes
+- Added `average_type` to `BaseTimeseriesRegressor.__init__()`.
+- `MLPTimeseriesRegressor.__init__()` now passes `average_type` to `BaseTimeseriesRegressor.__init__()`.
+- Update `BaseTimeseriesRegressor.score()` to account for the `self.average_type`: in case of "mean" take the MSE of the average predictions and in case of "median" take the MAE of the average predictions.
+- Fixed various spelling errors in `CHANGELOG.MD` and `models`.
+- Updated package dependencies for scikit-learn
+- Changed the DeepExplainer to the model agnostic KernelExplainer, so we can remove all the v1 dependencies on tensorflow
+- Fixed pytest MPL bug by temporarily setting it to a previous version 
 ## Version 3.0.3
 
 ### New features
@@ -57,6 +67,7 @@ No changes, version bump only.
 ### Changes
 - Added `.readthedocs.yml` and `docs/requirements.txt` to include requirements for readthedocs build.
 
+
 ## Version 2.10.2
 
 ### Changes
@@ -67,7 +78,7 @@ No changes, version bump only.
 ## Version 2.10.1
 
 ### Changes
-- Revert version changes in `scikit-learn` and `tenforflow` due to compatibility issues
+- Revert version changes in `scikit-learn` and `tensorflow` due to compatibility issues
 
 ## 2.10.0
 
@@ -296,7 +307,7 @@ the class. This was unwanted.
 - Fixed failing unit tests by removing tensorflow v1 code
 - Fixed QuantileMLP, where the target would stay an integer, which fails with our custom loss functions
 - Updated optional dependencies to everything we use
-- With the latest pandas version a UTC to string conversio has been fixed. Removed our fix, upped the pandas version
+- With the latest pandas version a UTC to string conversion has been fixed. Removed our fix, upped the pandas version
 - Updated scikit-learn to at least 0.21, which is required for the iterative imputer
 
 ### Development changes
@@ -324,10 +335,10 @@ the class. This was unwanted.
 - `sam.models.SamQuantileMLP.quantile_feature_importances`: now has argument sum_time_components that summarizes feature importances for different features generated for a single component (i.e. in onehot encoding).
 
 ### Changes
-- `sam.featurew_engineering.automatic_rolling_engineering`: `estimator_type` argument can now also be 'bayeslin', which should be used if one hot components are used
+- `sam.feature_engineering.automatic_rolling_engineering`: `estimator_type` argument can now also be 'bayeslin', which should be used if one hot components are used
 
 ### Bugfixes
-- `sam.featurew_engineering.automatic_rolling_engineering`: constant features are no longer deleted (broke one hot features)
+- `sam.feature_engineering.automatic_rolling_engineering`: constant features are no longer deleted (broke one hot features)
 
 ## Version 2.0.9
 

diff --git a/sam/exploration/find_incidents.py b/sam/exploration/find_incidents.py
@@ -307,7 +307,7 @@ def incident_curves_information(
             "OUTLIER_TYPE": lambda x: x.iloc[0],
         }
     )
-    streaks.columns = ["_".join(x) for x in streaks.columns.ravel()]
+    streaks.columns = ["_".join(x) for x in streaks.columns]
     streaks = streaks.rename(
         columns={
             "OUTLIER_CURVE_count": "OUTLIER_DURATION",

diff --git a/sam/feature_engineering/decompose_datetime.py b/sam/feature_engineering/decompose_datetime.py
@@ -57,10 +57,10 @@ def get_maxes_from_strings(cls, cyclicals: Sequence[str]) -> List[int]:
 
 def decompose_datetime(
     df: pd.DataFrame,
-    column: str = "TIME",
-    components: Optional[List[str]] = None,
-    cyclicals: Optional[List[str]] = None,
-    onehots: Optional[List[str]] = None,
+    column: Optional[str] = "TIME",
+    components: Optional[Sequence[str]] = None,
+    cyclicals: Optional[Sequence[str]] = None,
+    onehots: Optional[Sequence[str]] = None,
     remove_categorical: bool = True,
     keep_original: bool = True,
     cyclical_maxes: Optional[Sequence[int]] = None,

diff --git a/sam/feature_engineering/tests/test_decompose_datetime.py b/sam/feature_engineering/tests/test_decompose_datetime.py
@@ -410,7 +410,7 @@ def test_timezone_dutch_to_utc(self):
                 components=["day", "hour", "week", "secondofday"],
                 timezone="UTC",
             )
-        self.assertEquals(
+        self.assertEqual(
             str(exc.exception),
             "Data should either be in UTC timezone or it should have no"
             " timezone information (assumed to be in UTC)",

diff --git a/sam/metrics/__init__.py b/sam/metrics/__init__.py
@@ -14,13 +14,19 @@
 from .mase import mean_absolute_scaled_error
 from .quantile_evaluation import compute_quantile_crossings, compute_quantile_ratios
 from .r2_calculation import train_mean_r2, train_r2
-from .tilted_loss import tilted_loss
+from .tilted_loss_metrics import (
+    joint_mae_tilted_loss,
+    joint_mse_tilted_loss,
+    tilted_loss,
+)
 
 __all__ = [
     "incident_recall",
     "make_incident_recall_scorer",
     "precision_incident_recall_curve",
     "mean_absolute_scaled_error",
+    "joint_mae_tilted_loss",
+    "joint_mse_tilted_loss",
     "tilted_loss",
     "keras_tilted_loss",
     "keras_rmse",

diff --git a/sam/metrics/custom_callbacks.py b/sam/metrics/custom_callbacks.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Dict, Sequence
 
 import numpy as np
 import pandas as pd
@@ -9,9 +9,9 @@
 class R2Evaluation(Callback):
     def __init__(
         self,
-        all_data: Dict[str, np.array],
+        all_data: Dict[str, np.ndarray],
         prediction_cols: list,
-        predict_ahead: int,
+        predict_ahead: Sequence[int],
     ):
         """
         Custom keras callback that computes r2 compared to the training mean.

diff --git a/sam/metrics/incident_recall.py b/sam/metrics/incident_recall.py
@@ -7,9 +7,9 @@
 
 
 def incident_recall(
-    y_incidents: np.array,
-    y_pred: np.array,
-    range_pred: Tuple[int] = (0, 0),
+    y_incidents: np.ndarray,
+    y_pred: np.ndarray,
+    range_pred: Tuple[int, int] = (0, 0),
 ):
     """
     Given `y_pred`, `y_incidents` and a prediction range, see what percentage of incidents in
@@ -105,10 +105,10 @@ def incident_recall_scorer(clf, X):
 
 
 def _merge_thresholds(
-    left_t: np.array,
-    right_t: np.array,
-    left_val: np.array,
-    right_val: np.array,
+    left_t: np.ndarray,
+    right_t: np.ndarray,
+    left_val: np.ndarray,
+    right_val: np.ndarray,
 ):
     """
     Helper function that merges two different thresholds. Does this by iterating over the
@@ -155,7 +155,7 @@ def step_ahead(new_t, new_val, saved_val, ix, old_t, old_val):
 
 
 def precision_incident_recall_curve(
-    y_incidents: np.array, y_pred: np.array, range_pred: Tuple[int, int] = (0, 0)
+    y_incidents: np.ndarray, y_pred: np.ndarray, range_pred: Tuple[int, int] = (0, 0)
 ):
     """
     Analogous to `sklearn.metrics.precision_recall_curve

diff --git a/sam/metrics/quantile_evaluation.py b/sam/metrics/quantile_evaluation.py
@@ -36,7 +36,7 @@ def compute_quantile_ratios(
     qs = [float(c.split("_")[-1]) for c in pred.columns if "mean" not in c]
 
     quantile_ratios = {
-        # mean here computes ratio (mean of True/Falses - 0/1s)
+        # mean here computes ratio (mean of True/False - 0/1s)
         q: (y < pred["predict_lead_%d_q_" % predict_ahead + str(q)]).mean()
         for q in qs
     }
@@ -96,7 +96,7 @@ def compute_quantile_crossings(
     # now replace the 'mean' part with 0.5 in the predictions
     pred.columns = [c.replace("mean", "q_0.5") for c in pred.columns]
 
-    # make sure quantiles are sorted if they arent already:
+    # make sure quantiles are sorted if they aren't already:
     qs = np.sort(qs)[::-1]
 
     # now compute the quantile crossings

diff --git a/sam/metrics/tests/test_incident_recall.py b/sam/metrics/tests/test_incident_recall.py
@@ -75,7 +75,9 @@ def testCurve(self):
         y_incidents = [0, 0, 0, 1]
         y_pred = [0.1, 0.2, 0.3, 0.4]
         p, r, t = precision_incident_recall_curve(y_incidents, y_pred, range_pred=(0, 1))
-        assert_array_almost_equal(p, np.array([0.5, 0.666667, 1, 1, 1]))
+        # Note: Behaviour of sklearn precision recall curve changed in version 1.1.1
+        # Now the number of thresholds = number of unique predictions
+        assert_array_almost_equal(p, np.array([0.5, 0.6666666666666666, 1, 1, 1]))
         assert_array_equal(r, np.array([1, 1, 1, 1, 0]))
         assert_array_equal(t, np.array([0.1, 0.2, 0.3, 0.4]))
 

diff --git a/sam/metrics/tests/test_keras_metrics.py b/sam/metrics/tests/test_keras_metrics.py
@@ -14,9 +14,6 @@
 skipkeras = False
 try:
     import tensorflow as tf  # noqa: F401
-
-    # Necessary for shap DeepExplainer, see: https://github.com/slundberg/shap/issues/2189
-    tf.compat.v1.disable_v2_behavior()
     import tensorflow.keras.backend as K
     from tensorflow.keras.layers import Input
 except ImportError:

diff --git a/sam/metrics/tests/test_tilted_loss.py b/sam/metrics/tests/test_tilted_loss.py
diff --git a/sam/metrics/tests/test_tilted_loss_metrics.py b/sam/metrics/tests/test_tilted_loss_metrics.py
@@ -0,0 +1,62 @@
+import unittest
+
+import numpy as np
+import pandas as pd
+from numpy.testing import assert_almost_equal
+from sam.metrics import joint_mae_tilted_loss, joint_mse_tilted_loss, tilted_loss
+
+
+class TestTiltedLoss(unittest.TestCase):
+    def setUp(self) -> None:
+        actual_data = {
+            "output_1": [1, 2, 3, 4, 5],
+            "output_2": [1, 3, 5, 7, 9],
+        }
+        self.y_true_df = pd.DataFrame(data=actual_data)
+
+        pred_data = {
+            "output_1_quantile_1": [0.9, 1.9, 2.9, 3.9, 4.9],
+            "output_2_quantile_1": [0.5, 2.5, 4.5, 6.5, 8.5],
+            "output_1_quantile_2": [1.1, 2.1, 3.1, 4.1, 5.1],
+            "output_2_quantile_2": [1.5, 3.5, 5.5, 7.5, 9.5],
+            "output_1_mean": [0.9, 2.1, 2.9, 4.1, 4.9],
+            "output_2_mean": [1.1, 2.9, 5.1, 6.9, 9.1],
+        }
+        self.y_pred_df = pd.DataFrame(data=pred_data)
+
+        self.quantiles = [0.3, 0.7]
+        self.n_targets = 2
+
+        return super().setUp()
+
+    def test_tilted_loss(self):
+        y_true = [1, 2, 3, 4, 5]
+        y_pred = [1.1, 2.1, 3.1, 3.9, 4.9]
+
+        assert_almost_equal(tilted_loss(y_true, y_pred, 0.1), 0.058)
+        assert_almost_equal(tilted_loss(y_true, y_pred, 0.5), 0.05)
+        assert_almost_equal(tilted_loss(y_true, y_pred, 0.9), 0.042)
+
+        # MAE should be equivalent to tilted loss and only vary by a constant factor 2
+        mae = np.mean(np.abs(np.array(y_true) - np.array(y_pred)))
+        assert_almost_equal(mae / 2, tilted_loss(y_true, y_pred, 0.5))
+
+    def test_joint_mae_tilted_loss(self):
+        assert_almost_equal(
+            joint_mae_tilted_loss(
+                self.y_true_df, self.y_pred_df, quantiles=self.quantiles, n_targets=self.n_targets
+            ),
+            0.56,
+        )
+
+    def test_joint_mse_tilted_loss(self):
+        assert_almost_equal(
+            joint_mse_tilted_loss(
+                self.y_true_df, self.y_pred_df, quantiles=self.quantiles, n_targets=self.n_targets
+            ),
+            0.38,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/sam/metrics/tilted_loss.py b/sam/metrics/tilted_loss.py