Skip to content

Commit

Permalink
Merge pull request #62 from RoyalHaskoningDHV/backup
Browse files Browse the repository at this point in the history
Backup from devops (missed 2 versions)
  • Loading branch information
abontsema authored Aug 23, 2022
2 parents d9a9ac7 + 4adb429 commit aa4dcf6
Show file tree
Hide file tree
Showing 20 changed files with 351 additions and 122 deletions.
19 changes: 15 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ Version X.Y.Z stands for:

-------------

## Version 3.0.4

### Changes
- Added `average_type` to `BaseTimeseriesRegressor.__init__()`.
- `MLPTimeseriesRegressor.__init__()` now passes `average_type` to `BaseTimeseriesRegressor.__init__()`.
- Update `BaseTimeseriesRegressor.score()` to account for the `self.average_type`: in case of "mean" take the MSE of the average predictions and in case of "median" take the MAE of the average predictions.
- Fixed various spelling errors in `CHANGELOG.MD` and `models`.
- Updated package dependencies for scikit-learn
- Changed the DeepExplainer to the model agnostic KernelExplainer, so we can remove all the v1 dependencies on tensorflow
- Fixed pytest MPL bug by temporarily setting it to a previous version
## Version 3.0.3

### New features
Expand Down Expand Up @@ -57,6 +67,7 @@ No changes, version bump only.
### Changes
- Added `.readthedocs.yml` and `docs/requirements.txt` to include requirements for readthedocs build.


## Version 2.10.2

### Changes
Expand All @@ -67,7 +78,7 @@ No changes, version bump only.
## Version 2.10.1

### Changes
- Revert version changes in `scikit-learn` and `tenforflow` due to compatibility issues
- Revert version changes in `scikit-learn` and `tensorflow` due to compatibility issues

## 2.10.0

Expand Down Expand Up @@ -296,7 +307,7 @@ the class. This was unwanted.
- Fixed failing unit tests by removing tensorflow v1 code
- Fixed QuantileMLP, where the target would stay an integer, which fails with our custom loss functions
- Updated optional dependencies to everything we use
- With the latest pandas version a UTC to string conversio has been fixed. Removed our fix, upped the pandas version
- With the latest pandas version a UTC to string conversion has been fixed. Removed our fix, upped the pandas version
- Updated scikit-learn to at least 0.21, which is required for the iterative imputer

### Development changes
Expand Down Expand Up @@ -324,10 +335,10 @@ the class. This was unwanted.
- `sam.models.SamQuantileMLP.quantile_feature_importances`: now has argument sum_time_components that summarizes feature importances for different features generated for a single component (i.e. in onehot encoding).

### Changes
- `sam.featurew_engineering.automatic_rolling_engineering`: `estimator_type` argument can now also be 'bayeslin', which should be used if one hot components are used
- `sam.feature_engineering.automatic_rolling_engineering`: `estimator_type` argument can now also be 'bayeslin', which should be used if one hot components are used

### Bugfixes
- `sam.featurew_engineering.automatic_rolling_engineering`: constant features are no longer deleted (broke one hot features)
- `sam.feature_engineering.automatic_rolling_engineering`: constant features are no longer deleted (broke one hot features)

## Version 2.0.9

Expand Down
2 changes: 1 addition & 1 deletion sam/exploration/find_incidents.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def incident_curves_information(
"OUTLIER_TYPE": lambda x: x.iloc[0],
}
)
streaks.columns = ["_".join(x) for x in streaks.columns.ravel()]
streaks.columns = ["_".join(x) for x in streaks.columns]
streaks = streaks.rename(
columns={
"OUTLIER_CURVE_count": "OUTLIER_DURATION",
Expand Down
8 changes: 4 additions & 4 deletions sam/feature_engineering/decompose_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ def get_maxes_from_strings(cls, cyclicals: Sequence[str]) -> List[int]:

def decompose_datetime(
df: pd.DataFrame,
column: str = "TIME",
components: Optional[List[str]] = None,
cyclicals: Optional[List[str]] = None,
onehots: Optional[List[str]] = None,
column: Optional[str] = "TIME",
components: Optional[Sequence[str]] = None,
cyclicals: Optional[Sequence[str]] = None,
onehots: Optional[Sequence[str]] = None,
remove_categorical: bool = True,
keep_original: bool = True,
cyclical_maxes: Optional[Sequence[int]] = None,
Expand Down
2 changes: 1 addition & 1 deletion sam/feature_engineering/tests/test_decompose_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def test_timezone_dutch_to_utc(self):
components=["day", "hour", "week", "secondofday"],
timezone="UTC",
)
self.assertEquals(
self.assertEqual(
str(exc.exception),
"Data should either be in UTC timezone or it should have no"
" timezone information (assumed to be in UTC)",
Expand Down
8 changes: 7 additions & 1 deletion sam/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@
from .mase import mean_absolute_scaled_error
from .quantile_evaluation import compute_quantile_crossings, compute_quantile_ratios
from .r2_calculation import train_mean_r2, train_r2
from .tilted_loss import tilted_loss
from .tilted_loss_metrics import (
joint_mae_tilted_loss,
joint_mse_tilted_loss,
tilted_loss,
)

__all__ = [
"incident_recall",
"make_incident_recall_scorer",
"precision_incident_recall_curve",
"mean_absolute_scaled_error",
"joint_mae_tilted_loss",
"joint_mse_tilted_loss",
"tilted_loss",
"keras_tilted_loss",
"keras_rmse",
Expand Down
6 changes: 3 additions & 3 deletions sam/metrics/custom_callbacks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict
from typing import Dict, Sequence

import numpy as np
import pandas as pd
Expand All @@ -9,9 +9,9 @@
class R2Evaluation(Callback):
def __init__(
self,
all_data: Dict[str, np.array],
all_data: Dict[str, np.ndarray],
prediction_cols: list,
predict_ahead: int,
predict_ahead: Sequence[int],
):
"""
Custom keras callback that computes r2 compared to the training mean.
Expand Down
16 changes: 8 additions & 8 deletions sam/metrics/incident_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@


def incident_recall(
y_incidents: np.array,
y_pred: np.array,
range_pred: Tuple[int] = (0, 0),
y_incidents: np.ndarray,
y_pred: np.ndarray,
range_pred: Tuple[int, int] = (0, 0),
):
"""
Given `y_pred`, `y_incidents` and a prediction range, see what percentage of incidents in
Expand Down Expand Up @@ -105,10 +105,10 @@ def incident_recall_scorer(clf, X):


def _merge_thresholds(
left_t: np.array,
right_t: np.array,
left_val: np.array,
right_val: np.array,
left_t: np.ndarray,
right_t: np.ndarray,
left_val: np.ndarray,
right_val: np.ndarray,
):
"""
Helper function that merges two different thresholds. Does this by iterating over the
Expand Down Expand Up @@ -155,7 +155,7 @@ def step_ahead(new_t, new_val, saved_val, ix, old_t, old_val):


def precision_incident_recall_curve(
y_incidents: np.array, y_pred: np.array, range_pred: Tuple[int, int] = (0, 0)
y_incidents: np.ndarray, y_pred: np.ndarray, range_pred: Tuple[int, int] = (0, 0)
):
"""
Analogous to `sklearn.metrics.precision_recall_curve
Expand Down
4 changes: 2 additions & 2 deletions sam/metrics/quantile_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def compute_quantile_ratios(
qs = [float(c.split("_")[-1]) for c in pred.columns if "mean" not in c]

quantile_ratios = {
# mean here computes ratio (mean of True/Falses - 0/1s)
# mean here computes ratio (mean of True/False - 0/1s)
q: (y < pred["predict_lead_%d_q_" % predict_ahead + str(q)]).mean()
for q in qs
}
Expand Down Expand Up @@ -96,7 +96,7 @@ def compute_quantile_crossings(
# now replace the 'mean' part with 0.5 in the predictions
pred.columns = [c.replace("mean", "q_0.5") for c in pred.columns]

# make sure quantiles are sorted if they arent already:
# make sure quantiles are sorted if they aren't already:
qs = np.sort(qs)[::-1]

# now compute the quantile crossings
Expand Down
4 changes: 3 additions & 1 deletion sam/metrics/tests/test_incident_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def testCurve(self):
y_incidents = [0, 0, 0, 1]
y_pred = [0.1, 0.2, 0.3, 0.4]
p, r, t = precision_incident_recall_curve(y_incidents, y_pred, range_pred=(0, 1))
assert_array_almost_equal(p, np.array([0.5, 0.666667, 1, 1, 1]))
# Note: Behaviour of sklearn precision recall curve changed in version 1.1.1
# Now the number of thresholds = number of unique predictions
assert_array_almost_equal(p, np.array([0.5, 0.6666666666666666, 1, 1, 1]))
assert_array_equal(r, np.array([1, 1, 1, 1, 0]))
assert_array_equal(t, np.array([0.1, 0.2, 0.3, 0.4]))

Expand Down
3 changes: 0 additions & 3 deletions sam/metrics/tests/test_keras_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
skipkeras = False
try:
import tensorflow as tf # noqa: F401

# Necessary for shap DeepExplainer, see: https://github.com/slundberg/shap/issues/2189
tf.compat.v1.disable_v2_behavior()
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input
except ImportError:
Expand Down
18 changes: 0 additions & 18 deletions sam/metrics/tests/test_tilted_loss.py

This file was deleted.

62 changes: 62 additions & 0 deletions sam/metrics/tests/test_tilted_loss_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import unittest

import numpy as np
import pandas as pd
from numpy.testing import assert_almost_equal
from sam.metrics import joint_mae_tilted_loss, joint_mse_tilted_loss, tilted_loss


class TestTiltedLoss(unittest.TestCase):
def setUp(self) -> None:
actual_data = {
"output_1": [1, 2, 3, 4, 5],
"output_2": [1, 3, 5, 7, 9],
}
self.y_true_df = pd.DataFrame(data=actual_data)

pred_data = {
"output_1_quantile_1": [0.9, 1.9, 2.9, 3.9, 4.9],
"output_2_quantile_1": [0.5, 2.5, 4.5, 6.5, 8.5],
"output_1_quantile_2": [1.1, 2.1, 3.1, 4.1, 5.1],
"output_2_quantile_2": [1.5, 3.5, 5.5, 7.5, 9.5],
"output_1_mean": [0.9, 2.1, 2.9, 4.1, 4.9],
"output_2_mean": [1.1, 2.9, 5.1, 6.9, 9.1],
}
self.y_pred_df = pd.DataFrame(data=pred_data)

self.quantiles = [0.3, 0.7]
self.n_targets = 2

return super().setUp()

def test_tilted_loss(self):
y_true = [1, 2, 3, 4, 5]
y_pred = [1.1, 2.1, 3.1, 3.9, 4.9]

assert_almost_equal(tilted_loss(y_true, y_pred, 0.1), 0.058)
assert_almost_equal(tilted_loss(y_true, y_pred, 0.5), 0.05)
assert_almost_equal(tilted_loss(y_true, y_pred, 0.9), 0.042)

# MAE should be equivalent to tilted loss and only vary by a constant factor 2
mae = np.mean(np.abs(np.array(y_true) - np.array(y_pred)))
assert_almost_equal(mae / 2, tilted_loss(y_true, y_pred, 0.5))

def test_joint_mae_tilted_loss(self):
assert_almost_equal(
joint_mae_tilted_loss(
self.y_true_df, self.y_pred_df, quantiles=self.quantiles, n_targets=self.n_targets
),
0.56,
)

def test_joint_mse_tilted_loss(self):
assert_almost_equal(
joint_mse_tilted_loss(
self.y_true_df, self.y_pred_df, quantiles=self.quantiles, n_targets=self.n_targets
),
0.38,
)


if __name__ == "__main__":
unittest.main()
38 changes: 0 additions & 38 deletions sam/metrics/tilted_loss.py

This file was deleted.

Loading

0 comments on commit aa4dcf6

Please sign in to comment.