From 1e8a2673e1d5fe9e372fdaa52987259d7d304d34 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 19 Apr 2018 17:18:50 +0200 Subject: [PATCH 1/7] FIX use balanced accuracy from scikit-learn --- rampwf/score_types/balanced_accuracy.py | 30 +++++++++++++++---------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/rampwf/score_types/balanced_accuracy.py b/rampwf/score_types/balanced_accuracy.py index 8a7015ee..50269ce3 100644 --- a/rampwf/score_types/balanced_accuracy.py +++ b/rampwf/score_types/balanced_accuracy.py @@ -1,13 +1,21 @@ -"""Balanced accuracy. +from .classifier_base import ClassifierBaseScoreType +from sklearn.metrics import recall_score +from sklearn.metrics.classification import _check_targets -From https://github.com/ch-imad/AutoMl_Challenge/blob/2353ec0/Starting_kit/scoring_program/libscores.py#L187 # noqa -See the thread at -https://github.com/rhiever/tpot/issues/108#issuecomment-317067760 -about the different definitions. -""" -from .classifier_base import ClassifierBaseScoreType -from .macro_averaged_recall import MacroAveragedRecall +def _balanced_accuracy_score(y_true, y_pred, sample_weight=None): + """FIXME: port implementation of balanced accuracy from scikit-learn 0.20. + """ + y_type, y_true, y_pred = _check_targets(y_true, y_pred) + + if y_type != 'binary': + raise ValueError('Balanced accuracy is only meaningful ' + 'for binary classification problems.') + # simply wrap the ``recall_score`` function + return recall_score(y_true, y_pred, + pos_label=None, + average='macro', + sample_weight=sample_weight) class BalancedAccuracy(ClassifierBaseScoreType): @@ -20,8 +28,6 @@ def __init__(self, name='balanced_accuracy', precision=2): self.precision = precision def __call__(self, y_true_label_index, y_pred_label_index): - mac = MacroAveragedRecall() - tpr = mac(y_true_label_index, y_pred_label_index) - base_tpr = 1. / len(self.label_names) - score = (tpr - base_tpr) / (1 - base_tpr) + score = _balanced_accuracy_score(y_true_label_index, + y_pred_label_index) return score From 2c3622dbeae828cf468cb6ce3ce1ab6fea1b505f Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 19 Apr 2018 17:33:37 +0200 Subject: [PATCH 2/7] EHN multiclass balanced accuracy --- rampwf/score_types/balanced_accuracy.py | 31 ++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/rampwf/score_types/balanced_accuracy.py b/rampwf/score_types/balanced_accuracy.py index 50269ce3..4bc3eb73 100644 --- a/rampwf/score_types/balanced_accuracy.py +++ b/rampwf/score_types/balanced_accuracy.py @@ -1,22 +1,27 @@ -from .classifier_base import ClassifierBaseScoreType -from sklearn.metrics import recall_score +import numpy as np + +from sklearn.metrics import confusion_matrix from sklearn.metrics.classification import _check_targets +from .classifier_base import ClassifierBaseScoreType + def _balanced_accuracy_score(y_true, y_pred, sample_weight=None): """FIXME: port implementation of balanced accuracy from scikit-learn 0.20. """ - y_type, y_true, y_pred = _check_targets(y_true, y_pred) - - if y_type != 'binary': - raise ValueError('Balanced accuracy is only meaningful ' - 'for binary classification problems.') - # simply wrap the ``recall_score`` function - return recall_score(y_true, y_pred, - pos_label=None, - average='macro', - sample_weight=sample_weight) - + C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) + with np.errstate(divide='ignore', invalid='ignore'): + per_class = np.diag(C) / C.sum(axis=1) + if np.any(np.isnan(per_class)): + warnings.warn('y_pred contains classes not in y_true') + per_class = per_class[~np.isnan(per_class)] + score = np.mean(per_class) + if adjusted: + n_classes = len(per_class) + chance = 1 / n_classes + score -= chance + score /= 1 - chance + return score class BalancedAccuracy(ClassifierBaseScoreType): is_lower_the_better = False From 6cdb06bfa6395a2f4f05372c25eb04b05146b725 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 19 Apr 2018 17:35:40 +0200 Subject: [PATCH 3/7] fix parameters --- rampwf/score_types/balanced_accuracy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rampwf/score_types/balanced_accuracy.py b/rampwf/score_types/balanced_accuracy.py index 4bc3eb73..6a5ae61d 100644 --- a/rampwf/score_types/balanced_accuracy.py +++ b/rampwf/score_types/balanced_accuracy.py @@ -6,7 +6,8 @@ from .classifier_base import ClassifierBaseScoreType -def _balanced_accuracy_score(y_true, y_pred, sample_weight=None): +def _balanced_accuracy_score(y_true, y_pred, sample_weight=None, + adjusted=False): """FIXME: port implementation of balanced accuracy from scikit-learn 0.20. """ C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) From 5a497c76b1204ae014e98386ddda6540b7a404e2 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 19 Apr 2018 17:51:49 +0200 Subject: [PATCH 4/7] expose adjusted parameters --- rampwf/score_types/balanced_accuracy.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/rampwf/score_types/balanced_accuracy.py b/rampwf/score_types/balanced_accuracy.py index 6a5ae61d..4758a69d 100644 --- a/rampwf/score_types/balanced_accuracy.py +++ b/rampwf/score_types/balanced_accuracy.py @@ -7,7 +7,7 @@ def _balanced_accuracy_score(y_true, y_pred, sample_weight=None, - adjusted=False): + adjusted=True): """FIXME: port implementation of balanced accuracy from scikit-learn 0.20. """ C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) @@ -24,16 +24,19 @@ def _balanced_accuracy_score(y_true, y_pred, sample_weight=None, score /= 1 - chance return score + class BalancedAccuracy(ClassifierBaseScoreType): is_lower_the_better = False minimum = 0.0 maximum = 1.0 - def __init__(self, name='balanced_accuracy', precision=2): + def __init__(self, name='balanced_accuracy', precision=2, adjusted=True): self.name = name self.precision = precision + self.adjusted = adjusted def __call__(self, y_true_label_index, y_pred_label_index): score = _balanced_accuracy_score(y_true_label_index, - y_pred_label_index) + y_pred_label_index, + adjusted=self.adjusted) return score From ec151a4ad7c1bdc7a8d558fd593a183f6d84c267 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 20 Apr 2018 15:22:49 +0200 Subject: [PATCH 5/7] Update balanced_accuracy.py --- rampwf/score_types/balanced_accuracy.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rampwf/score_types/balanced_accuracy.py b/rampwf/score_types/balanced_accuracy.py index 4758a69d..998924a8 100644 --- a/rampwf/score_types/balanced_accuracy.py +++ b/rampwf/score_types/balanced_accuracy.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from sklearn.metrics import confusion_matrix From 8f7fddeeb1118c28fa155afa1071918357056845 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 9 May 2018 11:19:25 +0200 Subject: [PATCH 6/7] MAINT update conda --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index 46e025da..1bc6c57f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,6 +22,7 @@ install: # Add Library/bin directory to fix issue # https://github.com/conda/conda/issues/1753 - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PYTHON%\\Library\\bin;%PATH%" + - conda update conda -y - conda create -n testenv --yes ipykernel pip python=%PYTHON_VERSION_ENV% - activate testenv - pip install -r requirements.txt From 74887fb15fba9af375616bba4123825eadb33869 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 9 May 2018 11:27:09 +0200 Subject: [PATCH 7/7] fix --- appveyor.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 1bc6c57f..46e025da 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,7 +22,6 @@ install: # Add Library/bin directory to fix issue # https://github.com/conda/conda/issues/1753 - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PYTHON%\\Library\\bin;%PATH%" - - conda update conda -y - conda create -n testenv --yes ipykernel pip python=%PYTHON_VERSION_ENV% - activate testenv - pip install -r requirements.txt