calibrated-explanations v0.1.1

Moffran · Sep 14, 2023 · 0429194 · 0429194
1 parent 433c032
commit 0429194
Show file tree

Hide file tree

Showing 7 changed files with 38 additions and 49 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,15 @@
 # Changelog
+
 ## [Unreleased]
 ### Features
 ### Fixes
-
+## [v0.1.1](https://github.com/Moffran/calibrated_explanations/releases/tag/v0.1.1) - 2023-09-14
+[Full changelog](https://github.com/Moffran/calibrated_explanations/compare/v0.1.0...v0.1.1)
+### Features
+- Exchanged the slow `VennABERS_by_def` function for the `VennAbers` class in the `venn-abers` package.
+### Fixes
+- Low and high weights are correctly assigned, so that low < high is always the case.
+- Adjusted the number of decimals in counterfactual rules to 2.
 ## [v0.1.0](https://github.com/Moffran/calibrated_explanations/releases/tag/v0.1.0) - 2023-09-04
 
 [Full changelog](https://github.com/Moffran/calibrated_explanations/compare/v0.0.2...v0.1.0)

diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ Below is an example of a counterfactual explanation for an instance of the Diabe
 
 ![Counterfactual explanation for Diabetes](https://github.com/Moffran/calibrated_explanations/blob/main/docs/images/counterfactual_diabetes.png "Counterfactual explanation for Diabetes")
 
+- [Calibrated Explanations](#calibrated-explanations)
   - [Getting started](#getting-started)
     - [Classification](#classification)
       - [Factual Explanations](#factual-explanations)
@@ -318,11 +319,11 @@ This research is funded by the Swedish Knowledge Foundation together with indust
 
 [Rudy Matela](https://github.com/rudymatela) has been our git guru and has helped us with the release process.
 
-We have used code from [Paulo Toccaceli](https://github.com/ptocca)s implementation of [Venn-Abers](https://github.com/ptocca/VennABERS). We do not use the fast version yet but will likely add it in the future.
+We have used both the `ConformalPredictiveSystem` and `DifficultyEstimator` classes from [Henrik Boström](https://github.com/henrikbostrom)s [crepes](https://github.com/henrikbostrom/crepes) package to provide support for regression.
 
-We have used both the `ConformalPredictiveSystem` and `DifficultyEstimator` classes from [Henrik Boström](https://github.com/henrikbostrom)s [crepes](https://github.com/henrikbostrom/crepes) to provide support for regression.
+We have used the `VennAbers` class from [Ivan Petej](https://github.com/ip200)s [venn-abers](https://github.com/ip200/venn-abers) package to provide support for probabilistic explanations (both classification and probabilistic regression). 
 
-We have used code from [Marco Tulio Correia Ribeiro](https://github.com/marcotcr)s [lime](https://github.com/marcotcr/lime) for the `Disccretizer` class.
+We have used code from [Marco Tulio Correia Ribeiro](https://github.com/marcotcr)s [lime](https://github.com/marcotcr/lime) package for the `Disccretizer` class.
 
 [build-log]:    https://github.com/Moffran/calibrated_explanations/actions/workflows/test.yml
 [build-status]: https://github.com/Moffran/calibrated_explanations/actions/workflows/test.yml/badge.svg

diff --git a/docs/conf.py b/docs/conf.py
@@ -28,7 +28,7 @@
 version = '0.1'
 
 # The full version, including alpha/beta/rc tags
-release = '0.1.0'
+release = '0.1.1'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "calibrated_explanations"
-version = "0.1.0"
+version = "0.1.1"
 authors = [
   { name="Helena Löfström", email="[email protected]" },
   { name="Tuwe Löfström", email="[email protected]" },
@@ -25,7 +25,7 @@ dependencies = [
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: BSD 3-Clause License",
+    "License :: OSI Approved :: BSD License",
     "Operating System :: OS Independent",
 ]
 

diff --git a/src/calibrated_explanations/VennAbers.py b/src/calibrated_explanations/VennAbers.py
@@ -4,42 +4,20 @@
 # pylint: disable=invalid-name, line-too-long
 # flake8: noqa: E501
 import numpy as np
-from sklearn.isotonic import IsotonicRegression
+import venn_abers as va
 
 
 
-def VennABERS_by_def(calibration, test):
-# Function copied from https://github.com/ptocca/VennABERS/blob/master/test/VennABERS_test.ipynb
-    """a function to compute the VennABERS score
-
-    Args:
-        calibration (n_calibration_samples,): the probabilities of the positive class for the calibration samples
-        test (n_test_samples,): the probabilities of the positive class for the test samples
-
-    Returns:
-        lower_bounds (n_test_samples,): lower bounds of the VennABERS interval for each test sample
-        upper_bounds (n_test_samples,): upper bounds of the VennABERS interval for each test sample
-    """
-    p0,p1 = [],[]
-    for x in test:
-        ds0 = calibration+[(x,0)]
-        iso0 = IsotonicRegression().fit(*zip(*ds0))
-        p0.append(iso0.predict([x]))
-
-        ds1 = calibration+[(x,1)]
-        iso1 = IsotonicRegression().fit(*zip(*ds1))
-        p1.append(iso1.predict([x]))
-    return np.array(p0).flatten(),np.array(p1).flatten()
-
 class VennAbers:
     """a class to calibrate the predictions of a model using the VennABERS method
     """
-    iso = IsotonicRegression(out_of_bounds="clip")
-
     def __init__(self, cal_probs, cal_y, model):
         self.cprobs = cal_probs
         self.ctargets = cal_y
         self.model = model
+        self.va = va.VennAbers()
+        cprobs, predict = self.get_p_value(self.cprobs)
+        self.va.fit(cprobs, np.multiply(predict == self.ctargets, 1) if self.is_multiclass() else self.ctargets)
 
     def predict(self, test_X):
         """a function to predict the class of the test samples
@@ -50,10 +28,9 @@ def predict(self, test_X):
         Returns:
             predicted classes (n_test_samples,): predicted classes based on the regularized VennABERS probabilities
         """
-        cprobs, predict = self.get_p_value(self.cprobs)
-        targets = np.multiply(predict == self.ctargets, 1) if self.is_multiclass() else self.ctargets
         tprobs, _ = self.get_p_value(self.model.predict_proba(test_X))
-        low,high = VennABERS_by_def(list(zip(cprobs,targets)),tprobs)
+        _, p0p1 = self.va.predict_proba(tprobs)
+        low, high = p0p1[:,0], p0p1[:,1]
         tmp = high / (1-low + high)
         return np.asarray(np.round(tmp))
 
@@ -72,10 +49,9 @@ def predict_proba(self, test_X, output_interval=False, classes=None):
                 high (n_test_samples,): upper bounds of the VennABERS interval for each test sample
         """
         va_proba = self.model.predict_proba(test_X)
-        cprobs, predict = self.get_p_value(self.cprobs)
-        targets = np.multiply(predict == self.ctargets, 1) if self.is_multiclass() else self.ctargets
         tprobs, classes = self.get_p_value(va_proba, classes)
-        low,high = VennABERS_by_def(list(zip(cprobs,targets)),tprobs)
+        _,p0p1 = self.va.predict_proba(tprobs)
+        low, high = p0p1[:,0], p0p1[:,1]
         tmp = high / (1-low + high)
         va_proba[:,0] = 1-tmp
         va_proba[:,1] = tmp
@@ -94,13 +70,16 @@ def get_p_value(self, proba, classes=None):
         probable class otherwise
         """
         if classes is None:
-            return np.max(proba, axis=1) if self.is_multiclass() else proba[:,1], np.argmax(proba, axis=1)
-        return proba[:,classes], classes
+            return proba, np.argmax(proba, axis=1)
+        proba_2 = np.zeros((proba.shape[0], 2))
+        proba_2[:,1] = proba[:,classes]
+        proba_2[:,0] = 1 - proba[:,classes]
+        return proba_2, classes
 
     def is_multiclass(self) -> bool:
         """returns true if more than two classes
 
         Returns:
             bool: true if more than two classes
         """
-        return len(self.cprobs[0,:]) > 2
+        return len(self.cprobs[0,:]) > 2
diff --git a/src/calibrated_explanations/_explanations.py b/src/calibrated_explanations/_explanations.py
@@ -776,15 +776,15 @@ def __plot_probabilistic(self, instance, predict, feature_weights, features_to_p
         if interval is True:
             assert idx is not None
         fig = plt.figure(figsize=(10,num_to_show*.5+2))
-        subfigs = fig.subfigures(4, 1, height_ratios=[1, 1, 1, num_to_show+2])
+        subfigs = fig.subfigures(3, 1, height_ratios=[1, 1, num_to_show+2])
 
         if interval and (self._is_one_sided()):
             raise Warning('Interval plot is not supported for one-sided explanations.')
 
         ax_positive = subfigs[0].add_subplot(111)
         ax_negative = subfigs[1].add_subplot(111)
 
-        ax_main = subfigs[3].add_subplot(111)
+        ax_main = subfigs[2].add_subplot(111)
 
         # plot the probabilities at the top
         x = np.linspace(0, 1, 2)
@@ -1107,7 +1107,7 @@ def _get_rules(self):
                     counterfactual['feature_value'].append(
                                     self.binned['rule_values'][f][0][0])
                     counterfactual['rule'].append(
-                                    f'{self._get_explainer().feature_names[f]} < {lesser}')
+                                    f'{self._get_explainer().feature_names[f]} < {lesser:.2f}')
                     counterfactual['is_conjunctive'].append(False)
                     value_bin = 1
 
@@ -1132,7 +1132,7 @@ def _get_rules(self):
                                     self.binned['rule_values'][f][0][1 \
                                     if len(self.binned['rule_values'][f][0]) == 3 else 0])
                     counterfactual['rule'].append(
-                                    f'{self._get_explainer().feature_names[f]} > {greater}')
+                                    f'{self._get_explainer().feature_names[f]} > {greater:.2f}')
                     counterfactual['is_conjunctive'].append(False)
 
         self.rules = counterfactual

diff --git a/src/calibrated_explanations/core.py b/src/calibrated_explanations/core.py
@@ -25,7 +25,7 @@
 from ._interval_regressor import IntervalRegressor
 from .utils import safe_isinstance
 
-__version__ = 'v0.1.0'
+__version__ = 'v0.1.1'
 
 
 
@@ -471,8 +471,10 @@ def __call__(self,
                     instance_predict['high'][f] = np.mean(high_predict[uncovered])
 
                     instance_weights['predict'][f] = self._assign_weight(instance_predict['predict'][f], prediction['predict'][-1], is_probabilistic)
-                    instance_weights['low'][f] = self._assign_weight(instance_predict['low'][f], prediction['predict'][-1], is_probabilistic)
-                    instance_weights['high'][f] = self._assign_weight(instance_predict['high'][f], prediction['predict'][-1], is_probabilistic)
+                    tmp_low = self._assign_weight(instance_predict['low'][f], prediction['predict'][-1], is_probabilistic)
+                    tmp_high = self._assign_weight(instance_predict['high'][f], prediction['predict'][-1], is_probabilistic)
+                    instance_weights['low'][f] = np.min([tmp_low, tmp_high])
+                    instance_weights['high'][f] = np.max([tmp_low, tmp_high])
 
             binned_predict['predict'].append(instance_binned['predict'])
             binned_predict['low'].append(instance_binned['low'])