Merge branch 'master' into dependabot/pip/torch-gte-1.9.0-and-lt-3.0.0

SeldonIO · Jun 19, 2023 · 7248eb7 · 7248eb7
2 parents 5ae8bc9 + c2da012
commit 7248eb7
Show file tree

Hide file tree

Showing 40 changed files with 1,139 additions and 741 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -69,28 +69,46 @@ jobs:
         with:
           limit-access-to-actor: true
 
-      - name: Lint with flake8
-        run: |
-          flake8 alibi
-
-      - name: Typecheck with mypy
-        run: |
-          mypy alibi
-
       - name: Test with pytest
         run: |
           pytest -m tf1 alibi
           pytest -m "not tf1" alibi
 
       - name: Upload coverage to Codecov
-        if: ${{ success() }}
-        run: |
-          codecov -F ${{ matrix.os }}-${{ matrix.python-version }}
+        uses: codecov/codecov-action@v3
+        with:
+          directory: .
+          env_vars: ${{matrix.os}}, ${{matrix.python-version}}
+          fail_ci_if_error: false
+          verbose: true
 
       - name: Build Python package
         run: |
           make build_pypi
 
+  code-quality:
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.x
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade --upgrade-strategy eager -r requirements/dev.txt
+          python -m pip install --upgrade --upgrade-strategy eager .[all]
+
+      - name: Lint with flake8
+        run: |
+          flake8 alibi
+
+      - name: Typecheck with mypy
+        run: |
+          mypy alibi
 
   docs:
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,17 +1,13 @@
 repos:
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.4
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.4.0
     hooks:
       - id: flake8
-  - repo: https://github.com/nbQA-dev/nbqa
-    rev: 1.2.2
-    hooks:
-      - id: nbqa-pyupgrade
-        args: [--py37-plus]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.942
+    rev: v1.0.1
     hooks:
       - id: mypy
         additional_dependencies: [
-          types-requests>=2.25.0,
+          types-requests~=2.25,
         ]
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,37 @@
 # Change Log
 
+## [v0.9.2](https://github.com/SeldonIO/alibi/tree/v0.9.2) (2023-04-28)
+[Full Changelog](https://github.com/SeldonIO/alibi/compare/v0.9.1...v0.9.2)
+
+This is a patch release fixing several bugs, updating dependencies and adding some small extensions.
+
+### Added
+- Allow `IntegratedGradients` layer selection to be specified with a custom callable ([#894](https://github.com/SeldonIO/alibi/pull/894)).
+- Implement `reset_predictor` method for `PartialDependence` explainer ([#897](https://github.com/SeldonIO/alibi/pull/897)).
+- Extend `GradientSimilarity` explainer to allow models of any input type ([#912](https://github.com/SeldonIO/alibi/pull/912)).
+
+### Fixed
+ - `AnchorText` auto-regressive language model sampler updating `input_ids` tensor ([#895](https://github.com/SeldonIO/alibi/pull/895)).
+ - `AnchorTabular` length discrepancy between `feature` and `names` fields ([#902](https://github.com/SeldonIO/alibi/pull/902)).
+ - `AnchorBaseBeam` unintended coverage update during the multi-armed bandit run ([#919](https://github.com/SeldonIO/alibi/pull/919), [#914](https://github.com/SeldonIO/alibi/issues/914)).
+
+### Changed
+ - Maximum supported version of `tensorflow` bumped to `2.12.x` ([#896](https://github.com/SeldonIO/alibi/pull/896)).
+ - Supported version of `pandas` bumped to `>1.0.0, <3.0.0` ([#899](https://github.com/SeldonIO/alibi/pull/899)).
+ - Update notebooks to account for `pandas` version `2.x` deprecations ([#908](https://github.com/SeldonIO/alibi/pull/908), [#910](https://github.com/SeldonIO/alibi/pull/910)).
+ - Maximum supported version of `scikit-image` bumped to `0.20.x` ([#882](https://github.com/SeldonIO/alibi/pull/882)).
+ - Maximum supported version of `attrs` bumped to `23.x` ([#905](https://github.com/SeldonIO/alibi/pull/905)).
+
+### Development
+ - Migrate `codecov` to use Github Actions and don't fail CI on coverage report upload failure due to rate limiting ([#901](https://github.com/SeldonIO/alibi/pull/901), [#913](https://github.com/SeldonIO/alibi/pull/913)).
+ - Bumpy `mypy` version to `>=1.0, <2.0` ([#886](https://github.com/SeldonIO/alibi/pull/886)).
+ - Bump `sphinx` version to `6.x` ([#852](https://github.com/SeldonIO/alibi/pull/852)).
+ - Bump `sphinx-design` version to `0.4.1` ([#904](https://github.com/SeldonIO/alibi/pull/904)).
+ - Bump `nbsphinx` version to `0.9.x` ([#889](https://github.com/SeldonIO/alibi/pull/889)).
+ - Bump `myst-parser` version to `>=1.0, <2.0` ([#887](https://github.com/SeldonIO/alibi/pull/887)).
+ - Bump `twine` version to `4.x` ([#620](https://github.com/SeldonIO/alibi/pull/620)).
+ - Bump `pre-commit` version to `3.x` and update the config ([#866](https://github.com/SeldonIO/alibi/pull/866)).
+
 ## [v0.9.1](https://github.com/SeldonIO/alibi/tree/v0.9.1) (2023-03-13)
 [Full Changelog](https://github.com/SeldonIO/alibi/compare/v0.9.0...v0.9.1)
 

diff --git a/CITATION.cff b/CITATION.cff
@@ -19,8 +19,8 @@ authors:
 - family-names: "Athorne"
   given-names: "Alex"
 title: "Alibi Explain: Algorithms for Explaining Machine Learning Models"
-version: 0.9.1
-date-released: 2023-03-13
+version: 0.9.2
+date-released: 2023-04-28
 url: "https://github.com/SeldonIO/alibi"
 preferred-citation:
   type: article

diff --git a/alibi/confidence/trustscore.py b/alibi/confidence/trustscore.py
@@ -123,7 +123,7 @@ def fit(self, X: np.ndarray, Y: np.ndarray, classes: Optional[int] = None) -> No
 
         # make sure Y represents predicted classes, not one-hot encodings
         if len(Y.shape) > 1:
-            Y = np.argmax(Y, axis=1)  # type: ignore
+            Y = np.argmax(Y, axis=1)
 
         if self.filter == 'probability_knn':
             X_filter, Y_filter = self.filter_by_probability_knn(X, Y)
@@ -170,7 +170,7 @@ def score(self, X: np.ndarray, Y: np.ndarray, k: int = 2, dist_type: str = 'poin
         """
         # make sure Y represents predicted classes, not probabilities
         if len(Y.shape) > 1:
-            Y = np.argmax(Y, axis=1)  # type: ignore
+            Y = np.argmax(Y, axis=1)
 
         # KDTree needs 2D data
         if len(X.shape) > 2:

diff --git a/alibi/datasets/default.py b/alibi/datasets/default.py
@@ -151,7 +151,7 @@ def load_cats(target_size: tuple = (299, 299), return_X_y: bool = False) -> Unio
         target_names.append('_'.join(name).split('.')[0])
     tar.close()
 
-    images = np.concatenate(images, axis=0)  # type: ignore[assignment]
+    images = np.concatenate(images, axis=0)
     targets = np.asarray(target)
     if return_X_y:
         return images, targets  # type: ignore[return-value] # TODO: allow redefiniton

diff --git a/alibi/explainers/anchors/anchor_base.py b/alibi/explainers/anchors/anchor_base.py
@@ -458,8 +458,6 @@ def update_state(self, covered_true: np.ndarray, covered_false: np.ndarray, labe
         self.state['t_idx'][anchor].update(idxs)
         self.state['t_nsamples'][anchor] += n_samples
         self.state['t_positives'][anchor] += labels.sum()
-        if coverage > -1:
-            self.state['t_coverage'][anchor] = coverage
         self.state['t_covered_true'][anchor] = covered_true
         self.state['t_covered_false'][anchor] = covered_false
         self.state['data'][idxs] = data

diff --git a/alibi/explainers/anchors/anchor_image.py b/alibi/explainers/anchors/anchor_image.py
@@ -142,7 +142,7 @@ def __call__(
             covered_false = [scale_image(img) for img in covered_false]
             # coverage set to -1.0 as we can't compute 'true' coverage for this model
 
-            return [covered_true, covered_false, labels.astype(int), data, -1.0, anchor[0]]  # type: ignore
+            return [covered_true, covered_false, labels.astype(int), data, -1.0, anchor[0]]
 
         else:
             data = self._choose_superpixels(num_samples)

diff --git a/alibi/explainers/anchors/anchor_tabular.py b/alibi/explainers/anchors/anchor_tabular.py
@@ -399,7 +399,7 @@ def replace_features(self, samples: np.ndarray, allowed_rows: Dict[int, Any], un
 
         # replace partial anchors with partial anchors drawn from the training dataset
         # samp_idxs are arrays of training set row indices from where partial anchors are extracted for replacement
-        for idx, n_samp in enumerate(nb_partial_anchors[start_idx:end_idx + 1], start=start_idx):  # type: ignore[misc]
+        for idx, n_samp in enumerate(nb_partial_anchors[start_idx:end_idx + 1], start=start_idx):
             if num_samples >= n_samp:
                 samp_idxs = partial_anchor_rows[n_anchor_feats - idx - 1]
                 num_samples -= n_samp
@@ -912,9 +912,10 @@ def add_names_to_exp(self, explanation: dict) -> None:
         """
 
         anchor_idxs = explanation['feature']
-        explanation['names'] = []
-        explanation['feature'] = [self.enc2feat_idx[idx] for idx in anchor_idxs]
         ordinal_ranges = {self.enc2feat_idx[idx]: [float('-inf'), float('inf')] for idx in anchor_idxs}
+        explanation['feature'] = list(ordinal_ranges.keys())
+        explanation['names'] = []
+
         for idx in set(anchor_idxs) - self.cat_lookup.keys():
             feat_id = self.enc2feat_idx[idx]  # feature col. id
             if 0 in self.ord_lookup[idx]:  # tells if the feature in X falls in a higher or lower bin

diff --git a/alibi/explainers/backends/pytorch/cfrl_base.py b/alibi/explainers/backends/pytorch/cfrl_base.py
@@ -475,12 +475,12 @@ def update_actor_critic(encoder: nn.Module,
 
     # Define state by concatenating the input embedding, the classification label, the target label, and optionally
     # the conditional vector if exists.
-    state = [Z, Y_m, Y_t] + ([C] if (C is not None) else [])  # type: ignore
+    state = [Z, Y_m, Y_t] + ([C] if (C is not None) else [])
     state = torch.cat(state, dim=1).to(device)  # type: ignore
 
     # Define input for critic, compute q-values and append critic's loss.
     input_critic = torch.cat([state, Z_cf_tilde], dim=1).float()  # type: ignore
-    output_critic = critic(input_critic).squeeze(1)  # type: ignore
+    output_critic = critic(input_critic).squeeze(1)
     loss_critic = F.mse_loss(output_critic, R_tilde)  # type: ignore
     losses.update({"critic_loss": loss_critic.item()})
 

diff --git a/alibi/explainers/cem.py b/alibi/explainers/cem.py
@@ -490,9 +490,9 @@ def compare(x: Union[float, int, np.ndarray], y: int) -> bool:
             if not isinstance(x, (float, int, np.int64)):
                 x = np.copy(x)
                 if self.mode == "PP":
-                    x[y] -= self.kappa  # type:ignore
+                    x[y] -= self.kappa
                 elif self.mode == "PN":
-                    x[y] += self.kappa  # type:ignore
+                    x[y] += self.kappa
                 x = np.argmax(x)  # type:ignore
             if self.mode == "PP":
                 return x == y
@@ -554,8 +554,8 @@ def compare(x: Union[float, int, np.ndarray], y: int) -> bool:
                         grads_num = self.get_gradients(X_der_batch, Y) * c
                         grads_num_s = self.get_gradients(X_der_batch_s, Y) * c
                         # clip gradients
-                        grads_num = np.clip(grads_num, self.clip[0], self.clip[1])  # type:ignore
-                        grads_num_s = np.clip(grads_num_s, self.clip[0], self.clip[1])  # type: ignore
+                        grads_num = np.clip(grads_num, self.clip[0], self.clip[1])
+                        grads_num_s = np.clip(grads_num_s, self.clip[0], self.clip[1])
                         X_der_batch, X_der_batch_s = [], []
 
                 # compute and clip gradients defined in graph
@@ -601,12 +601,12 @@ def compare(x: Union[float, int, np.ndarray], y: int) -> bool:
                                                                                       nontarget_proba_max))
                     print('Gradient graph min/max: {:.3f}/{:.3f}'.format(grads_graph.min(), grads_graph.max()))
                     print('Gradient graph mean/abs mean: {:.3f}/{:.3f}'
-                          .format(np.mean(grads_graph), np.mean(np.abs(grads_graph))))  # type: ignore
+                          .format(np.mean(grads_graph), np.mean(np.abs(grads_graph))))
                     if not self.model:
                         print('Gradient numerical attack min/max: {:.3f}/{:.3f}'
-                              .format(grads_num.min(), grads_num.max()))  # type: ignore
-                        print('Gradient numerical mean/abs mean: {:.3f}/{:.3f}'
-                              .format(np.mean(grads_num), np.mean(np.abs(grads_num))))  # type: ignore
+                              .format(grads_num.min(), grads_num.max()))
+                        print('Gradient numerical mean/abs mean: {:.3f}/{:.3f}'  # type: ignore[str-format]
+                              .format(np.mean(grads_num), np.mean(np.abs(grads_num))))
                     sys.stdout.flush()
 
                 # update best perturbation (distance) and class probabilities

diff --git a/alibi/explainers/cfproto.py b/alibi/explainers/cfproto.py
@@ -742,7 +742,7 @@ def fit(self,
             if d_type == 'abdm':
                 d_pair = abdm(train_data_bin, self.cat_vars_ord, cat_vars_bin)
             elif d_type == 'mvdm':
-                d_pair = mvdm(train_data_ord, preds, self.cat_vars_ord, alpha=1)  # type: ignore
+                d_pair = mvdm(train_data_ord, preds, self.cat_vars_ord, alpha=1)
 
             # combined distance measure
             if d_type == 'abdm-mvdm':
@@ -752,7 +752,7 @@ def fit(self,
 
                 # pairwise distances
                 d_abdm = abdm(train_data_bin, self.cat_vars_ord, cat_vars_bin)
-                d_mvdm = mvdm(train_data_ord, preds, self.cat_vars_ord, alpha=1)  # type: ignore
+                d_mvdm = mvdm(train_data_ord, preds, self.cat_vars_ord, alpha=1)
 
                 # multidim scaled distances
                 d_abs_abdm, _ = multidim_scaling(d_abdm, n_components=2, use_metric=True,
@@ -810,7 +810,7 @@ def fit(self,
                 ts = TrustScore()
             if self.is_cat:  # map categorical to numerical data
                 train_data = ord_to_num(train_data_ord, self.d_abs)
-            ts.fit(train_data, preds, classes=self.classes)  # type: ignore
+            ts.fit(train_data, preds, classes=self.classes)
             self.kdtrees = ts.kdtrees
             self.X_by_class = ts.X_kdtree
 
@@ -1007,8 +1007,8 @@ def compare(x: Union[float, int, np.ndarray], y: int) -> bool:
             """
             if not isinstance(x, (float, int, np.int64)):
                 x = np.copy(x)
-                x[y] += self.kappa  # type: ignore
-                x = np.argmax(x)  # type: ignore
+                x[y] += self.kappa
+                x = np.argmax(x)  # type: ignore[assignment]
             return x != y
 
         # define target classes for prototype if not specified yet
@@ -1131,8 +1131,8 @@ def compare(x: Union[float, int, np.ndarray], y: int) -> bool:
                         grads_num_s = self.get_gradients(X_der_batch_s, Y, cat_vars_ord=self.cat_vars_ord,
                                                          grads_shape=pert_shape[1:]) * c
                         # clip gradients
-                        grads_num = np.clip(grads_num, self.clip[0], self.clip[1])  # type: ignore
-                        grads_num_s = np.clip(grads_num_s, self.clip[0], self.clip[1])  # type: ignore
+                        grads_num = np.clip(grads_num, self.clip[0], self.clip[1])
+                        grads_num_s = np.clip(grads_num_s, self.clip[0], self.clip[1])
                         X_der_batch, X_der_batch_s = [], []
 
                 # compute and clip gradients defined in graph
@@ -1202,12 +1202,12 @@ def compare(x: Union[float, int, np.ndarray], y: int) -> bool:
                                                                                       nontarget_proba_max))
                     print('Gradient graph min/max: {:.3f}/{:.3f}'.format(grads_graph.min(), grads_graph.max()))
                     print('Gradient graph mean/abs mean: {:.3f}/{:.3f}'
-                          .format(np.mean(grads_graph), np.mean(np.abs(grads_graph))))  # type: ignore
+                          .format(np.mean(grads_graph), np.mean(np.abs(grads_graph))))
                     if not self.model:
                         print('Gradient numerical attack min/max: {:.3f}/{:.3f}'
-                              .format(grads_num.min(), grads_num.max()))  # type: ignore
-                        print('Gradient numerical mean/abs mean: {:.3f}/{:.3f}'
-                              .format(np.mean(grads_num), np.mean(np.abs(grads_num))))  # type: ignore
+                              .format(grads_num.min(), grads_num.max()))
+                        print('Gradient numerical mean/abs mean: {:.3f}/{:.3f}'  # type: ignore[str-format]
+                              .format(np.mean(grads_num), np.mean(np.abs(grads_num))))
                     sys.stdout.flush()
 
                 # update best perturbation (distance) and class probabilities

diff --git a/alibi/explainers/integrated_gradients.py b/alibi/explainers/integrated_gradients.py
@@ -941,7 +941,7 @@ def explain(self,
 
         if self._is_list:
             X = cast(List[np.ndarray], X)  # help mypy out
-            self.orig_dummy_input = [np.zeros((1,) + xx.shape[1:], dtype=xx.dtype) for xx in X]  # type: ignore
+            self.orig_dummy_input = [np.zeros((1,) + xx.shape[1:], dtype=xx.dtype) for xx in X]
             nb_samples = len(X[0])
             input_dtypes = [xx.dtype for xx in X]
             # Formatting baselines in case of models with multiple inputs
@@ -967,9 +967,9 @@ def explain(self,
 
         elif self._is_np:
             X = cast(np.ndarray, X)  # help mypy out
-            self.orig_dummy_input = np.zeros((1,) + X.shape[1:], dtype=X.dtype)  # type: ignore
+            self.orig_dummy_input = np.zeros((1,) + X.shape[1:], dtype=X.dtype)
             nb_samples = len(X)
-            input_dtypes = [X.dtype]  # type: ignore
+            input_dtypes = [X.dtype]
             # Formatting baselines for models with a single input
             baselines = _format_baseline(X, baselines)  # type: ignore # TODO: validate/narrow baselines type
 
@@ -979,7 +979,7 @@ def explain(self,
         # defining integral method
         step_sizes_func, alphas_func = approximation_parameters(self.method)
         step_sizes, alphas = step_sizes_func(self.n_steps), alphas_func(self.n_steps)
-        target = _format_target(target, nb_samples)  # type: ignore[assignment]
+        target = _format_target(target, nb_samples)
 
         if self._is_list:
             X = cast(List[np.ndarray], X)  # help mypy out
@@ -990,7 +990,7 @@ def explain(self,
                 inputs = [tf.keras.Input(shape=xx.shape[1:], dtype=xx.dtype) for xx in X]
                 self.model(inputs, **forward_kwargs)
 
-            _validate_output(self.model, target)  # type: ignore[arg-type]
+            _validate_output(self.model, target)
             _check_target(self.model.output_shape, target, nb_samples)
             if self.layer is None:
                 # No layer passed, attributions computed with respect to the inputs
@@ -1176,7 +1176,7 @@ def _compute_attributions_list_input(self,
         # define paths in features' space
         paths = []
         for i in range(len(X)):
-            x, baseline = X[i], baselines[i]  # type: ignore
+            x, baseline = X[i], baselines[i]
             # construct paths
             path = np.concatenate([baseline + alphas[i] * (x - baseline) for i in range(self.n_steps)], axis=0)
             paths.append(path)
@@ -1252,7 +1252,7 @@ def _compute_attributions_list_input(self,
                                          target, target_paths,
                                          self.n_steps, nb_samples,
                                          step_sizes, j)
-            norm = X[j] - baselines[j]  # type: ignore
+            norm = X[j] - baselines[j]
             attribution = norm * sum_int
             attributions.append(attribution)