understandable-machine-intelligence-lab · annahedstroem · Oct 18, 2022 · Oct 11, 2022 · Oct 11, 2022 · Oct 11, 2022
diff --git a/__init__.py b/__init__.py
diff --git a/docs/source/docs_api/quantus.helpers.rst b/docs/source/docs_api/quantus.helpers.rst
@@ -28,4 +28,4 @@ Submodules
    quantus.helpers.similarity_func
    quantus.helpers.tf_model
    quantus.helpers.utils
-   quantus.helpers.warn_func
+   quantus.helpers.warn
diff --git a/docs/source/docs_api/quantus.helpers.warn_func.rst b/docs/source/docs_api/quantus.helpers.warn_func.rst
@@ -1,7 +1,7 @@
 quantus.helpers.warn\_func module
 =================================
 
-.. automodule:: quantus.helpers.warn_func
+.. automodule:: quantus.helpers.warn
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/source/getting_started/getting_started_example.md b/docs/source/getting_started/getting_started_example.md
diff --git a/docs/source/getting_started/installation.md b/docs/source/getting_started/installation.md
@@ -1,33 +1,66 @@
 ## Quick Installation
 
-Quantus can be installed from [PyPI](https://pypi.org/project/quantus/)
-(this way assumes that you have either `torch` or `tensorflow` already installed on your machine).
+### Installing from PyPI
+
+If you already have [PyTorch](https://pytorch.org/) or [Tensorflow](https://www.tensorflow.org) installed on your machine, 
+Quantus can be obtained from [PyPI](https://pypi.org/project/quantus/) as follows:
 
 ```setup
 pip install quantus
 ```
 
-If you don't have `torch` or `tensorflow` installed, you can simply add the package you want and install it simultaneously.
+Otherwise, you can simply add the desired framework in brackets, and it will be installed in addition to Quantus:
 
 ```setup
-pip install "quantus[torch]"
+pip install quantus[torch]
 ```
-Or, alternatively for `tensorflow` you run:
+
+OR
 
 ```setup
-pip install "quantus[tensorflow]"
+pip install quantus[tensorflow]
 ```
 
-Additionally, if you want to use the basic explainability functionality such as `quantus.explain` in your evaluations, you can run `pip install "quantus[extras]"` (this step requires that either `torch` or `tensorflow` is installed).
-To use Quantus with `zennit` support, install in the following way: `pip install "quantus[zennit]"`.
+### Installing from requirements.txt
 
-Alternatively, simply install requirements.txt (again, this requires that either `torch` or `tensorflow` is installed and won't include the explainability functionality to the installation):
+Alternatively, you can simply install from the requirements.txt found [here](https://github.com/understandable-machine-intelligence-lab/Quantus/blob/main/requirements.txt),
+however, this only installs with the default setup, requiring either PyTorch or Tensorflow:
 
 ```setup
 pip install -r requirements.txt
 ```
 
-**Package requirements**
+### Installing XAI Library Support (PyPI only)
+
+Most evaluation metrics in Quantus allow for a choice of either providing pre-computed explanations directly as an input,
+or to instead make use of several wrappers implemented in `quantus.explain` around common explainability libraries.
+The following XAI Libraries are currently supported:
+
+**Captum**
+
+To enable the use of wrappers around [Captum](https://captum.ai/), you need to have PyTorch already installed and can then run
+
+```setup
+pip install quantus[extras]
+```
+
+**tf-explain**
+
+To enable the use of wrappers around [tf.explain](https://github.com/sicara/tf-explain), you need to have [Tensorflow already installed and can then run
+
+```setup
+pip install quantus[extras]
+```
+
+**Zennit**
+
+To use Quantus with support for the [Zennit](https://github.com/chr5tphr/zennit) library you need to have PyTorch already installed and can then run
+
+```setup
+pip install quantus[zennit]
+```
+
+### Package Requirements
 
 ```
 python>=3.7.0

diff --git a/docs/source/guidelines/guidelines_and_disclaimers.md b/docs/source/guidelines/guidelines_and_disclaimers.md
@@ -1,40 +1,87 @@
 ## User guidelines
 
-Just 'throwing' some metrics at your XAI explanations and consider the job done, is an approach not very productive.
+Just 'throwing' some metrics at your explanations and considering the job done is not a very productive approach.
 Before evaluating your explanations, make sure to:
 
 * Always read the original publication to understand the context that the metric was introduced in - it may differ from your specific task and/ or data domain
-* Spend time on understanding and investigate how the hyperparameters of the metrics influence the evaluation outcome; does changing the perturbation function fundamentally change scores?
-* Establish evidence that your chosen metric is well-behaved in your specific setting e.g., include a random explanation (as a control variant) to verify the metric
-* Reflect on the metric's underlying assumptions e.g., most perturbation-based metrics don't account for nonlinear interactions between features
-* Ensure that your model is well-trained, a poor behaving model e.g., a non-robust model will have useless explanations
+* Spend time on understanding and investigating how the hyperparameters of metrics can influence the evaluation outcome. Some parameters that usually influence results significantly include:
+  * the choice of perturbation function
+  * whether normalisation is applied and the choice of the normalisation function
+  * whether unsigned or signed attributions are considered
+* Establish evidence that your chosen metric is well-behaved in your specific setting, e.g., include a random explanation (as a control variant) to verify the metric
+* Reflect on the metric's underlying assumptions, e.g., most perturbation-based metrics don't account for nonlinear interactions between features
+* Ensure that your model is well-trained, as a poor behaving model, e.g., a non-robust model will have useless explanations
+* Each metric measures different properties of explanations, and especially the various categories (faithfulness, localisation, ...) can be viewed as different facettes of evaluation,
+but a single metric never suffices as a sole criterion for the quality of an explanation method
 
 
 ## Disclaimers
 
 **1. Implementation may differ from the original author(s)**
 
-Note that the implementations of metrics in this library have not been verified by the original authors. Thus any metric implementation in this library may differ from the original authors. It is moreover likely that differences exist since 1) the source code of original publication is most often not made publicly available, 2) sometimes the mathematical definition of the metric is missing and/ or 3) the description of hyperparameter choice was left out. This leaves room for (subjective) interpretations.
+Note that the implementations of metrics in this library have not been verified by the original authors. 
+Thus any metric implementation in this library may differ from the original authors. 
+It is moreover likely that differences exist since 
+* the source code of original publication is most often not made publicly available 
+* sometimes the mathematical definition of the metric is missing  
+* the description of hyperparameter choice was left out. 
+
+This leaves room for (subjective) interpretations.
 
 **2. Discrepancy in operationalisation is likely**
 
-Metrics for XAI methods are often empirical interpretations (or translations) of qualities that researcher(s) stated were important for explanations to fulfil. Hence it may be a discrepancy between what the author claims to measure by the proposed metric and what is actually measured e.g., using entropy as an operationalisation of explanation complexity.
+Metrics for XAI methods are often empirical interpretations (or translations) of qualities that researcher(s) stated 
+were important for explanations to fulfil. Hence there may be a discrepancy between what the author claims to measure by 
+the proposed metric and what is actually measured, e.g., using entropy as an operationalisation of explanation complexity.
 
-**3. Hyperparameters may (and should) change depending on application/ task and dataset/ domain**
+**3. Hyperparameters may (and should) change depending on the application/ task and dataset/ domain**
 
-Metrics are often designed with a specific use case in mind e.g., in an image classification setting. Thus it is not always clear how to change the hyperparameters to make them suitable for another setting. Pay careful attention to how your hyperparameters should be tuned; what is a proper baseline value in your context i.e., that represents the notion of “missingness”?
+Metrics are often designed with a specific use case in mind, most commonly for an image classification setting. 
+Thus it is not always clear how to change the hyperparameters to make them suitable for another setting. 
+Pay careful attention to how your hyperparameters should be tuned and what a proper baseline value could be in your context
 
 **4. Evaluation of explanations must be understood in its context; its application and of its kind**
 
- What evaluation metric to use is completely dependent on: 1) the type of explanation (explanation by example cannot be evaluated the same way as attribution-based/ feature-importance methods), 2) the application/ task: we may not require the explanations to fulfil certain criteria in some context compared to others e.g., multi-label vs single label classification 3) the dataset/ domain: text vs images e.g, different dependency structures between features exist, and preprocessing of the data, leading to differences on what the model may perceive, and how attribution methods can react to that (prime example: MNIST in range  [0,1] vs [-1,1] and any NN) and 4) the user (most evaluation metrics are founded from principles of what a user want from its explanation e.g., even in the seemingly objective measures we are enforcing our preferences e.g., in TCAV "explain in a language we can understand", object localisation "explain over objects we think are important", robustness "explain similarly over things we think looks similar" etc. Thus it is important to define what attribution quality means for each experimental setting.
-
-**5. Evaluation (and explanations) will be unreliable if the model is not robust**
-
-Evaluation will fail if you explain a poorly trained model. If the model is not robust, then explanations cannot be expected to be meaningful or interpretable [1, 2]. If the model achieves high predictive performance, but for the wrong reasons (e.g., Clever Hans, Backdoor issues) [3, 4], there is likely to be unexpected effects on the localisation metrics (which generally captures how well explanations are able to centre attributional evidence on the object of interest).
-
-**6. Evaluation outcomes can be true to data or true to model**
-
-Interpretation of evaluation outcome will differ depending on whether we prioritise that attributions are faithful to data or to the model [5, 6]. As explained in [5], imagine if a model is trained to use only one of two highly correlated features. The explanation might then rightly point out that this one feature is important (and that the other correlated feature is not). But if we were to re-train the model, the model might now pick the other feature as basis for prediction, for which the explanation will consequently tell another story --- that the other feature is important. Since the explanation function have returned conflicting information about what features are important --- we might now believe that the explanation function in itself is unstable. But this may not necessarily be true --- in this case, the explanation has remained faithful to the model but not the data. As such, in the context of evaluation, to avoid misinterpretation of results, it may therefore be important to articulate what you care most about explaining.
+ What evaluation metric to use can depend on the following factors:  
+ * **The type of explanation:** e.g., an explanation by example cannot be evaluated 
+ the same way as attribution-based or feature-importance methods
+ * **The application/ task:** we may not require the explanations to fulfil 
+ certain criteria in some context compared to others, e.g., multi-label 
+ vs. single label classification 
+ * **The dataset/ domain:** e.g, text vs. images, or if different dependency structures between features exist, 
+ as well as the preprocessing of the data, leading to differences on what the model
+ may perceive, and how attribution methods can react to that
+ * **The user:** most evaluation metrics are founded from principles of what 
+ a user may expect from explanations, even in the seemingly objective
+ measures. E.g., localisation asks for the explanation to be focused on objects expected to be important, 
+ and may fail independent of the explanation if the model simply does not consider those objects, 
+ while robustness asks to explain similarly over things we 
+ think looks similar, not considering how the model represents the data manifold etc. 
+ Thus it is important to define what attribution quality means for each experimental setting.
+
+**5. Evaluation (and explanations) can be unreliable if the model is not robust**
+
+Evaluation can fail (depending on the evaluation method) if you explain a poorly trained model. 
+If the model is not robust, then explanations cannot be expected to be meaningful or interpretable [1, 2]. 
+If the model achieves high predictive performance, but for the wrong reasons (e.g., Clever Hans effects, Backdoor issues) 
+[3, 4], unexpected effects on localisation metrics are likely.
+
+**6. Evaluation outcomes can be true to the data or true to the model**
+
+Generally, explanations should depend on both the data and the model.
+However, both are difficult to measure at the same time, and
+the interpretation of evaluation outcomes will differ depending on whether we prioritise 
+that attributions are faithful to data or to the model [5, 6]. As explained in [5], 
+imagine if a model is trained to use only one of two highly correlated features. 
+The explanation might then rightly point out that this one feature is important 
+(and that the other correlated feature is not). But if we were to re-train the model, 
+the model might now pick the other feature as basis for prediction, for which the explanation 
+will consequently tell another story --- that the other feature is important. Since the 
+explanation function have returned conflicting information about what features are important 
+--- we might now believe that the explanation function in itself is unstable. But this may 
+not necessarily be true --- in this case, the explanation has remained faithful to the model 
+but not the data. As such, in the context of evaluation, to avoid misinterpretation of results, 
+it may therefore be important to articulate what you care most about explaining.
 
 **References**
 

diff --git a/docs/source/index.md b/docs/source/index.md
@@ -16,21 +16,32 @@ e.g. pixel replacement strategy of a faithfulness test influences the ranking of
 [📑 Shortcut to paper!](https://arxiv.org/abs/2202.06861)
 
 
-This documentation is complementary to Quantus repository's [README.md](https://github.com/understandable-machine-intelligence-lab/Quantus) and provides documentation
-for how to install Quantus (**Installation**), how to contribute to the project (**Developer Documentation**) and on the interface (**API Documentation**).
-For further guidance on what to think about when applying Quantus, please read the user guidelines (**Guidelines**).
+This documentation is complementary to the [README.md](https://github.com/understandable-machine-intelligence-lab/Quantus) in the Quantus repository and provides documentation
+for how to {doc}`install </getting_started/installation>` Quantus, how to {doc}`contribute </docs_dev/CONTRIBUTING>` to the project, and on the {doc}`interface </docs_api/modules>`.
+For further guidance on what to think about when applying Quantus, please read the {doc}`user guidelines </guidelines/guidelines_and_disclaimers>`.
 
-Do you want to get started? Please have a look at our simple MNIST/torch/Saliency/IntGrad toy example (**Getting started**).
+Do you want to get started? Please have a look at our simple {doc}`toy example </getting_started/getting_started_example>` with PyTorch using MNIST data.
 For more examples, check the [tutorials](https://github.com/understandable-machine-intelligence-lab/Quantus/tree/main/tutorials) folder.
 
-Quantus can be installed from [PyPI](https://pypi.org/project/quantus/)
-(this way assumes that you have either `torch` or `tensorflow` already installed on your machine).
+If you already have [PyTorch](https://pytorch.org/) or [Tensorflow](https://www.tensorflow.org) installed on your machine, Quantus can be obtained from [PyPI](https://pypi.org/project/quantus/) as follows:
 
 ```setup
 pip install quantus
 ```
 
-For alternative ways to install Quantus, read more under **Installation**.
+Otherwise, you can simply add the desired framework in brackets, and it will be installed in addition to Quantus:
+
+```setup
+pip install quantus[torch]
+```
+
+OR
+
+```setup
+pip install quantus[tensorflow]
+```
+
+For a more in-depth guide on how to install Quantus, read more {doc}`here </getting_started/installation>`.
 
 ```{toctree}
 :caption: Installation
@@ -72,7 +83,7 @@ guidelines/guidelines_and_disclaimers
 
 If you find this toolkit or its companion paper
 [**Quantus: An Explainable AI Toolkit for Responsible Evaluation of Neural Network Explanations**](https://arxiv.org/abs/2202.06861)
-interesting or useful in your research, use following Bibtex annotation to cite us:
+interesting or useful in your research, please use the following Bibtex annotation to cite us:
 
 ```bibtex
 @article{hedstrom2022quantus,
@@ -92,3 +103,4 @@ interesting or useful in your research, use following Bibtex annotation to cite
 ```
 
 When applying the individual metrics of Quantus, please make sure to also properly cite the work of the original authors.
+You can find the relevant citations in the documentation of each respective metric {doc}`here </docs_api/modules>`.
diff --git a/quantus/__init__.py b/quantus/__init__.py
@@ -4,6 +4,14 @@
 # You should have received a copy of the GNU Lesser General Public License along with Quantus. If not, see <https://www.gnu.org/licenses/>.
 # Quantus project URL: <https://github.com/understandable-machine-intelligence-lab/Quantus>.
 
-from .helpers import *
-from .metrics import *
-from .evaluation import *
+# Enable quantus.evaluate call
+from quantus.evaluation import evaluate
+
+# Enable quantus.explain call
+from quantus.helpers.functions.explanation_func import explain
+
+# Enable quantus.<function-class>.<function-name> call
+from quantus.helpers.functions import *
+
+# Enable quantus.<metric> call
+from quantus.metrics import *
diff --git a/quantus/evaluation.py b/quantus/evaluation.py
@@ -7,11 +7,12 @@
 # Quantus project URL: <https://github.com/understandable-machine-intelligence-lab/Quantus>.
 
 from typing import Union, Callable, Dict, Optional, List
+
 import numpy as np
 
-from .helpers import asserts
-from .helpers import utils
-from .helpers.model_interface import ModelInterface
+from quantus.helpers import asserts
+from quantus.helpers import utils
+from quantus.helpers.model.model_interface import ModelInterface
 
 
 def evaluate(

diff --git a/quantus/helpers/__init__.py b/quantus/helpers/__init__.py
@@ -6,24 +6,6 @@
 
 from importlib import util
 
+# Import files dependent on package installations.
 __EXTRAS__ = util.find_spec("captum") or util.find_spec("tf_explain")
 __MODELS__ = util.find_spec("torch") or util.find_spec("tensorflow")
-
-from .asserts import *
-from .constants import *
-from .norm_func import *
-from .normalise_func import *
-from .mosaic_func import *
-from .loss_func import *
-from .discretise_func import *
-from .perturb_func import *
-from .plotting import *
-from .similarity_func import *
-from .utils import *
-from .warn_func import *
-
-# Import files dependent on package installations.
-if __MODELS__:
-    from .models import *
-if __EXTRAS__:
-    from .explanation_func import *
diff --git a/quantus/helpers/asserts.py b/quantus/helpers/asserts.py
@@ -6,9 +6,10 @@
 # You should have received a copy of the GNU Lesser General Public License along with Quantus. If not, see <https://www.gnu.org/licenses/>.
 # Quantus project URL: <https://github.com/understandable-machine-intelligence-lab/Quantus>.
 
-import numpy as np
 from typing import Callable, Tuple, Sequence
 
+import numpy as np
+
 
 def attributes_check(metric):
     """