explanatory-supervision.bib

% ============================================================================
% Passive Learning
% ============================================================================

@inproceedings{lei2016rationalizing,
  title={Rationalizing Neural Predictions},
  author={Lei, Tao and Barzilay, Regina and Jaakkola, Tommi},
  booktitle={Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing},
  pages={107--117},
  year={2016}
}

@inproceedings{ross2017right,
  title={Right for the right reasons: training differentiable models by constraining their explanations},
  author={Ross, Andrew Slavin and Hughes, Michael C and Doshi-Velez, Finale},
  booktitle={Proceedings of the 26th International Joint Conference on Artificial Intelligence},
  pages={2662--2670},
  year={2017}
}

@article{camburu2018snli,
  title={e-snli: Natural language inference with natural language explanations},
  author={Camburu, Oana-Maria and Rockt{\"a}schel, Tim and Lukasiewicz, Thomas and Blunsom, Phil},
  journal={Advances in Neural Information Processing Systems},
  volume={31},
  year={2018}
}

@inproceedings{wang2018learning,
  title={Learning credible models},
  author={Wang, Jiaxuan and Oh, Jeeheh and Wang, Haozhu and Wiens, Jenna},
  booktitle={Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining},
  pages={2417--2426},
  year={2018}
}

@inproceedings{li2018tell,
  title={Tell me where to look: Guided attention inference network},
  author={Li, Kunpeng and Wu, Ziyan and Peng, Kuan-Chuan and Ernst, Jan and Fu, Yun},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={9215--9223},
  year={2018}
}

@inproceedings{shetty2019not,
  title={Not Using the Car to See the Sidewalk--Quantifying and Controlling the Effects of Context in Classification and Segmentation},
  author={Shetty, Rakshith and Schiele, Bernt and Fritz, Mario},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={8218--8226},
  year={2019}
}

@inproceedings{selvaraju2019taking,
  title={{Taking a HINT: Leveraging Explanations to Make Vision and Language Models More Grounded}},
  author={Selvaraju, Ramprasaath R and Lee, Stefan and Shen, Yilin and Jin, Hongxia and Ghosh, Shalini and Heck, Larry and Batra, Dhruv and Parikh, Devi},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={2591--2600},
  year={2019}
}

@inproceedings{du2019learning,
  title={Learning credible deep neural networks with rationale regularization},
  author={Du, Mengnan and Liu, Ninghao and Yang, Fan and Hu, Xia},
  booktitle={2019 IEEE International Conference on Data Mining (ICDM)},
  pages={150--159},
  year={2019},
  organization={IEEE}
}

@inproceedings{bao2018deriving,
  title={Deriving Machine Attention from Human Rationales},
  author={Bao, Yujia and Chang, Shiyu and Yu, Mo and Barzilay, Regina},
  booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
  pages={1903--1913},
  year={2018}
}

@inproceedings{hind2019ted,
  title={TED: Teaching AI to explain its decisions},
  author={Hind, Michael and Wei, Dennis and Campbell, Murray and Codella, Noel CF and Dhurandhar, Amit and Mojsilovi{\'c}, Aleksandra and Natesan Ramamurthy, Karthikeyan and Varshney, Kush R},
  booktitle={Proceedings of the 2019 AAAI/ACM Conference on AI, Ethics, and Society},
  pages={123--129},
  year={2019}
}

@inproceedings{liu2019incorporating,
  title={Incorporating Priors with Feature Attribution on Text Classification},
  author={Liu, Frederick and Avci, Besim},
  booktitle={Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
  pages={6274--6283},
  year={2019}
}

@inproceedings{ghaeini2019saliency,
  title={Saliency Learning: Teaching the Model Where to Pay Attention},
  author={Ghaeini, Reza and Fern, Xiaoli and Shahbazi, Hamed and Tadepalli, Prasad},
  booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  pages={4016--4025},
  year={2019}
}

@inproceedings{zhuang2019care,
  title={{CARE: Class Attention to Regions of Lesion for Classification on Imbalanced Data}},
  author={Zhuang, Jiaxin and Cai, Jiabin and Wang, Ruixuan and Zhang, Jianguo and Zheng, Weishi},
  booktitle={International Conference on Medical Imaging with Deep Learning},
  pages={588--597},
  year={2019},
  organization={PMLR}
}

@inproceedings{simpson2019gradmask,
  title={GradMask: Reduce Overfitting by Regularizing Saliency},
  author={Simpson, Becks and Dutil, Francis and Bengio, Yoshua and Cohen, Joseph Paul},
  booktitle={International Conference on Medical Imaging with Deep Learning--Extended Abstract Track},
  year={2019}
}

@inproceedings{strout2019human,
  title={Do Human Rationales Improve Machine Explanations?},
  author={Strout, Julia and Zhang, Ye and Mooney, Raymond},
  booktitle={Proceedings of the 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP},
  pages={56--62},
  year={2019}
}

@inproceedings{pedapati2020learning,
  title={Learning Global Transparent Models Consistent with Local Contrastive Explanations},
  author={Pedapati, Tejaswini and Balakrishnan, Avinash and Shanmugam, Karthikeyan and Dhurandhar, Amit},
  booktitle={Advances in Neural Information Processing Systems},
  volume={33},
  year={2020}
}

@inproceedings{ramamurthy2020model,
  title={Model Agnostic Multilevel Explanations},
  author={Natesan Ramamurthy, Karthikeyan and Vinzamuri, Bhanukiran and Zhang, Yunfeng and Dhurandhar, Amit},
  booktitle={Advances in Neural Information Processing Systems},
  volume={33},
  year={2020}
}

@inproceedings{rieger2020interpretations,
  title={Interpretations are useful: penalizing explanations to align neural networks with prior knowledge},
  author={Rieger, Laura and Singh, Chandan and Murdoch, William and Yu, Bin},
  booktitle={International Conference on Machine Learning},
  pages={8116--8126},
  year={2020},
  organization={PMLR}
}

@inproceedings{ebrahimi2020remembering,
  title={Remembering for the Right Reasons: Explanations Reduce Catastrophic Forgetting},
  author={Ebrahimi, Sayna and Petryk, Suzanne and Gokul, Akash and Gan, William and Gonzalez, Joseph E and Rohrbach, Marcus and others},
  booktitle={International Conference on Learning Representations},
  year={2020}
}

@inproceedings{jain2020learning,
  title={Learning to Faithfully Rationalize by Construction},
  author={Jain, Sarthak and Wiegreffe, Sarah and Pinter, Yuval and Wallace, Byron C},
  booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
  pages={4459--4473},
  year={2020}
}

@article{schneider2020reflective,
  title={Reflective-Net: Learning from Explanations},
  author={Schneider, Johannes and Vlachos, Michalis},
  journal={arXiv preprint arXiv:2011.13986},
  year={2020}
}

@article{lage2020learning,
  title={Learning Interpretable Concept-Based Models with Human Feedback},
  author={Lage, Isaac and Doshi-Velez, Finale},
  journal={arXiv preprint arXiv:2012.02898},
  year={2020}
}

@article{erion2021improving,
  title={Improving performance of deep learning models with axiomatic attribution priors and expected gradients},
  author={Erion, Gabriel and Janizek, Joseph D and Sturmfels, Pascal and Lundberg, Scott M and Lee, Su-In},
  journal={Nature Machine Intelligence},
  pages={1--12},
  year={2021},
  publisher={Nature Publishing Group}
}

@article{setzu2021glocalx,
  title={GLocalX-From Local to Global Explanations of Black Box AI Models},
  author={Setzu, Mattia and Guidotti, Riccardo and Monreale, Anna and Turini, Franco and Pedreschi, Dino and Giannotti, Fosca},
  journal={Artificial Intelligence},
  volume={294},
  pages={103457},
  year={2021},
  publisher={Elsevier}
}

@inproceedings{bahadori2021debiasing,
  title={Debiasing Concept-based Explanations with Causal Analysis},
  author={Bahadori, Mohammad Taha and Heckerman, David},
  booktitle={International Conference on Learning Representations},
  year={2021}
}

@inproceedings{raghu2021teaching,
  title={Teaching with Commentaries},
  author={Raghu, Aniruddh and Raghu, Maithra and Kornblith, Simon and Duvenaud, David and Hinton, Geoffrey},
  booktitle={International Conference on Learning Representations},
  year={2021}
}

@inproceedings{viviano2021saliency,
  title={Saliency is a possible red herring when diagnosing poor generalization},
  author={Viviano, Joseph D and Simpson, Becks and Dutil, Francis and Bengio, Yoshua and Cohen, Joseph Paul},
  booktitle={International Conference on Learning Representations},
  year={2021}
}

@inproceedings{chang2021towards,
  title={Towards Robust Classification Model by Counterfactual and Invariant Data Generation},
  author={Chang, Chun-Hao and Adam, George Alexandru and Goldenberg, Anna},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={15212--15221},
  year={2021}
}

@inproceedings{nanfack2021global,
  title={Global explanations with decision rules: a co-learning approach},
  author={Nanfack, G{\'e}raldin and Temple, Paul and Fr{\'e}nay, Beno{\^\i}t},
  booktitle={Uncertainty in Artificial Intelligence},
  pages={589--599},
  year={2021},
  organization={PMLR}
}

@inproceedings{zhang2021explain,
  title={Explain and Predict, and then Predict again},
  author={Zhang, Zijian and Rudra, Koustav and Anand, Avishek},
  booktitle={Proceedings of the 14th ACM International Conference on Web Search and Data Mining},
  pages={418--426},
  year={2021}
}

@article{lertvittayakumjorn2021explanation,
  title={Explanation-Based Human Debugging of NLP Models: A Survey},
  author={Lertvittayakumjorn, Piyawat and Toni, Francesca},
  journal={arXiv preprint arXiv:2104.15135},
  year={2021}
}

@article{hase2021can,
  title={When Can Models Learn From Explanations? A Formal Framework for Understanding the Roles of Explanation Data},
  author={Hase, Peter and Bansal, Mohit},
  journal={arXiv preprint arXiv:2102.02201},
  year={2020}
}

@article{barnett2021case,
  title={A case-based interpretable deep learning model for classification of mass lesions in digital mammography},
  author={Barnett, Alina Jade and Schwartz, Fides Regina and Tao, Chaofan and Chen, Chaofan and Ren, Yinhao and Lo, Joseph Y and Rudin, Cynthia},
  journal={Nature Machine Intelligence},
  volume={3},
  number={12},
  pages={1061--1070},
  year={2021},
  publisher={Nature Publishing Group}
}

@article{chrysostomou2021enjoy,
  title={Enjoy the Salience: Towards Better Transformer-based Faithful Explanations with Word Salience},
  author={Chrysostomou, George and Aletras, Nikolaos},
  journal={arXiv preprint arXiv:2108.13759},
  year={2021}
}

@article{han2021influence,
  title={Influence Tuning: Demoting Spurious Correlations via Instance Attribution and Instance-Driven Updates},
  author={Han, Xiaochuang and Tsvetkov, Yulia},
  journal={arXiv preprint arXiv:2110.03212},
  year={2021}
}

@article{saha2021saliency,
  title={Saliency Guided Experience Packing for Replay in Continual Learning},
  author={Saha, Gobinda and Roy, Kaushik},
  journal={arXiv preprint arXiv:2109.04954},
  year={2021}
}

@article{carton2021learn,
  title={What to Learn, and How: Toward Effective Learning from Rationales},
  author={Carton, Samuel and Kanoria, Surya and Tan, Chenhao},
  journal={arXiv preprint arXiv:2112.00071},
  year={2021}
}

@inproceedings{stacey2022supervising,
  title={Supervising Model Attention with Human Explanations for Robust Natural Language Inference},
  author={Stacey, Joe and Belinkov, Yonatan and Rei, Marek},
  booktitle={Proceedings of Thirty-Sixth AAAI Conference on Artificial Intelligence (AAAI)},
  year={2022}
}

@article{anders2022finding,
  title={Finding and removing clever hans: Using explanation methods to debug and improve deep models},
  author={Anders, Christopher J and Weber, Leander and Neumann, David and Samek, Wojciech and M{\"u}ller, Klaus-Robert and Lapuschkin, Sebastian},
  journal={Information Fusion},
  volume={77},
  pages={261--295},
  year={2022},
  publisher={Elsevier}
}

@inproceedings{wang2022toward,
  title={Toward learning human-aligned cross-domain robust models by countering misaligned features},
  author={Wang, Haohan and Huang, Zeyi and Zhang, Hanlin and Lee, Yong Jae and Xing, Eric P},
  booktitle={Uncertainty in Artificial Intelligence},
  pages={2075--2084},
  year={2022},
  organization={PMLR}
}

@article{hartmann2022survey,
  title={A survey on improving NLP models with human explanations},
  author={Hartmann, Mareike and Sonntag, Daniel},
  journal={arXiv preprint arXiv:2204.08892},
  year={2022}
}

@article{ying2022visfis,
  title={VisFIS: Visual Feature Importance Supervision with Right-for-the-Right-Reason Objectives},
  author={Ying, Zhuofan and Hase, Peter and Bansal, Mohit},
  journal={arXiv preprint arXiv:2206.11212},
  year={2022}
}

@article{hagos2022identifying,
  title={Identifying Spurious Correlations and Correcting them with an Explanation-based Learning},
  author={Hagos, Misgina Tsighe and Curran, Kathleen M and Mac Namee, Brian},
  journal={arXiv preprint arXiv:2211.08285},
  year={2022}
}

@inproceedings{rao2023studying,
  title={Studying How to Efficiently and Effectively Guide Models with Explanations},
  author={Rao, Sukrut and B{\"o}hle, Moritz and Parchami-Araghi, Amin and Schiele, Bernt},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={1922--1933},
  year={2023}
}

@article{pukdee2023learning,
  title={Learning with Explanation Constraints},
  author={Pukdee, Rattana and Sam, Dylan and Kolter, J Zico and Balcan, Maria-Florina and Ravikumar, Pradeep},
  journal={arXiv preprint arXiv:2303.14496},
  year={2023}
}

@article{eastwood2023spuriosity,
  title={Spuriosity Didn't Kill the Classifier: Using Invariant Predictions to Harness Spurious Features},
  author={Eastwood, Cian and Singh, Shashank and Nicolicioiu, Andrei Liviu and Vlastelica, Marin and von K{\"u}gelgen, Julius and Sch{\"o}lkopf, Bernhard},
  journal={arXiv preprint arXiv:2307.09933},
  year={2023}
}

@inproceedings{neuhaus2023spurious,
  title={Spurious features everywhere-large-scale detection of harmful spurious features in imagenet},
  author={Neuhaus, Yannic and Augustin, Maximilian and Boreiko, Valentyn and Hein, Matthias},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  year={2023}
}

@inproceedings{zhang2024targeted,
  title={Targeted Activation Penalties Help CNNs Ignore Spurious Signals},
  author={Zhang, Dekai and Williams, Matt and Toni, Francesca},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  year={2024}
}


% ============================================================================
% Interactive Learning
% ============================================================================

@inproceedings{kulesza2015principles,
  title={Principles of explanatory debugging to personalize interactive machine learning},
  author={Kulesza, Todd and Burnett, Margaret and Wong, Weng-Keen and Stumpf, Simone},
  booktitle={Proceedings of the 20th international conference on intelligent user interfaces},
  pages={126--137},
  year={2015}
}

@inproceedings{teso2019explanatory,
  title={Explanatory interactive machine learning},
  author={Teso, Stefano and Kersting, Kristian},
  booktitle={Proceedings of the 2019 AAAI/ACM Conference on AI, Ethics, and Society},
  pages={239--245},
  year={2019}
}

@inproceedings{teso2019toward,
  title={Toward Faithful Explanatory Active Learning with Self-explainable Neural Nets},
  author={Teso, Stefano},
  booktitle={Proceedings of the Workshop on Interactive Adaptive Learning (IAL 2019)},
  pages={4--16},
  year={2019}
}

@article{schramowski2020making,
  title={Making deep neural networks right for the right scientific reasons by interacting with their explanations},
  author={Schramowski, Patrick and Stammer, Wolfgang and Teso, Stefano and Brugger, Anna and Herbert, Franziska and Shao, Xiaoting and Luigs, Hans-Georg and Mahlein, Anne-Katrin and Kersting, Kristian},
  journal={Nature Machine Intelligence},
  volume={2},
  number={8},
  pages={476--486},
  year={2020},
  publisher={Nature Publishing Group}
}

@inproceedings{heo2020cost,
  title={Cost-effective Interactive Attention Learning with Neural Attention Processes},
  author={Heo, Jay and Park, Junhyeon and Jeong, Hyewon and Kim, Kwang Joon and Lee, Juho and Yang, Eunho and Hwang, Sung Ju},
  booktitle={International Conference on Machine Learning},
  pages={4228--4238},
  year={2020},
  organization={PMLR}
}

@inproceedings{honeycutt2020soliciting,
  title={Soliciting human-in-the-loop user feedback for interactive machine learning reduces user trust and impressions of model accuracy},
  author={Honeycutt, Donald and Nourani, Mahsan and Ragan, Eric},
  booktitle={Proceedings of the AAAI Conference on Human Computation and Crowdsourcing},
  volume={8},
  number={1},
  pages={63--72},
  year={2020}
}

@article{mitsuhara2019embedding,
  title={Embedding Human Knowledge into Deep Neural Network via Attention Map},
  author={Mitsuhara, Masahiro and Fukui, Hiroshi and Sakashita, Yusuke and Ogata, Takanori and Hirakawa, Tsubasa and Yamashita, Takayoshi and Fujiyoshi, Hironobu},
  journal={arXiv preprint arXiv:1905.03540},
  year={2019}
}

@article{sokol2020one,
  title={One explanation does not fit all},
  author={Sokol, Kacper and Flach, Peter},
  journal={KI-K{\"u}nstliche Intelligenz},
  pages={1--16},
  year={2020},
  publisher={Springer}
}

@inproceedings{lertvittayakumjorn2020find,
  title={FIND: human-in-the-loop debugging deep text classifiers},
  author={Lertvittayakumjorn, Piyawat and Specia, Lucia and Toni, Francesca},
  booktitle={Conference on Empirical Methods in Natural Language Processing},
  pages={332--348},
  year={2020}
}

@inproceedings{ciravegna2020human,
  title={Human-driven FOL explanations of deep learning},
  author={Ciravegna, Gabriele and Giannini, Francesco and Gori, Marco and Maggini, Marco and Melacci, Stefano},
  booktitle={Twenty-Ninth International Joint Conference on Artificial Intelligence and Seventeenth Pacific Rim International Conference on Artificial Intelligence},
  pages={2234--2240},
  year={2020},
  organization={International Joint Conferences on Artificial Intelligence Organization}
}

@inproceedings{liang2020alice,
  title={{ALICE: Active Learning with Contrastive Natural Language Explanations}},
  author={Liang, Weixin and Zou, James and Yu, Zhou},
  booktitle={Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
  pages={4380--4391},
  year={2020}
}

@article{popordanoska2020machine,
  title={{Machine Guides, Human Supervises: Interactive Learning with Global Explanations}},
  author={Popordanoska, Teodora and Kumar, Mohit and Teso, Stefano},
  journal={arXiv preprint arXiv:2009.09723},
  year={2020}
}

@article{wang2021teaching,
  title={Teaching an Active Learner with Contrastive Examples},
  author={Wang, Chaoqi and Singla, Adish and Chen, Yuxin},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  pages={17968--17980},
  year={2021}
}

@inproceedings{stammer2021right,
  title={{Right for the Right Concept: Revising Neuro-Symbolic Concepts by Interacting with their Explanations}},
  author={Stammer, Wolfgang and Schramowski, Patrick and Kersting, Kristian},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={3619--3629},
  year={2021}
}

@inproceedings{shao2021right,
  title={{Right for Better Reasons: Training Differentiable Models by Constraining their Influence Function}},
  author={Shao, Xiaoting and Skryagin, Arseny and Schramowski, P and Stammer, W and Kersting, Kristian},
  booktitle={Proceedings of Thirty-Fifth AAAI Conference on Artificial Intelligence (AAAI)},
  year={2021}
}

@inproceedings{daly2021user,
  title={{User Driven Model Adjustment via Boolean Rule Explanations}},
  author={Daly, Elizabeth M and Mattetti, Massimiliano and Alkan, {\"O}znur and Nair, Rahul},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={35},
  number={7},
  pages={5896--5904},
  year={2021}
}

@article{ghai2021explainable,
  title={{Explainable Active Learning (XAL): Toward AI Explanations as Interfaces for Machine Teachers}},
  author={Ghai, Bhavya and Liao, Q Vera and Zhang, Yunfeng and Bellamy, Rachel and Mueller, Klaus},
  journal={Proceedings of the ACM on Human-Computer Interaction},
  volume={4},
  number={CSCW3},
  pages={1--28},
  year={2021},
  publisher={ACM New York, NY, USA}
}

@article{behrens2021bandits,
  title={Bandits for Learning to Explain from Explanations},
  author={Behrens, Freya and Teso, Stefano and Mottin, Davide},
  journal={arXiv preprint arXiv:2102.03815},
  year={2021}
}

@article{zylberajch2021hildif,
  title={{HILDIF: Interactive Debugging of NLI Models Using Influence Functions}},
  author={Zylberajch, Hugo and Lertvittayakumjorn, Piyawat and Toni, Francesca},
  journal={Workshop on Interactive Learning for Natural Language Processing},
  pages={1},
  year={2021}
}

@article{yao2021refining,
  title={{Refining Neural Networks with Compositional Explanations}},
  author={Yao, Huihan and Chen, Ying and Ye, Qinyuan and Jin, Xisen and Ren, Xiang},
  journal={arXiv preprint arXiv:2103.10415},
  year={2021}
}

@inproceedings{teso2021interactive,
  title={{Interactive Label Cleaning with Example-based Explanations}},
  author={Teso, Stefano and Bontempelli, Andrea and Giunchiglia, Fausto and Passerini, Andrea},
  booktitle={Proceedings of the 35th International Conference on Neural Information Processing Systems},
  year={2021}
}

@inproceedings{kambhampati2021symbols,
  title={{Symbols as a Lingua Franca for Bridging Human-AI Chasm for Explainable and Advisable AI Systems}},
  author={Kambhampati, Subbarao and Sreedharan, Sarath and Verma, Mudit and Zha, Yantian and Guan, Lin},
  booktitle={Proceedings of Thirty-Sixth AAAI Conference on Artificial Intelligence (AAAI)},
  year={2022}
}

@inproceedings{bontempelli2021toward,
  title={{Toward a Unified Framework for Debugging Gray-box Models}},
  author={Bontempelli, Andrea and Giunchiglia, Fausto and Passerini, Andrea and Teso, Stefano},
  booktitle={The AAAI-22 Workshop on Interactive Machine Learning},
  year={2021}
}

@inproceedings{margatina2021active,
  title={Active Learning by Acquiring Contrastive Examples},
  author={Margatina, Katerina and Vernikos, Giorgos and Barrault, Lo{\"\i}c and Aletras, Nikolaos},
  booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
  pages={650--663},
  year={2021}
}

@article{plumb2021finding,
  title={{Finding and Fixing Spurious Patterns with Explanations}},
  author={Plumb, Gregory and Ribeiro, Marco Tulio and Talwalkar, Ameet},
  journal={arXiv preprint arXiv:2106.02112},
  year={2021}
}

@article{schramowski2021interactively,
  title={{Interactively Generating Explanations for Transformer Language Models}},
  author={Schramowski, Patrick and Friedrich, Felix and Tauchmann, Christopher and Kersting, Kristian},
  journal={arXiv preprint arXiv:2110.02058},
  year={2021}
}

@article{hartmanninteraction,
  title={{Interaction with Explanations in the XAINES Project}},
  author={Hartmann, Mareike and Kruijff-Korbayov{\'a}, Ivana and Sonntag, Daniel},
  year={2021}
}

@inproceedings{lu2022rationale,
  title={A Rationale-Centric Framework for Human-in-the-loop Machine Learning},
  author={Lu, Jinghui and Yang, Linyi and Namee, Brian and Zhang, Yue},
  booktitle={Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  pages={6986--6996},
  year={2022}
}

@article{friedrich2022typology,
  title={A Typology to Explore and Guide Explanatory Interactive Machine Learning},
  author={Friedrich, Felix and Stammer, Wolfgang and Schramowski, Patrick and Kersting, Kristian},
  journal={arXiv preprint arXiv:2203.03668},
  year={2022}
}

@inproceedings{slany2022caipi,
  title={CAIPI in Practice: Towards Explainable Interactive Medical Image Classification},
  author={Slany, Emanuel and Ott, Yannik and Scheele, Stephan and Paulus, Jan and Schmid, Ute},
  booktitle={IFIP International Conference on Artificial Intelligence Applications and Innovations},
  pages={389--400},
  year={2022},
  organization={Springer}
}

@article{kiefer2022semantic,
  title={Semantic Interactive Learning for Text Classification: A Constructive Approach for Contextual Interactions},
  author={Kiefer, Sebastian and Hoffmann, Mareike and Schmid, Ute},
  journal={Machine Learning and Knowledge Extraction},
  volume={4},
  number={4},
  pages={994--1010},
  year={2022},
  publisher={MDPI}
}

@inproceedings{hagos2022impact,
  title={Impact of Feedback Type on Explanatory Interactive Learning},
  author={Hagos, Misgina Tsighe and Curran, Kathleen M and Mac Namee, Brian},
  booktitle={International Symposium on Methodologies for Intelligent Systems},
  pages={127--137},
  year={2022},
  organization={Springer}
}

@article{teso2023leveraging,
  title={Leveraging Explanations in Interactive Machine Learning: An Overview},
  author={Teso, Stefano and Alkan, {\"O}znur and Stammer, Wolfang and Daly, Elizabeth},
  journal={Frontiers in Artificial Intelligence},
  year={2023}
}

@inproceedings{bontempelli2023concept,
  title={Concept-level debugging of part-prototype networks},
  author={Bontempelli, Andrea and Teso, Stefano and Giunchiglia, Fausto and Passerini, Andrea},
  booktitle={International Conference on Learning Representations},
  year={2023}
}

@article{steinmann2023learning,
  title={Learning to Intervene on Concept Bottlenecks},
  author={Steinmann, David and Stammer, Wolfgang and Friedrich, Felix and Kersting, Kristian},
  journal={arXiv preprint arXiv:2308.13453},
  year={2023}
}

@article{lalletti2024spurious,
  title={Spurious Correlations in Concept Drift: Can Explanatory Interaction Help?},
  author={Lalletti, Cristiana and Teso, Stefano},
  journal={arXiv preprint arXiv:2407.16515},
  year={2024}
}


% ============================================================================
% Reinforcement Learning
% ============================================================================

@inproceedings{guan2020explanation,
  title={Explanation augmented feedback in human-in-the-loop reinforcement learning},
  author={Guan, Lin and Verma, Mudit and Kambhampati, Subbarao},
  booktitle={Human And Machine in-the-Loop Evaluation and Learning Strategies},
  year={2020}
}

@inproceedings{tulli2020learning,
  title={Learning from explanations and demonstrations: A pilot study},
  author={Tulli, Silvia and Wallk{\"o}tter, Sebastian and Paiva, Ana and Melo, Francisco S and Chetouani, Mohamed},
  booktitle={2nd Workshop on Interactive Natural Language Technology for Explainable Artificial Intelligence},
  pages={61--66},
  year={2020}
}

@inproceedings{guan2021widening,
  title={Widening the Pipeline in Human-Guided Reinforcement Learning with Explanation and Context-Aware Data Augmentation},
  author={Guan, Lin and Verma, Mudit and Guo, Sihang and Zhang, Ruohan and Kambhampati, Suabbarao},
  booktitle={Proceedings of the 35th International Conference on Neural Information Processing Systems},
  year={2021}
}


% ============================================================================
% Model Distillation
% ============================================================================

@inproceedings{milli2019model,
  title={Model reconstruction from model explanations},
  author={Milli, Smitha and Schmidt, Ludwig and Dragan, Anca D and Hardt, Moritz},
  booktitle={Proceedings of the Conference on Fairness, Accountability, and Transparency},
  pages={1--9},
  year={2019}
}

@article{pruthi2020evaluating,
  title={Evaluating Explanations: How much do explanations from the teacher aid students?},
  author={Pruthi, Danish and Dhingra, Bhuwan and Soares, Livio Baldini and Collins, Michael and Lipton, Zachary C and Neubig, Graham and Cohen, William W},
  journal={arXiv preprint arXiv:2012.00893},
  year={2020}
}


% ============================================================================
% Regularization without Supervision
% ============================================================================

@inproceedings{ross2018improving,
  title={Improving the adversarial robustness and interpretability of deep neural networks by regularizing their input gradients},
  author={Ross, Andrew and Doshi-Velez, Finale},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={32},
  number={1},
  year={2018}
}

@inproceedings{alvarez2018towards,
  title={Towards robust interpretability with self-explaining neural networks},
  author={Alvarez-Melis, David and Jaakkola, Tommi S},
  booktitle={Proceedings of the 32nd International Conference on Neural Information Processing Systems},
  pages={7786--7795},
  year={2018}
}

@inproceedings{wu2018beyond,
  title={Beyond sparsity: Tree regularization of deep models for interpretability},
  author={Wu, Mike and Hughes, Michael and Parbhoo, Sonali and Zazzi, Maurizio and Roth, Volker and Doshi-Velez, Finale},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={32},
  number={1},
  year={2018}
}

@inproceedings{wu2020regional,
  title={Regional tree regularization for interpretability in deep neural networks},
  author={Wu, Mike and Parbhoo, Sonali and Hughes, Michael and Kindle, Ryan and Celi, Leo and Zazzi, Maurizio and Roth, Volker and Doshi-Velez, Finale},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={34},
  number={04},
  pages={6413--6421},
  year={2020}
}

@inproceedings{plumb2020regularizing,
  title={Regularizing black-box models for improved interpretability},
  author={Plumb, Gregory and Al-Shedivat, Maruan and Cabrera, {\'A}ngel Alexander and Perer, Adam and Xing, Eric and Talwalkar, Ameet},
  booktitle={Advances in Neural Information Processing Systems},
  volume={33},
  year={2020}
}

@inproceedings{singh2020don,
  title={Don't Judge an Object by Its Context: Learning to Overcome Contextual Bias},
  author={Singh, Krishna Kumar and Mahajan, Dhruv and Grauman, Kristen and Lee, Yong Jae and Feiszli, Matt and Ghadiyaram, Deepti},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={11070--11078},
  year={2020}
}

@article{halliwell2020trustworthy,
  title={Trustworthy convolutional neural networks: A gradient penalized-based approach},
  author={Halliwell, Nicholas and Lecue, Freddy},
  journal={arXiv preprint arXiv:2009.14260},
  year={2020}
}

@inproceedings{pillai2021explainable,
  title={Explainable Models with Consistent Interpretations},
  author={Pillai, Vipin and Pirsiavash, Hamed},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  year={2021},
}

@inproceedings{han2021explanation,
  title={Explanation Consistency Training: Facilitating Consistency-Based Semi-Supervised Learning with Interpretability},
  author={Han, Tao and Tu, Wei-Wei and Li, Yu-Feng},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  year={2021},
}

@article{ismail2021improving,
  title={Improving Deep Learning Interpretability by Saliency Guided Training},
  author={Ismail, Aya Abdelsalam and Corrada Bravo, Hector and Feizi, Soheil},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  year={2021}
}

@inproceedings{zeng2021generating,
  title={Generating Deep Networks Explanations with Robust Attribution Alignment},
  author={Zeng, Guohang and Kowsar, Yousef and Erfani, Sarah and Bailey, James},
  booktitle={Asian Conference on Machine Learning},
  pages={753--768},
  year={2021},
  organization={PMLR}
}

@article{stammer2023learning,
  title={Learning by Self-Explaining},
  author={Stammer, Wolfgang and Friedrich, Felix and Steinmann, David and Shindo, Hikaru and Kersting, Kristian},
  journal={arXiv preprint arXiv:2309.08395},
  year={2023},
}


% ============================================================================
% Machine Teaching
% ============================================================================

@inproceedings{su2017interpretable,
  title={Interpretable Machine Teaching via Feature Feedback},
  author={Su, Shihan and Chen, Yuxin and Mac Aodha, Oisin and Perona, Pietro and Yue, Yisong},
  booktitle={NIPS'17 Workshop on Teaching Machines, Robots, and Humans},
  year={2017}
}

@inproceedings{mac2018teaching,
  title={Teaching categories to human learners with visual explanations},
  author={Mac Aodha, Oisin and Su, Shihan and Chen, Yuxin and Perona, Pietro and Yue, Yisong},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={3820--3828},
  year={2018}
}


% ============================================================================
% Applications
% ============================================================================

@article{sefcik2021improving,
  title={Improving a neural network model by explanation-guided training for glioma classification based on MRI data},
  author={Sefcik, Frantisek and Benesova, Wanda},
  journal={arXiv preprint arXiv:2107.02008},
  year={2021}
}


% ============================================================================
% Related Works
% ============================================================================

% Explanation-based Learning

@article{mitchell1986explanation,
  title={Explanation-based generalization: A unifying view},
  author={Mitchell, Tom M and Keller, Richard M and Kedar-Cabelli, Smadar T},
  journal={Machine learning},
  volume={1},
  number={1},
  pages={47--80},
  year={1986},
  publisher={Springer}
}

@article{dejong1986explanation,
  title={Explanation-based learning: An alternative view},
  author={DeJong, Gerald and Mooney, Raymond},
  journal={Machine learning},
  volume={1},
  number={2},
  pages={145--176},
  year={1986},
  publisher={Springer}
}

@article{ellman1989explanation,
  title={Explanation-based learning: A survey of programs and perspectives},
  author={Ellman, Thomas},
  journal={ACM Computing Surveys (CSUR)},
  volume={21},
  number={2},
  pages={163--221},
  year={1989},
  publisher={ACM New York, NY, USA}
}

@inproceedings{kimmig2007probabilistic,
  title={Probabilistic explanation based learning},
  author={Kimmig, Angelika and De Raedt, Luc and Toivonen, Hannu},
  booktitle={European Conference on Machine Learning},
  pages={176--187},
  year={2007},
  organization={Springer}
}

% Injecting invariances / feature constraints into models

@inproceedings{simard1991tangent,
  title={Tangent prop-a formalism for specifying selected invariances in an adaptive network},
  author={Simard, Patrice and Victorri, Bernard and LeCun, Yann and Denker, John S},
  booktitle={NIPS},
  volume={91},
  pages={895--903},
  year={1991}
}

@article{decoste2002training,
  title={Training invariant support vector machines},
  author={DeCoste, Dennis and Sch{\"o}lkopf, Bernhard},
  journal={Machine learning},
  volume={46},
  number={1},
  pages={161--190},
  year={2002},
  publisher={Springer}
}

@inproceedings{small2011constrained,
  title={The constrained weight space svm: learning with ranked features},
  author={Small, Kevin and Wallace, Byron C and Brodley, Carla E and Trikalinos, Thomas A},
  booktitle={Proceedings of the 28th International Conference on International Conference on Machine Learning},
  pages={865--872},
  year={2011}
}

% Dual label-feature feedback

@article{raghavan2006active,
  title={Active learning with feedback on features and instances},
  author={Raghavan, Hema and Madani, Omid and Jones, Rosie},
  journal={The Journal of Machine Learning Research},
  volume={7},
  pages={1655--1686},
  year={2006},
  publisher={JMLR. org}
}

@inproceedings{raghavan2007interactive,
  title={An interactive algorithm for asking and incorporating feature feedback into support vector machines},
  author={Raghavan, Hema and Allan, James},
  booktitle={Proceedings of the 30th annual international ACM SIGIR conference on Research and development in information retrieval},
  pages={79--86},
  year={2007}
}

@inproceedings{druck2008learning,
  title={Learning from labeled features using generalized expectation criteria},
  author={Druck, Gregory and Mann, Gideon and McCallum, Andrew},
  booktitle={Proceedings of the 31st annual international ACM SIGIR conference on Research and development in information retrieval},
  pages={595--602},
  year={2008}
}

@inproceedings{druck2009active,
  title={Active learning by labeling features},
  author={Druck, Gregory and Settles, Burr and McCallum, Andrew},
  booktitle={Proceedings of the 2009 conference on Empirical methods in natural language processing},
  pages={81--90},
  year={2009}
}

@inproceedings{attenberg2010unified,
  title={A unified approach to active dual supervision for labeling features and examples},
  author={Attenberg, Josh and Melville, Prem and Provost, Foster},
  booktitle={Joint European Conference on Machine Learning and Knowledge Discovery in Databases},
  pages={40--55},
  year={2010},
  organization={Springer}
}

@inproceedings{settles2011closing,
  title={Closing the loop: Fast, interactive semi-supervised annotation with queries on features and instances},
  author={Settles, Burr},
  booktitle={Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing},
  pages={1467--1478},
  year={2011}
}

@inproceedings{dasgupta2018learning,
  title={Learning from discriminative feature feedback},
  author={Dasgupta, Sanjoy and Dey, Akansha and Roberts, Nicholas and Sabato, Sivan},
  booktitle={Proceedings of the 32nd International Conference on Neural Information Processing Systems},
  pages={3959--3967},
  year={2018}
}

@inproceedings{dasgupta2020robust,
  title={Robust Learning from Discriminative Feature Feedback},
  author={Dasgupta, Sanjoy and Sabato, Sivan},
  booktitle={International Conference on Artificial Intelligence and Statistics},
  pages={973--982},
  year={2020},
  organization={PMLR}
}

@article{katakkar2021practical,
  title={Practical Benefits of Feature Feedback Under Distribution Shift},
  author={Katakkar, Anurag and Wang, Weiqin and Yoo, Clay H and Lipton, Zachary C and Kaushik, Divyansh},
  journal={arXiv preprint arXiv:2110.07566},
  year={2021}
}

% Annotator Rationales

@inproceedings{zaidan2007using,
  title={Using “annotator rationales” to improve machine learning for text categorization},
  author={Zaidan, Omar and Eisner, Jason and Piatko, Christine},
  booktitle={Human language technologies 2007: The conference of the North American chapter of the association for computational linguistics; proceedings of the main conference},
  pages={260--267},
  year={2007}
}

@inproceedings{zaidan2008modeling,
  title={Modeling annotators: A generative approach to learning from annotator rationales},
  author={Zaidan, Omar and Eisner, Jason},
  booktitle={Proceedings of the 2008 conference on Empirical methods in natural language processing},
  pages={31--40},
  year={2008}
}

@inproceedings{sharma2015active,
  title={Active learning with rationales for text classification},
  author={Sharma, Manali and Zhuang, Di and Bilgic, Mustafa},
  booktitle={Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  pages={441--451},
  year={2015}
}

% Counterfactual augmentation

@inproceedings{kaushik2019learning,
  title={{Learning The Difference That Makes A Difference With Counterfactually-Augmented Data}},
  author={Kaushik, Divyansh and Hovy, Eduard and Lipton, Zachary},
  booktitle={International Conference on Learning Representations},
  year={2019}
}

@inproceedings{kaushik2020explaining,
  title={{Explaining the Efficacy of Counterfactually Augmented Data}},
  author={Kaushik, Divyansh and Setlur, Amrith and Hovy, Eduard H and Lipton, Zachary Chase},
  booktitle={International Conference on Learning Representations},
  year={2021}
}

@article{joshi2021investigation,
  title={An investigation of the (in)effectiveness of counterfactually augmented data},
  author={Joshi, Nitish and He, He},
  journal={arXiv preprint arXiv:2107.00753},
  year={2021}
}

% Critiquing in recommenders

@article{chen2012critiquing,
  title={Critiquing-based recommenders: survey and emerging trends},
  author={Chen, Li and Pu, Pearl},
  journal={User Modeling and User-Adapted Interaction},
  volume={22},
  number={1},
  pages={125--150},
  year={2012},
  publisher={Springer}
}

@inproceedings{teso2017coactive,
  title={Coactive critiquing: Elicitation of preferences and features},
  author={Teso, Stefano and Dragone, Paolo and Passerini, Andrea},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={31},
  number={1},
  year={2017}
}

% Gray-box models

@inproceedings{koh2020concept,
  title={Concept bottleneck models},
  author={Koh, Pang Wei and Nguyen, Thao and Tang, Yew Siang and Mussmann, Stephen and Pierson, Emma and Kim, Been and Liang, Percy},
  booktitle={International Conference on Machine Learning},
  pages={5338--5348},
  year={2020},
  organization={PMLR}
}


% ============================================================================
% Resources
% ============================================================================

@article{andrews1995survey,
  title={Survey and critique of techniques for extracting rules from trained artificial neural networks},
  author={Andrews, Robert and Diederich, Joachim and Tickle, Alan B},
  journal={Knowledge-based systems},
  volume={8},
  number={6},
  pages={373--389},
  year={1995},
  publisher={Elsevier}
}

@inproceedings{stumpf2007toward,
  title={Toward harnessing user feedback for machine learning},
  author={Stumpf, Simone and Rajaram, Vidya and Li, Lida and Burnett, Margaret and Dietterich, Thomas and Sullivan, Erin and Drummond, Russell and Herlocker, Jonathan},
  booktitle={Proceedings of the 12th international conference on Intelligent user interfaces},
  pages={82--91},
  year={2007}
}

@article{lipton2018mythos,
  title={The Mythos of Model Interpretability: In machine learning, the concept of interpretability is both important and slippery},
  author={Lipton, Zachary C},
  journal={Queue},
  volume={16},
  number={3},
  pages={31--57},
  year={2018},
  publisher={ACM New York, NY, USA}
}

@article{guidotti2018survey,
  title={A survey of methods for explaining black box models},
  author={Guidotti, Riccardo and Monreale, Anna and Ruggieri, Salvatore and Turini, Franco and Giannotti, Fosca and Pedreschi, Dino},
  journal={ACM computing surveys (CSUR)},
  volume={51},
  number={5},
  pages={1--42},
  year={2018},
  publisher={ACM New York, NY, USA}
}

@inproceedings{adebayo2018sanity,
  title={Sanity checks for saliency maps},
  author={Adebayo, Julius and Gilmer, Justin and Muelly, Michael and Goodfellow, Ian and Hardt, Moritz and Kim, Been},
  booktitle={Proceedings of the 32nd International Conference on Neural Information Processing Systems},
  pages={9525--9536},
  year={2018}
}

@inproceedings{beery2018recognition,
  title={Recognition in terra incognita},
  author={Beery, Sara and Van Horn, Grant and Perona, Pietro},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={456--473},
  year={2018}
}

@article{miller2019explanation,
  title={Explanation in artificial intelligence: Insights from the social sciences},
  author={Miller, Tim},
  journal={Artificial intelligence},
  volume={267},
  pages={1--38},
  year={2019},
  publisher={Elsevier}
}

@article{lapuschkin2019unmasking,
  title={Unmasking Clever Hans predictors and assessing what machines really learn},
  author={Lapuschkin, Sebastian and W{\"a}ldchen, Stephan and Binder, Alexander and Montavon, Gr{\'e}goire and Samek, Wojciech and M{\"u}ller, Klaus-Robert},
  journal={Nature communications},
  volume={10},
  number={1},
  pages={1--8},
  year={2019},
  publisher={Nature Publishing Group}
}

@inproceedings{ghorbani2019interpretation,
  title={Interpretation of neural networks is fragile},
  author={Ghorbani, Amirata and Abid, Abubakar and Zou, James},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={33},
  number={01},
  pages={3681--3688},
  year={2019}
}

@article{hooker2019benchmark,
  title={A Benchmark for Interpretability Methods in Deep Neural Networks},
  author={Hooker, Sara and Erhan, Dumitru and Kindermans, Pieter-Jan and Kim, Been},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  pages={9737--9748},
  year={2019}
}

@inproceedings{serrano2019attention,
  title={Is Attention Interpretable?},
  author={Serrano, Sofia and Smith, Noah A},
  booktitle={Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
  pages={2931--2951},
  year={2019}
}

@inproceedings{jain2019attention,
  title={Attention is not Explanation},
  author={Jain, Sarthak and Wallace, Byron C},
  booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
  pages={3543--3556},
  year={2019}
}

@inproceedings{wiegreffe2019attention,
  title={Attention is not not Explanation},
  author={Wiegreffe, Sarah and Pinter, Yuval},
  booktitle={Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)},
  pages={11--20},
  year={2019}
}

@incollection{kindermans2019reliability,
  title={The (un) reliability of saliency methods},
  author={Kindermans, Pieter-Jan and Hooker, Sara and Adebayo, Julius and Alber, Maximilian and Sch{\"u}tt, Kristof T and D{\"a}hne, Sven and Erhan, Dumitru and Kim, Been},
  booktitle={Explainable AI: Interpreting, Explaining and Visualizing Deep Learning},
  pages={267--280},
  year={2019},
  publisher={Springer}
}

@article{dombrowski2019explanations,
  title={Explanations can be manipulated and geometry is to blame},
  author={Dombrowski, Ann-Kathrin and Alber, Maximillian and Anders, Christopher and Ackermann, Marcel and M{\"u}ller, Klaus-Robert and Kessel, Pan},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  pages={13589--13600},
  year={2019}
}

@article{heo2019fooling,
  title={Fooling neural network interpretations via adversarial model manipulation},
  author={Heo, Juyeon and Joo, Sunghwan and Moon, Taesup},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  pages={2925--2936},
  year={2019}
}

@article{rudin2019stop,
  title={Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead},
  author={Rudin, Cynthia},
  journal={Nature Machine Intelligence},
  volume={1},
  number={5},
  pages={206--215},
  year={2019},
  publisher={Nature Publishing Group}
}

@article{green2019principles,
  title={The principles and limits of algorithm-in-the-loop decision making},
  author={Green, Ben and Chen, Yiling},
  journal={Proceedings of the ACM on Human-Computer Interaction},
  volume={3},
  number={CSCW},
  pages={1--24},
  year={2019},
  publisher={ACM New York, NY, USA}
}

@article{geirhos2020shortcut,
  title={Shortcut learning in deep neural networks},
  author={Geirhos, Robert and Jacobsen, J{\"o}rn-Henrik and Michaelis, Claudio and Zemel, Richard and Brendel, Wieland and Bethge, Matthias and Wichmann, Felix A},
  journal={Nature Machine Intelligence},
  volume={2},
  number={11},
  pages={665--673},
  year={2020},
  publisher={Nature Publishing Group}
}

@inproceedings{sixt2020explanations,
  title={When explanations lie: Why many modified bp attributions fail},
  author={Sixt, Leon and Granz, Maximilian and Landgraf, Tim},
  booktitle={International Conference on Machine Learning},
  pages={9046--9057},
  year={2020},
  organization={PMLR}
}

@inproceedings{bastings2020elephant,
  title={The elephant in the interpretability room: Why use attention as explanation when we have saliency methods?},
  author={Bastings, Jasmijn and Filippova, Katja},
  booktitle={Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP},
  pages={149--155},
  year={2020}
}

@inproceedings{grimsley2020attention,
  title={Why Attention is Not Explanation: Surgical Intervention and Causal Reasoning about Neural Models},
  author={Grimsley, Christopher and Mayfield, Elijah and Bursten, Julia RS},
  booktitle={Proceedings of the 12th Language Resources and Evaluation Conference},
  pages={1780--1790},
  year={2020}
}

@article{degrave2021ai,
  title={AI for radiographic COVID-19 detection selects shortcuts over signal},
  author={DeGrave, Alex J and Janizek, Joseph D and Lee, Su-In},
  journal={Nature Machine Intelligence},
  pages={1--10},
  year={2021},
  publisher={Nature Publishing Group}
}

@article{zimmermann2021well,
  title={How Well do Feature Visualizations Support Causal Understanding of CNN Activations?},
  author={Zimmermann, Roland S and Borowski, Judy and Geirhos, Robert and Bethge, Matthias and Wallis, Thomas SA and Brendel, Wieland},
  journal={arXiv preprint arXiv:2106.12447},
  year={2021}
}

@inproceedings{adebayo2021post,
  title={Post hoc explanations may be ineffective for detecting unknown spurious correlation},
  author={Adebayo, Julius and Muelly, Michael and Abelson, Harold and Kim, Been},
  booktitle={International Conference on Learning Representations},
  year={2022}
}

@article{busch2024truth,
  title={Where is the Truth? The Risk of Getting Confounded in a Continual World},
  author={Busch, Florian Peter and Kamath, Roshni and Mitchell, Rupert and Stammer, Wolfgang and Kersting, Kristian and Mundt, Martin},
  journal={arXiv preprint arXiv:2402.06434},
  year={2024}
}

@article{steinmann2024navigating,
  title={Navigating Shortcuts, Spurious Correlations, and Confounders: From Origins via Detection to Mitigation},
  author={Steinmann, David and Divo, Felix and Kraus, Maurice and W{\"u}st, Antonia and Struppek, Lukas and Friedrich, Felix and Kersting, Kristian},
  journal={arXiv preprint arXiv:2412.05152},
  year={2024}
}