diff --git a/CHANGELOG.md b/CHANGELOG.md
index ffb75b1..cf59913 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## Unreleased
+### Added
+- Added [FEQA](https://www.aclweb.org/anthology/2020.acl-main.454/)
 
 ## [v0.1.3](https://github.com/danieldeutsch/sacrerouge/releases/tag/v0.1.3) - 2020-11-25
 ### Added
diff --git a/doc/metrics/feqa.md b/doc/metrics/feqa.md
new file mode 100644
index 0000000..b6afada
--- /dev/null
+++ b/doc/metrics/feqa.md
@@ -0,0 +1,31 @@
+# FEQA
+FEQA [1] is a question-answering based metric for evaluating the faithfulness of summaries.
+Our implementation uses our [fork](https://github.com/danieldeutsch/feqa) of the [original repository](https://github.com/esdurmus/feqa) which adds a `run.py` file to easily run FEQA using an input and output file.
+
+## Setting Up
+First, create an environment for FEQA to run (see [here](../../environments/feqa.yml)).
+We had to manually install the Cython and numpy packages before installing the other requirements because benepar requires Cython and numpy to be installed first.
+
+After the environment is created, install the spacy model and other resources:
+```
+python -m spacy download en_core_web_sm
+python
+>>> import benepar
+>>> import nltk
+>>> benepar.download('benepar_en2')
+>>> nltk.download('stopwords')
+```
+
+Then, setup the metric by cloning the repository and downloading the necessary pre-trained models:
+```
+sacrerouge setup-metric feqa
+```
+
+## Correlations
+This implementation achieves near 0.0 Pearson correlations to the data collected by [2] on the CNN/DailyMail and XSUM splits, respectively.
+I am not sure why yet.
+The data from [1] has not been released yet to reproduce the results from the paper.
+
+## References
+[1] Esin Durmus, He He and Mona Diab. [FEQA: A Question Answering Evaluation Framework for Faithfulness Assessment in Abstractive Summarization](https://www.aclweb.org/anthology/2020.acl-main.454/). ACL 2020.
+[2] Alex Wang, Kyunghyun Cho, and Mike Lewis. [Asking and Answering Questions to Evaluate the Factual Consistency of Summaries](https://www.aclweb.org/anthology/2020.acl-main.450.pdf). ACL 2020.
\ No newline at end of file
diff --git a/doc/metrics/metrics.md b/doc/metrics/metrics.md
index 62e1697..29bb648 100644
--- a/doc/metrics/metrics.md
+++ b/doc/metrics/metrics.md
@@ -6,6 +6,7 @@ The following metrics have been implemented:
 - [BERTScore](bertscore.md)
 - [BEwT-E](bewte.md)
 - [Decomposed ROUGE](decomposed-rouge.md)
+- [FEQA](feqa.md)
 - [METEOR](meteor.md)
 - [MoverScore](moverscore.md)
 - [Pyramid Score](pyramid-score.md)
diff --git a/environments/feqa.yml b/environments/feqa.yml
new file mode 100644
index 0000000..684df1c
--- /dev/null
+++ b/environments/feqa.yml
@@ -0,0 +1,14 @@
+name: feqa
+dependencies:
+  - python=3.6
+  - pip
+  - pip:
+      - Cython==0.29.15
+      - numpy==1.19.1
+      - benepar==0.1.2
+      - torch==1.5.0
+      - fairseq==0.9.0
+      - nltk==3.5
+      - spacy==2.3.2
+      - tensorflow==1.15.0
+      - transformers==2.8.0
\ No newline at end of file
diff --git a/experiments/feqa/.gitignore b/experiments/feqa/.gitignore
new file mode 100644
index 0000000..6caf68a
--- /dev/null
+++ b/experiments/feqa/.gitignore
@@ -0,0 +1 @@
+output
\ No newline at end of file
diff --git a/experiments/feqa/run.sh b/experiments/feqa/run.sh
new file mode 100644
index 0000000..3fac16f
--- /dev/null
+++ b/experiments/feqa/run.sh
@@ -0,0 +1,19 @@
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+set -e
+
+for dataset in cnndm xsum; do
+  python -m sacrerouge feqa score \
+    --input-files datasets/wang2020/${dataset}.summaries.jsonl \
+    --dataset-reader document-based \
+    --output-jsonl ${DIR}/output/${dataset}/scores.jsonl \
+    --environment_name /shared/ddeutsch/envs/feqa
+
+  python -m sacrerouge correlate \
+    --metrics-jsonl-files datasets/wang2020/${dataset}.metrics.jsonl ${DIR}/output/${dataset}/scores.jsonl \
+    --metrics wang2020_crowd_faithfulness FEQA \
+    --summarizer-type peer \
+    --skip-summary-level \
+    --skip-system-level \
+    --output-file ${DIR}/output/${dataset}/correlations.json
+done
\ No newline at end of file
diff --git a/sacrerouge/metrics/__init__.py b/sacrerouge/metrics/__init__.py
index 782757d..c153c58 100644
--- a/sacrerouge/metrics/__init__.py
+++ b/sacrerouge/metrics/__init__.py
@@ -3,6 +3,7 @@
 from sacrerouge.metrics.bertscore import BertScore
 from sacrerouge.metrics.bewte import BEwTE
 from sacrerouge.metrics.bleurt import Bleurt
+from sacrerouge.metrics.feqa import FEQA
 from sacrerouge.metrics.decomposed_rouge import DecomposedRouge
 from sacrerouge.metrics.meteor import Meteor
 from sacrerouge.metrics.moverscore import MoverScore
diff --git a/sacrerouge/metrics/feqa.py b/sacrerouge/metrics/feqa.py
new file mode 100644
index 0000000..719b85f
--- /dev/null
+++ b/sacrerouge/metrics/feqa.py
@@ -0,0 +1,128 @@
+import argparse
+import logging
+import os
+import shutil
+from overrides import overrides
+from subprocess import Popen, PIPE
+from typing import List
+
+from sacrerouge.commands import MetricSetupSubcommand
+from sacrerouge.common import DATA_ROOT, TemporaryDirectory
+from sacrerouge.common.util import download_file_from_google_drive
+from sacrerouge.data import MetricsDict
+from sacrerouge.data.types import DocumentType, SummaryType
+from sacrerouge.io import JsonlReader, JsonlWriter
+from sacrerouge.metrics import Metric, DocumentBasedMetric
+
+logger = logging.getLogger(__name__)
+
+
+@Metric.register('feqa')
+class FEQA(DocumentBasedMetric):
+    def __init__(self,
+                 environment_name: str = None,
+                 feqa_root: str = f'{DATA_ROOT}/metrics/feqa',
+                 batch_size: int = 8) -> None:
+        super().__init__(['summary'], ['documents'])
+        self.environment_name = environment_name
+        self.feqa_root = feqa_root
+        self.batch_size = batch_size
+
+        if self.environment_name is not None:
+            if 'CONDA_INIT' not in os.environ:
+                raise Exception('If `environment_name` is not none, environment variable "CONDA_INIT" must be set to the path to "conda.sh"')
+
+    def _ensure_single_document(self, documents_list: List[List[DocumentType]]):
+        # For now, the code only works if there's 1 input document. The QA model only evalutes against one document,
+        # so I think it may have to fundamentally change for multi-documents
+        for documents in documents_list:
+            assert len(documents) == 1
+
+    def score_multi_all(self,
+                        summaries_list: List[List[SummaryType]],
+                        documents_list: List[List[DocumentType]]) -> List[List[MetricsDict]]:
+        self._ensure_single_document(documents_list)
+
+        with TemporaryDirectory() as temp_dir:
+            input_file = f'{temp_dir}/input.jsonl'
+            output_file = f'{temp_dir}/output.jsonl'
+            with JsonlWriter(input_file) as out:
+                for summaries, documents in zip(summaries_list, documents_list):
+                    assert len(documents) == 1
+                    document = documents[0]
+                    if isinstance(document, list):
+                        document = ' '.join(document)
+                    for summary in summaries:
+                        if isinstance(summary, list):
+                            summary = ' '.join(summary)
+                        out.write({'document': document, 'summary': summary})
+
+            commands = []
+            if self.environment_name is not None:
+                commands.append(f'source {os.environ["CONDA_INIT"]}')
+                commands.append(f'conda activate {self.environment_name}')
+            commands.append(f'cd {self.feqa_root}')
+            commands.append(f'python run.py {input_file} {output_file} {self.batch_size}')
+            command = ' && '.join(commands)
+            logger.info(f'Running FEQA command: "{command}"')
+            process = Popen(command, stdout=PIPE, stderr=PIPE, shell=True)
+            stdout, stderr = process.communicate()
+            logger.info(stdout.decode())
+            logger.error(stderr.decode())
+
+            scores = JsonlReader(output_file).read()
+            metrics_list = []
+            index = 0
+            for summaries in summaries_list:
+                metrics_list.append([])
+                for _ in summaries:
+                    metrics_list[-1].append(MetricsDict({'FEQA': scores[index]['score']}))
+                    index += 1
+            return metrics_list
+
+
+@MetricSetupSubcommand.register('feqa')
+class FEQASetupSubcommand(MetricSetupSubcommand):
+    @overrides
+    def add_subparser(self, parser: argparse._SubParsersAction):
+        description = 'Setup the FEQA metric'
+        self.parser = parser.add_parser('feqa', description=description, help=description)
+        self.parser.add_argument('--force', action='store_true', help='Force setting up the metric again')
+        self.parser.set_defaults(subfunc=self.run)
+
+    @overrides
+    def run(self, args):
+        if args.force and os.path.exists(f'{DATA_ROOT}/metrics/feqa'):
+            shutil.rmtree(f'{DATA_ROOT}/metrics/feqa')
+
+        # Clone the github repo
+        if not os.path.exists(f'{DATA_ROOT}/metrics/feqa'):
+            commands = [
+                f'mkdir -p {DATA_ROOT}/metrics',
+                f'cd {DATA_ROOT}/metrics',
+                f'git clone https://github.com/danieldeutsch/feqa',
+            ]
+            command = ' && '.join(commands)
+            process = Popen(command, shell=True)
+            process.communicate()
+
+        # Download the model files
+        gdrive_files = {
+            'qa_models/squad1.0/config.json': '1IwWhQf9MP2G-vOBsQD87kMMEBS0IvcXa',
+            'qa_models/squad1.0/dev-v1.1.json': '1tsWhCsXSxxgkBMBnGB9wkOliJH8K3Prs',
+            'qa_models/squad1.0/evaluate-v1.1.py': '1p-LlVVAGuMYjFckjK5HxdiK5xEuM-2Ev',
+            'qa_models/squad1.0/pytorch_model.bin': '1pWMsSTTwcoX0l75bzNFjvSC7firawp9M',
+            'qa_models/squad1.0/run_squad.py': '1yZKNFU7md4KPGmThPwsp4dt95HkKsArX',
+            'qa_models/squad1.0/special_tokens_map.json': '1rbv75oE5x0rXxtGGXETTvLBoHK5h3Lfj',
+            'qa_models/squad1.0/tokenizer_config.json': '1oPM62qOWofGnaLmlX_CWkYKbZ-KEMtym',
+            'qa_models/squad1.0/train-v1.1.json': '1y9_EgnoBbm0SJeCaNZFfjOyraeA-qfqP',
+            'qa_models/squad1.0/training_args.bin': '1r49Y1Cp2t6_II2xjOyxbvYVvp2EQj3zu',
+            'qa_models/squad1.0/vocab.txt': '1iGZrP6_3PiiH0pcF4zoSbqAsWdFvimfF',
+            'bart_qg/checkpoints/checkpoint_best.pt': '1GFnimonLFgGal1LT6KRgMJZLbxmNJvxF',
+            'bart_qg/checkpoints/dict.src.txt': '17CShx4cUEQTl_gpLapnbMsc7CmDAaV7r',
+            'bart_qg/checkpoints/dict.tgt.txt': '1_dUN7CQZdqPxoiezzWp5yByuEXVJFwce',
+        }
+        for file_path, file_id in gdrive_files.items():
+            download_file_from_google_drive(file_id, f'{DATA_ROOT}/metrics/feqa/{file_path}', force=args.force)
+
+        print('FEQA setup success')
\ No newline at end of file
diff --git a/sacrerouge/tests/metrics/feqa_test.py b/sacrerouge/tests/metrics/feqa_test.py
new file mode 100644
index 0000000..42d5843
--- /dev/null
+++ b/sacrerouge/tests/metrics/feqa_test.py
@@ -0,0 +1,53 @@
+import os
+import pytest
+
+from sacrerouge.common.testing.metric_test_cases import DocumentBasedMetricTestCase
+from sacrerouge.common.testing.util import sacrerouge_command_exists
+from sacrerouge.metrics import FEQA
+
+
+@pytest.mark.skipif('FEQA_ENV' not in os.environ, reason='FEQA python environment environment variable not set')
+class TestFEQA(DocumentBasedMetricTestCase):
+    def test_example(self):
+        # Tests to make sure we get the same output as running the example in the original repository
+        documents_list = [
+            [
+                "The world's oldest person has died a \
+                 few weeks after celebrating her 117th birthday.  \
+                 Born on March 5, 1898, the greatgrandmother had lived through two world \
+                 wars, the invention of the television and the \
+                 first successful powered aeroplane."
+            ],
+            [
+                "The world's oldest person has died a \
+                 few weeks after celebrating her 117th birthday.  \
+                 Born on March 5, 1898, the greatgrandmother had lived through two world \
+                 wars, the invention of the television and the \
+                 first successful powered aeroplane."
+            ]
+        ]
+        summaries = [
+            "The world's oldest person died in 1898",
+            "The world's oldest person died after her 117th birthday"
+        ]
+
+        metric = FEQA(environment_name=os.environ['FEQA_ENV'])
+
+        # The original iPython notebook has the second score of 0.8875, but the first one matches. I assume that
+        # this is caused by some minor change (e.g., they use spacy model 2.1.0 in the example but ours uses 2.3.1)
+        # since the first score matches.
+        expected_output = [
+            {'FEQA': 0.674074074074074},
+            {'FEQA': 0.85},
+        ]
+        actual_output = metric.score_all(summaries, documents_list)
+
+        assert len(expected_output) == len(actual_output)
+        for expected, actual in zip(expected_output, actual_output):
+            assert expected == pytest.approx(actual, 1e-4)
+
+    def test_command_exists(self):
+        assert sacrerouge_command_exists(['feqa'])
+
+    def test_setup_command_exists(self):
+        assert sacrerouge_command_exists(['setup-metric', 'feqa'])
\ No newline at end of file