diff --git a/docs/source/api/bluesearch.database.rst b/docs/source/api/bluesearch.database.rst
index 45a7e8f2e..73eb468dc 100644
--- a/docs/source/api/bluesearch.database.rst
+++ b/docs/source/api/bluesearch.database.rst
@@ -14,6 +14,7 @@ Submodules
    bluesearch.database.mining_cache
    bluesearch.database.pdf
    bluesearch.database.topic
+   bluesearch.database.topic_info
 
 Module contents
 ---------------
diff --git a/docs/source/api/bluesearch.database.topic_info.rst b/docs/source/api/bluesearch.database.topic_info.rst
new file mode 100644
index 000000000..dff7075e0
--- /dev/null
+++ b/docs/source/api/bluesearch.database.topic_info.rst
@@ -0,0 +1,7 @@
+bluesearch.database.topic\_info module
+======================================
+
+.. automodule:: bluesearch.database.topic_info
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/whatsnew.rst b/docs/source/whatsnew.rst
index 07c2b580f..6981547a5 100644
--- a/docs/source/whatsnew.rst
+++ b/docs/source/whatsnew.rst
@@ -29,6 +29,8 @@ Legend
 
 Latest
 ======
+- |Add| the :code:`bluesearch.database.topic_info.TopicInfo` class
+- |Add| the :code:`bluesearch.database.article.ArticleSource` enum class
 - |Add| extraction of journal and article topics for :code:`arxiv` papers
   through CLI command :code:`bbs_database topic-extract arxiv`.
 - |Add| extraction of journal and article topics for :code:`pubmed` papers
diff --git a/src/bluesearch/database/article.py b/src/bluesearch/database/article.py
index a374b7e5f..e7880ed4b 100644
--- a/src/bluesearch/database/article.py
+++ b/src/bluesearch/database/article.py
@@ -17,6 +17,7 @@
 """Abstraction of scientific article data and related tools."""
 from __future__ import annotations
 
+import enum
 import html
 import re
 import string
@@ -33,6 +34,17 @@
 from bluesearch.database.identifiers import generate_uid
 
 
+class ArticleSource(enum.Enum):
+    """The source of an article."""
+
+    ARXIV = "arxiv"
+    BIORXIV = "biorxiv"
+    MEDRXIV = "medrxiv"
+    PMC = "pmc"
+    PUBMED = "pubmed"
+    UNKNOWN = "unknown"
+
+
 def get_arxiv_id(path: str | Path, with_prefix: bool = True) -> str:
     """Compute arXiv ID, including version, from file path.
 
diff --git a/src/bluesearch/database/topic.py b/src/bluesearch/database/topic.py
index f13a44c4e..ecb79340e 100644
--- a/src/bluesearch/database/topic.py
+++ b/src/bluesearch/database/topic.py
@@ -402,7 +402,7 @@ def extract_article_topics_from_medrxiv_article(
 
     Returns
     -------
-    topic : pathlib.Path or str
+    topic : str
         The subject area of the article.
     journal : str
         The journal the article was published in. Should be either
diff --git a/src/bluesearch/database/topic_info.py b/src/bluesearch/database/topic_info.py
new file mode 100644
index 000000000..10bf67628
--- /dev/null
+++ b/src/bluesearch/database/topic_info.py
@@ -0,0 +1,121 @@
+# Blue Brain Search is a text mining toolbox focused on scientific use cases.
+#
+# Copyright (C) 2020  Blue Brain Project, EPFL.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+"""Implementation of the TopicInfo data structure."""
+from __future__ import annotations
+
+import copy
+import datetime
+import pathlib
+from dataclasses import dataclass, field
+from typing import Any
+
+import bluesearch
+from bluesearch.database.article import ArticleSource
+
+
+@dataclass
+class TopicInfo:
+    """The topic information extracted from a journal article.
+
+    For the spec see the following GitHub issue/comment:
+    https://github.com/BlueBrain/Search/issues/518#issuecomment-985525160
+    """
+
+    source: ArticleSource
+    path: str | pathlib.Path
+    element_in_file: int | None = None
+    article_topics: dict[str, list[str]] = field(init=False, default_factory=dict)
+    journal_topics: dict[str, list[str]] = field(init=False, default_factory=dict)
+
+    def __post_init__(self) -> None:
+        """Run the post-initialization."""
+        self.creation_date = datetime.datetime.now()
+        self.path = pathlib.Path(self.path).resolve()
+
+    @staticmethod
+    def _add_topics(
+        mapping: dict[str, list[str]], kind: str, topics: list[str]
+    ) -> None:
+        """Add topics to a mapping with collection of topics.
+
+        Parameters
+        ----------
+        mapping
+            A mapping of the form kind -> list-of-topics that shall be
+            updated in-place. For example ``{"MeSH": ["topic 1", "topic 2"]}``.
+        kind
+            The topic kind. Corresponds to a key in ``mapping``.
+        topics
+            The topics to add. Corresponds to a value in ``mapping``.
+        """
+        updated_topics = mapping.get(kind, []) + topics
+        mapping[kind] = sorted(set(updated_topics))
+
+    def add_article_topics(self, kind: str, topics: list[str]) -> None:
+        """Add article topics.
+
+        Parameters
+        ----------
+        kind
+            The topic kind. For example "MeSH" or "MAG".
+        topics
+            A list of the topics to add.
+        """
+        self._add_topics(self.article_topics, kind, topics)
+
+    def add_journal_topics(self, kind: str, topics: list[str]) -> None:
+        """Add journal topics.
+
+        Parameters
+        ----------
+        kind
+            The topic kind. For example "MeSH" or "MAG".
+        topics
+            A list of the topics to add.
+        """
+        self._add_topics(self.journal_topics, kind, topics)
+
+    def json(self) -> dict:
+        """Convert the contents of this class to a structured dictionary.
+
+        Apart from the source, path and topic entries a "metadata" top-level
+        key will be added containing a dictionary with entries "created-date"
+        and "bbs-version".
+
+        Returns
+        -------
+        dict
+            The structure dictionary with all topic information.
+        """
+        metadata: dict[str, Any] = {
+            "created-date": self.creation_date.strftime("%Y-%m-%d %H:%M:%S"),
+            "bbs-version": bluesearch.__version__,
+        }
+        if self.element_in_file is not None:
+            metadata["element_in_file"] = self.element_in_file
+
+        json = {
+            "source": self.source.value,
+            "path": str(self.path),
+            "topics": {
+                "article": copy.deepcopy(self.article_topics),
+                "journal": copy.deepcopy(self.journal_topics),
+            },
+            "metadata": metadata,
+        }
+
+        return json
diff --git a/src/bluesearch/entrypoint/database/download.py b/src/bluesearch/entrypoint/database/download.py
index c3143789f..d25914ffd 100644
--- a/src/bluesearch/entrypoint/database/download.py
+++ b/src/bluesearch/entrypoint/database/download.py
@@ -23,6 +23,8 @@
 from itertools import chain
 from pathlib import Path
 
+from bluesearch.database.article import ArticleSource
+
 logger = logging.getLogger(__name__)
 
 # Data conventions and formats are different prior to these dates. We
@@ -30,17 +32,17 @@
 # respective threshold.
 MIN_DATE = {
     # https://arxiv.org/help/arxiv_identifier#old
-    "arxiv": datetime(2007, 4, 1),
+    ArticleSource.ARXIV: datetime(2007, 4, 1),
     # https://www.biorxiv.org/tdm + looked into Current Content folder on GPFS
-    "biorxiv": datetime(2018, 12, 1),
+    ArticleSource.BIORXIV: datetime(2018, 12, 1),
     # https://www.medrxiv.org/tdm + looked into Current Content folder on GPFS
-    "medrxiv": datetime(2020, 10, 1),
+    ArticleSource.MEDRXIV: datetime(2020, 10, 1),
     # This should change every year in December:
     # see https://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_bulk/oa_comm/xml/
-    "pmc": datetime(2021, 12, 1),
+    ArticleSource.PMC: datetime(2021, 12, 1),
     # This should change every year in December:
     # see https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/
-    "pubmed": datetime(2021, 12, 1),
+    ArticleSource.PUBMED: datetime(2021, 12, 1),
 }
 
 
@@ -88,7 +90,7 @@ def init_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
     parser.add_argument(
         "source",
         type=str,
-        choices=("arxiv", "biorxiv", "medrxiv", "pmc", "pubmed"),
+        choices=[member.value for member in ArticleSource],
         help="Source of the download.",
     )
     parser.add_argument(
@@ -129,16 +131,17 @@ def run(source: str, from_month: datetime, output_dir: Path, dry_run: bool) -> i
         get_s3_urls,
     )
 
-    if from_month < MIN_DATE[source]:
+    article_source = ArticleSource(source)
+    if from_month < MIN_DATE[article_source]:
         logger.error(
-            f"The papers from before {MIN_DATE[source].strftime('%B %Y')} "
+            f"The papers from before {MIN_DATE[article_source].strftime('%B %Y')} "
             "follow a different format and can't be downloaded. "
             "Please contact the developers if you need them. "
             "To proceed please re-run the command with a different starting month."
         )
         return 1
 
-    if source == "pmc":
+    if article_source == ArticleSource.PMC:
         url_dict = {}
         for component in {"author_manuscript", "oa_comm", "oa_noncomm"}:
             url_dict[component] = generate_pmc_urls(component, from_month)
@@ -158,7 +161,7 @@ def run(source: str, from_month: datetime, output_dir: Path, dry_run: bool) -> i
             component_dir.mkdir(exist_ok=True, parents=True)
             download_articles(url_list, component_dir)
         return 0
-    elif source == "pubmed":
+    elif article_source == ArticleSource.PUBMED:
         url_list = get_pubmed_urls(from_month)
         if dry_run:
             print("URL requests from:")
@@ -169,7 +172,7 @@ def run(source: str, from_month: datetime, output_dir: Path, dry_run: bool) -> i
         output_dir.mkdir(exist_ok=True, parents=True)
         download_articles(url_list, output_dir)
         return 0
-    elif source in {"biorxiv", "medrxiv"}:
+    elif article_source in {ArticleSource.BIORXIV, ArticleSource.MEDRXIV}:
 
         key_id = getpass.getpass("aws_access_key_id: ")
         secret_access_key = getpass.getpass("aws_secret_access_key: ")
@@ -192,7 +195,7 @@ def run(source: str, from_month: datetime, output_dir: Path, dry_run: bool) -> i
         logger.info(f"Start downloading {source} papers.")
         download_s3_articles(bucket, url_dict, output_dir)
         return 0
-    elif source == "arxiv":
+    elif article_source == ArticleSource.ARXIV:
         logger.info("Loading libraries")
         from google.cloud.storage import Client
 
diff --git a/src/bluesearch/entrypoint/database/topic_extract.py b/src/bluesearch/entrypoint/database/topic_extract.py
index e2d4bb2de..1368147ae 100644
--- a/src/bluesearch/entrypoint/database/topic_extract.py
+++ b/src/bluesearch/entrypoint/database/topic_extract.py
@@ -18,11 +18,12 @@
 from __future__ import annotations
 
 import argparse
-import datetime
 import logging
 from pathlib import Path
 from typing import Any
 
+from bluesearch.database.article import ArticleSource
+
 logger = logging.getLogger(__name__)
 
 
@@ -44,14 +45,7 @@ def init_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
 
     parser.add_argument(
         "source",
-        type=str,
-        choices=(
-            "arxiv",
-            "biorxiv",
-            "medrxiv",
-            "pmc",
-            "pubmed",
-        ),
+        choices=[member.value for member in ArticleSource],
         help="""
         Format of the input.
         If extracting topic of several articles, all articles must have the same format.
@@ -129,7 +123,6 @@ def run(
     """
     from defusedxml import ElementTree
 
-    import bluesearch
     from bluesearch.database.topic import (
         extract_article_topics_for_pubmed_article,
         extract_article_topics_from_medrxiv_article,
@@ -137,6 +130,7 @@ def run(
         get_topics_for_arxiv_articles,
         get_topics_for_pmc_article,
     )
+    from bluesearch.database.topic_info import TopicInfo
     from bluesearch.utils import JSONL, find_files
 
     try:
@@ -153,102 +147,45 @@ def run(
         print(*inputs, sep="\n")
         return 0
 
+    article_source = ArticleSource(source)
     all_results: list[dict[str, Any]] = []
-
-    if source == "pmc":
+    if article_source is ArticleSource.PMC:
         for path in inputs:
             logger.info(f"Processing {path}")
+            topic_info = TopicInfo(source=article_source, path=path.resolve())
             journal_topics = get_topics_for_pmc_article(path)
-            all_results.append(
-                {
-                    "source": "pmc",
-                    "path": str(path.resolve()),
-                    "topics": {
-                        "journal": {
-                            "MeSH": journal_topics,
-                        },
-                    },
-                    "metadata": {
-                        "created-date": datetime.datetime.now().strftime(
-                            "%Y-%m-%d %H:%M:%S"
-                        ),
-                        "bbs-version": bluesearch.version.__version__,
-                    },
-                }
-            )
-
-    elif source == "arxiv":
-        all_results = [
-            {
-                "source": "arxiv",
-                "path": str(path.resolve()),
-                "topics": {
-                    "article": {
-                        "arXiv": article_topics,
-                    },
-                },
-                "metadata": {
-                    "created-date": datetime.datetime.now().strftime(
-                        "%Y-%m-%d %H:%M:%S"
-                    ),
-                    "bbs-version": bluesearch.version.__version__,
-                },
-            }
-            for path, article_topics in get_topics_for_arxiv_articles(inputs).items()
-        ]
-
-    elif source in {"biorxiv", "medrxiv"}:
-        for path in inputs:
-            logger.info(f"Processing {path}")
-            topic, journal = extract_article_topics_from_medrxiv_article(path)
-            all_results.append(
-                {
-                    "source": journal,
-                    "path": str(path.resolve()),
-                    "topics": {
-                        "article": {
-                            "Subject Area": topic,
-                        },
-                    },
-                    "metadata": {
-                        "created-date": datetime.datetime.now().strftime(
-                            "%Y-%m-%d %H:%M:%S"
-                        ),
-                        "bbs-version": bluesearch.version.__version__,
-                    },
-                }
-            )
-
-        pass
-
-    elif source == "pubmed":
+            if journal_topics:
+                topic_info.add_journal_topics("MeSH", journal_topics)
+            all_results.append(topic_info.json())
+    elif article_source is ArticleSource.PUBMED:
         for path in inputs:
             logger.info(f"Processing {path}")
             articles = ElementTree.parse(input_path)
             for i, article in enumerate(articles.iter("PubmedArticle")):
+                topic_info = TopicInfo(
+                    source=article_source,
+                    path=path.resolve(),
+                    element_in_file=i,
+                )
                 article_topics = extract_article_topics_for_pubmed_article(article)
                 journal_topics = extract_journal_topics_for_pubmed_article(article)
-                all_results.append(
-                    {
-                        "source": "pubmed",
-                        "path": str(path.resolve()),
-                        "topics": {
-                            "journal": {
-                                "MeSH": journal_topics,
-                            },
-                            "article": {
-                                "MeSH": article_topics,
-                            },
-                        },
-                        "metadata": {
-                            "created-date": datetime.datetime.now().strftime(
-                                "%Y-%m-%d %H:%M:%S"
-                            ),
-                            "bbs-version": bluesearch.version.__version__,
-                            "element_in_file": i,
-                        },
-                    }
-                )
+                if article_topics:
+                    topic_info.add_article_topics("MeSH", article_topics)
+                if journal_topics:
+                    topic_info.add_journal_topics("MeSH", journal_topics)
+                all_results.append(topic_info.json())
+    elif article_source is ArticleSource.ARXIV:
+        for path, article_topics in get_topics_for_arxiv_articles(inputs).items():
+            topic_info = TopicInfo(source=article_source, path=path)
+            topic_info.add_article_topics("arXiv", article_topics)
+            all_results.append(topic_info.json())
+    elif article_source in {ArticleSource.BIORXIV, ArticleSource.MEDRXIV}:
+        for path in inputs:
+            logger.info(f"Processing {path}")
+            topic, journal = extract_article_topics_from_medrxiv_article(path)
+            topic_info = TopicInfo(source=ArticleSource(journal), path=path)
+            topic_info.add_article_topics("Subject Area", [topic])
+            all_results.append(topic_info.json())
     else:
         logger.error(f"The source type {source!r} is not implemented yet")
         return 1
diff --git a/tests/unit/database/test_topic_info.py b/tests/unit/database/test_topic_info.py
new file mode 100644
index 000000000..1f57b2c71
--- /dev/null
+++ b/tests/unit/database/test_topic_info.py
@@ -0,0 +1,84 @@
+import datetime
+import pathlib
+
+import pytest
+
+import bluesearch
+from bluesearch.database.article import ArticleSource
+from bluesearch.database.topic_info import TopicInfo
+
+
+class TestTopicInfo:
+    def test_instantiation(self):
+        source = ArticleSource.ARXIV
+        path = pathlib.Path("/some/path.test")
+        topic_info = TopicInfo(source, path)
+
+        assert topic_info.source == source
+        assert topic_info.path == path
+
+    def test_relative_path_is_resolved(self):
+        source = ArticleSource.ARXIV
+        path = pathlib.Path("relative/path")
+        topic_info = TopicInfo(source, path)
+
+        assert topic_info.source == source
+        assert topic_info.path == pathlib.Path.cwd() / path
+
+    @pytest.mark.parametrize(
+        ("mapping", "kind", "topics", "result"),
+        (
+            ({}, "MeSH", ["topic 1"], {"MeSH": ["topic 1"]}),
+            (
+                {"MeSH": ["topic 2"]},
+                "MeSH",
+                ["topic 1"],
+                {"MeSH": ["topic 1", "topic 2"]},
+            ),
+            ({"MeSH": ["topic 1"]}, "MeSH", ["topic 1"], {"MeSH": ["topic 1"]}),
+        ),
+    )
+    def test_add_topics(self, mapping, kind, topics, result):
+        TopicInfo._add_topics(mapping, kind, topics)
+        assert mapping == result
+
+    def test_add_article_journal_topics(self):
+        topic_info = TopicInfo(ArticleSource.UNKNOWN, "")
+        topic_info.add_article_topics("MeSH", ["AT 1", "AT 2", "AT 3"])
+        topic_info.add_journal_topics("MAP", ["JT 1", "JT 2"])
+
+        assert topic_info.article_topics == {"MeSH": ["AT 1", "AT 2", "AT 3"]}
+        assert topic_info.journal_topics == {"MAP": ["JT 1", "JT 2"]}
+
+    def test_json(self):
+        start = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+        topic_info = TopicInfo(
+            source=ArticleSource.PUBMED,
+            path=pathlib.Path("/some/path.test"),
+            element_in_file=5,
+        )
+        topic_info.add_article_topics("MeSH", ["AT 1", "AT 2", "AT 3"])
+        topic_info.add_journal_topics("MAP", ["JT 1", "JT 2"])
+
+        end = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+        json = topic_info.json()
+        metadata = json.pop("metadata")
+        assert json == {
+            "source": ArticleSource.PUBMED.value,
+            "path": "/some/path.test",
+            "topics": {
+                "article": {"MeSH": ["AT 1", "AT 2", "AT 3"]},
+                "journal": {"MAP": ["JT 1", "JT 2"]},
+            },
+        }
+        assert start <= metadata["created-date"] <= end
+        assert metadata["bbs-version"] == bluesearch.__version__
+
+    def test_element_in_file(self):
+        json = TopicInfo(ArticleSource.UNKNOWN, "").json()
+        assert json["metadata"].get("element_in_file") is None
+
+        json = TopicInfo(ArticleSource.UNKNOWN, "", element_in_file=5).json()
+        assert json["metadata"].get("element_in_file") == 5
diff --git a/tests/unit/entrypoint/database/test_download.py b/tests/unit/entrypoint/database/test_download.py
index 0511524ff..a317be77a 100644
--- a/tests/unit/entrypoint/database/test_download.py
+++ b/tests/unit/entrypoint/database/test_download.py
@@ -26,6 +26,7 @@
 import pytest
 from google.cloud.storage import Blob
 
+from bluesearch.database.article import ArticleSource
 from bluesearch.entrypoint.database import download
 
 DOWNLOAD_PARAMS = {"source", "from_month", "output_dir", "dry_run"}
@@ -268,11 +269,11 @@ def test_worker_errors_are_reported(self, caplog, tmp_path, mocked):
 @pytest.mark.parametrize(
     ("source", "expected_date"),
     [
-        ("arxiv", "April 2007"),
-        ("biorxiv", "December 2018"),
-        ("medrxiv", "October 2020"),
-        ("pmc", "December 2021"),
-        ("pubmed", "December 2021"),
+        (ArticleSource.ARXIV, "April 2007"),
+        (ArticleSource.BIORXIV, "December 2018"),
+        (ArticleSource.MEDRXIV, "October 2020"),
+        (ArticleSource.PMC, "December 2021"),
+        (ArticleSource.PUBMED, "December 2021"),
     ],
 )
 def test_structure_change(source, expected_date, tmp_path, caplog):
@@ -281,7 +282,7 @@ def test_structure_change(source, expected_date, tmp_path, caplog):
     fake_datetime = limit_datetime - datetime.timedelta(days=32)
 
     with caplog.at_level(logging.ERROR):
-        exit_code = download.run(source, fake_datetime, tmp_path, dry_run=False)
+        exit_code = download.run(source.value, fake_datetime, tmp_path, dry_run=False)
 
     assert exit_code == 1
     assert expected_date in caplog.text
diff --git a/tests/unit/entrypoint/database/test_topic_extract.py b/tests/unit/entrypoint/database/test_topic_extract.py
index ed9275a36..0a2d26acf 100644
--- a/tests/unit/entrypoint/database/test_topic_extract.py
+++ b/tests/unit/entrypoint/database/test_topic_extract.py
@@ -73,11 +73,11 @@ def test_input_path_not_correct(caplog):
     assert "Argument 'input_path'" in caplog.text
 
 
-def test_wrong_source(test_data_path, caplog, tmp_path):
+def test_source_type_not_implemented(test_data_path, caplog, tmp_path):
     pmc_path = test_data_path / "jats_article.xml"
     with caplog.at_level(logging.ERROR):
         exit_code = topic_extract.run(
-            source="wrong_type",
+            source="unknown",
             input_path=pmc_path,
             output_file=tmp_path,
             match_filename=None,
@@ -86,7 +86,7 @@ def test_wrong_source(test_data_path, caplog, tmp_path):
             dry_run=False,
         )
     assert exit_code == 1
-    assert "The source type" in caplog.text
+    assert "not implemented" in caplog.text
 
 
 def test_dry_run(test_data_path, capsys, tmp_path):
@@ -134,7 +134,6 @@ def test_pmc_source(test_data_path, capsys, monkeypatch, tmp_path):
     assert result["path"] == str(pmc_path)
     assert isinstance(result["topics"], dict)
     topics = result["topics"]
-    assert "article" not in topics
     assert "journal" in topics
     assert isinstance(topics["journal"], dict)
     assert topics["journal"]["MeSH"] == meshes
@@ -177,7 +176,7 @@ def test_medbiorxiv_source(capsys, monkeypatch, tmp_path, source):
 
     # Mocking
     fake_extract_article_topics_from_medrxiv_article = Mock(
-        side_effect=lambda p: ("TOPIC", "JOURNAL")
+        side_effect=lambda p: ("TOPIC", source)
     )
 
     monkeypatch.setattr(
@@ -201,8 +200,8 @@ def test_medbiorxiv_source(capsys, monkeypatch, tmp_path, source):
     result = JSONL.load_jsonl(output_file)
     assert len(result) == 1
 
-    assert result[0]["source"] == "JOURNAL"
-    assert result[0]["topics"]["article"]["Subject Area"] == "TOPIC"
+    assert result[0]["source"] == source
+    assert result[0]["topics"]["article"]["Subject Area"] == ["TOPIC"]
 
 
 def test_pubmed_source(test_data_path, capsys, monkeypatch, tmp_path):