From 8b0677274254a67ff0d20d5e45cd84c1a4ef7d35 Mon Sep 17 00:00:00 2001
From: Jochen Sieg <jochen.sieg@basf.com>
Date: Thu, 11 Jul 2024 10:52:44 +0200
Subject: [PATCH] notebooks: beautify advanced_02

    - Fix typos, remove wrong type hints,
      add more explanation
---
 ...nced_02_add_custom_pipeline_elements.ipynb | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/notebooks/advanced_02_add_custom_pipeline_elements.ipynb b/notebooks/advanced_02_add_custom_pipeline_elements.ipynb
index 9f399afd..86ffe86d 100644
--- a/notebooks/advanced_02_add_custom_pipeline_elements.ipynb
+++ b/notebooks/advanced_02_add_custom_pipeline_elements.ipynb
@@ -20,7 +20,7 @@
     "\n",
     "## Example using `MolToDescriptorPipelineElement`\n",
     "\n",
-    "The `MolToDescriptorPipelineElement` is a specification of `MolToAnyPipelineElement` adding useful functionality to the interface, like the number of features, the names of features and optional feature normalization. Analogously, the `MolToFingerprintPipelineElement` provides some useful functions for molecular fingerprint featurizes.\n",
+    "The `MolToDescriptorPipelineElement` is a specification of `MolToAnyPipelineElement` adding useful functionality to the interface, like the number of features, the names of features and optional feature normalization. Analogously, the `MolToFingerprintPipelineElement` provides some useful functions for molecular fingerprint featurization.\n",
     "\n",
     "In the following example, we demonstrate how to implement a new molecular descriptor representing the composition of a molecule using the counts of chemical element symbols. "
    ]
@@ -35,10 +35,7 @@
     "import numpy as np\n",
     "import numpy.typing as npt\n",
     "\n",
-    "from rdkit import Chem\n",
-    "\n",
     "from molpipeline.abstract_pipeline_elements.core import MolToAnyPipelineElement\n",
-    "from molpipeline.abstract_pipeline_elements.core import InvalidInstance\n",
     "from molpipeline.abstract_pipeline_elements.mol2any import (\n",
     "    MolToDescriptorPipelineElement,\n",
     ")\n",
@@ -46,7 +43,7 @@
     "\n",
     "\n",
     "class ElementCountDescriptor(MolToDescriptorPipelineElement):\n",
-    "    \"\"\"Element count descriptor\"\"\"\n",
+    "    \"\"\"Element count descriptor.\"\"\"\n",
     "\n",
     "    def __init__(\n",
     "        self,\n",
@@ -56,6 +53,7 @@
     "        n_jobs: int = 1,\n",
     "        uuid: str | None = None,\n",
     "    ) -> None:\n",
+    "        \"\"\"Construct a new ElementCountDescriptor.\"\"\"\n",
     "        super().__init__(\n",
     "            standardizer=standardizer,\n",
     "            name=name,\n",
@@ -77,11 +75,8 @@
     "        \"\"\"Return a copy of the descriptor list.\"\"\"\n",
     "        return [f\"atom_count_{atom_number}\" for atom_number in self.elements_dict]\n",
     "\n",
-    "    #\n",
-    "    def pretransform_single(\n",
-    "        self, value: RDKitMol\n",
-    "    ) -> npt.NDArray[np.float64] | InvalidInstance:\n",
-    "        \"\"\"Transform an RDKit molecule to the element count feature vector\"\"\"\n",
+    "    def pretransform_single(self, value: RDKitMol) -> npt.NDArray[np.float64]:\n",
+    "        \"\"\"Transform an RDKit molecule to the element count feature vector.\"\"\"\n",
     "        feature_vector = np.zeros(len(self.elements_dict))\n",
     "        for atom in value.GetAtoms():\n",
     "            atomic_number = atom.GetAtomicNum()\n",
@@ -108,6 +103,8 @@
     }
    ],
    "source": [
+    "from rdkit import Chem\n",
+    "\n",
     "# let's create a new ElementCountDescriptor counting carbon, nitrogen, oxygen and fluor atoms in the molecule\n",
     "counter = ElementCountDescriptor(elements_to_count=[6, 7, 8, 9])\n",
     "\n",
@@ -115,6 +112,14 @@
     "counter.transform([Chem.MolFromSmiles(\"CCO\")])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "867ce38d-ea34-4db3-8d79-b2637abacbee",
+   "metadata": {},
+   "source": [
+    "The resulting feature vector shows 2 carbons, 0 nitrogens, 1 oxygen and 0 fluorines."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "caa290aa-fc19-4764-b19a-7d511d43f442",