From 8b0677274254a67ff0d20d5e45cd84c1a4ef7d35 Mon Sep 17 00:00:00 2001 From: Jochen Sieg Date: Thu, 11 Jul 2024 10:52:44 +0200 Subject: [PATCH] notebooks: beautify advanced_02 - Fix typos, remove wrong type hints, add more explanation --- ...nced_02_add_custom_pipeline_elements.ipynb | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/notebooks/advanced_02_add_custom_pipeline_elements.ipynb b/notebooks/advanced_02_add_custom_pipeline_elements.ipynb index 9f399afd..86ffe86d 100644 --- a/notebooks/advanced_02_add_custom_pipeline_elements.ipynb +++ b/notebooks/advanced_02_add_custom_pipeline_elements.ipynb @@ -20,7 +20,7 @@ "\n", "## Example using `MolToDescriptorPipelineElement`\n", "\n", - "The `MolToDescriptorPipelineElement` is a specification of `MolToAnyPipelineElement` adding useful functionality to the interface, like the number of features, the names of features and optional feature normalization. Analogously, the `MolToFingerprintPipelineElement` provides some useful functions for molecular fingerprint featurizes.\n", + "The `MolToDescriptorPipelineElement` is a specification of `MolToAnyPipelineElement` adding useful functionality to the interface, like the number of features, the names of features and optional feature normalization. Analogously, the `MolToFingerprintPipelineElement` provides some useful functions for molecular fingerprint featurization.\n", "\n", "In the following example, we demonstrate how to implement a new molecular descriptor representing the composition of a molecule using the counts of chemical element symbols. " ] @@ -35,10 +35,7 @@ "import numpy as np\n", "import numpy.typing as npt\n", "\n", - "from rdkit import Chem\n", - "\n", "from molpipeline.abstract_pipeline_elements.core import MolToAnyPipelineElement\n", - "from molpipeline.abstract_pipeline_elements.core import InvalidInstance\n", "from molpipeline.abstract_pipeline_elements.mol2any import (\n", " MolToDescriptorPipelineElement,\n", ")\n", @@ -46,7 +43,7 @@ "\n", "\n", "class ElementCountDescriptor(MolToDescriptorPipelineElement):\n", - " \"\"\"Element count descriptor\"\"\"\n", + " \"\"\"Element count descriptor.\"\"\"\n", "\n", " def __init__(\n", " self,\n", @@ -56,6 +53,7 @@ " n_jobs: int = 1,\n", " uuid: str | None = None,\n", " ) -> None:\n", + " \"\"\"Construct a new ElementCountDescriptor.\"\"\"\n", " super().__init__(\n", " standardizer=standardizer,\n", " name=name,\n", @@ -77,11 +75,8 @@ " \"\"\"Return a copy of the descriptor list.\"\"\"\n", " return [f\"atom_count_{atom_number}\" for atom_number in self.elements_dict]\n", "\n", - " #\n", - " def pretransform_single(\n", - " self, value: RDKitMol\n", - " ) -> npt.NDArray[np.float64] | InvalidInstance:\n", - " \"\"\"Transform an RDKit molecule to the element count feature vector\"\"\"\n", + " def pretransform_single(self, value: RDKitMol) -> npt.NDArray[np.float64]:\n", + " \"\"\"Transform an RDKit molecule to the element count feature vector.\"\"\"\n", " feature_vector = np.zeros(len(self.elements_dict))\n", " for atom in value.GetAtoms():\n", " atomic_number = atom.GetAtomicNum()\n", @@ -108,6 +103,8 @@ } ], "source": [ + "from rdkit import Chem\n", + "\n", "# let's create a new ElementCountDescriptor counting carbon, nitrogen, oxygen and fluor atoms in the molecule\n", "counter = ElementCountDescriptor(elements_to_count=[6, 7, 8, 9])\n", "\n", @@ -115,6 +112,14 @@ "counter.transform([Chem.MolFromSmiles(\"CCO\")])" ] }, + { + "cell_type": "markdown", + "id": "867ce38d-ea34-4db3-8d79-b2637abacbee", + "metadata": {}, + "source": [ + "The resulting feature vector shows 2 carbons, 0 nitrogens, 1 oxygen and 0 fluorines." + ] + }, { "cell_type": "markdown", "id": "caa290aa-fc19-4764-b19a-7d511d43f442",