From 8c282a44359085b8fd65d5d07a22c2cc6461983e Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Wed, 20 Nov 2024 18:27:15 +0100
Subject: [PATCH 1/6] add tests for occurring bug

---
 test_extras/test_chemprop/test_chemprop_pipeline.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py
index 32c4e677..07fbc1e5 100644
--- a/test_extras/test_chemprop/test_chemprop_pipeline.py
+++ b/test_extras/test_chemprop/test_chemprop_pipeline.py
@@ -307,6 +307,9 @@ def test_prediction(self) -> None:
         pred_copy = model_copy.predict(molecule_net_logd_df["smiles"].tolist())
         self.assertTrue(np.allclose(pred, pred_copy))
 
+        # Test single prediction, this was causing an error before
+        _ = regression_model.predict([molecule_net_logd_df["smiles"].iloc[0]])
+
 
 class TestClassificationPipeline(unittest.TestCase):
     """Test the Chemprop model pipeline for classification."""
@@ -341,6 +344,9 @@ def test_prediction(self) -> None:
         self.assertEqual(proba.shape, proba_copy.shape)
         self.assertTrue(np.allclose(proba[~nan_indices], proba_copy[~nan_indices]))
 
+        # Test single prediction, this was causing an error before
+        _ = classification_model.predict([molecule_net_bbbp_df["smiles"].iloc[0]])
+
 
 class TestMulticlassClassificationPipeline(unittest.TestCase):
     """Test the Chemprop model pipeline for multiclass classification."""

From e279563c13fe7c5750103febf48d6cffa0120501 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Wed, 20 Nov 2024 19:00:10 +0100
Subject: [PATCH 2/6] don't squeeze to hard! Ensuring atleast1d

---
 molpipeline/estimators/chemprop/models.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index e720e029..2ea80a10 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -143,8 +143,7 @@ def _predict(
         test_data = build_dataloader(X, num_workers=self.n_jobs, shuffle=False)
         predictions = self.lightning_trainer.predict(self.model, test_data)
         prediction_array = np.vstack(predictions)  # type: ignore
-        prediction_array = prediction_array.squeeze()
-
+        prediction_array = np.atleast_1d(prediction_array.squeeze())
         # Check if the predictions have the same length as the input dataset
         if prediction_array.shape[0] != len(X):
             raise AssertionError(

From 8cfc27c61369cf5d277d4825e09f1f4463ffd41f Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Wed, 20 Nov 2024 19:00:47 +0100
Subject: [PATCH 3/6] finalize test

---
 test_extras/test_chemprop/test_chemprop_pipeline.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py
index 07fbc1e5..83789831 100644
--- a/test_extras/test_chemprop/test_chemprop_pipeline.py
+++ b/test_extras/test_chemprop/test_chemprop_pipeline.py
@@ -308,7 +308,8 @@ def test_prediction(self) -> None:
         self.assertTrue(np.allclose(pred, pred_copy))
 
         # Test single prediction, this was causing an error before
-        _ = regression_model.predict([molecule_net_logd_df["smiles"].iloc[0]])
+        single_mol_pred = regression_model.predict([molecule_net_logd_df["smiles"].iloc[0]])
+        self.assertEqual(single_mol_pred.shape, (1,))
 
 
 class TestClassificationPipeline(unittest.TestCase):
@@ -345,7 +346,10 @@ def test_prediction(self) -> None:
         self.assertTrue(np.allclose(proba[~nan_indices], proba_copy[~nan_indices]))
 
         # Test single prediction, this was causing an error before
-        _ = classification_model.predict([molecule_net_bbbp_df["smiles"].iloc[0]])
+        single_mol_pred = classification_model.predict([molecule_net_bbbp_df["smiles"].iloc[0]])
+        self.assertEqual(single_mol_pred.shape, (1,))
+        single_mol_proba = classification_model.predict_proba([molecule_net_bbbp_df["smiles"].iloc[0]])
+        self.assertEqual(single_mol_proba.shape, (1, 2))
 
 
 class TestMulticlassClassificationPipeline(unittest.TestCase):

From 967ce1750edd8eef5c60be478abf4b800cc2db28 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Wed, 20 Nov 2024 19:02:45 +0100
Subject: [PATCH 4/6] black

---
 test_extras/test_chemprop/test_chemprop_pipeline.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py
index 83789831..b8bd8360 100644
--- a/test_extras/test_chemprop/test_chemprop_pipeline.py
+++ b/test_extras/test_chemprop/test_chemprop_pipeline.py
@@ -308,7 +308,9 @@ def test_prediction(self) -> None:
         self.assertTrue(np.allclose(pred, pred_copy))
 
         # Test single prediction, this was causing an error before
-        single_mol_pred = regression_model.predict([molecule_net_logd_df["smiles"].iloc[0]])
+        single_mol_pred = regression_model.predict(
+            [molecule_net_logd_df["smiles"].iloc[0]]
+        )
         self.assertEqual(single_mol_pred.shape, (1,))
 
 
@@ -346,9 +348,13 @@ def test_prediction(self) -> None:
         self.assertTrue(np.allclose(proba[~nan_indices], proba_copy[~nan_indices]))
 
         # Test single prediction, this was causing an error before
-        single_mol_pred = classification_model.predict([molecule_net_bbbp_df["smiles"].iloc[0]])
+        single_mol_pred = classification_model.predict(
+            [molecule_net_bbbp_df["smiles"].iloc[0]]
+        )
         self.assertEqual(single_mol_pred.shape, (1,))
-        single_mol_proba = classification_model.predict_proba([molecule_net_bbbp_df["smiles"].iloc[0]])
+        single_mol_proba = classification_model.predict_proba(
+            [molecule_net_bbbp_df["smiles"].iloc[0]]
+        )
         self.assertEqual(single_mol_proba.shape, (1, 2))
 
 

From 0d63aa564a8cd5daa1d34119ea117ad600dccbcc Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Thu, 21 Nov 2024 09:28:58 +0100
Subject: [PATCH 5/6] add tests for multiclass

---
 test_extras/test_chemprop/test_chemprop_pipeline.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py
index b8bd8360..c600a3a3 100644
--- a/test_extras/test_chemprop/test_chemprop_pipeline.py
+++ b/test_extras/test_chemprop/test_chemprop_pipeline.py
@@ -391,6 +391,16 @@ def test_prediction(self) -> None:
         self.assertEqual(pred.shape, pred_copy.shape)
         self.assertTrue(np.allclose(proba[~nan_mask], proba_copy[~nan_mask]))
 
+        # Test single prediction, this was causing an error before
+        single_mol_pred = classification_model.predict(
+            [test_data_df["Molecule"].iloc[0]]
+        )
+        self.assertEqual(single_mol_pred.shape, (1,))
+        single_mol_proba = classification_model.predict_proba(
+            [test_data_df["Molecule"].iloc[0]]
+        )
+        self.assertEqual(single_mol_proba.shape, (1, 3))
+
         with self.assertRaises(ValueError):
             classification_model.fit(
                 mols,

From 1963396d87badda77f7085eb40e4513f65fc4607 Mon Sep 17 00:00:00 2001
From: Christian Feldmann <christian-wolfgang.feldmann@basf.com>
Date: Thu, 21 Nov 2024 09:30:00 +0100
Subject: [PATCH 6/6] fix newly introduced bugs

---
 molpipeline/estimators/chemprop/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py
index 2ea80a10..47f5e59a 100644
--- a/molpipeline/estimators/chemprop/models.py
+++ b/molpipeline/estimators/chemprop/models.py
@@ -143,7 +143,7 @@ def _predict(
         test_data = build_dataloader(X, num_workers=self.n_jobs, shuffle=False)
         predictions = self.lightning_trainer.predict(self.model, test_data)
         prediction_array = np.vstack(predictions)  # type: ignore
-        prediction_array = np.atleast_1d(prediction_array.squeeze())
+        prediction_array = prediction_array.squeeze(axis=1)
         # Check if the predictions have the same length as the input dataset
         if prediction_array.shape[0] != len(X):
             raise AssertionError(