From 8c282a44359085b8fd65d5d07a22c2cc6461983e Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Wed, 20 Nov 2024 18:27:15 +0100 Subject: [PATCH 1/6] add tests for occurring bug --- test_extras/test_chemprop/test_chemprop_pipeline.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py index 32c4e677..07fbc1e5 100644 --- a/test_extras/test_chemprop/test_chemprop_pipeline.py +++ b/test_extras/test_chemprop/test_chemprop_pipeline.py @@ -307,6 +307,9 @@ def test_prediction(self) -> None: pred_copy = model_copy.predict(molecule_net_logd_df["smiles"].tolist()) self.assertTrue(np.allclose(pred, pred_copy)) + # Test single prediction, this was causing an error before + _ = regression_model.predict([molecule_net_logd_df["smiles"].iloc[0]]) + class TestClassificationPipeline(unittest.TestCase): """Test the Chemprop model pipeline for classification.""" @@ -341,6 +344,9 @@ def test_prediction(self) -> None: self.assertEqual(proba.shape, proba_copy.shape) self.assertTrue(np.allclose(proba[~nan_indices], proba_copy[~nan_indices])) + # Test single prediction, this was causing an error before + _ = classification_model.predict([molecule_net_bbbp_df["smiles"].iloc[0]]) + class TestMulticlassClassificationPipeline(unittest.TestCase): """Test the Chemprop model pipeline for multiclass classification.""" From e279563c13fe7c5750103febf48d6cffa0120501 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Wed, 20 Nov 2024 19:00:10 +0100 Subject: [PATCH 2/6] don't squeeze to hard! Ensuring atleast1d --- molpipeline/estimators/chemprop/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index e720e029..2ea80a10 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -143,8 +143,7 @@ def _predict( test_data = build_dataloader(X, num_workers=self.n_jobs, shuffle=False) predictions = self.lightning_trainer.predict(self.model, test_data) prediction_array = np.vstack(predictions) # type: ignore - prediction_array = prediction_array.squeeze() - + prediction_array = np.atleast_1d(prediction_array.squeeze()) # Check if the predictions have the same length as the input dataset if prediction_array.shape[0] != len(X): raise AssertionError( From 8cfc27c61369cf5d277d4825e09f1f4463ffd41f Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Wed, 20 Nov 2024 19:00:47 +0100 Subject: [PATCH 3/6] finalize test --- test_extras/test_chemprop/test_chemprop_pipeline.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py index 07fbc1e5..83789831 100644 --- a/test_extras/test_chemprop/test_chemprop_pipeline.py +++ b/test_extras/test_chemprop/test_chemprop_pipeline.py @@ -308,7 +308,8 @@ def test_prediction(self) -> None: self.assertTrue(np.allclose(pred, pred_copy)) # Test single prediction, this was causing an error before - _ = regression_model.predict([molecule_net_logd_df["smiles"].iloc[0]]) + single_mol_pred = regression_model.predict([molecule_net_logd_df["smiles"].iloc[0]]) + self.assertEqual(single_mol_pred.shape, (1,)) class TestClassificationPipeline(unittest.TestCase): @@ -345,7 +346,10 @@ def test_prediction(self) -> None: self.assertTrue(np.allclose(proba[~nan_indices], proba_copy[~nan_indices])) # Test single prediction, this was causing an error before - _ = classification_model.predict([molecule_net_bbbp_df["smiles"].iloc[0]]) + single_mol_pred = classification_model.predict([molecule_net_bbbp_df["smiles"].iloc[0]]) + self.assertEqual(single_mol_pred.shape, (1,)) + single_mol_proba = classification_model.predict_proba([molecule_net_bbbp_df["smiles"].iloc[0]]) + self.assertEqual(single_mol_proba.shape, (1, 2)) class TestMulticlassClassificationPipeline(unittest.TestCase): From 967ce1750edd8eef5c60be478abf4b800cc2db28 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Wed, 20 Nov 2024 19:02:45 +0100 Subject: [PATCH 4/6] black --- test_extras/test_chemprop/test_chemprop_pipeline.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py index 83789831..b8bd8360 100644 --- a/test_extras/test_chemprop/test_chemprop_pipeline.py +++ b/test_extras/test_chemprop/test_chemprop_pipeline.py @@ -308,7 +308,9 @@ def test_prediction(self) -> None: self.assertTrue(np.allclose(pred, pred_copy)) # Test single prediction, this was causing an error before - single_mol_pred = regression_model.predict([molecule_net_logd_df["smiles"].iloc[0]]) + single_mol_pred = regression_model.predict( + [molecule_net_logd_df["smiles"].iloc[0]] + ) self.assertEqual(single_mol_pred.shape, (1,)) @@ -346,9 +348,13 @@ def test_prediction(self) -> None: self.assertTrue(np.allclose(proba[~nan_indices], proba_copy[~nan_indices])) # Test single prediction, this was causing an error before - single_mol_pred = classification_model.predict([molecule_net_bbbp_df["smiles"].iloc[0]]) + single_mol_pred = classification_model.predict( + [molecule_net_bbbp_df["smiles"].iloc[0]] + ) self.assertEqual(single_mol_pred.shape, (1,)) - single_mol_proba = classification_model.predict_proba([molecule_net_bbbp_df["smiles"].iloc[0]]) + single_mol_proba = classification_model.predict_proba( + [molecule_net_bbbp_df["smiles"].iloc[0]] + ) self.assertEqual(single_mol_proba.shape, (1, 2)) From 0d63aa564a8cd5daa1d34119ea117ad600dccbcc Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Thu, 21 Nov 2024 09:28:58 +0100 Subject: [PATCH 5/6] add tests for multiclass --- test_extras/test_chemprop/test_chemprop_pipeline.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test_extras/test_chemprop/test_chemprop_pipeline.py b/test_extras/test_chemprop/test_chemprop_pipeline.py index b8bd8360..c600a3a3 100644 --- a/test_extras/test_chemprop/test_chemprop_pipeline.py +++ b/test_extras/test_chemprop/test_chemprop_pipeline.py @@ -391,6 +391,16 @@ def test_prediction(self) -> None: self.assertEqual(pred.shape, pred_copy.shape) self.assertTrue(np.allclose(proba[~nan_mask], proba_copy[~nan_mask])) + # Test single prediction, this was causing an error before + single_mol_pred = classification_model.predict( + [test_data_df["Molecule"].iloc[0]] + ) + self.assertEqual(single_mol_pred.shape, (1,)) + single_mol_proba = classification_model.predict_proba( + [test_data_df["Molecule"].iloc[0]] + ) + self.assertEqual(single_mol_proba.shape, (1, 3)) + with self.assertRaises(ValueError): classification_model.fit( mols, From 1963396d87badda77f7085eb40e4513f65fc4607 Mon Sep 17 00:00:00 2001 From: Christian Feldmann Date: Thu, 21 Nov 2024 09:30:00 +0100 Subject: [PATCH 6/6] fix newly introduced bugs --- molpipeline/estimators/chemprop/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molpipeline/estimators/chemprop/models.py b/molpipeline/estimators/chemprop/models.py index 2ea80a10..47f5e59a 100644 --- a/molpipeline/estimators/chemprop/models.py +++ b/molpipeline/estimators/chemprop/models.py @@ -143,7 +143,7 @@ def _predict( test_data = build_dataloader(X, num_workers=self.n_jobs, shuffle=False) predictions = self.lightning_trainer.predict(self.model, test_data) prediction_array = np.vstack(predictions) # type: ignore - prediction_array = np.atleast_1d(prediction_array.squeeze()) + prediction_array = prediction_array.squeeze(axis=1) # Check if the predictions have the same length as the input dataset if prediction_array.shape[0] != len(X): raise AssertionError(